Hi all,

Could someone knowledgeable in Ruby's parse.y please verify the correct
lexing/scanning/tokenizing of general delimited strings exhibited by the
code below.

Thanks,

Robert

class Tokenizer
  attr_reader :string, :cursor, :string_length

  def initialize(str, cursor = 1)
    @string, @cursor, @string_length = str, cursor, str.length
  end

  def delimited_string
    s, pos, skip_tokens = string, cursor - 1, 2
    token_name = case s[pos += 1]
		 when ?r then :Regexp
		 when ?q then :QString
		 when ?Q then :IString
		 when ?(, ?{, ?[, ?<
		   pos -= 1
                   skip_tokens = 1
                   :IString
		 when ?w then :ArrayOfStrings
		 when ?x then :ShellCommand
		 else return nil
		 end
    delimiter = s[pos += 1]
    if index = [?(, ?[, ?{, ?<].index(delimiter)
      balancing_delimiter = [?), ?], ?}, ?>][index]
      count = 1
      while count > 0
	c = s[pos += 1]
	return nil if pos > string_length
	if c == balancing_delimiter
	  count -= 1
	elsif c == delimiter
	  count += 1
	end
      end
    else
      c = s[pos += 1] while c != delimiter and pos < string_length
    end
    return token_name, cursor + skip_tokens, pos - 1, pos + 1
  end

  def match
    res = delimited_string
    if res
      s = "#{res[0]}: #{string[res[1]..res[2]]}"
      puts s + (" " * (45-s.length)) + "(rest: #{string[res[3]..-1].inspect})"
    else
      puts "NO MATCH!"
    end
  end
end

Tokenizer.new("%q/simple strings are ok/; #the rest...").match
Tokenizer.new("%q(nesting (really) works); #the rest...").match
Tokenizer.new("%q no_blanks_in_this_one ; #the rest...").match

Tokenizer.new("%Q!\"I said 'nuts', \" I said!; #the rest...").match
a = 123
Tokenizer.new("%Q{Try #{a+1}, not #{a-1}}; #the rest...").match
Tokenizer.new("%Q<Try #{a+1}, not #{a-1}>; #the rest...").match
Tokenizer.new("%<Try #{a+1}, not #{a-1}>; #the rest...").match
Tokenizer.new("%(and the (nesting) works again!); #the rest...").match
Tokenizer.new("%(\123mile you little hobbit); #the rest...").match

Tokenizer.new("%r<[ab]+(c|d)*e{1,3}>; #the rest...").match
Tokenizer.new("%w{a|| b<> c[] d{} e()}; #the rest...").match
Tokenizer.new("%x;ls -al *.dll;; #the rest...").match
Tokenizer.new("%%%").match
Tokenizer.new("%(this one is not terminated").match
Tokenizer.new("%!neither is this one").match