Hi all,
Could someone knowledgeable in Ruby's parse.y please verify the correct
lexing/scanning/tokenizing of general delimited strings exhibited by the
code below.
Thanks,
Robert
class Tokenizer
attr_reader :string, :cursor, :string_length
def initialize(str, cursor = 1)
@string, @cursor, @string_length = str, cursor, str.length
end
def delimited_string
s, pos, skip_tokens = string, cursor - 1, 2
token_name = case s[pos += 1]
when ?r then :Regexp
when ?q then :QString
when ?Q then :IString
when ?(, ?{, ?[, ?<
pos -= 1
skip_tokens = 1
:IString
when ?w then :ArrayOfStrings
when ?x then :ShellCommand
else return nil
end
delimiter = s[pos += 1]
if index = [?(, ?[, ?{, ?<].index(delimiter)
balancing_delimiter = [?), ?], ?}, ?>][index]
count = 1
while count > 0
c = s[pos += 1]
return nil if pos > string_length
if c == balancing_delimiter
count -= 1
elsif c == delimiter
count += 1
end
end
else
c = s[pos += 1] while c != delimiter and pos < string_length
end
return token_name, cursor + skip_tokens, pos - 1, pos + 1
end
def match
res = delimited_string
if res
s = "#{res[0]}: #{string[res[1]..res[2]]}"
puts s + (" " * (45-s.length)) + "(rest: #{string[res[3]..-1].inspect})"
else
puts "NO MATCH!"
end
end
end
Tokenizer.new("%q/simple strings are ok/; #the rest...").match
Tokenizer.new("%q(nesting (really) works); #the rest...").match
Tokenizer.new("%q no_blanks_in_this_one ; #the rest...").match
Tokenizer.new("%Q!\"I said 'nuts', \" I said!; #the rest...").match
a = 123
Tokenizer.new("%Q{Try #{a+1}, not #{a-1}}; #the rest...").match
Tokenizer.new("%Q<Try #{a+1}, not #{a-1}>; #the rest...").match
Tokenizer.new("%<Try #{a+1}, not #{a-1}>; #the rest...").match
Tokenizer.new("%(and the (nesting) works again!); #the rest...").match
Tokenizer.new("%(\123mile you little hobbit); #the rest...").match
Tokenizer.new("%r<[ab]+(c|d)*e{1,3}>; #the rest...").match
Tokenizer.new("%w{a|| b<> c[] d{} e()}; #the rest...").match
Tokenizer.new("%x;ls -al *.dll;; #the rest...").match
Tokenizer.new("%%%").match
Tokenizer.new("%(this one is not terminated").match
Tokenizer.new("%!neither is this one").match