In article <1139916679.044875.75620 / g47g2000cwa.googlegroups.com>, Timothy Goddard <interfecus / gmail.com> wrote: >I just whipped this up in a bit of free time. It may be a decent >starting point for a pure ruby parser. Note that there is no lookahead >ability. > >class ParseError < StandardError; end > >class Parser > > @@reductions = {} > @@reduction_procs = {} > @@tokens = {} > @@token_values = {} > > # Parse either a string or an IO object (read all at once) using the >rules defined for this parser. > def parse(input) > stack = [] > value_stack = [] > text = input.is_a?(IO) ? input.read : input.dup > loop do > token, value = retrieve_token(text) > stack << token > value_stack << value > reduce_stack(stack, value_stack) > if text.length == 0 > if stack.length == 1 > return stack[0], value_stack[0] > else > raise ParseError, 'Stack failed to reduce' > end > end > end > end > protected > > # Retrieve a single token from the input text and return an array of >it and its value. > def retrieve_token(text) > @@tokens.each do |regexp, token| > if md = text.match(regexp) > text.gsub!(regexp, '') > return [token, @@token_values[token] ? >@@token_values[token].call(md.to_s) : nil] > end > end > raise ParseError, "Invalid token in input near #{text}" > end > > # Compare the stack to reduction rules to reduce any matches found > def reduce_stack(stack, value_stack) > loop do > matched = false > @@reductions.each do |tokens, result| > if tokens == stack[stack.length - tokens.length, tokens.length] > start_pos = stack.length - tokens.length > stack[start_pos, tokens.length] = result > value_stack[start_pos, tokens.length] = >@@reduction_procs[tokens] ? >@@reduction_procs[tokens].call(value_stack[start_pos, tokens.length]) : >nil > matched = true > break > end > end > return unless matched > end > end > > def self.token(regexp, token, &block) > @@tokens[Regexp.new('\A' + regexp.to_s)] = token > @@token_values[token] = block > end > > def self.rule(*tokens, &block) > final = tokens.pop > tokens += final.keys > result = final.values.first > @@reductions[tokens] = result > @@reduction_procs[tokens] = block > end >end > >class TestParser < Parser > token /foo/i, :foo do |s| > s.upcase > end > token /bar/i, :bar do |s| > s.downcase > end > token /mega/i, :mega do |s| > 3 > end > rule :foo, :bar => :foobar do |foo, bar| > foo + bar > end > rule :mega, :foobar => :megafoobar do |mega, foobar| > foobar * mega > end >end > This is a bit like Grammar: http://grammar.rubyforge.org/0.5/ Phil