--------------090601070206080908080803
Content-Type: text/plain; charset=us-ascii; format=flowed
Content-Transfer-Encoding: 7bit

Here's my solution. It builds a tree of the Gedcom nodes.

It supports a broad subset of the Gedcom specification, can output XML, 
YAML and pretty-print, has error checks and is reasonable short.

Note that the YAML representation will not reuse the IDs that were 
specified in the original Gedcom file, but rather create its own. I 
don't know if there is an easy way of making YAML use pre-specified IDs.

The XML representation uses <ref toID@" /> for representing links.

The YAML and pp emitters blow up the stack when given the CPAN sample 
data. There's not too much I can do about this.

The XML emitter tries hard to make the output as pretty as possible. 
This includes trying to use value when appropriate. (It won't get 
used when the value contains multi-line data.)

Data is read from ARGF which means either standard input or filenames 
that where given on the command line.

I've also attached sample output for the file given on 
http://heiner-eichmann.de/gedcom/simple.ged

--------------090601070206080908080803
Content-Type: text/plain;
 nameedcom.rb"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filenameedcom.rb"

module Gedcom
  class ParseError < ArgumentError; end

  class Node < Hash
    attr_accessor :value, :origin, :special_type, :id
    def special?() not @special_type.nil? end

    def initialize(origin  il)
      @value, @origin  il, origin
      @as_plain_hash_cache  ash.new

      super() do |hash, key|
        hash[key]  rray.new
      end
    end

    def hash
      [@value.is_a?(Node) ? :recursive : @value, super].hash
    end

    def other) self.hash other.hash end

    def replace(other)
      super(other)
      @value, origin  ther.value, other.origin
    end

    # YAML detects self-referencing structures by comparing object_ids.
    # as_plain_hash() needs to cache the Hash it creates to make that 
    # check work.
    def as_plain_hash
      if @as_plain_hash_cache.include?(self.hash)
        @as_plain_hash_cache[self.hash]
      else
        result  }.merge(self)

        result.each do |key, values|
          if values.size 1 then
            result[key]  alues.first
          end
        end

        if not @value.nil? then
          result[:value]  value
        end

        @as_plain_hash_cache[self.hash]  esult
      end
    end
    private :as_plain_hash

    def as_value
      if @value.is_a?(String) and empty? then
        @value
      else
        as_plain_hash
      end
    end

    def to_yaml_type() "!map" end

    def to_yaml(opts  }) as_value.to_yaml(opts) end
    def inspect() as_value.inspect end
    def pretty_print(q) as_value.pretty_print(q) end

    def to_xml(level  )
      require 'cgi'
      indent    " * (level + 1)

      result  f @value.is_a?(Node) then
        "#{indent}<ref to#{@value.id}\" />"
      else
        self.map do |tag, nodes|
          nodes.map do |node|
            escaped_value  f node.value.is_a?(String) then
              CGI.escapeHTML(node.value.to_s)
            end
            id_attr  ode.id.nil? ? "" : " id#{node.id}\""
            xml_tag  ag.downcase

            if node.value.nil? and node.empty? then
              "#{indent}<#{xml_tag}#{id_attr} />"
            elsif node.empty? and escaped_value then
              "#{indent}<#{xml_tag}#{id_attr}>" + escaped_value + "</#{xml_tag}>"
            else
              if node.value.is_a?(String) and node.value["\n"] then
                "#{indent}<#{xml_tag}#{id_attr}>\n" +
                "#{indent}  #{node.value}\n" +
                node.to_xml(level + 1) + "\n" + 
                "#{indent}</#{xml_tag}>"
              else
                val_attr  ode.value.is_a?(String) ? " value#{escaped_value}\"" : ""
                "#{indent}<#{xml_tag}#{id_attr}#{val_attr}>\n" +
                node.to_xml(level + 1) + "\n" + 
                "#{indent}</#{xml_tag}>"
              end
            end
          end.join("\n")
        end.join("\n")
      end

      if level 0 then
        result  <gedcom>\n#{result}\n</gedcom>"
      end

      return result
    end
  end

  LineRegexp  ^\s*(\d+)\s+(?:(@\w[^@]*@)\s+)?(\w+)(?:\s+(?:(@\w[^@]*@)|(.+)))?\s*$/

  def parse(data)
    nodes  ode.new(1)
    stack  nodes]
    node_by_id  ash.new
    nodes_with_refs  rray.new

    data.each_with_index do |line, index|
      line_no  ndex + 1

      if md  ineRegexp.match(line) then
        level, id, tag, value_id, value  md.captures
        level  evel.to_i
        value.gsub!("@@", "@") if value

        if level > stack.size - 1 then
          raise(ParseError, "Inconsistent nesting at line #{line_no}")
        elsif level ! tack.size - 1 then
          (stack.size - level - 1).times { stack.pop }
        end

        if stack.last.special? then
          raise(ParseError, "Can't create sub node for special node " +
            "of type #{stack.last.special_type} " +
            "(defined at #{stack.last.origin}) at #{line_no}")
        end

        new_node  ode.new(line_no)

        if id and not id.empty? then
          node_by_id[id]  ew_node
          new_node.id  d
        end

        if value and not value.empty? then
          new_node.value  alue
        elsif value_id and not value_id.empty? then
          nodes_with_refs << new_node
          # id is temporarily stored in value
          new_node.value  alue_id
        end

        case tag
          when "CONC", "CONT" then
            new_node.special_type  ag

            if id and not id.empty? then
              raise(ParseError, "#{tag} node can't have id at line #{line_no}")
            end

            str_value  value and not value.empty?) ? value : value_id
            separator  ase tag
              when "CONC" then ""
              when "CONT" then "\n"
            end
            stack.last.value  tack.last.value.to_s + separator + str_value.to_s
        end

        unless new_node.special?
          stack.last[tag] << new_node
        end
        stack << new_node
      elsif line.strip.empty? then
        # Ignore, line contains whitespace only
      else
        raise(ParseError, "Parse error at line #{line_no}")
      end
    end

    nodes_with_refs.each do |node|
      id  ode.value
      if node_by_id.include?(id) then
        node.value  ode_by_id[id]
      else
        raise(ParseError, "Pointer to undefined node `#{id}' at line #{node.origin}")
      end
    end

    return nodes
  end
  module_function :parse
end

if __FILE__ $0 then
  data  RGF.read

  require 'pp'
  puts "Pretty-printed:"
  begin
    pp Gedcom.parse(data)
  rescue SystemStackError
    puts "Sorry, pp blowed up the stack."
  end

  require 'yaml'
  puts "", "As YAML:"
  begin
    y Gedcom.parse(data)
  rescue SystemStackError
    puts "Sorry, YAML blowed up the stack."
  end

  puts "", "As XML:"
  puts Gedcom.parse(data).to_xml
end

--------------090601070206080908080803--