On 12/15/07, Rob Biedenharn <Rob / agileconsultingllc.com> wrote:
> On Dec 14, 2007, at 6:42 PM, Michal Suchanek wrote:
> > This one is really a tree for once.
>
>
> This was quite close to what I had at the time, so that' why the reply
> is to Michal's message.  I've also incorporated tests from Adam Shelly
> and Justin Ethier, but see the comments for how I've softened them and
> why.
>
> I moved Michal's TreeNode into AVLTree::Node since I don't believe it
> should be at the top level.  I also created an AVLTree::ExternalNode
> to duck-type with AVLTree::Node as a special kind of nil alternative
> to clean up a lot of code that would otherwise need to make a special
> case of @left or @right being nil.

I had a little time, so I've added the other two types of rebalancing, and
added a test for Tree#find {block}

I hope some people are still interested in ping-pong.
-Adam

==> avl_tree.rb <==
class AVLTree

  # Need something smarter than nil for external nodes
  class ExternalNode
    attr_accessor :parent
    def initialize(parent)
      @parent = parent
    end
    def include?(_); false; end
    def <<(_); raise RuntimeError; end
    def height; 0; end
    def balance_factor; 0; end
    def left; self; end
    def right; self; end
    def left=(node)
      raise(NotImplementedError,
            "external node of #{@parent}")
    end
    def right=(node)
      raise(NotImplementedError,
            "external node of #{@parent}")
    end
    def to_s; ''; end

    def to_a; []; end
  end


  class Node
    attr_accessor :data, :parent
    attr_reader :left, :right

    def initialize obj
      @parent = nil
      @data   = obj
      @left   = ExternalNode.new(self)
      @right  = ExternalNode.new(self)
    end

    def left=(node)

      @left = node
      node.parent = self
    end


    def right=(node)

      @right = node
      node.parent = self
    end

    def height

      [ @left.height, @right.height ].max + 1
    end

    def << node
      case node.data <=> @data
      when -1
        if Node === @left
          @left << node
        else
          self.left = node
        end
      when 0
        return self             # no dups
      when +1
        if Node === @right
          @right << node
        else
          self.right = node
        end
      end
      rebalance if balance_factor.abs > 1
    end

    def include? obj
      case obj <=> @data
      when -1 : @left.include?(obj)
      when  0 : true
      when +1 : @right.include?(obj)
      end
    end

    def to_a
      result = @left.to_a
      result << @data
      result.concat @right.to_a
      result
    end

    def to_s
      bf = case balance_factor <=> 0
           when -1 : '-' * -balance_factor
           when  0 : '.'
           when  1 : '+' * balance_factor
           end
      "[#{left} "+
        "(#{@data}{#{height}#{bf}}^#{parent.data})"+
        " #{right}]"
    end

    protected

    def balance_factor
      @right.height - @left.height
    end

    def rebalance  weight=0
      if (bf = balance_factor + 0) > 1
        puts "balancing:  #{self.to_s}"
        # right is too high
        if @right.balance_factor < 0
          # double rotate right-left - first the right subtree
          #add some weight to force rotation even if it was nearly balanced
          @right.rebalance -1
        end
        # single rotate left
        my_parent, from, to = @parent, self, @right
        temp = @right.left
        @right.left = self
        self.right = temp
        my_parent.send :replace_child, from, to
        to.parent = my_parent
        #~ else

        #~ end
      elsif bf < -1
        # left must be too high
        if @left.balance_factor > 0
          #double rotate - first force left subtree,
          @left.rebalance 1
        end
        # single rotate right
        my_parent, from, to = @parent, self, @left
        temp = @left.right
        @left.right = self
        self.left = temp
        my_parent.send :replace_child, from, to
        to.parent = my_parent
      end
       puts my_parent

    end

    def replace_child(from, to)
      if from.eql? @left
        @left = to
      elsif from.eql? @right
        @right = to
      else
        raise(ArgumentError,
              "#{from} is not a branch of #{self}")
      end
    end

  end


  def initialize
    @root = nil
  end

  def empty?

    @root.nil?
  end

  def include?(obj)
    empty? ? false : @root.include?(obj)
  end

  def <<(obj)
    raise(ArgumentError,
          "Objects added to #{self.class.name} must" +
          " respond to <=>"
          ) unless obj.respond_to?(:<=>)

    if empty?
      @root = Node.new(obj)
      @root.parent = self
    else
      @root << Node.new(obj)

    end
    self
  end

  def height
    empty? ? 0 : @root.height
  end


  # naive implementation [not O(lg N)]
  #   def [](idx)
  #     to_a[idx]
  #   end

  def to_a
    empty? ? [] : @root.to_a
  end

  # naive implementation [not O(lg N)]
  def each
    to_a.each {|e| yield e}
  end

  def to_s
    empty? ? "[]" : @root.to_s
  end

  # Indicates that parent is root in to_s
  def data; '*'; end

  protected

  def replace_child(from, to)
    if @root.eql? from
      @root = to
    else
      raise(ArgumentError,
            "#{from} is not a branch of #{self}")
    end
  end

end
__END__

==> package.rb <==
Dir[ARGV[0] || '*.rb'].each do |f|
  lines = IO.readlines(f)
  lines.unshift "==> #{f} <==\n"
  lines << "__END__\n" unless lines.last.chomp == '__END__'
  lines << "\n"
  puts lines
end
__END__

==> test_avl_tree.rb <==
#!/usr/bin/env ruby -wKU


require "test/unit"
require "avl_tree"


class TestAVLTree < Test::Unit::TestCase
  def setup
    @tree = AVLTree.new
  end


  ##################################################
  # Membership tests

  def test_tree_membership
    assert_equal(true,  @tree.empty?)
    assert_equal(false, @tree.include?(3))

    @tree << 3

    assert_equal(false, @tree.empty?)
    assert_equal(true,  @tree.include?(3))
  end


  def test_tree_should_allow_more_than_one_element
    @tree << 3
    @tree << 4


    assert(@tree.include?(4), "4 not in #{@tree}")
    assert(@tree.include?(3), "3 not in #{@tree}")

  end

  def test_tree_include_many
    0.upto(10) do |i|

      assert_equal(false, @tree.include?(i),

                   "Tree should not include #{i} yet.")
      @tree << i
      0.upto(i) do |j|

        assert_equal(true, @tree.include?(j),
                     "Tree should have 0..#{i},"+
                     " where's #{j}? ")
      end
    end
  end

  # This sits at the intersection of membership
  # and height tests.  We know one node has height 1,
  # and two nodes has height 2, so if we insert one
  # object twice and the height is 1, there must
  # only be one node in the tree.
  def test_tree_does_not_keep_duplicates
    @tree << 'a'
    @tree << 'a'
    assert_equal 1, @tree.height, "one node: #{@tree}"
  end

  ##################################################
  # Height tests

  def test_tree_height_of_one_or_two_nodes_is_N
    @tree << 5

    assert_equal 1, @tree.height, "one node: #{@tree}"
    @tree << 6
    assert_equal 2, @tree.height, "two nodes: #{@tree}"
  end

  def test_tree_height_of_three_nodes_is_two

    @tree << 5
    @tree << 6
    @tree << 7

    assert_equal 2, @tree.height, @tree.to_s
  end

  # RobB: The more precise limit given in [Knuth] is
  # used rather than that from [Wikipedia]

  def test_tree_growth_limit_is_1pt44_log_N

    (1..10).each do |i|
      @tree << i
      limit = ((1.4405 *
                Math::log(i+2.0)/Math::log(2.0)
                ) - 0.3277).ceil

      assert(@tree.height <= limit,
             "Tree of #{i} nodes is too tall by" +

             " #{@tree.height - limit}" +
             " #{@tree}")
    end

  end

  def test_balances_left
    4.downto(1) { |i| @tree << i }

    assert(@tree.height < 4,
           "expected tree height #{@tree.height} < 4")

  end

  def test_balances_right
    1.upto(4) { |i| @tree << i }

    assert(@tree.height < 4,
           "expected tree height #{@tree.height} < 4")
  end

  def test_non_sequential_insertion__part_1
    items = [ 1, 3, 2 ]
    items.each do |i|
      @tree << i
    end
    items.each do |i|
      assert_equal(true, @tree.include?(i),
                   "where is #{i}? ")
    end
  end

  ##################################################
  # Access tests (getting data back out)

  # RobB: this tests too much at one time; I sorted ary.
  def test_tree_traverse
    ary = [ 3, 5, 17, 30, 42, 54, 1, 2 ].sort

    ary.each { |n| @tree << n }
    traversal = []
    @tree.each { |n| traversal << n }

    assert_equal(ary.size, traversal.size)

    ary.each do |n|
      assert_equal(true, traversal.include?(n),

                   "#{n} was not visited in tree.")
    end
  end

  def test_tree_find
    [1,2,3,4].each{|n| @tree<<n}
    assert_equal(1, @tree.find{|v|v>0} )
    assert_equal(2, @tree.find{|v|v%2==0} )
  end



  ##################################################
  # Things that aren't tested anymore...

  # [Knuth] p.473: "The problem of deletion can be solved
  # in O(log N) steps if we approach it correctly."
  # RobB: I'm choosing to skip this capability since
  # the test was added before an actual tree structure
  # emerged.
  def future__test_remove_node

    @tree << 314
    @tree.remove(314)

    assert_equal(false, @tree.include?(314),
                 '314 still in the tree')
  end

  # RobB: While I think the spirit of this test is good,
  # it attempts to expose the implementation too much.
  # I replaced this with test_sequential_access.
  def never__test_has_branches

    [50, 17, 72].each {|n| @tree << n}

    assert_equal 50, @tree.data
    assert_equal 17, @tree.left.data
    assert_equal 72, @tree.right.data
  end

end
=begin
[Knuth] Knuth, Donald Ervin,
    The Art of Computer Programming, 2nd ed.
    Volume 3, Sorting and Searching,
    section 6.2.3 "Balanced Trees", pp. 458-478
[Wikipedia]
    AVL Tree, http://en.wikipedia.org/wiki/AVL_tree
=end
__END__




Also, I've included my package and extract scripts that can pull the
> code into separate files.  To bootstrap, go to the bottom and past the
> code for extract into a file, then run that file and give it the whole
> message as input (or just the part starting below here).
>
> I've also been very strict about the indentation and width of the
> lines so the mailer shouldn't break any lines.
>
> I'm using Knuth's "The Art of Computer Programming" as a guide where
> the benefits of an AVL Tree are defined as:
>
>   ... and it never uses more than O(log N) operations
>   to search the tree or insert an item.  In fact, we
>   shall see that thier approach also leads to a general
>   technique that is good for representing arbitrary
>   _linear lists_ of length N, so that each of the
>   following operations can be done in only O(log N)
>   units of time:
>
>       i) Find an item having a given key
>      ii) Find the k-th item, given k
>     iii) Insert an item at a specified place
>      iv) Delete a specified item
>
> I'm tending to ignore (iii) and assume that the "specified place" is
> implicit in the value of the object being inserted.  I'm also ignoring
> (iv) because it's hard (see the comment in the test file for more).
>
> As a result of (i) and (iii), I ended up rejecting duplicates and
> there's an already passing test that shows this so I stretched the
> "one new failing test" rule.
>
> -Rob
>
> Rob Biedenharn          http://agileconsultingllc.com
> Rob / AgileConsultingLLC.com
>
> ==> avl_tree.rb <==
> #!/usr/bin/env ruby -wKU
>
> class AVLTree
>
>   # Need something smarter than nil for external nodes
>   class ExternalNode
>     attr_accessor :parent
>     def initialize(parent)
>       @parent = parent
>     end
>     def include?(_); false; end
>     def <<(_); raise RuntimeError; end
>     def height; 0; end
>     def balance_factor; 0; end
>     def left; self; end
>     def right; self; end
>     def left=(node)
>       raise(NotImplementedError,
>             "external node of #{@parent}")
>     end
>     def right=(node)
>       raise(NotImplementedError,
>             "external node of #{@parent}")
>     end
>     def to_s; ''; end
>     def to_a; []; end
>   end
>
>   class Node
>     attr_accessor :data, :parent
>     attr_reader :left, :right
>
>     def initialize obj
>       @parent = nil
>       @data   = obj
>       @left   = ExternalNode.new(self)
>       @right  = ExternalNode.new(self)
>     end
>
>     def left=(node)
>       @left = node
>       node.parent = self
>     end
>
>     def right=(node)
>       @right = node
>       node.parent = self
>     end
>
>     def height
>       [ @left.height, @right.height ].max + 1
>     end
>
>     def << node
>       case node.data <=> @data
>       when -1
>         if Node === @left
>           @left << node
>         else
>           self.left = node
>         end
>       when 0
>         return self             # no dups
>       when +1
>         if Node === @right
>           @right << node
>         else
>           self.right = node
>         end
>       end
>       rebalance if balance_factor.abs > 1
>     end
>
>     def include? obj
>       case obj <=> @data
>       when -1 : @left.include?(obj)
>       when  0 : true
>       when +1 : @right.include?(obj)
>       end
>     end
>
>     def to_a
>       result = @left.to_a
>       result << @data
>       result.concat @right.to_a
>       result
>     end
>
>     def to_s
>       bf = case balance_factor <=> 0
>            when -1 : '-' * -balance_factor
>            when  0 : '.'
>            when  1 : '+' * balance_factor
>            end
>       "[#{left} "+
>         "(#{@data}{#{height}#{bf}}^#{parent.data})"+
>         " #{right}]"
>     end
>
>     protected
>
>     def balance_factor
>       @right.height - @left.height
>     end
>
>     def rebalance
>       if (bf = balance_factor) > 1
>         # right is too high
>         if @right.balance_factor > 0
>           # single rotate left
>           my_parent, from, to = @parent, self, @right
>           temp = @right.left
>           @right.left = self
>           self.right = temp
>           my_parent.send :replace_child, from, to
>           to.parent = my_parent
>         else
>           # double rotate right-left
>           raise(NotImplementedError,
>                 "double rotate right-left for #{self}")
>         end
>       elsif bf < -1
>         # left must be too high
>         if @left.balance_factor < 0
>           # single rotate right
>           my_parent, from, to = @parent, self, @left
>           temp = @left.right
>           @left.right = self
>           self.left = temp
>           my_parent.send :replace_child, from, to
>           to.parent = my_parent
>         else
>           raise(NotImplementedError,
>                 "double rotate left-right for #{self}")
>         end
>       end
>     end
>
>     def replace_child(from, to)
>       if from.eql? @left
>         @left = to
>       elsif from.eql? @right
>         @right = to
>       else
>         raise(ArgumentError,
>               "#{from} is not a branch of #{self}")
>       end
>     end
>
>   end
>
>   def initialize
>     @root = nil
>   end
>
>   def empty?
>     @root.nil?
>   end
>
>   def include?(obj)
>     empty? ? false : @root.include?(obj)
>   end
>
>   def <<(obj)
>     raise(ArgumentError,
>           "Objects added to #{self.class.name} must" +
>           " respond to <=>"
>           ) unless obj.respond_to?(:<=>)
>
>     if empty?
>       @root = Node.new(obj)
>       @root.parent = self
>     else
>       @root << Node.new(obj)
>     end
>     self
>   end
>
>   def height
>     empty? ? 0 : @root.height
>   end
>
>   # naive implementation [not O(lg N)]
>   #   def [](idx)
>   #     to_a[idx]
>   #   end
>
>   def to_a
>     empty? ? [] : @root.to_a
>   end
>
>   # naive implementation [not O(lg N)]
>   def each
>     to_a.each {|e| yield e}
>   end
>
>   def to_s
>     empty? ? "[]" : @root.to_s
>   end
>
>   # Indicates that parent is root in to_s
>   def data; '*'; end
>
>   protected
>
>   def replace_child(from, to)
>     if @root.eql? from
>       @root = to
>     else
>       raise(ArgumentError,
>             "#{from} is not a branch of #{self}")
>     end
>   end
>
> end
> __END__
>
> ==> test_avl_tree.rb <==
> #!/usr/bin/env ruby -wKU
>
> require "test/unit"
> require "avl_tree"
>
> class TestAVLTree < Test::Unit::TestCase
>   def setup
>     @tree = AVLTree.new
>   end
>
>   ##################################################
>   # Membership tests
>   def test_tree_membership
>     assert_equal(true,  @tree.empty?)
>     assert_equal(false, @tree.include?(3))
>
>     @tree << 3
>
>     assert_equal(false, @tree.empty?)
>     assert_equal(true,  @tree.include?(3))
>   end
>
>   def test_tree_should_allow_more_than_one_element
>     @tree << 3
>     @tree << 4
>
>     assert(@tree.include?(4), "4 not in #{@tree}")
>     assert(@tree.include?(3), "3 not in #{@tree}")
>   end
>
>   def test_tree_include_many
>     0.upto(10) do |i|
>       assert_equal(false, @tree.include?(i),
>                    "Tree should not include #{i} yet.")
>       @tree << i
>       0.upto(i) do |j|
>         assert_equal(true, @tree.include?(j),
>                      "Tree should have 0..#{i},"+
>                      " where's #{j}? ")
>       end
>     end
>   end
>
>   # This sits at the intersection of membership
>   # and height tests.  We know one node has height 1,
>   # and two nodes has height 2, so if we insert one
>   # object twice and the height is 1, there must
>   # only be one node in the tree.
>   def test_tree_does_not_keep_duplicates
>     @tree << 'a'
>     @tree << 'a'
>     assert_equal 1, @tree.height, "one node: #{@tree}"
>   end
>
>   ##################################################
>   # Height tests
>   def test_tree_height_of_one_or_two_nodes_is_N
>     @tree << 5
>     assert_equal 1, @tree.height, "one node: #{@tree}"
>     @tree << 6
>     assert_equal 2, @tree.height, "two nodes: #{@tree}"
>   end
>
>   def test_tree_height_of_three_nodes_is_two
>     @tree << 5
>     @tree << 6
>     @tree << 7
>     assert_equal 2, @tree.height, @tree.to_s
>   end
>
>   # RobB: The more precise limit given in [Knuth] is
>   # used rather than that from [Wikipedia]
>   def test_tree_growth_limit_is_1pt44_log_N
>     (1..10).each do |i|
>       @tree << i
>       limit = ((1.4405 *
>                 Math::log(i+2.0)/Math::log(2.0)
>                 ) - 0.3277).ceil
>       assert(@tree.height <= limit,
>              "Tree of #{i} nodes is too tall by" +
>              " #{@tree.height - limit}" +
>              " #{@tree}")
>     end
>   end
>
>   def test_balances_left
>     4.downto(1) { |i| @tree << i }
>     assert(@tree.height < 4,
>            "expected tree height #{@tree.height} < 4")
>   end
>
>   def test_balances_right
>     1.upto(4) { |i| @tree << i }
>     assert(@tree.height < 4,
>            "expected tree height #{@tree.height} < 4")
>   end
>
>   def test_non_sequential_insertion__part_1
>     items = [ 1, 3, 2 ]
>     items.each do |i|
>       @tree << i
>     end
>     items.each do |i|
>       assert_equal(true, @tree.include?(i),
>                    "where is #{i}? ")
>     end
>   end
>
>   ##################################################
>   # Access tests (getting data back out)
>
>   # RobB: this tests too much at one time; I sorted ary.
>   def test_tree_traverse
>     ary = [ 3, 5, 17, 30, 42, 54, 1, 2 ].sort
>     ary.each { |n| @tree << n }
>     traversal = []
>     @tree.each { |n| traversal << n }
>     assert_equal(ary.size, traversal.size)
>     ary.each do |n|
>       assert_equal(true, traversal.include?(n),
>                    "#{n} was not visited in tree.")
>     end
>   end
>
>   ##################################################
>   # Things that aren't tested anymore...
>
>   # [Knuth] p.473: "The problem of deletion can be solved
>   # in O(log N) steps if we approach it correctly."
>   # RobB: I'm choosing to skip this capability since
>   # the test was added before an actual tree structure
>   # emerged.
>   def future__test_remove_node
>     @tree << 314
>     @tree.remove(314)
>     assert_equal(false, @tree.include?(314),
>                  '314 still in the tree')
>   end
>
>   # RobB: While I think the spirit of this test is good,
>   # it attempts to expose the implementation too much.
>   # I replaced this with test_sequential_access.
>   def never__test_has_branches
>     [50, 17, 72].each {|n| @tree << n}
>     assert_equal 50, @tree.data
>     assert_equal 17, @tree.left.data
>     assert_equal 72, @tree.right.data
>   end
>
> end
> =begin
> [Knuth] Knuth, Donald Ervin,
>     The Art of Computer Programming, 2nd ed.
>     Volume 3, Sorting and Searching,
>     section 6.2.3 "Balanced Trees", pp. 458-478
> [Wikipedia]
>     AVL Tree, http://en.wikipedia.org/wiki/AVL_tree
> =end
> __END__
>
> ==> package <==
> #!/usr/bin/env ruby -wKU
> # -*- ruby -*-
>
> Dir[ARGV[0] || '*.rb'].each do |f|
>   lines = IO.readlines(f)
>   lines.unshift "==> #{f} <==\n"
>   lines << "__END__\n" unless lines.last.chomp == '__END__'
>   lines << "\n"
>   puts lines
> end
> __END__
>
> ==> extract <==
> #!/usr/bin/env ruby -wKU
> # -*- ruby -*-
>
> file = nil
> state = :init
> ARGF.each do |line|
>   case state
>   when :init
>     next unless line =~ /^==> (.*) <==$/
>     if File.exist?($1)
>       backup = $1+'~'
>       File.delete(backup) if File.exist?(backup)
>       File.rename($1, backup)
>     end
>     file = File.open($1, 'w')
>     state = :writing
>   when :writing
>     file.write line
>     if line.chomp == '__END__'
>       file.close
>       state = :init
>     end
>   end
> end
> __END__
>
>