I based my solution on the algorithm described here:

http://www.siggraph.org/education/materials/HyperGraph/video/mpeg/mpegfaq/huffman_tutorial.html

I use the rubytree gem for the tree, and subclassed its TreeNode into the class
FreqNode, which holds a frequency, possibly a letter (just the leaves), and a
side (either :left or :right). I made FreqNodes compare primarily based on
their frequencies so I could build a SortedSet of them and repeatedly take the
two lowest to combine. The actual tree is built in the build_tree method.

It just takes an input string, builds the tree, and outputs the compressed
data (which is slightly different from the example given in the original quiz
message because we treat ties differently. I'm pretty sure this doesn't
matter though). No extra credit, though it shouldn't be too hard to add on some
tree serialization.

irb> tree = build_tree('ABC')
irb> tree.printTree
Node ID: 16 Content: 3 Parent: ROOT Children: 2 Total Nodes: 5 Side:
    Node ID: 14 Content: 1C Parent: 16 Children: 0 Total Nodes: 1 Side: left
    Node ID: 15 Content: 2 Parent: 16 Children: 2 Total Nodes: 3 Side: right
        Node ID: 12 Content: 1A Parent: 15 Children: 0 Total Nodes: 1 Side: left
        Node ID: 13 Content: 1B Parent: 15 Children: 0 Total Nodes: 1 Side: right
irb> encode 'ABC', tree
=> "10110"

$ ./huffman_encode.rb ABRRKBAARAA
Encoded:
01011111 10010100 1100
Encoded bytes: 3

Original:
ABRRKBAARAA
Original bytes: 11

Compressed: 73%


#!/usr/bin/env ruby

require 'rubygems'
require 'tree'
require 'set'

class Tree::TreeNode
  # Yield once for each leaf node.
  def each_leaf
    self.each { |node| yield(node) if not node.hasChildren? }
  end

  # Both assume exactly 2 children
  def left_child;  children.first; end
  def right_child; children.last; end
end

# FreqNodes are TreeNodes that:
#   1) Have generated, numeric names to ensure they're all unique.
#   2) Compare based primarily on their contents.
#   3) Have as contents an array containing a frequency, and possibly a letter
#      (for the leaves).
#   4) Have a @side attribute that should be set to either :left or :right for
#      non-root nodes.
#   5) Have a few other nicities, like letter().
class FreqNode < Tree::TreeNode
  attr_accessor :side

  def initialize(content = nil)
    super(FreqNode.next_name, content)
  end

  def <=>(node)
    content_diff = @content <=> node.content
    return content_diff if not content_diff.zero?
    super(node)
  end

  # Assumes this is a leaf.
  def letter; @content[1]; end

  def to_s; super() + " Side: #{@side}"; end

  # Generate unique names because we can't add two nodes with the same name as
  # children of the same parent node.
  @next_name_num = -1
  def FreqNode.next_name
    (@next_name_num += 1).to_s
  end
end

class Set
  def shift
    elem = self.min
    self.delete elem
    elem
  end
end

# Build a SortedSet containing pairs of [freq, letter] for the given string.
def build_freqs str
  # Build hash of byte => count
  counts = Hash.new 0
  str.each_byte { |byte| counts[byte.chr] += 1 }

  # Build SortedSet of [freq, byte] pairs (lower freqs first).
  freqs = SortedSet[]
  counts.each { |bc| freqs << bc.reverse }
  freqs
end

# Build the tree for the given input string, and return the root node.
def build_tree str
  nodes = build_freqs(str).map! { |pair| FreqNode.new(pair) }

  while nodes.size > 1
    child1, child2 = nodes.shift, nodes.shift
    parent = FreqNode.new([child1.content.first + child2.content.first])
    child1.side, child2.side = :left, :right
    parent << child1
    parent << child2
    nodes << parent
  end

  nodes.min
end

# Encode the given letter using the given tree of FreqNodes.
def encode_letter letter, tree
  enc = ''

  # Find leaf with the right byte value
  node = nil
  tree.each_leaf do |leaf|
    (node = leaf; break) if leaf.letter == letter
  end

  while not node.isRoot?
    node.side == :left ? enc << '0' : enc << '1'
    node = node.parent
  end

  enc.reverse
end

# Build a tree for the given string, and encode it using that tree.
def encode str, tree = build_tree(str)
  #tree = build_tree(str)
  encoded = ''
  str.each_byte { |byte| encoded << encode_letter(byte.chr, tree) }
  encoded
end

# Decode the given string (which should consist of '0's and '1's) using the
# given tree.
def decode str, tree
  dec = ''
  i = 0
  node = tree

  str.each_byte do |byte|
    node = (byte.chr == '0' ? node.left_child : node.right_child)
    if not node.hasChildren?
      dec << node.byte_value.chr
      node = tree
    end
  end

  dec
end

class String
  # Split up the string by inserting sep between len-length chunks.
  def split_up! len, sep
    (((length / len.to_f).ceil - 1) * len).step(len, -len) do |i|
      insert i, sep
    end
    self
  end
end

# Output a binary string, splitting it up for easier reading.
def puts_binary str
  str = str.dup
  str.split_up! 40, "\n"
  str.each { |line| puts line.split_up!(8, ' ') }
end

if __FILE__ == $0
  input = ARGV.join ' '
  input_bytes = input.length
  enc = encode(input)
  enc_bytes = (enc.length / 8.0).ceil
  compressed = ((1 - enc_bytes.to_f/input_bytes) * 100).round

  puts 'Encoded:'
  puts_binary(enc)
  puts "Encoded bytes: #{enc_bytes}\n\n"

  puts "Original:\n#{input}"
  puts "Original bytes: #{input_bytes}\n\n"

  puts "Compressed: #{compressed}%"
end


-- 
Jesse Merriman
jessemerriman / warpmail.net
http://www.jessemerriman.com/