Hi, ---- Original message from Hugh Sasse on 10/31/2005 11:41 AM: >On Tue, 1 Nov 2005, Dale Martenson wrote: > >>I am not sure I understood the quiz this week. When I first read it, I thought >>it might be that one should analyze the input, build a dictionary, determine >>the repetition automatically and generate the output using a simplified form. >>My first thought was to huffman encode the dictionary based on entity >>frequency. The result was great and all :-) ... but not very readable. >> >>After rereading the quiz, I took it to mean how would you apply the DRY >>principle to generating this repetitive output. No programmatic analysis >>required. >> >You were right the first time. I was wondering how much of this >could be automated, and how good the results could be. My solution >looked as below. Note: I know the user dialogue should not appear >in the output but this was just to get it working, no time to tidy >it up as I'd wish. So I'd be interested in seeing your Huffman >approach. > > Below is my Huffman coding version. A sample of the output in included at the end to give you an idea of what gets generated. require 'rubygems' require_gem "usage" class Huffman class LeafNode attr_accessor :type, :parent, :weight, :symbol def initialize( symbol, weight ) @type = :leaf @parent = nil @symbol = symbol @weight = weight end end class InternalNode attr_accessor :type,:parent, :left_child, :right_child, :weight def initialize( node_left, node_right ) @type = :internal @parent = nil @left_child = node_left @right_child = node_right left_weight = (node_left != nil) ? node_left.weight : 0 right_weight = (node_right != nil) ? node_right.weight : 0 @weight = left_weight + right_weight @left_child.parent = self if @left_child != nil @right_child.parent = self if @right_child != nil end end attr_reader :root, :encode_table, :decode_table def initialize @root = nil @encode_table = {} @decode_table = {} end def encode( string ) original_words = string.split(/ /) all_words = original_words.sort dictionary = Hash.new(0) all_words.each do |word| dictionary[ word ] += 1 end sorted_dictionary = dictionary.sort {|a,b| a[1]<=>b[1]} generate_huffman_tree( sorted_dictionary ) walk encoding = "" original_words.each do |word| encoding += @encode_table[ word ] end encoding end def generate_huffman_tree( dictionary ) heap = [] dictionary.each do |item| heap << LeafNode.new( item[0], item[1] ) end heap.sort {|a,b| a.weight<=>b.weight} while heap.size > 1 n1 = heap.shift n2 = heap.shift heap << InternalNode.new( n1, n2 ) heap.sort {|a,b| a.weight<=>b.weight} end @root = heap.shift end def walk(node=@root, encoding='', level=0) if node != nil then if node.type == :leaf then # visit leaf # print "symbol:#{node.symbol} encoding:#{encoding} level:#{level}\n" @encode_table[ node.symbol ] = encoding @decode_table[ encoding ] = node.symbol else # must be internal walk( node.left_child, encoding+'0',level+1 ) walk( node.right_child, encoding+'1',level+1 ) end end end def decode( string ) output = '' lookup = '' string.each_byte do |b| lookup << b.chr if @decode_table.has_key?( lookup ) then output += decode_table[ lookup ] + ' ' lookup = '' end end output end end h = Huffman.new Usage.new "<infile" do |usage| print <<EOT # Undoes the work of TumbleDRYer. class WashingMachine def initialize EOT puts " @encoding = '#{h.encode(usage.infile.read)}'" puts " @decode_table = {" h.decode_table.each_pair { |k,v| puts " '#{k}' => %q{#{v}}," } puts " }" print <<EOT end def output lookup = '' @encoding.each_byte do |b| lookup << b.chr if @decode_table.has_key?( lookup ) then print @decode_table[ lookup ] + ' ' lookup = '' end end end end WashingMachine.new.output EOT end ************************************************ Example output: # Undoes the work of TumbleDRYer. class WashingMachine def initialize @encoding = '1001000010111011000110011110000001001011100110100010011111011010101001110011010110001011011111111010101001110011010110001011011111011110101001110011010110001011011111000010101001110011010110001011011111100010101001110011010110001011011110001111110001110111001011110000101000011111110110011111111000100011111111110010111000000110011110000001001011100110100010011111111011011100111001101011000101101111000111100110111001101011000101101111000010100001111111011001111111100010001111111111001011000100011001111111000010010111001101011001110100111110011101001011100110101100101010001110010001111111111001011001100011001111000000100101110011010001001111100101010100111001101011000101101111000111111000111011100101111101011010010111001101011001110100111110000010110001110011010110011001001111100011010100111001101011000101101111000010100001111110100011110100011010110100100111101000110111' @decode_table = { '111111' => %q{ CREATE}, '111100' => %q{text}, '111001' => %q{NULL, }, '110110' => %q{`authors`}, '110011' => %q{varchar(70)}, '110000' => %q{`categories`}, '101010' => %q{'0', )}, '00101' => %q{TABLE}, '111101' => %q{`name`}, '110111' => %q{; }, '110100' => %q{(`id`), }, '110001' => %q{`password`}, '101110' => %q{varchar(20)}, '101011' => %q{`author_id`}, '101000' => %q{AUTO_INCREMENT=14}, '100010' => %q{`categories_documents`}, '110101' => %q{`document`}, '101111' => %q{`nickname`}, '101100' => %q{date}, '101001' => %q{(`title`) )}, '100110' => %q{`documents`}, '100011' => %q{`filename`}, '100000' => %q{`date`}, '101101' => %q{`firstname`}, '100111' => %q{`document_id`}, '100100' => %q{ CREATE}, '100001' => %q{`contact`}, '100101' => %q{`title`}, '01010' => %q{varchar(50)}, '111010' => %q{'0', }, '01110' => %q{NOT}, '01011' => %q{'', }, '01000' => %q{KEY}, '00010' => %q{auto_increment, }, '111110' => %q{AUTO_INCREMENT=3}, '111011' => %q{(`id`) )}, '111000' => %q{`category_id`}, '110010' => %q{'0000-00-00', }, '01111' => %q{}, '01100' => %q{default}, '01001' => %q{int(11)}, '00110' => %q{( }, '00011' => %q{`description`}, '00000' => %q{`id`}, '01101' => %q{NULL}, '00111' => %q{TYPE=MyISAM}, '00100' => %q{; }, '00001' => %q{PRIMARY}, } end def output lookup = '' @encoding.each_byte do |b| lookup << b.chr if @decode_table.has_key?( lookup ) then print @decode_table[ lookup ] + ' ' lookup = '' end end end end WashingMachine.new.output