--Apple-Mail-1--1050557658
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	delsp=yes;
	format=flowed

Begin forwarded message:

> From: "chuck sonic" <chuck.sonic / gmail.com>
> Date: April 11, 2006 3:34:08 AM CDT
> To: submission / rubyquiz.com
> Subject: Markov chain solution
>
> I'm still learning Ruby and I'm not a ruby-talk subscriber, but I
> wanted to submit
> my solution to this latest quiz. My solution uses a helper file I
> cobbled together
> in part using a code sample I found (and sanity checked) on the web.
> This helper adds
> a few class methods to Array which make the whole affair very  
> straightforward.
>
> If it matters - I didn't have a chance to work on this until tonight
> and haven't yet had a chance to see the other solutions...
>
> Thanks for organizing ruby quiz - it is an excellent resource!
>
>   Chuck
--Apple-Mail-1--1050557658
Content-Transfer-Encoding: 7bit
Content-Type: text/x-ruby-script;
	x-unix-mode=0666;
	name="markov.rb"
Content-Disposition: attachment;
	filename=markov.rb

#!/usr/bin/env ruby

require 'ArrayExtras'

# 2nd order markov chain for word generation
# this is an exercise in quick-n-dirty, not at pretty code
# chuck.sonic / gmail.com

class Markov
  # just for debugging
  attr_reader :table

  def initialize()
    @table = {}
  end

  def scan_text(filename)
    word1 = nil
    word2 = nil
    scanning = true

    File.open(filename) do |file|
      words = file.each_line do |line|
        if (scanning)
          # looking for START token
          if (line =~ /^\*\*\* START/)
            scanning = false
            word1 = nil
            word2 = nil
          end
          # don't process, just get next line
          next
        end
        if(line =~ /^\*\*\* END/) 
          # this file is done
          scanning = true
          next
        end
        #if we get here, then this is the body
        line.split.each do |curword|
          if(word1.nil? || word2.nil?)
            # getting warmed up, shift and continue
            word1, word2 = word2, curword
            next
          end
          # does the top level table have the root word?
          @table[word1] = {} unless @table.has_key?(word1)
          level1 = @table[word1]
          # does the 2nd level table have the second word?
          @table[word1][word2] = {} unless @table[word1].has_key?(word2)
          # is this a legit increment/init idiom? perhaps...
          @table[word1][word2][curword] = 
            (@table[word1][word2][curword] || 0) + 1
          word1, word2 = word2, curword
        end
      end
    end
    @table.length
  end

  def generate(numwords=120)
    word1 = @table.keys.anyone
    word2 = @table[word1].keys.anyone
    print "#{word1} #{word2} "
    (numwords-2).times do |num|
      arr = @table[word1][word2].to_a
      entry = arr.random(:last)
      word1, word2 = word2, entry.first
      print "#{word2} "
      print "\n" if num%10 == 8
    end
    print "\n"
    nil
  end
end

if __FILE__ == $0
  numwords = 100
  if(ARGV.size >= 2 and ARGV[0] == '-n')
    ARGV.shift
    numwords = ARGV.shift.to_i
    numwords = 100 unless numwords > 0
  end

  unless ARGV.size >=1 
    puts "Usage: #$0 [-n  NUMWORDS] gutenberg1.txt [gutenberg2.txt ...]"
    exit
  end

  m = Markov.new
  ARGV.each do |filename| 
    puts "Scanning file #{filename}..."
    m.scan_text(filename)
  end
  puts "\nHere is your own #{numwords} word story:\n\n"
  m.generate(numwords)
end



--Apple-Mail-1--1050557658
Content-Transfer-Encoding: 7bit
Content-Type: text/x-ruby-script;
	x-unix-mode=0666;
	name="arrayextras.rb"
Content-Disposition: attachment;
	filename=arrayextras.rb

# handy stuff
module ListUtil
    def swap!(a,b)
      self[a], self[b] = self[b], self[a]
      self
    end

    # this code found on the net at -
    # http://www.bigbold.com/snippets/posts/show/898
  # Chooses a random array element from the receiver based on the weights
  # provided. If _weights_ is nil, then each element is weighed equally.
  # 
  #   [1,2,3].random          #=> 2
  #   [1,2,3].random          #=> 1
  #   [1,2,3].random          #=> 3
  #
  # If _weights_ is an array, then each element of the receiver gets its
  # weight from the corresponding element of _weights_. Notice that it
  # favors the element with the highest weight.
  #
  #   [1,2,3].random([1,4,1]) #=> 2
  #   [1,2,3].random([1,4,1]) #=> 1
  #   [1,2,3].random([1,4,1]) #=> 2
  #   [1,2,3].random([1,4,1]) #=> 2
  #   [1,2,3].random([1,4,1]) #=> 3
  #
  # If _weights_ is a symbol, the weight array is constructed by calling
  # the appropriate method on each array element in turn. Notice that
  # it favors the longer word when using :length.
  #
  #   ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus"
  #   ['dog', 'cat', 'hippopotamus'].random(:length) #=> "dog"
  #   ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus"
  #   ['dog', 'cat', 'hippopotamus'].random(:length) #=> "hippopotamus"
  #   ['dog', 'cat', 'hippopotamus'].random(:length) #=> "cat"
  def random(weights=nil)
    return random(map {|n| n.send(weights)}) if weights.is_a? Symbol
    
    weights ||= Array.new(length, 1.0)
    total = weights.inject(0.0) {|t,w| t+w}
    point = rand * total
    
    zip(weights).each do |n,w|
      return n if w >= point
      point -= w
    end
  end
  
    def anyone(cutoff=self.length)
      self[rand(cutoff)]
    end
end

class Array
  include ListUtil
end

class String
  include ListUtil
end

# this code shows that Array.random(:foo) is probably working
if __FILE__ == $0
  ar = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
  totals = []
  10.times { |n| totals[n] = 0 }
  10000.times do
    ans = ar.random(:round)
    totals[ans] += 1
  end
  expect = 10000.0 / (0...10).to_a.inject(0) {|sum, n| sum += n}
  printf "weight : times |   t/w : error (%2.2f expected t/w)\n", expect
  printf "------------------------------\n"
  10.times do |n|
    #dif = (n.zero?) ? 0 : totals[n] - totals[n-1]
    share = (n.zero?) ? 0 : totals[n] / n.to_f
    percent = (n.zero?) ? 0 : share / expect
    printf "%6d : %5d | %3.2f : %2.2f\n", n, totals[n], share, percent
  end
end



--Apple-Mail-1--1050557658
Content-Transfer-Encoding: 7bit
Content-Type: application/octet-stream;
	x-unix-mode=0666;
	name="README"
Content-Disposition: attachment;
	filename=README

Hard coded for project gutenberg texts (looks for *** START / *** END)
Otherwise, should be self explanatory

here is a sample run

  ./markov.rb huck_finn.txt pride_and_p.txt 
  Scanning file huck_finn.txt...
  Scanning file pride_and_p.txt...

  Here is your own 100 word story:

  Odd, too. Say, boy, what's the sense of decency and virtue 
  in a quieter way. Elizabeth felt herself to stay two 
  months. I told you I ben a-drinkin'? Has I had 
  been driven from her might assist his tenants, and relieve 
  the poor. With respect to Jane--his unpardonable assurance in acknowledging, 
  though he warn't quite midnight yet, so we can LET 
  ON, to ourselves, that we wouldn't let me out. But 
  if only HALF a man--like Buck Harkness, there--shouts 'Lynch him! 
  lynch him!' you're afraid to come to be in the 
  most generous of his head, and opened his mouth--what 

Quick and dirty.
Thanks to ArrayExtras, I was able to keep non-golfing code to < 100 lines.

chuck.sonic / gmail.com
4/10/06



--Apple-Mail-1--1050557658
Content-Transfer-Encoding: 7bit
Content-Type: text/plain;
	charset=US-ASCII;
	format=flowed



--Apple-Mail-1--1050557658--