Begin forwarded message: > From: Pascal Van Cauwenberghe <pvc / nayima.be> > Date: July 11, 2007 7:27:09 AM CDT > To: submission / rubyquiz.com > Subject: Please Forward: Ruby Quiz Submission > > Hi James, > > I don't subscribe to ruby-talk, too much volume. Could you forward > my submission for the Hangman puzzle? Thanks. > > The program doesn't concern itself with UI or gameplay, but more > with strategies. The tests need a word file to run correctly. This > is available online (see URL in the code). > > Thanks for organizing the Ruby Quiz. This is the first time I send > in a solution, but I've implemented several of them before. It's a > great way to keep up the coding muscles. > > -- > Pascal Van Cauwenberghe > Nayima bvba > --- > http://www.nayima.be > http://www.xpday.net > http://www.xp.be > require 'rubygems' > gem 'rspec' > require 'spec' > > # The HangMan classes don't bother with UI. I was more interested > in the strategies that could be used > # The experiment consists of playing the game on +/- 20900 English > words of 5 letters or more and > # plotting how many wrong guesses are made. The player has the > dictionary. > > # The first strategy I thought of was a classic 'Binary chop': find > the letter that appears in about half > # of the words. Thereby, you eliminate half of the words from the > dictionary, whatever the outcome. > # This strategy succeeds in guessing the right word with 5 or fewer > wrong guesses in 86% of cases > # This is implemented in MidpointStrategy > # > # But this is not a symmetrical problem: we're not optimizing the > number of guesses, but the number of wrong guesses. > # Also, a correct guess gives us more information than a wrong > guess: we get the position(s) of the correct letter. > # This allows us to eliminate even more words. > # The ThreeQuarterpointStrategy chooses the letter that is present > in about 3/4 of the words. > # This strategy succeeds in 94 % of cases. > # > # The MostFrequentStrategy chooses the letter that appears in most > words. This strategy succeeds in almost 95% of cases. > # > # The EnglishFrequencyStrategy is much simpler: it doesn't take > advantage of the dictionary. It just guesses letters > # based on their frequency in English words. This strategy is > faster than the others but does poorly: 11% success > # > # The results can be seen at http://blog.nayima.be/wp-content/ > uploads/hangman-strategies.JPG > > # The code is mostly self-explanatory. I've had to cache a few > things (letters in Words, the number of words that contain a letter) > # to speed things up. The simulation plays about 1100 games per > minute > # > # The tests use a file words.txt that I copied from /usr/share/dict/ > words > # Each line of the file contains one word > # This file can be downloaded from http://blog.nayima.be/wp-content/ > uploads/words.txt > > module RubyQuiz > > > # A Word describes a word in the dictionary > # For optimization, the word keeps an array of all unique letters > in the word > class Word > attr_reader :word > attr_reader :letters > > # Create a word with the given string > def initialize(word) > @word = word > @letters = unique_letters_in(word) > end > > private > > def unique_letters_in(word) > bytes = [] > word.each_byte {|c| bytes << c } > bytes.uniq > end > end > > # The Dictionary contains a list of words > class Dictionary > > # Create an empty dictionary > def initialize > change_wordlist [] > @dictionary_per_size = {} > end > > # Add one word to the dictionary > def add(word) > change_wordlist(@words + [ Word.new(word) ] ) > end > > # Load all words from a text file (one word per line) > # Keep only words that are 5 ascii letters or more > # Words all all lower case > def load(file) > words = File.readlines(file) > words = words.collect {|word| Word.new(word.chomp.downcase) } > change_wordlist(words.select {|word| word.word.length >= 5 && > word.word =~ /^([a-z])+$/ }) > self > end > > def dup > other = super > other.add_words(@words) > other > end > > # Return a new Dictionary with only the words of the given length > def with_only_words_of_size(len) > other = Dictionary.new > @dictionary_per_size[len] ||= @words.select{|word| > word.word.length == len } > words = @dictionary_per_size[len] > other.add_words(words) > other > end > > # Number of Words > def length > @words.length > end > > # Access each Word individually > def [](index) > @words[index].word > end > > # Filter out all words that don't have the given length > def keep_only_words_of_length(len) > change_wordlist(@words.select{|word,letters| word.word.length > == len }) > end > > # Filter out all words that contain the given letter > def reject_words_that_contain(letter) > change_wordlist(@words.select { |word,letters| word.word.index > (letter) == nil }) > end > > # Keep only the words that match the partial solution > def keep_only_words_that_match(hangman_pattern) > pattern = Regexp.new('^' + hangman_pattern.gsub(/-/,'.') + '$') > > change_wordlist(@words.select { |word,letters| word.word =~ > pattern }) > end > > # Return the number of words in the dictionary that contain the > given letter > def words_that_contain(letter) > letter_count(letter) > end > > # Iterate over each Word > def each(&block) > @words.each(&block) > end > > > protected > > def add_words(words) > change_wordlist(words) > end > > private > > def change_wordlist(list) > @words = list > @letter_counts = nil > @dictionary_per_size = {} > end > > # Compute the number of words that a letter appears in > # Because this is an expensive operation, cache the results. > The cache is invalidated when the list of Words changes > def letter_count(letter) > @letter_counts ||= compute_counts > @letter_counts[letter[0] - ?a] > end > > def compute_counts > count = Array.new(26,0) > @words.each do |word| > word.letters.each {|c| count[c-?a] += 1} > end > count > end > end > > # Guess the letter that appears in most words > class MostFrequentStrategy > def score_for(letter,dictionary) > dictionary.length - dictionary.words_that_contain(letter) > end > end > > # Guess the letter that appears in approx half of the words > # If there's only one word left, use the letters in that word first > class MidpointStrategy > def score_for(letter,dictionary) > if dictionary.length >= 2 then > midpoint = dictionary.length / 2 > nbwords = dictionary.words_that_contain(letter) > midpoint > nbwords ? midpoint - nbwords : nbwords - midpoint > else > dictionary.length - dictionary.words_that_contain(letter) > end > end > end > > # Guess the letter that appears in approx 3/4 of the words > class ThreeQuaterpointStrategy > def score_for(letter,dictionary) > if dictionary.length >= 2 then > midpoint = 3 * dictionary.length / 4 > nbwords = dictionary.words_that_contain(letter) > midpoint > nbwords ? midpoint - nbwords : nbwords - midpoint > else > dictionary.length - dictionary.words_that_contain(letter) > end > end > end > > # Guess the letter that appears in most words, using the > frequency of letters in English words > # Doesn't take the dictionary into account > class EnglishFrequencyStrategy > def score_for(letter,dictionary) > frequencies = "earitnoslcumdphbgyfvkwxqjz" > frequencies.index(letter) > end > end > > # The HangMan solving class > # Play is simple: > # - Give the solver a puzzle to solve as a string of '-', a > dictionary and optionally a stratagy for choosing the next letter > # - Ask the solver for a guess > # - Tell the solver it took a wrong guess: the letter doesn't > appear in the solution > # - Or tell the solver it took a good guess. Give it a string > with '-' replaced with the good letter > # - Keep on going until the puzzle is solved > class HangManSolver > # The current solution > attr_reader :solution > > # Create a HangMan Solver > # puzzle should be a string of '-' as long as the word to guess > # dictionary is a Dictionary. The solver can find words that > are not in the dictionary > # strategy is an optional parameter to determine the letter > choosing strategy > # a Strategy object should implement one method score_for > (letter,dictionary) => numeric score > # the lowest scoring letter is chosen > def initialize > (puzzle,dictionary,strategy=MostFrequentStrategy.new) > @solution = puzzle.dup > @dictionary = dictionary.with_only_words_of_size(puzzle.length) > @possible_letters = ('a'..'z').to_a > @strategy = strategy > end > > def merge(answer) > for pos in 0.. / solution.length > if @solution[pos] == ?- && answer[pos] != ?- then > @solution[pos] = answer[pos] > end > end > @solution > end > > # Returns true if the solution is known > def solved? > @solution !~ /-/ > end > > # How many more Words in the dictionary are candidates? > def possibilities > @dictionary.length > end > > # Returns the letter that the solver guesses > # Uses the strategy to determine the letter with the lowest score > def guess > letters = @possible_letters.collect {|letter| [ score_for > (letter),letter ]} > letter = letters.min {|letter1,letter2| letter1 <=> letter2 } > letter[1] > end > > # Tell the solver that the letter does not appear in the solution > def wrong_guess(letter) > @possible_letters.delete(letter) > @dictionary.reject_words_that_contain(letter) > end > > # Tell the solver that the letter was a good guess, by placing > the letter in the solution > # e.g. to indicate that 'a' is a good guess for 'hangman' pass > '-a---a-' > def good_guess(pattern) > merge(pattern) > @dictionary.keep_only_words_that_match(@solution) > @possible_letters.delete(letter_in(pattern)) > end > > private > def score_for(letter) > @strategy.score_for(letter,@dictionary) > end > > def letter_in(pattern) > result = ' ' > pattern.each_byte {|char| result[0] = char if char != ?-} > result > end > end > > # The HangManPlayer can tell if the Solver made a good/wrong guess > class HangManPlayer > def initialize(word) > @solution = word.dup > end > > def evaluate(letter) > template = '' > @solution.each_byte do |char| > template << (char == letter[0] ? char : ?-) > end > template > end > end > > # The Game uses the Solver and Player to find a word, given a > dictionary and a strategy > class HangManGame > > # Finds the given word, using the dictionary and the strategy > # Returns: > # the word found > # the number of wrong guesses > # an array of guesses. Each item is a '+' (good guess) or > '-' (wrong guess) and the letter guessed > def self.solve(word,dictionary,strategy=MostFrequentStrategy.new) > guesses = [] > empty = '-' * word.length > player = HangManPlayer.new(word) > puzzle = HangManSolver.new(empty ,dictionary,strategy) > wrong_guesses = 0 > while !puzzle.solved? do > letter = puzzle.guess > pattern = player.evaluate(letter) > if pattern == empty then > puzzle.wrong_guess(letter) > wrong_guesses += 1 > guesses << "-#{letter}" > else > puzzle.good_guess(pattern) > guesses << "+#{letter}" > end > end > > return puzzle.solution,wrong_guesses,guesses > end > end > > describe Word do > it "should identify its unique letters" do > word = Word.new('banana') > word.letters.length.should eql(3) > word.letters.should include(?b) > word.letters.should include(?a) > word.letters.should include(?n) > end > end > > describe Dictionary do > it "should contain words" do > dict = Dictionary.new > > dict.add("hangman") > dict.add("packman") > dict.add("rackham") > > dict.length.should == 3 > end > > it "should select only words of a certain size" do > dict = Dictionary.new > > dict.add("hangman") > dict.add("packman") > dict.add("rackham") > dict.add("rat") > dict.add("hang") > > dict.keep_only_words_of_length(7) > dict.length.should ==(3) > end > > it "should load word files" do > dict = Dictionary.new > dict.load("words.txt") > dict.length.should eql(20905) > dict.keep_only_words_of_length(7) > dict.length.should eql(3872) > end > > it "should select words that match pattern" do > dict = Dictionary.new > dict.load("words.txt") > dict.length.should eql(20905) > dict.keep_only_words_that_match('h------') > dict.length.should eql(155) > dict.keep_only_words_that_match('ha---a-') > dict.length.should eql(11) > end > > it "should throw away words that contain a wrong letter" do > dict = Dictionary.new > dict.load("words.txt") > dict.length.should eql(20905) > dict.reject_words_that_contain('z') > dict.length.should eql(20605) > dict.reject_words_that_contain('x') > dict.length.should == 20075 > end > > it "should know how many words contain a certain letter" do > dict = Dictionary.new > dict.load("words.txt") > dict.length.should eql(20905) > > dict.words_that_contain('e').should eql(13211) > dict.words_that_contain('f').should eql(1915) > dict.words_that_contain('z').should eql(300) > dict.words_that_contain('r').should eql(10403) > end > end > > describe HangManPlayer do > > it "should evaluate guesses" do > player = HangManPlayer.new("hangman") > > player.evaluate('a').should eql('-a---a-') > player.evaluate('b').should eql('-------') > player.evaluate('m').should eql('----m--') > end > > > end > > describe HangManSolver do > it "should accept a puzzle" do > puzzle = HangManSolver.new("-------",Dictionary.new) > end > > it "should merge solutions and answers" do > puzzle = HangManSolver.new("-------",Dictionary.new) > puzzle.solution.should eql("-------") > puzzle.merge("-a---a-") > puzzle.solution.should eql("-a---a-") > puzzle.merge("----m--") > puzzle.solution.should eql("-a--ma-") > end > > it "should know when it's solved" do > puzzle = HangManSolver.new("-------",Dictionary.new) > puzzle.solved?.should be_false > > puzzle.merge('h------') > puzzle.solved?.should be_false > > puzzle.merge('-angman') > puzzle.solved?.should be_true > end > > it "should find 'hangman'" do > puzzle = HangManSolver.new("-------",Dictionary.new.load > ("words.txt")) > puzzle.possibilities.should eql(3872) > puzzle.guess.should eql('e') > puzzle.wrong_guess('e') > puzzle.possibilities.should eql(1490) > puzzle.guess.should eql('a') > puzzle.good_guess('-a---a-') > puzzle.possibilities.should eql(69) > puzzle.guess.should eql('r') > puzzle.wrong_guess('r') > puzzle.possibilities.should eql(37) > puzzle.guess.should eql('n') > puzzle.good_guess('--n---n') > puzzle.possibilities.should eql(2) > puzzle.guess.should eql('m') > puzzle.good_guess('----m--') > puzzle.possibilities.should eql(2) > puzzle.guess.should eql('d') > puzzle.wrong_guess('d') > puzzle.possibilities.should eql(1) > puzzle.guess.should eql('g') > puzzle.good_guess('---g---') > puzzle.possibilities.should eql(1) > puzzle.guess.should eql('h') > puzzle.good_guess('h------') > puzzle.possibilities.should eql(1) > puzzle.solution.should eql('hangman') > end > end > > describe HangManGame do > > it "should find words in the dictionary" do > dictionary = Dictionary.new > dictionary.load("words.txt") > solution, round = HangManGame.solve('hockey',dictionary) > > solution.should eql('hockey') > end > > it "should find words not in the dictionary" do > dictionary = Dictionary.new > dictionary.add('hockey') > dictionary.add('cyclic') > solution, round = HangManGame.solve('pascal',dictionary) > > solution.should eql('pascal') > end > > it "should find all words in the dictionary" do > # result[i] contains the number of words that made i wrong > guesses > result = Array.new(26,0) > > dictionary = Dictionary.new > dictionary.load("words.txt") > > # Change the strategy to test another type of strategy > strategy = MostFrequentStrategy.new > dictionary.each do |word| > solution,wrong_guesses,guesses = HangManGame.solve > (word.word,dictionary,strategy) > solution.should eql(word.word) > result[wrong_guesses] += 1 > end > # Uncomment to print out the number of words per number of > wrong guesses > # puts "=> #{result.inspect}" > end > > end > end > >