#!/usr/bin/env ruby
#
# q115.rb - solution to rubyquiz #115 (Mailing List Files)
# Lou Scoras <louis.j.scoras / gmail.com>
# February 28, 2007
#
# = Dependancies
#
# It felt like I was cheating a lot in this quiz since I made use of several
# great libraries to do everything for me =)  If you want to play with the
# script, you'll need to get a hold of:
#
# ActionMailer::  This was used for access to TMail.  You might be able to use
#                 TMail by itself, but I haven't tested it and rails might
#                 have made some modifications.
#
# Elif::          This handy little library reads files backwards.  This was
#                 actually a solution from a previous quiz ({64 - Port a
#                 Library}[http://www.rubyquiz.com/quiz64.html]). Plus it's
#                 from James so you know it's good stuff ;)
#
# Hpricot::       Used this little gem (no not the kind of package) to do the
#                 scraping to get all the solutions for a quiz.  Awesome, just
#                 awesome!
#
# = The Script
#
# The messages in the archive are pretty close to being readable by TMail.
# Each page is just missing the correct mime header to let the mail parser
# know it's actually got attachments.
#
# After pulling out all the html artifacts, we still need to find the mime
# boundary.  An easy way to do this is just look for the content-disposition
# headers for the attachments and then look above them to find the boundary.
#
# 1. Look for 'Content-Disposition: attachment'
# 2. Look for the first line above that which is not a mail header -- that's
#    what elif is helping with.
# 3. That line is the mime boundary.  Add the header into the TMail object and
#    then you can read the attachments as normal
#
# = Running
#
# The script implements the command line interface mentioned in the quiz
# description.  You just give it the name of a ruby-talk message id and it
# will fetch the attachments into the current directory.  If you follow the
# number by a path you can change the output directory.
#
#     $ q115 190780 outdir
#
# As an additional feature, you can also provide the number of the quiz
# prefixed with a 'q' character.  In this case, all of the solutions will be
# downloaded and put in a subdirectory by solver.  If the solution didn't have
# any attachments it puts the message body into a file called solution.txt.

require 'action_mailer'
require 'cgi'
require 'delegate'
require 'elif'
require 'fileutils'
require 'hpricot'
require 'open-uri'
require 'tempfile'

module Quiz115
 class QuizMail < DelegateClass(TMail::Mail)
   class << self
     attr_reader :archive_base_url

     def archive_base_url
       @archive_base_url ||
"http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/"
     end

     def solutions(quiz_number)
       doc   = Hpricot(open("http://www.rubyquiz.com/quiz#{quiz_number}.html"))
       (doc/'#links'/'li/a').collect do |link|
         [CGI.unescapeHTML(link.inner_text), link['href']]
       end
     end
   end

   def initialize(mail)
     temp_path = to_temp_file(mail)
     boundary  = MIME::BoundaryFinder.new(temp_path).find_boundary

     @tmail = TMail::Mail.load(temp_path)
     @tmail.set_content_type 'multipart', 'mixed',
       'boundary' => boundary if boundary

     super(@tmail)
   end

   private

   def to_temp_file(mail)
     temp = Tempfile.new('qmail')

     temp.write(if (Integer(mail) rescue nil)
       url = self.class.archive_base_url + mail
       open(url) { |f| x = cleanse_html f.read }
     else
       web = URI.parse(mail).scheme == 'http'
       open(mail) { |m| web ? cleanse_html(m.read) : m.read }
     end)

     temp.close
     temp.path
   end

   def cleanse_html(str)
     CGI.unescapeHTML(str.gsub(/\A.*?<div
id="header">/mi,'').gsub(/<[^>]*>/m, ''))
   end
 end

 module MIME
   class BoundaryFinder

     ##
     # Create a parser to find the mime boundary
     #
     def initialize(file)
       @elif = ::Elif.new(file)
       @in_attachment_headers = false
     end

     ##
     # Find the mime boundary marker.  Only returns the marker if itcan find an
     # attachment, otherwise for quiz purposes there's no reason to find it: id
     # est we don't care about multipart/alternative messages, et cetera.
     #
     def find_boundary
       while line = @elif.gets
         if @in_attachment_headers
           if boundary = look_for_mime_boundary(line)
             return boundary
           end
         else
           look_for_attachment(line)
         end
       end
       nil
     end

     private

     def look_for_attachment line
       if line =~ /^content-disposition\s*:\s*attachment/i
         puts "Found an attachment" if $DEBUG
         @in_attachment_headers = true
       end
     end

     def look_for_mime_boundary line
       unless line =~ /^\S+\s*:\s*/ || # Not a mail header
              line =~ /^\s+/           # Continuation line?
         puts "I think I found it...#{line}" if $DEBUG
         line.strip.gsub(/^--/, '')
       else
         nil
       end
     end
   end
 end
end

include Quiz115
include FileUtils

def process_mail(mailh, outdir)
 begin
   t = QuizMail.new(mailh)
   if t.has_attachments?
     t.attachments.each do |attachment|
       outpath = File.join(outdir, attachment.original_filename)
       puts "\tWriting: #{outpath}"
       File.open(outpath, 'w') do |out|
         out.puts attachment.read
       end
     end
   else
     outfile = File.join(outdir, 'solution.txt')
     File.open(outfile, 'w') {|f| f.write t.body}
   end
 rescue => e
   puts "Couldn't parse mail correctly. Sorry! (E: #{e})"
 end
end

def to_dirname(solver)
 solver.downcase.delete('!#$&*?(){}').gsub(/\s+/, '_')
end

query  = ARGV[0]
outdir = ARGV[1] || '.'

unless query
 $stderr.puts "You must specify either a ruby-talk message id, or a
quiz number (prefixed by 'q')"
 exit 1
end

if query =~ /\Aq/i
 quiz_number = query.sub(/\Aq/i, '')
 puts "Fetching all solutions for quiz \##{quiz_number}"

 QuizMail.solutions(quiz_number).each do |solver, url|
   puts "Fetching solution from #{solver}."

   dirname    = to_dirname(solver)
   solver_dir = File.join(outdir, dirname)

   mkdir_p solver_dir
   process_mail(url, solver_dir)
 end
else
 process_mail(query, outdir)
end

exit 0