pkellner wrote:
> I'm trying to download images from a web page that has them listed with
> html like what I've pasted below.  Basically, I want to iterate through
> all the <IMG tags and grab the SRC= info and download those files.

require 'uri'
require 'open-uri'
require 'html/htmltokenizer'

class WebPage
   attr_reader :images # URLs of all images on page

   # Get a web page from a specified URL
   def get(url)
     @uri = URI.parse(url)
     open(url) {|result| @body = result.read }
   end

   # Parse the web page, extracting links to images
   def parse
     if !@body
       return
     end
     tokenizer = HTMLTokenizer.new(@body)
     @images = Array.new
     while tag = tokenizer.getTag('img')
       url = tag.attr_hash['src']
       uri = @uri.merge(url)
       @images.push(uri.to_s)
     end
   end
end

wp = WebPage.new
wp.get('http://www.ruby-lang.org/en/')
wp.parse
for u in wp.images
   puts u
end


mathew
-- 
<URL:http://www.pobox.com/~meta/>
          WE HAVE TACOS