Lloyd Zusman <ljz / asfast.com> writes:

> I'm interested in using the rubyfilter module.  Although the
> documentation that comes with this module is quite extensive, it
> would be helpful to me to see a few working rubyfilter samples, in
> addition to those that are mentioned in the official docs.
>
> Can anyone point me to some real-world rubyfilter examples?
>
> Thanks in advance.

Hey Lloyd, almost one month later, here is the contents of my
.rdeliver file, used by the example rdeliver script that comes with
rubyfilter.  I'll append 'confirm_queue' afterward.  All my incoming
mail goes through this filter.  Email addresses not in my white list
are likely to get an automatic (and controversial) confirmation
request to cut down on SPAM.

I have been meaning to package this stuff more formally, and clean it
up considerably, but have successfully procrastinated for years.  ;-)

As the author of RubyFilter, I imagine this is the most extensive use
of the software.  I think RubyMail is a little more popular than
RubyFilter.  But somebody has mailed me from one of the country code
TLD registrars saying they use RubyMail+RubyFilter to automatically
process their incoming mail requests.  :-)


----------------------------------------------------------------------

require 'rdeliver/confirm_queue'

def h
  agent.header
end

def send_to_gmail
    require 'net/smtp'
    Net::SMTP.start('localhost', 25) do |smtp|
	smtp.send_message(agent.message, 'matt+gmailbounces / lickey.com',
		'gmatta / gmail.com')
    end
    agent.log(1, "sent to gmatta / gmail.com")
end

# Returns the line in the table that matches or nil
def check_senders(table)
  senders(agent.message.header).each do |address|
    if line = hash_match(address, table)
      return [line, address]
    end
  end
  return [nil, nil]
end

def senders(header)
  get_addresses([ 'From', 'Sender', 'Return-Path' ], header)
end

def hash_match(address, hash)
  a = address.address.downcase
  if hash.key?(a)
    return hash[a]
  end
  return nil unless address.domain
  d = address.domain.downcase
  if hash.key?("@#{d}")
    return hash["@#{d}"]
  end
  parts = d.split(/\./)
  while not parts.empty?
    d = '.' + parts.join('.')
    if hash.key?(d)
      return hash[d]
    end
    parts.shift
  end
  return nil
end

# Returns a hash of every line in a flat text file (downcased)
def table_from_file(filename)
  lines = {}
  File.open(filename) { |f|
    f.each { |line|
      case line
      when /^\s*\#/
	next
      when /^\s*$/
	next
      else
	line = line.strip.downcase
	lines[line] = line
      end
    }
  }
  lines
end


# def corpus_save(kind)
#   if x_bogosity = h['x-bogosity']
#     h.delete('x-bogosity')
#   end
#   h.delete('x-corpus')

#   type_str = case kind
# 	     when :spam
# 	       "spam"
# 	     when :good
# 	       "good"
# 	     else
# 	       raise ArgumentError, "unknown kind #{kind.inspect}"
# 	     end

#   box = RFilter::KeyedMailbox.new(".corpus/#{type_str}")

#   agent.log(2, "Action: save to keyed mailbox #{box.path.inspect}")
#   key = box.save(agent.message)
#   h.delete('x-corpus')
#   h['X-Corpus'] = "type=#{type_str}; key=#{key}"

#   if x_bogosity
#     h['X-Bogosity'] = x_bogosity
#   end
# end

def spool_save(folder, continue = false)
  folder = ".incoming/spools/%s.spool" % [ folder ]
  agent.save(folder, continue)
end

def is_list(list)
  name, domain = list.split(/@/).collect { |s| Regexp.quote(s) }
  raise ArgumentError if name.nil? || domain.nil?
  raise ArgumentError if name.length == 0 || domain.length == 0
  h.match?(/^((Resent-)?Sender|Errors-To|X-Loop|(X-)?Mailing-List)$/i, /(owner-)?#{name}(-request|-help|-admin|-bounce|-errors|-owner)?@#{domain}/im) ||
    h.match?(/(List-Id|X-Mailing-List)$/i, /<#{name}.#{domain}>/im) ||
    h.match?('delivered-to', /mailing list #{name}@#{domain}/im) ||
    h.match?("x-ml-name", /^\s*#{name}([^ \t]|$)/im) ||
    h.match?(/^(to|cc)$/i, /#{name}@#{domain}/im)
end

def save_if_list(list, folder)
  if is_list(list)
    spool_save("list.#{folder}")
  end
end


LISTS = [
  ['amrita-users / walrus-ruby.org', 'amrita-users'],
  ['bitkeeper-users / bitmover.com', 'bitkeeper'],
  ['spamtools / abuse.net', 'spamtools'],
  ['mutt-users / mutt.org', 'mutt'],
  ['debian-ocaml-maint / lists.debian.org', 'ocaml'],
  ['vim / vim.org', 'vim'],
  ['dcc / calcite.rhyolite.com', 'dcc'],
  ['dcc / rhyolite.com', 'dcc'],
  ['cygwin / cygwin.com', 'cygwin'],
  ['bbdb-info / lists.sourceforge.net', 'bbdb'],
  ['ding / gnus.org', 'ding'],
  ['ding / lists.math.uh.edu', 'ding'],
  ['ding / hpc.uh.edu', 'ding'],
  ['secprog / securityfocus.com', 'secprog'],
  ['list / dsbl.org', 'dsbl'],
  ['nmh-workers / mhost.com', 'nmh-workers'],
  ['jamming / perforce.com', 'jamming'],
  ['bogofilter / aotto.com', 'bogofilter'],
  ['bogofilter-dev / aotto.com', 'bogofilter-dev'],
  ['ruby-core / ruby-lang.org', 'ruby-core'],
  ['ruby-talk / ruby-lang.org', 'ruby-talk'],
  ['help-cfengine / gnu.org', 'cfengine'],
  ['email-sig / python.org', 'email-sig'],
  ['email-sig / python.org', 'email-sig'],
  ['zsh-users / sunsite.dk', 'zsh'],
  ['zsh-workers / sunsite.dk', 'zsh']
  ]

def save_listmail
  LISTS.each { |list, folder|
    unless folder.nil?
      if is_list(list)
        spool_save("list.#{folder}")
      end
    end
  }
end

def is_listmail
  LISTS.detect { |list, folder|
    is_list(list)
  }
end

def bogofilter_classify_and_update
  command = %w{ /usr/bin/bogofilter -e -p -u }
  agent.filter(*command)
end

def bogofilter_reclassify_as_ham
  command = %w{ /usr/bin/bogofilter -e -p -Sn }
  agent.filter(*command)
end

def bogofilter_reclassify_as_spam
  command = %w{ /usr/bin/bogofilter -e -p -Ns }
  agent.filter(*command)
end

def dccproc
  command = %w{ /home/matt/pkg/bin/dccproc -x 0 -S List-Id -w whiteclnt }
  agent.filter(*command)
end

def dccproc_bulk
  ! agent.header.match(/^x-dcc-.*-metrics$/, /;.*=many/).empty?
end

# def bogofilter
#   command = %w{ /usr/local/bin/bogofilter -uep }
#   agent.filter(*command)
# end
=begin
  if h.match?('x-bogosity', /^Yes, tests=bogofilter/im)
    corpus_save(:spam)
    spool_save('spam')
  else
    corpus_save(:good)
  end
rescue RFilter::DeliveryAgent::DeliveryCommandFailure => e
  agent.log(2, "Bogofilter failure: #{e.message} : #{e.status.inspect}")
  agent.save('.incoming/bogofailures', true)
end
=end

def skip_spamcheck
  if ((agent.header.match?('to', /postmaster@lickey\.com/im) and
       (agent.header.match?('subject', /Postfix Log/im) or
	agent.header.match?('subject', /FOUND VIRUS IN MAIL from/m))) or
      (agent.header.match?('to', /root@lickey\.com/) and
       agent.header.match?('subject', /system check/)))
    true
  else
    false
  end
end

# def crm
#   agent.pipe("/usr/bin/crm -u /home/matt/crm procmailfilter.crm", true)
#   corpus_save(:good)
# rescue RFilter::DeliveryAgent::DeliveryCommandFailure => e
#   if e.status.exitstatus == 75
#     corpus_save(:spam)
#     spool_save('spam')
#   end
# end

def destinations
  d = h.to
  d.concat(h.cc)
  d.concat(h.bcc)
end

def return_path(header)
  value = header['Return-Path']
  RMail::Address.parse(value)[0]
end

def sender(header)
  value = nil
  [ 'Resent-Sender', 'Resent-From', 'Sender', 'From' ].each { |h|
    value ||= header[h]
  }
  RMail::Address.parse(value)[0]
end

def log_address_header(header, address)
  return unless address
  agent.log(1, header + ': ' + address.format)
end

def get_addresses(list, header)
  addresses = []
  list.each { |h|
    v = header[h]
    addresses.concat(RMail::Address.parse(v)) if v
  }
  return addresses
end

def get_address(name, header)
  get_addresses([name], header)[0]
end

def recipients(header)
  addresses = get_addresses([ 'Resent-To', 'Resent-Cc', 'Resent-Bcc' ],
			    header)
  addresses = get_addresses([ 'To', 'Cc', 'Bcc' ], header) if addresses.empty?
  addresses
end

def log_abstract
  header = agent.header
  agent.log(1, "-----------------------------------------------")
  return_path = return_path(header)
  sender = sender(header)
  log_address_header('Return-Path', return_path(header))
  if return_path && sender && return_path.address != sender.address
    log_address_header('Sender', sender(header))
  end
  agent.log(1, 'Recipients: ' +
      recipients(header).collect {|r| r.format }.join(", "))
  agent.log(1, 'Subject: ' + header['Subject'].to_s)

  delivered_to = get_address('Delivered-To', header)
  if delivered_to &&
      delivered_to.address !~ /^matt(\+(bmi|root))?@lickey\.com$/i
    agent.log(1, 'Delivered-To: ' + delivered_to.format)
  end
end

def check_list(name, file)
  table = table_from_file(file)
  line, address = check_senders(table)
  if line
    agent().log(1, "#{name} match on #{address.address} by #{line}")
    true
  else
    false
  end
end

def check_whitelist
  check_list("whitelist", File::expand_path("~/.whitelist").untaint)
end

def check_bouncelist
  check_list("bouncelist", File::expand_path("~/.bouncelist").untaint)
end

BOUNCE_ADDRESS = RMail::Address.new('matt+confirmationbounces / lickey.com')
def confirmation_bounce
  bounce_address = recipients(agent.header).detect { |recipient|
    recipient == BOUNCE_ADDRESS
  }
  !bounce_address.nil?
end

def is_local_mail
  ! agent.header.match?('received', /with.*fetchmail/)
end

def skip_confirm
  is_local_mail || check_whitelist || skip_spamcheck || is_listmail
end

def main
  #agent.defer('testing stuff')

  log_abstract
  confirm_reply
  if check_bouncelist
    agent.reject("Sorry, mail rejected.")
  end

  if confirmation_bounce
    agent.log(1, 'dropping a bounce from a confirmation request')
    agent.save(Time.now.strftime("confirmationbounces-%F"))
  end

  agent.header.delete('x-bogosity')
  bogofilter_classify_and_update

  unless skip_confirm
    dccproc
    if dccproc_bulk
      agent.log(1, "dccproc found bulk mail")
      agent.reject("Rejecting bulk e-mail.")
    end
    agent.log(1, "doing confirmation thing")
    confirm
  end

  if agent.header.match?('x-bogosity', /^yes,/i)
    bogofilter_reclassify_as_ham
  end

  if agent.header.match?('subject', /to ruby-talk/)
    spool_save('list.ruby-talk')
  end

  agent.save('.incoming/backup-' + Time.now.strftime('%Y-%V'), true)
  save_listmail
  send_to_gmail
  spool_save('inbox')
end

----------------------------------------------------------------------
#!/usr/bin/env ruby
#--
#   Copyright (C) 2002, 2003, 2004 Matt Armstrong.  All rights reserved.
#
#   Permission is granted for use, copying, modification,
#   distribution, and distribution of modified versions of this work
#   as long as the above copyright notice is included.

require 'util'
require 'rfilter/keyed_mailbox'

class Deliver

  include RFilter::Deliver

  def confirm
    return if @confirmed

    require 'net/smtp'

    # FIXME: define a 'cannot send confirmation' exception and use
    # that here instead of plain raise

    # FIXME: need a general "send bounce to this address" routine
    return_path = RMail::Address.parse(agent.header['return-path'])[0]
    unless return_path.nil?
      agent.message.header.delete('X-RFilter-Confirm-Address')
      agent.message.header['X-RFilter-Confirm-Address'] = return_path.format
    end

    bogospam = agent.header.match?('X-Bogosity', /^Yes/)
    # I want any mail in the pending queue to be SPAM.  If somebody
    # confirms their message, then it'll be reclassified as HAM.
    unless bogospam
      # Steal this function from my .rdeliver file.  Ugh.
      bogofilter_reclassify_as_spam
    end

    send_confirmation = !(return_path.nil? ||
                          agent.header.match?('precedence', /bulk|list/im) ||
                          agent.header['list-unsubscribe'] ||
                          agent.header['list-id'] ||
                          agent.header['list-post'] ||
                          agent.header['list-help'] ||
                          agent.header['list-subscribe'] ||
                          agent.header['list-unsubscribe'] ||
                          agent.header['mailing-list'] ||
                          agent.header['x-ml-name'] ||
                          agent.header['x-ml-info'] ||
                          # FIXME: this is not appropriate for everyone, but fatcow.com
                          # adds this if brightmail thinks the thing is SPAM.
                          agent.header.match?('X-SPAM', /bTrueb/) ||
                          # FIXME: this is particular to my install -- I run everything
                          # through bogofilter before we get here
                          bogospam)

    agent.header.set('X-RFilter-Confirm-Sent', send_confirmation ? "Yes" : "No")

    pending = RFilter::KeyedMailbox.new(".pending")
    key = pending.save(agent.message)
    agent.log(1, "pending message is #{key}")

    if send_confirmation
      m = RMail::Message.new
      m.header['To'] = return_path.format
      m.header['From'] =
	"Matt Armstrong <matt+confirm-#{key.downcase}@lickey.com>"
	m.header['Subject'] = "Please confirm your message (#{key.downcase})"
      subject = agent.header['Subject']
      subject ||= '(none)'
      subject = subject.chomp
      m.body = Util.confirm_bounce_body({ "return_path" => return_path.format,
					  "headers" => agent.header.to_s })
      # FIXME: this return address is bogus, should be <>
      # FIXME: guard against shell metacharacters!  
      agent.log(1, "return_path.address #{return_path.address.inspect}")
      Net::SMTP.start('localhost', 25) do |smtp|
        smtp.send_message(m, 'matt+confirmationbounces / lickey.com',
                          return_path.address.untaint)
      end
      agent.log(1, "sent confirmation request to #{return_path.format}")
    end

    # FIXME: need a AGENT.drop method
    agent.save('/dev/null')
  end

  def bogus_confirmation(key)
    unless agent.header['subject'] =~ /#{key}/i
      agent.log(1, "Subject does not contain confirmation key: #{key}")
      return true
    end
    from = agent.header.from.first
    if from and from.address == 'NAVER-MAILER / naver.com'
      agent.log(1, "From: NAVER-MAILER, a bogus confirmation mail")
      return true
    end

    if agent.header.content_type == 'multipart/report'
      agent.log(1, "a multipart/report -- a bounce, a bogus confirmation mail")
      return true
    end

    return false
  end

  def confirm_reply
    delivered_to = agent.header['delivered-to']
    if delivered_to =~ /confirm-(\w+)/im
      key = $1.untaint.upcase

      if bogus_confirmation(key)
	agent.save('bogusconfirmations', true)
	agent.reject("invalid confirmation message")
      end

      pending = RFilter::KeyedMailbox.new(".pending")
      name =
	begin
	  pending.retrieve(key)
	rescue
	  # FIXME: RFilter::KeyedMailbox#retrieve should throw a
	  # specific exception when the key is invalid, and we should
	  # reject only in that case.  We may be rejecting for other
	  # unforeseen reasons here.
	  agent.reject("invalid confirmation key")
	end
      if name
	old_msg = File.open(name) do |f|
	  RMail::Parser.new.parse(f)
	end

	# Append this dude to the whitelist
	h = old_msg.header
	confirmed_by = RMail::Address.parse(h['X-RFilter-Confirm-Address'])
	if confirmed_by.first
	  agent.log(1, "appending #{confirmed_by.first.address} to whitelist")
	  File.open("/home/matt/.whitelist", "a") { |f|
	    f.puts confirmed_by.first.address
	  }
	end

	# FIXME: make this mailbox configurable.
	agent.save('confirmations', true)

	agent.log(1, "a confirmation -- replacing current message with" +
		  "original")

	# FIXME: this is a serious bug.  Why?  We should either
	# refrain from deleting the pending key until this message has
	# been delivered successfully, or we should just re-introduce
	# the message into the MTA for re-delivery.  The former option
	# requires a new mechanism that allows code to run only after
	# successful delivery, while the latter is more overhead.
	#
	# Why is this a problem?  If subsequent delivery of this
	# released message fails, it is the confirmation that will be
	# deferred.
	agent.log(1, "FIXME: this is a serious bug!")

        confirmaton_msgid = agent.header['message-id']
        confirmaton_return_path = agent.header['return-path']
        confirmaton_from = agent.header.from.first.format
	agent.message = old_msg
        agent.header.set('X-RFilter-Confirmed', 'yes',
                         :msgid => confirmaton_msgid,
                         :return_path => confirmaton_return_path,
                         :from => confirmaton_from,
                         :confirmation_key => key)
	pending.delete(key)
	@confirmed = true
      end
    end
  end
end



-- 
matt