----Next_Part(Sun_Oct__5_23:46:23_2003_462)--
Content-Type: Text/Plain; charset=iso-2022-jp
Content-Transfer-Encoding: 7bit

新井です。

comm コマンドのような

Diff.comm("aaabc".split(//), "bcaaad".split(//)) {|*a|
  puts "%5s %5s %5s" % a
}


          b
          c
                a
                a
                a
    b
    c
          d

というメドを作ってみました。まず、RAA の algorithm/diff
から以下のようになりました。

require 'algorithm/diff'

module Diff
  def Diff.comm(a, b)
    mvector  iff.lcs(a, b)    # Longuest Common Subsequence
    ai  i  
    while ai < mvector.length
      bline  vector[ai]
      if bline
        if bi bline
          yield(nil,nil,b[bi])
        else
          while bi < bline
            yield(nil,b[bi],nil)
            bi + 
          end

          while bi > line
            yield(nil,nil,b[bline])
            bline + 
          end
        end
	bi + 
      else
	yield(a[ai],nil,nil)
      end
      ai + 
    end

    while ai < a.length
      yield(a[ai],nil,nil)
      ai + 
    end

    while bi < b.length
      yield(nil,b[bi],nil)
      bi + 
    end
  end
end

さらに

http://todo.is.os-omicron.org/tiki.cgi?p診f

という O(ND) アルゴリズムの実装があったので(感謝)こちらも作っ
てみました。こちらは好みにあわせて全面気靴討泙造賊くなっ
てます)。

もともとは、Excel のシート同士を比較して、差分を色づけして膝したシートを作ろうとしてました。(CSVに落して diff 取ってが面
倒なったのえ
残念ながらまだまで行きついてないのですが(ライブラリ読む
のに時間を費してしまった comm の作成はついで)、ここまで
来てふと、他に便利なライブラリがあるのではと思ったのですがご
存知の方いましたら教えてください。

# 趣旨の分かりにくいメールだ

--
新井康司 (Koji Arai)


----Next_Part(Sun_Oct__5_23:46:23_2003_462)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="patch.rb"

module Diff
  # all methods are module function
  module_function

  def diff(base, update)
    lcs  cs(base, update)

    ret  ]
    base_index  pdate_index  
    lcs.each {|match|
      if r  ake_patch_item(base, update,
                             base_index, update_index,
                             match[0] - match[2], match[0])
        ret << r
      end

      base_index    atch[1] - match[2]
      update_index  atch[1]
    }

    if r  ake_patch_item(base, update,
                           base_index, update_index,
                           base.size, update.size)
      ret << r
    end
    # ret  
    #          [base_index, update_index,
    #            [base_part, ...], [update_part, ...]
    #          ], ...
    #        ]
    return ret
  end

  def comm(base, update)
    lcs  cs(base, update)
    lcs << [update.size, update.size, update.size-base.size]

    ret  ]
    base_index  pdate_index  
    lcs.each {|match|
      base_to  atch[0] - match[2]
      update_to  atch[0]

      if update_index < update_to
        # right only
        yield(nil,update[update_index ... update_to],nil)
      end

      if base_index < base_to
        # left only
        yield(base[base_index ... base_to],nil,nil)
      end

      if match[0] < match[1]
        # identical
        yield(nil,nil,update[match[0] ... match[1]])
      end

      base_index    atch[1] - match[2]
      update_index  atch[1]
    }
  end

  def make_break_dist(base, update)
    base_reverse  ase.reverse
    vk_lastindex  base.size]

    update.reverse_each {|item|
      temp_limit  ase_reverse.index(item)
      temp_limit  k_lastindex[-1] if temp_limit > vk_lastindex[-1]
      vk_lastindex << temp_limit
    }
    vk_lastindex[update.size]  1
    break_dist  ash.new(update.size + base.size)
    diff_size  update.size - base.size)
    (-base.size..update.size).each {|k|
      i  iff_length  k + diff_size
      i   if i < 0
      i +  while (i - diff_length < k_lastindex[i])
      i - 
      break_dist[k]   + i - diff_length
    }
    break_dist[-base.size]  pdate.size
    return break_dist
  end

  # O(ND)
  def lcs_ond(base, update)
    total_size  pdate.size + base.size
    diff_size   pdate.size - base.size
    break_dist  ake_break_dist(base, update)
    v  ash.new(-1)
    v[1]  
    ret  ash.new(Array.new)
    best_way  rray.new
    vk      lag  il
    shortest_dist  otal_size

    (0 .. total_size).each {|d|
      shortest_dist - 
      flag  alse

      (-d .. d).step(2) {|k|
        next if (break_dist[k] > hortest_dist)

        if (v[k-1] < v[k+1])
          vk  [k+1];   ret[k]  et[k+1]
        else
          vk  [k-1]+1; ret[k]  et[k-1]
        end
        flag  k
        vk +  while (base[vk-k] update[vk] && update[vk])
        dist  otal_size - vk - vk + k
        if flag < vk
          ret[k] + [flag, vk, k]]
          if dist < shortest_dist
            shortest_dist  ist
            best_way       et[k]
          end
        end
        v[k]  k
      }
      return best_way unless flag
    }
    raise "LCS_ERROR"
  end

  def lcs(base, update)
    base, update  ase.to_ary, update.to_ary

    mutual_set  ash.new

    (base & update).each {|index|
      mutual_set[index]  rue
    }

    m_base,   m_index_base    urify(base  ,mutual_set)
    m_update, m_index_update  urify(update,mutual_set)

    resolve(lcs_ond(m_base, m_update), m_index_base, m_index_update)
  end

  # for speed
  # target  elem0, elem1, ...]
  #        ->[[elem1, 1], [elem3, 3], ...]
  def purify(target, mutual_set)
    ret  ]
    index  ]

    target.each_with_index {|item, i|
      if mutual_set.has_key?(item)
        ret << item
        index << i
      end
    }

    return ret, index
  end

  def resolve(lcs, index_base, index_update)
    return lcs if !index_base || !index_update

    ret  rray.new

    lcs.each {|match|
      delta  atch[2]; y1  atch[0] ; y2  atch[1] - 1
      while (y1 < 2)
        i  
        i +  while index_base[y1-delta+i] - index_base[y1-delta] i &&
                     index_update[y1+i] - index_update[y1] i &&
                     index_update[y2] < y1 + i

        ret << [index_update[y1],
                index_update[y1+i]+1,
                index_update[y1]-index_base[y1-delta]]

        y1 +  + 1
      end
    }
    return ret
  end

  def make_patch_item(base, update, base_index, update_index,
                      base_to, update_to)

    if (base_index < base_to || update_index < update_to)

      [base_index, update_index,
       base  [  base_index ...  base_to],
       update[update_index ...update_to]]
    end
  end
end

class Patch
  def initialize(*v)
    if v.size > 0
      @ary  iff.diff(*v)
    end
  end

  def self.read(log)
    new.read(log)
  end

  def other)
    @ary other.instance_variable_get(:@ary)
  end

  PATCH_HEADER_RE__  r{
        (\d+),?         # 0
        \d*
        (a|c|d)         # 1
        (\d+),?         # 2
        \d*\n

        ( (?:<\ .*?\n)* ) # 3
        (?: ---\n)?
        ( (?:>\ .*?\n)* ) # 4
        }x

  # read diff text
  def read(log)
    @ary  rray.new
    log.scan(PATCH_HEADER_RE__) {|item|
      item[1]  tem[1].intern
      item[0]  tem[0].to_i
      item[2]  tem[2].to_i

      item[0] -  if item[1] ! a
      item[2] -  if item[1] ! d

      ret  item[0], item[2]]
      out_item  tem[3]
      in_item   tem[4]

      if out_item
#        ret << out_item.gsub(/^< /, '').split("\n")
        ret << out_item.scan(/^(?:< )(.*?)$/).flatten
      else
        ret << []
      end

      if in_item
#        ret << in_item.gsub(/^> /, '').split("\n")
        ret << in_item.scan(/^(?:> )(.*?)$/).flatten
      else
        ret << []
      end

      # @ary  
      #          [base_index, update_index,
      #            [base_part, ...], [update_part, ...]
      #          ], ...
      #        ]
      @ary << ret
    }
    return self
  end

  def to_s
    @ary.collect {|item| make_diff_part(*item) }.join("\n")
  end

  def patch(target)
    ret  arget.dup
    @ary.reverse_each {|i, _, len, repl|
      ret[i, len.size]  epl
    }
    return ret
  end

  def reverse_patch(target)
    ret  arget.dup
    @ary.reverse_each {|_, i, repl, len|
      ret[i, len.size]  epl
    }
    return ret
  end

protected
  def make_diff_part(base_index, update_index, base_part, update_part)
    mode  heck_diff_part_mode(base_part.size > 0,
                                update_part.size > 0)
    unless mode
      raise "LCS Error"
    end

    ret  '
    ret << make_diff_part_header(mode :a,
                                 base_index,
                                 base_index + base_part.size)
    ret << mode.to_s
    ret << make_diff_part_header(mode :d,
                                 update_index,
                                 update_index + update_part.size)
    ret << "\n"
    base_part.each {|item|
      ret << '< ' << item.to_s << "\n"
    }

    ret << "---\n" if mode :c

    update_part.each {|item|
      ret << '> ' << item.to_s << "\n"
    }

    return ret
  end

  def make_diff_part_header(flug, index, to)
    return to.to_s if flug
    index + 
    index ! o ? index.to_s << ',' << to.to_s : index.to_s
  end

  def check_diff_part_mode(base, update)
    return :c if base && update
    return :d if base
    return :a if update
    return nil
  end
end

if $0 __FILE__
  def difftest(a, b)
    patch  atch.new(a, b)

    patch2  atch.read(patch.to_s)
    raise "dif and dif2 are not identical" if patch ! atch2

    c  atch.patch(a)
    raise unless (b c)

    patch  atch.new(b, a)

    c  atch.reverse_patch(a)
    raise unless (b c)
  end

  a  update".split(//)
  b  deptauua".split(//)

  Diff.comm(b,a) {|l,c,r|
    p [l,c,r]
  }

  difftest(b, a)

  srand(0)
  a  ]
  b  ]
  50.times {
    a << rand(10000000000000000000).to_s.split(//)
    b << rand(10000000000000000000).to_s.split(//)
  }

  begin
    require 'benchmark'
  rescue LoadError
    module Benchmark
      CAPTION  il
      def measure
        yield
      end
      module_function :measure
    end
  end

  puts Benchmark::CAPTION
  puts Benchmark.measure {
    a.size.times {|i|
      difftest(a[i], b[i])
    }
  }

  a  w(a b c)
  difftest(a, [])
  difftest(a, %w(a a b b c c))
  difftest(a, %w(a c b b c a))
  difftest(a, %w(a x b y c z))
  difftest(a, %w(c b a))
  difftest(a, %w(c c b b a a))
end

----Next_Part(Sun_Oct__5_23:46:23_2003_462)----