----Next_Part(Sun_Oct__5_23:46:23_2003_462)--
Content-Type: Text/Plain; charset=iso-2022-jp
Content-Transfer-Encoding: 7bit
¿·°æ¤Ç¤¹¡£
comm ¥³¥Þ¥ó¥É¤Î¤è¤¦¤Ê
Diff.comm("aaabc".split(//), "bcaaad".split(//)) {|*a|
puts "%5s %5s %5s" % a
}
b
c
a
a
a
b
c
d
¤È¤¤¤¦¥á¥É¤òºî¤Ã¤Æ¤ß¤Þ¤·¤¿¡£¤Þ¤º¡¢RAA ¤Î algorithm/diff
¤«¤é°Ê²¼¤Î¤è¤¦¤Ë¤Ê¤ê¤Þ¤·¤¿¡£
require 'algorithm/diff'
module Diff
def Diff.comm(a, b)
mvector iff.lcs(a, b) # Longuest Common Subsequence
ai i
while ai < mvector.length
bline vector[ai]
if bline
if bi bline
yield(nil,nil,b[bi])
else
while bi < bline
yield(nil,b[bi],nil)
bi +
end
while bi > line
yield(nil,nil,b[bline])
bline +
end
end
bi +
else
yield(a[ai],nil,nil)
end
ai +
end
while ai < a.length
yield(a[ai],nil,nil)
ai +
end
while bi < b.length
yield(nil,b[bi],nil)
bi +
end
end
end
¤µ¤é¤Ë
http://todo.is.os-omicron.org/tiki.cgi?p¿Çf
¤È¤¤¤¦ O(ND) ¥¢¥ë¥´¥ê¥º¥à¤Î¼ÂÁõ¤¬¤¢¤Ã¤¿¤Î¤Ç(´¶¼Õ)¤³¤Á¤é¤âºî¤Ã
¤Æ¤ß¤Þ¤·¤¿¡£¤³¤Á¤é¤Ï¹¥¤ß¤Ë¤¢¤ï¤»¤ÆÁ´ÌÌ µ¤·¤Æ¤Þ¤¨Â¤Â±ø¤¯¤Ê¤Ã
¤Æ¤Þ¤¹)¡£
¤â¤È¤â¤È¤Ï¡¢Excel ¤Î¥·¡¼¥ÈƱ»Î¤òÈæ³Ó¤·¤Æ¡¢º¹Ê¬¤ò¿§¤Å¤±¤·¤ÆÉ¨¤·¤¿¥·¡¼¥È¤òºî¤í¤¦¤È¤·¤Æ¤Þ¤·¤¿¡£(CSV¤ËÍ¤Æ diff ¼è¤Ã¤Æ¤¬ÌÌ
ÅÝ ¯¤Ê¤Ã¤¿¤Î¤¨Â
»Äǰ¤Ê¤¬¤é¤Þ¤À¤Þ¤Ç¹Ô¤¤Ä¤¤¤Æ¤Ê¤¤¤Î¤Ç¤¹¤¬(¥é¥¤¥Ö¥é¥êÆÉ¤à
¤Î¤Ë»þ´Ö¤òÈñ¤·¤Æ¤·¤Þ¤Ã¤¿ comm ¤ÎºîÀ®¤Ï¤Ä¤¤¤Ç)¡¢¤³¤³¤Þ¤Ç
Íè¤Æ¤Õ¤È¡¢Â¾¤ËÊØÍø¤Ê¥é¥¤¥Ö¥é¥ê¤¬¤¢¤ë¤Î¤Ç¤Ï¤È»×¤Ã¤¿¤Î¤Ç¤¹¤¬¤´
¸ÃΤÎÊý¤¤¤Þ¤·¤¿¤é¶µ¤¨¤Æ¤¯¤À¤µ¤¤¡£
# ¼ñ»Ý¤Îʬ¤«¤ê¤Ë¤¯¤¤¥á¡¼¥ë¤À
--
¿·°æ¹¯»Ê (Koji Arai)
----Next_Part(Sun_Oct__5_23:46:23_2003_462)--
Content-Type: Text/Plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="patch.rb"
module Diff
# all methods are module function
module_function
def diff(base, update)
lcs cs(base, update)
ret ]
base_index pdate_index
lcs.each {|match|
if r ake_patch_item(base, update,
base_index, update_index,
match[0] - match[2], match[0])
ret << r
end
base_index atch[1] - match[2]
update_index atch[1]
}
if r ake_patch_item(base, update,
base_index, update_index,
base.size, update.size)
ret << r
end
# ret
# [base_index, update_index,
# [base_part, ...], [update_part, ...]
# ], ...
# ]
return ret
end
def comm(base, update)
lcs cs(base, update)
lcs << [update.size, update.size, update.size-base.size]
ret ]
base_index pdate_index
lcs.each {|match|
base_to atch[0] - match[2]
update_to atch[0]
if update_index < update_to
# right only
yield(nil,update[update_index ... update_to],nil)
end
if base_index < base_to
# left only
yield(base[base_index ... base_to],nil,nil)
end
if match[0] < match[1]
# identical
yield(nil,nil,update[match[0] ... match[1]])
end
base_index atch[1] - match[2]
update_index atch[1]
}
end
def make_break_dist(base, update)
base_reverse ase.reverse
vk_lastindex base.size]
update.reverse_each {|item|
temp_limit ase_reverse.index(item)
temp_limit k_lastindex[-1] if temp_limit > vk_lastindex[-1]
vk_lastindex << temp_limit
}
vk_lastindex[update.size] 1
break_dist ash.new(update.size + base.size)
diff_size update.size - base.size)
(-base.size..update.size).each {|k|
i iff_length k + diff_size
i if i < 0
i + while (i - diff_length < k_lastindex[i])
i -
break_dist[k] + i - diff_length
}
break_dist[-base.size] pdate.size
return break_dist
end
# O(ND)
def lcs_ond(base, update)
total_size pdate.size + base.size
diff_size pdate.size - base.size
break_dist ake_break_dist(base, update)
v ash.new(-1)
v[1]
ret ash.new(Array.new)
best_way rray.new
vk lag il
shortest_dist otal_size
(0 .. total_size).each {|d|
shortest_dist -
flag alse
(-d .. d).step(2) {|k|
next if (break_dist[k] > hortest_dist)
if (v[k-1] < v[k+1])
vk [k+1]; ret[k] et[k+1]
else
vk [k-1]+1; ret[k] et[k-1]
end
flag k
vk + while (base[vk-k] update[vk] && update[vk])
dist otal_size - vk - vk + k
if flag < vk
ret[k] + [flag, vk, k]]
if dist < shortest_dist
shortest_dist ist
best_way et[k]
end
end
v[k] k
}
return best_way unless flag
}
raise "LCS_ERROR"
end
def lcs(base, update)
base, update ase.to_ary, update.to_ary
mutual_set ash.new
(base & update).each {|index|
mutual_set[index] rue
}
m_base, m_index_base urify(base ,mutual_set)
m_update, m_index_update urify(update,mutual_set)
resolve(lcs_ond(m_base, m_update), m_index_base, m_index_update)
end
# for speed
# target elem0, elem1, ...]
# ->[[elem1, 1], [elem3, 3], ...]
def purify(target, mutual_set)
ret ]
index ]
target.each_with_index {|item, i|
if mutual_set.has_key?(item)
ret << item
index << i
end
}
return ret, index
end
def resolve(lcs, index_base, index_update)
return lcs if !index_base || !index_update
ret rray.new
lcs.each {|match|
delta atch[2]; y1 atch[0] ; y2 atch[1] - 1
while (y1 < 2)
i
i + while index_base[y1-delta+i] - index_base[y1-delta] i &&
index_update[y1+i] - index_update[y1] i &&
index_update[y2] < y1 + i
ret << [index_update[y1],
index_update[y1+i]+1,
index_update[y1]-index_base[y1-delta]]
y1 + + 1
end
}
return ret
end
def make_patch_item(base, update, base_index, update_index,
base_to, update_to)
if (base_index < base_to || update_index < update_to)
[base_index, update_index,
base [ base_index ... base_to],
update[update_index ...update_to]]
end
end
end
class Patch
def initialize(*v)
if v.size > 0
@ary iff.diff(*v)
end
end
def self.read(log)
new.read(log)
end
def other)
@ary other.instance_variable_get(:@ary)
end
PATCH_HEADER_RE__ r{
(\d+),? # 0
\d*
(a|c|d) # 1
(\d+),? # 2
\d*\n
( (?:<\ .*?\n)* ) # 3
(?: ---\n)?
( (?:>\ .*?\n)* ) # 4
}x
# read diff text
def read(log)
@ary rray.new
log.scan(PATCH_HEADER_RE__) {|item|
item[1] tem[1].intern
item[0] tem[0].to_i
item[2] tem[2].to_i
item[0] - if item[1] ! a
item[2] - if item[1] ! d
ret item[0], item[2]]
out_item tem[3]
in_item tem[4]
if out_item
# ret << out_item.gsub(/^< /, '').split("\n")
ret << out_item.scan(/^(?:< )(.*?)$/).flatten
else
ret << []
end
if in_item
# ret << in_item.gsub(/^> /, '').split("\n")
ret << in_item.scan(/^(?:> )(.*?)$/).flatten
else
ret << []
end
# @ary
# [base_index, update_index,
# [base_part, ...], [update_part, ...]
# ], ...
# ]
@ary << ret
}
return self
end
def to_s
@ary.collect {|item| make_diff_part(*item) }.join("\n")
end
def patch(target)
ret arget.dup
@ary.reverse_each {|i, _, len, repl|
ret[i, len.size] epl
}
return ret
end
def reverse_patch(target)
ret arget.dup
@ary.reverse_each {|_, i, repl, len|
ret[i, len.size] epl
}
return ret
end
protected
def make_diff_part(base_index, update_index, base_part, update_part)
mode heck_diff_part_mode(base_part.size > 0,
update_part.size > 0)
unless mode
raise "LCS Error"
end
ret '
ret << make_diff_part_header(mode :a,
base_index,
base_index + base_part.size)
ret << mode.to_s
ret << make_diff_part_header(mode :d,
update_index,
update_index + update_part.size)
ret << "\n"
base_part.each {|item|
ret << '< ' << item.to_s << "\n"
}
ret << "---\n" if mode :c
update_part.each {|item|
ret << '> ' << item.to_s << "\n"
}
return ret
end
def make_diff_part_header(flug, index, to)
return to.to_s if flug
index +
index ! o ? index.to_s << ',' << to.to_s : index.to_s
end
def check_diff_part_mode(base, update)
return :c if base && update
return :d if base
return :a if update
return nil
end
end
if $0 __FILE__
def difftest(a, b)
patch atch.new(a, b)
patch2 atch.read(patch.to_s)
raise "dif and dif2 are not identical" if patch ! atch2
c atch.patch(a)
raise unless (b c)
patch atch.new(b, a)
c atch.reverse_patch(a)
raise unless (b c)
end
a update".split(//)
b deptauua".split(//)
Diff.comm(b,a) {|l,c,r|
p [l,c,r]
}
difftest(b, a)
srand(0)
a ]
b ]
50.times {
a << rand(10000000000000000000).to_s.split(//)
b << rand(10000000000000000000).to_s.split(//)
}
begin
require 'benchmark'
rescue LoadError
module Benchmark
CAPTION il
def measure
yield
end
module_function :measure
end
end
puts Benchmark::CAPTION
puts Benchmark.measure {
a.size.times {|i|
difftest(a[i], b[i])
}
}
a w(a b c)
difftest(a, [])
difftest(a, %w(a a b b c c))
difftest(a, %w(a c b b c a))
difftest(a, %w(a x b y c z))
difftest(a, %w(c b a))
difftest(a, %w(c c b b a a))
end
----Next_Part(Sun_Oct__5_23:46:23_2003_462)----