ワナベと申します。

ruby-1.9では配列内の重複要素の検出のために
内部でHashオブジェクトが生成されますが、
これをスレッドローカルに保存して使いまわすことで
オブジェクト生成とGCの回数を減らすパッチを書きました。


$ cat ../bm_array_overlap.rb
require 'benchmark'
n = 200000
Benchmark.bm do |x|
  srand(0)
  a, b = Array.new(2) { Array.new(20) { rand(20) } }
  x.report("&") { n.times { a & b } }
  x.report("|") { n.times { a | b } }
  x.report("-") { n.times { a - b } }
  puts " --- with thread ---"
  x.report("&") { n.times { Thread.new { a & b } } }
  x.report("|") { n.times { Thread.new { a | b } } }
  x.report("-") { n.times { Thread.new { a - b } } }
  puts " --- GC.disable ---"
  GC.disable
  x.report("&") { n.times { a & b } }
  x.report("|") { n.times { a | b } }
  x.report("-") { n.times { a - b } }
end

$ ./ruby -Ilib ../bm_array_overlap.rb
      user     system      total        real
&  3.672000   0.047000   3.719000 (  3.750000)
|  5.641000   0.078000   5.719000 (  5.765625)
-  3.515000   0.031000   3.546000 (  3.546875)
 --- with thread ---
& 17.985000  22.328000  40.313000 ( 52.562500)
| 18.922000  23.719000  42.641000 ( 54.062500)
- 20.828000  23.109000  43.937000 ( 55.734375)
 --- GC.disable ---
&  3.766000   0.032000   3.798000 (  3.828125)
|  5.844000   0.109000   5.953000 (  5.984375)
-  3.609000   0.109000   3.718000 (  3.734375)

$ /usr/local/bin/ruby ../bm_array_overlap.rb
      user     system      total        real
&  5.359000   0.110000   5.469000 (  5.515625)
|  7.875000   0.109000   7.984000 (  8.015625)
-  6.875000   0.250000   7.125000 (  7.187500)
 --- with thread ---
& 18.391000  22.984000  41.375000 ( 52.625000)
| 19.547000  23.172000  42.719000 ( 54.718750)
- 20.063000  23.828000  43.891000 ( 57.203125)
 --- GC.disable ---
&  5.032000   0.094000   5.126000 (  5.187500)
|  6.515000   0.219000   6.734000 (  7.000000)
-  4.375000   0.250000   4.625000 (  9.640625)

$ svn diff array.c
Index: array.c
===================================================================
--- array.c     (revision 22023)
+++ array.c     (working copy)
@@ -2866,9 +2866,15 @@
 static VALUE
 ary_make_hash(VALUE ary1, VALUE ary2)
 {
-    VALUE hash = rb_hash_new();
+    VALUE hash = rb_thread_local_aref(rb_thread_current(),
rb_intern("ary_overlap_check_hash"));
     long i;

+    if (hash == Qnil) {
+       hash = rb_hash_new();
+       rb_thread_local_aset(rb_thread_current(),
rb_intern("ary_overlap_check_hash"), hash);
+    } else {
+       rb_funcall(hash, rb_intern("clear"), 0);
+    }
     for (i=0; i<RARRAY_LEN(ary1); i++) {
        rb_hash_aset(hash, RARRAY_PTR(ary1)[i], Qtrue);
     }


-- 
ワナベ