まつもと ゆきひろです

In message "Re: [ruby-dev:34560] rb_yield の高速版"
    on Thu, 1 May 2008 19:27:02 +0900, wanabe <s.wanabe / gmail.com> writes:
|
|ワナベと申します。
|
|rb_yield を連続で呼び出すときに、積んだフレームを使いまわす関数を書きました。
|試験的に Fixnum#times にだけ組み込んでいますが、ほかにも
|Array#each, Range#each などで使えると思います。
|
|ruby -e 'GC.disable;t=Time.now;(10**7).times {};p Time.now - t'
|こんな感じの簡単な計測では 4.9秒から 3.5 秒ほどに縮まりました。
|一応手元では make test と make test-all の動作を確認してあります。

これを取り込んでみようと作業してみました。結論からいうと

  * 全然速くならなかった
  * しかもtest-allで落ちる

ということで、全然ダメです。これが、5月から今までにcoreに入っ
た改善のため有効な最適化でなくなったのか、それとも私の作った
パッチに不備があるのかはまだ分かりません。また、test-allで落
ちる原因もまだ分かりません。自分のダメさ加減が実感できて悲し
いのですが、せっかく作業したので、現状のパッチを投稿しておき
ます。

diff --git a/array.c b/array.c
index e7eb280..a91fcb1 100644
--- a/array.c
+++ b/array.c
@@ -19,6 +19,9 @@ VALUE rb_cArray;
 
 static ID id_cmp;
 
+VALUE rb_fast_yield_loop(VALUE (*i_proc)(ANYARGS), VALUE val);
+VALUE rb_fast_yield(VALUE val, void *block);
+
 #define ARY_DEFAULT_SIZE 16
 #define ARY_MAX_SIZE (LONG_MAX / sizeof(VALUE))
 
@@ -1116,6 +1119,17 @@ rb_ary_insert(int argc, VALUE *argv, VALUE ary)
     return ary;
 }
 
+static VALUE
+each_i(VALUE ary, void *block)
+{
+    long i;
+
+    for (i=0; i<RARRAY_LEN(ary); i++) {
+	rb_fast_yield(RARRAY_PTR(ary)[i], block);
+    }
+    return Qnil;
+}
+
 /*
  *  call-seq:
  *     array.each {|item| block }   ->   array
@@ -1134,12 +1148,8 @@ rb_ary_insert(int argc, VALUE *argv, VALUE ary)
 VALUE
 rb_ary_each(VALUE ary)
 {
-    long i;
-
     RETURN_ENUMERATOR(ary, 0, 0);
-    for (i=0; i<RARRAY_LEN(ary); i++) {
-	rb_yield(RARRAY_PTR(ary)[i]);
-    }
+    rb_fast_yield_loop(each_i, ary);
     return ary;
 }
 
diff --git a/eval.c b/eval.c
index b39da54..fd08d0a 100644
--- a/eval.c
+++ b/eval.c
@@ -950,6 +950,32 @@ rb_obj_extend(int argc, VALUE *argv, VALUE obj)
  *  in each module to the receiver.
  */
 
+const rb_block_t *vm_yield_fast_setup(rb_thread_t *th, int argc); /* vm.c */
+void vm_yield_fast_finish(rb_thread_t *th, const rb_block_t *block); /* vm.c */
+VALUE vm_yield_fast(rb_thread_t *th, rb_block_t *block, int argc, VALUE *argv); /* vm.c */
+
+VALUE
+rb_fast_yield_loop(VALUE (*i_proc)(ANYARGS), VALUE val)
+{
+    rb_thread_t *th = GET_THREAD();
+    const rb_block_t *block = vm_yield_fast_setup(th, 1);
+    val = i_proc(val, (void*)block);
+    vm_yield_fast_finish(th, block);
+    return val;
+}
+
+VALUE
+rb_fast_yield(VALUE val, void *block)
+{
+    return vm_yield_fast(GET_THREAD(), (rb_block_t *)block, 1, &val);
+}
+
+VALUE
+rb_fast_yield_argv(int argc, VALUE *argv, void *block)
+{
+    return vm_yield_fast(GET_THREAD(), (rb_block_t *)block, argc, argv);
+}
+
 static VALUE
 top_include(int argc, VALUE *argv, VALUE self)
 {
diff --git a/numeric.c b/numeric.c
index 745a217..fbd6498 100644
--- a/numeric.c
+++ b/numeric.c
@@ -80,6 +80,9 @@ round(double x)
 }
 #endif
 
+VALUE rb_fast_yield_loop(VALUE (*i_proc)(ANYARGS), VALUE val);
+VALUE rb_fast_yield(VALUE val, void *block);
+
 static ID id_coerce, id_to_i, id_eq;
 
 VALUE rb_cNumeric;
@@ -2992,17 +2995,24 @@ int_downto(VALUE from, VALUE to)
  */
 
 static VALUE
+int_dotimes_i(VALUE num, void *block)
+{
+    long i, end;
+
+    end = FIX2LONG(num);
+    for (i=0; i<end; i++) {
+	rb_fast_yield(LONG2FIX(i), block);
+    }
+    return Qnil;
+}
+
+static VALUE
 int_dotimes(VALUE num)
 {
     RETURN_ENUMERATOR(num, 0, 0);
 
     if (FIXNUM_P(num)) {
-	long i, end;
-
-	end = FIX2LONG(num);
-	for (i=0; i<end; i++) {
-	    rb_yield(LONG2FIX(i));
-	}
+	rb_fast_yield_loop(int_dotimes_i, num);
     }
     else {
 	VALUE i = INT2FIX(0);
diff --git a/vm.c b/vm.c
index f06ba0e..6942462 100644
--- a/vm.c
+++ b/vm.c
@@ -437,10 +437,10 @@ vm_make_proc(rb_thread_t *th,
 
 /* C -> Ruby: block */
 
-static inline VALUE
-invoke_block_from_c(rb_thread_t *th, const rb_block_t *block,
+static inline void
+setup_block(rb_thread_t *th, const rb_block_t *block,
 		    VALUE self, int argc, const VALUE *argv,
-		    const rb_block_t *blockptr, const NODE *cref)
+		    const rb_block_t *blockptr)
 {
     if (BUILTIN_TYPE(block->iseq) != T_NODE) {
 	const rb_iseq_t *iseq = block->iseq;
@@ -453,8 +453,14 @@ invoke_block_from_c(rb_thread_t *th, const rb_block_t *block,
 
 	CHECK_STACK_OVERFLOW(cfp, argc + iseq->stack_max);
 
-	for (i=0; i<argc; i++) {
-	    cfp->sp[i] = argv[i];
+	if (argv) {
+	    for (i=0; i<argc; i++) {
+		cfp->sp[i] = argv[i];
+	    }
+	} else {
+	    for (i=0; i<argc; i++) {
+		cfp->sp[i] = Qnil;
+	    }
 	}
 
 	opt_pc = vm_yield_setup_args(th, iseq, argc, cfp->sp, blockptr,
@@ -464,11 +470,18 @@ invoke_block_from_c(rb_thread_t *th, const rb_block_t *block,
 		      self, GC_GUARDED_PTR(block->dfp),
 		      iseq->iseq_encoded + opt_pc, cfp->sp + arg_size, block->lfp,
 		      iseq->local_size - arg_size);
+    }
+}
 
+static inline VALUE
+invoke_block_fast(rb_thread_t *th, const rb_block_t *block,
+		  VALUE self, int argc, const VALUE *argv,
+		  const rb_block_t *blockptr, const NODE *cref)
+{
+    if (BUILTIN_TYPE(block->iseq) != T_NODE) {
 	if (cref) {
 	    th->cfp->dfp[-1] = (VALUE)cref;
 	}
-
 	return vm_eval_body(th);
     }
     else {
@@ -476,6 +489,16 @@ invoke_block_from_c(rb_thread_t *th, const rb_block_t *block,
     }
 }
 
+static inline VALUE
+invoke_block_from_c(rb_thread_t *th, const rb_block_t *block,
+		    VALUE self, int argc, const VALUE *argv,
+		    const rb_block_t *blockptr, const NODE *cref)
+{
+    setup_block(th, block, self, argc, argv, blockptr);
+    return invoke_block_fast(th, block, self, argc, argv, blockptr, cref);
+}
+
+ 
 static inline const rb_block_t *
 check_block(rb_thread_t *th)
 {
@@ -502,6 +525,56 @@ vm_yield(rb_thread_t *th, int argc, const VALUE *argv)
     return invoke_block_from_c(th, blockptr, blockptr->self, argc, argv, 0, 0);
 }
 
+const rb_block_t *
+vm_yield_fast_setup(rb_thread_t *th, int argc)
+{
+    const rb_block_t *block = check_block(th);
+    setup_block(th, block, block->self, argc, 0, 0);
+    return block;
+}
+
+VALUE
+vm_yield_fast(rb_thread_t *th, rb_block_t *block, int argc, VALUE *argv)
+{
+    int i;
+    int opt_pc;
+    VALUE retval;
+
+    if (BUILTIN_TYPE(block->iseq) != T_NODE) {
+	CHECK_STACK_OVERFLOW(th->cfp, argc + block->iseq->stack_max);
+	for (i=0; i<argc; i++) {
+	    th->cfp->sp[i] = argv[i];
+	}
+	opt_pc = vm_yield_setup_args(th, block->iseq, argc, th->cfp->sp, 0,
+				     block_proc_is_lambda(block->proc));
+	for (i=0; i<block->iseq->arg_size; i++) {
+	    th->cfp[2].sp[i] = th->cfp->sp[i];
+	}
+	th->cfp->pc = block->iseq->iseq_encoded + opt_pc;
+	th->cfp->dfp[0] = GC_GUARDED_PTR(block->dfp);
+    }
+    retval = invoke_block_fast(th, block, block->self, argc, argv, 0, 0);
+    if (BUILTIN_TYPE(block->iseq) != T_NODE) {
+	for (i=0; i<block->iseq->local_size; i++) {
+	    th->cfp->sp[i] = Qnil;
+	}
+	th->cfp--;
+	th->cfp->pc--;
+	th->cfp--;
+    }
+    return retval;
+}
+
+void
+vm_yield_fast_finish(rb_thread_t *th, const rb_block_t *block)
+{
+    if (BUILTIN_TYPE(block->iseq) != T_NODE) {
+	th->cfp++;
+	th->cfp->pc++;
+	th->cfp++;
+    }
+}
+
 VALUE
 vm_invoke_proc(rb_thread_t *th, rb_proc_t *proc, VALUE self,
 	       int argc, const VALUE *argv, rb_block_t * blockptr)
diff --git a/vm_eval.c b/vm_eval.c
index 1de5371..1c318e6 100644
--- a/vm_eval.c
+++ b/vm_eval.c
@@ -535,15 +535,25 @@ rb_yield_splat(VALUE values)
     return v;
 }
 
+VALUE rb_fast_yield_loop(VALUE (*i_proc)(ANYARGS), VALUE val);
+VALUE rb_fast_yield_argv(int argc, VALUE *argv, void *block);
+
 static VALUE
-loop_i(void)
+loop_ii(VALUE dummy, void *block)
 {
     for (;;) {
-	rb_yield_0(0, 0);
+	rb_fast_yield_argv(0, 0, block);
     }
     return Qnil;
 }
 
+static VALUE
+loop_i(void)
+{
+    rb_fast_yield_loop(loop_ii, Qnil);
+    return Qnil;
+}
+
 /*
  *  call-seq:
  *     loop {|| block }