ko1 / atdot.net wrote:
> -1 because of performance concern (indirect access).

Isn't existing function pointer code also indirect?
I hoped smaller ci would improve cache locality, but apparently
not enough to offset...

> I'm not sure how it is impact for not so smart CPUs.
> (I wondered that recent CPU doesn't care such indirection, maybe they have
> smart cache, branch prediction and so on.)

My original benchmarks for cifn were on Xeon with 8MB cache,
so I think it was too powerful.

AMD FX-8320 shows more bad results with my original cifn patch :<
http://80x24.org/bmlog-20140920-000901.28433

I also tried using vm_call_general directly for common case (on FX-8320),
but results from early benchmarks looked terrible and I stopped early :<

diff --git a/insns.def b/insns.def
index bfa11a9..c6c03dc 100644
--- a/insns.def
+++ b/insns.def
@@ -996,7 +996,7 @@ send
     ci->blockptr = 0;
     vm_caller_setup_args(th, reg_cfp, ci);
     vm_search_method(ci, ci->recv = TOPN(ci->argc));
-    CALL_METHOD(ci);
+    CALL_GENERAL_METHOD(ci);
 }
 
 DEFINE_INSN
@@ -1025,7 +1025,7 @@ opt_send_simple
 (VALUE val) // inc += -ci->orig_argc;
 {
     vm_search_method(ci, ci->recv = TOPN(ci->argc));
-    CALL_METHOD(ci);
+    CALL_GENERAL_METHOD(ci);
 }
 
 /**
diff --git a/vm_insnhelper.h b/vm_insnhelper.h
index e53ab4f..f5d4acd 100644
--- a/vm_insnhelper.h
+++ b/vm_insnhelper.h
@@ -197,6 +197,18 @@ enum vm_cifn_type {
     } \
 } while (0)
 
+#define CALL_GENERAL_METHOD(ci) do { \
+    VALUE v = vm_call_general((th), GET_CFP(), (ci)); \
+    if (v == Qundef) { \
+	RESTORE_REGS(); \
+	NEXT_INSN(); \
+    } \
+    else { \
+	val = v; \
+    } \
+} while (0)
+
+
 /* set fastpath when cached method is *NOT* protected
  * because inline method cache does not care about receiver.
  */
@@ -239,7 +251,7 @@ enum vm_cifn_type {
 #define CALL_SIMPLE_METHOD(recv_) do { \
     ci->blockptr = 0; ci->argc = ci->orig_argc; \
     vm_search_method(ci, ci->recv = (recv_)); \
-    CALL_METHOD(ci); \
+    CALL_GENERAL_METHOD(ci); \
 } while (0)
 
 #define NEXT_CLASS_SERIAL() (++ruby_vm_class_serial)