金井 仁弘と申します。

ループ内で定数化されうるマクロを、ループの外に出すことによって、
一部メソッドの高速化を図りました。
revision 24444での修正と同様の発想です。
<http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=24444>

今回の修正は、Array, String, Structに対して行いました。
いずれも、R{Array, String, Struct}_{LEN, PTR}を持っており、
特定の場合において、ループ内で定数化可能です。
ただ、[ruby-dev:24254]等で挙げられているように、
ループ中に、元の{Array, String, Struct}に破壊的変更が加わった場合は、
SEGVしてしまうので、その点は考慮しました。

ベンチマーク結果は、以下の通りです。
$ ./ruby -v
ruby 1.9.2dev (2009-09-23 trunk 25052) [i686-linux]

a : revision 25052
b : working copy
--------------------------------------------------------------------------------
                        	a	b	%
String#*             	2.06	1.85	111.35
String#split        	2.25	2.13	105.63
String#start_with?	1.93	1.79	107.82
String#end_with?  	2.9	2.86	101.4
Array#*              	2.17	2.1	103.33
Array#shuffle!     	7.76	7.62	101.84
Array#sample      	1.52	1.49	102.01
Struct#[]            	2.61	2.59	100.77
Struct#[]=          	1.37	1.36	100.74
Struct#inspect    	3.61	3.57	101.12
Struct#==          	3.46	2.11	163.98
Struct#eql?        	1.65	1.62	101.85
Struct#hash       	1.32	1.31	100.76
--------------------------------------------------------------------------------

全体で約8%、最高は、Struct#==で、約63%の高速化となります。
詳細は、自分が書いたblogエントリにもあります。
<http://d.hatena.ne.jp/CanI/20090921/1253549503>

自分の検証では、Struct#{==, eql?, inspect, hash}のループ中に、
ruby上でのメソッドの再定義によって、元のStructに破壊的変更を
加える方法は見つかりませんでした。
しかし、仮に元のStructに破壊的変更が加えられる場合は、
SEGVの原因となりえます。

・ruby上で、元のStructに破壊的変更を加えることは可能でしょうか?

上記1点も含め、よろしくお願いいたします。


なお、このパッチは、revision 24547でのパッチと同様に、
セキュリティ&プログラミングキャンプ2009の成果物です。
<http://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=rev&revision=24547>


--- array.c (revision 25052)
+++ array.c  (working copy)
@@ -2708,8 +2708,8 @@ rb_ary_concat(VALUE x, VALUE y)
 static VALUE
 rb_ary_times(VALUE ary, VALUE times)
 {
-    VALUE ary2, tmp;
-    long i, len;
+    VALUE ary2, tmp, *ptr, *ptr2;
+    long i, t, len;

     tmp = rb_check_string_type(times);
     if (!NIL_P(tmp)) {
@@ -2732,8 +2732,11 @@ rb_ary_times(VALUE ary, VALUE times)
     ary2 = ary_new(rb_obj_class(ary), len);
     ARY_SET_LEN(ary2, len);

-    for (i=0; i<len; i+=RARRAY_LEN(ary)) {
-	MEMCPY(RARRAY_PTR(ary2)+i, RARRAY_PTR(ary), VALUE, RARRAY_LEN(ary));
+    ptr = RARRAY_PTR(ary);
+    ptr2 = RARRAY_PTR(ary2);
+    t = RARRAY_LEN(ary);
+    for (i=0; i<len; i+=t) {
+	MEMCPY(ptr2+i, ptr, VALUE, t);
     }
   out:
     OBJ_INFECT(ary2, ary);
@@ -3491,14 +3494,16 @@ rb_ary_flatten(int argc, VALUE *argv, VALUE ary)
 static VALUE
 rb_ary_shuffle_bang(VALUE ary)
 {
+    VALUE *ptr;
     long i = RARRAY_LEN(ary);

     rb_ary_modify(ary);
+    ptr = RARRAY_PTR(ary);
     while (i) {
 	long j = (long)(rb_genrand_real()*i);
-	VALUE tmp = RARRAY_PTR(ary)[--i];
-	RARRAY_PTR(ary)[i] = RARRAY_PTR(ary)[j];
-	RARRAY_PTR(ary)[j] = tmp;
+	VALUE tmp = ptr[--i];
+	ptr[i] = ptr[j];
+	ptr[j] = tmp;
     }
     return ary;
 }
@@ -3576,6 +3581,7 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
 	return rb_ary_new3(3, ptr[i], ptr[j], ptr[k]);
     }
     if ((size_t)n < sizeof(idx)/sizeof(idx[0])) {
+	VALUE *ptr_result;
 	long sorted[sizeof(idx)/sizeof(idx[0])];
 	sorted[0] = idx[0] = (long)(rb_genrand_real()*len);
 	for (i=1; i<n; i++) {
@@ -3588,18 +3594,21 @@ rb_ary_sample(int argc, VALUE *argv, VALUE ary)
 	    sorted[j] = idx[i] = k;
 	}
 	result = rb_ary_new2(n);
+	ptr_result = RARRAY_PTR(result);
 	for (i=0; i<n; i++) {
-	    RARRAY_PTR(result)[i] = RARRAY_PTR(ary)[idx[i]];
+	    ptr_result[i] = ptr[idx[i]];
 	}
     }
     else {
+	VALUE *ptr_result;
 	result = rb_ary_new4(len, ptr);
+	ptr_result = RARRAY_PTR(result);
 	RB_GC_GUARD(ary);
 	for (i=0; i<n; i++) {
 	    j = (long)(rb_genrand_real()*(len-i)) + i;
-	    nv = RARRAY_PTR(result)[j];
-	    RARRAY_PTR(result)[j] = RARRAY_PTR(result)[i];
-	    RARRAY_PTR(result)[i] = nv;
+	    nv = ptr_result[j];
+	    ptr_result[j] = ptr_result[i];
+	    ptr_result[i] = nv;
 	}
     }
     ARY_SET_LEN(result, n);
diff --git a/string.c b/string.c
index daf6ed1..487c6de 100644
--- a/string.c
+++ b/string.c
@@ -1159,6 +1159,7 @@ rb_str_times(VALUE str, VALUE times)
 {
     VALUE str2;
     long n, len;
+    char *ptr2;

     len = NUM2LONG(times);
     if (len < 0) {
@@ -1169,16 +1170,17 @@ rb_str_times(VALUE str, VALUE times)
     }

     str2 = rb_str_new5(str, 0, len *= RSTRING_LEN(str));
+    ptr2 = RSTRING_PTR(str2);
     if (len) {
         n = RSTRING_LEN(str);
-        memcpy(RSTRING_PTR(str2), RSTRING_PTR(str), n);
+        memcpy(ptr2, RSTRING_PTR(str), n);
         while (n <= len/2) {
-            memcpy(RSTRING_PTR(str2) + n, RSTRING_PTR(str2), n);
+            memcpy(ptr2 + n, ptr2, n);
             n *= 2;
         }
-        memcpy(RSTRING_PTR(str2) + n, RSTRING_PTR(str2), len-n);
+        memcpy(ptr2 + n, ptr2, len-n);
     }
-    RSTRING_PTR(str2)[RSTRING_LEN(str2)] = '\0';
+    ptr2[RSTRING_LEN(str2)] = '\0';
     OBJ_INFECT(str2, str);
     rb_enc_cr_str_copy_for_substr(str2, str);

@@ -5697,6 +5699,7 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
     }
     else if (split_type == string) {
 	char *ptr = RSTRING_PTR(str);
+	char *temp = ptr;
 	char *eptr = RSTRING_END(str);
 	char *sptr = RSTRING_PTR(spat);
 	long slen = RSTRING_LEN(spat);
@@ -5716,13 +5719,15 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
 		ptr = t;
 		continue;
 	    }
-	    rb_ary_push(result, rb_str_subseq(str, ptr - RSTRING_PTR(str), end));
+	    rb_ary_push(result, rb_str_subseq(str, ptr - temp, end));
 	    ptr += end + slen;
 	    if (!NIL_P(limit) && lim <= ++i) break;
 	}
-	beg = ptr - RSTRING_PTR(str);
+	beg = ptr - temp;
     }
     else {
+	char *ptr = RSTRING_PTR(str);
+	long len = RSTRING_LEN(str);
 	long start = beg;
 	long idx;
 	int last_null = 0;
@@ -5731,22 +5736,22 @@ rb_str_split_m(int argc, VALUE *argv, VALUE str)
 	while ((end = rb_reg_search(spat, str, start, 0)) >= 0) {
 	    regs = RMATCH_REGS(rb_backref_get());
 	    if (start == end && BEG(0) == END(0)) {
-		if (!RSTRING_PTR(str)) {
+		if (!ptr) {
 		    rb_ary_push(result, rb_str_new("", 0));
 		    break;
 		}
 		else if (last_null == 1) {
 		    rb_ary_push(result, rb_str_subseq(str, beg,
-						      rb_enc_fast_mbclen(RSTRING_PTR(str)+beg,
-									 RSTRING_END(str),
+						      rb_enc_fast_mbclen(ptr+beg,
+									 ptr+len,
 									 enc)));
 		    beg = start;
 		}
 		else {
-                    if (RSTRING_PTR(str)+start == RSTRING_END(str))
+                    if (ptr+start == ptr+len)
                         start++;
                     else
-                        start +=
rb_enc_fast_mbclen(RSTRING_PTR(str)+start,RSTRING_END(str),enc);
+                        start += rb_enc_fast_mbclen(ptr+start,ptr+len,enc);
 		    last_null = 1;
 		    continue;
 		}
@@ -7048,13 +7053,15 @@ static VALUE
 rb_str_start_with(int argc, VALUE *argv, VALUE str)
 {
     int i;
+    char *ptr = RSTRING_PTR(str);
+    long len = RSTRING_LEN(str);

     for (i=0; i<argc; i++) {
 	VALUE tmp = rb_check_string_type(argv[i]);
 	if (NIL_P(tmp)) continue;
 	rb_enc_check(str, tmp);
-	if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
-	if (memcmp(RSTRING_PTR(str), RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
+	if (len < RSTRING_LEN(tmp)) continue;
+	if (memcmp(ptr, RSTRING_PTR(tmp), RSTRING_LEN(tmp)) == 0)
 	    return Qtrue;
     }
     return Qfalse;
@@ -7072,15 +7079,17 @@ rb_str_end_with(int argc, VALUE *argv, VALUE str)
 {
     int i;
     char *p, *s, *e;
+    long len;
     rb_encoding *enc;

+    p = RSTRING_PTR(str);
+    len = RSTRING_LEN(str);
     for (i=0; i<argc; i++) {
 	VALUE tmp = rb_check_string_type(argv[i]);
 	if (NIL_P(tmp)) continue;
 	enc = rb_enc_check(str, tmp);
-	if (RSTRING_LEN(str) < RSTRING_LEN(tmp)) continue;
-	p = RSTRING_PTR(str);
-        e = p + RSTRING_LEN(str);
+	if (len < RSTRING_LEN(tmp)) continue;
+        e = p + len;
 	s = e - RSTRING_LEN(tmp);
 	if (rb_enc_left_char_head(p, s, e, enc) != s)
 	    continue;
diff --git a/struct.c b/struct.c
index 4f3e508..68cad6b 100644
--- a/struct.c
+++ b/struct.c
@@ -98,14 +98,17 @@ rb_struct_members_m(VALUE obj)
 VALUE
 rb_struct_getmember(VALUE obj, ID id)
 {
-    VALUE members, slot;
-    long i;
+    VALUE members, slot, *ptr, *ptr_members;
+    long i, len;

+    ptr = RSTRUCT_PTR(obj);
     members = rb_struct_members(obj);
+    ptr_members = RARRAY_PTR(members);
     slot = ID2SYM(id);
-    for (i=0; i<RARRAY_LEN(members); i++) {
-	if (RARRAY_PTR(members)[i] == slot) {
-	    return RSTRUCT_PTR(obj)[i];
+    len = RARRAY_LEN(members);
+    for (i=0; i<len; i++) {
+	if (ptr_members[i] == slot) {
+	    return ptr[i];
 	}
     }
     rb_name_error(id, "%s is not struct member", rb_id2name(id));
@@ -156,15 +159,18 @@ rb_struct_modify(VALUE s)
 static VALUE
 rb_struct_set(VALUE obj, VALUE val)
 {
-    VALUE members, slot;
-    long i;
+    VALUE members, slot, *ptr, *ptr_members;
+    long i, len;

     members = rb_struct_members(obj);
+    ptr_members = RARRAY_PTR(members);
+    len = RARRAY_LEN(members);
     rb_struct_modify(obj);
-    for (i=0; i<RARRAY_LEN(members); i++) {
-	slot = RARRAY_PTR(members)[i];
+    ptr = RSTRUCT_PTR(obj);
+    for (i=0; i<len; i++) {
+	slot = ptr_members[i];
 	if (rb_id_attrset(SYM2ID(slot)) == rb_frame_this_func()) {
-	    return RSTRUCT_PTR(obj)[i] = val;
+	    return ptr[i] = val;
 	}
     }
     rb_name_error(rb_frame_this_func(), "`%s' is not a struct member",
@@ -175,9 +181,9 @@ rb_struct_set(VALUE obj, VALUE val)
 static VALUE
 make_struct(VALUE name, VALUE members, VALUE klass)
 {
-    VALUE nstr;
+    VALUE nstr, *ptr_members;
     ID id;
-    long i;
+    long i, len;

     OBJ_FREEZE(members);
     if (NIL_P(name)) {
@@ -204,8 +210,10 @@ make_struct(VALUE name, VALUE members, VALUE klass)
     rb_define_singleton_method(nstr, "new", rb_class_new_instance, -1);
     rb_define_singleton_method(nstr, "[]", rb_class_new_instance, -1);
     rb_define_singleton_method(nstr, "members", rb_struct_s_members_m, 0);
-    for (i=0; i< RARRAY_LEN(members); i++) {
-	ID id = SYM2ID(RARRAY_PTR(members)[i]);
+    ptr_members = RARRAY_PTR(members);
+    len = RARRAY_LEN(members);
+    for (i=0; i< len; i++) {
+	ID id = SYM2ID(ptr_members[i]);
 	if (rb_is_local_id(id) || rb_is_const_id(id)) {
 	    if (i < N_REF_FUNC) {
 		rb_define_method_id(nstr, id, ref_func[i], 0);
@@ -461,6 +469,7 @@ rb_struct_each(VALUE s)
     return s;
 }

+
 /*
  *  call-seq:
  *     struct.each_pair {|sym, obj| block }     => struct
@@ -498,7 +507,8 @@ inspect_struct(VALUE s, VALUE dummy, int recur)
 {
     VALUE cname = rb_class_name(rb_obj_class(s));
     VALUE members, str = rb_str_new2("#<struct ");
-    long i;
+    VALUE *ptr, *ptr_members;
+    long i, len;
     char first = RSTRING_PTR(cname)[0];

     if (recur || first != '#') {
@@ -509,7 +519,10 @@ inspect_struct(VALUE s, VALUE dummy, int recur)
     }

     members = rb_struct_members(s);
-    for (i=0; i<RSTRUCT_LEN(s); i++) {
+    ptr_members = RARRAY_PTR(members);
+    ptr = RSTRUCT_PTR(s);
+    len = RSTRUCT_LEN(s);
+    for (i=0; i<len; i++) {
 	VALUE slot;
 	ID id;

@@ -519,7 +532,7 @@ inspect_struct(VALUE s, VALUE dummy, int recur)
 	else if (first != '#') {
 	    rb_str_cat2(str, " ");
 	}
-	slot = RARRAY_PTR(members)[i];
+	slot = ptr_members[i];
 	id = SYM2ID(slot);
 	if (rb_is_local_id(id) || rb_is_const_id(id)) {
 	    rb_str_append(str, rb_id2str(id));
@@ -528,7 +541,7 @@ inspect_struct(VALUE s, VALUE dummy, int recur)
 	    rb_str_append(str, rb_inspect(slot));
 	}
 	rb_str_cat2(str, "=");
-	rb_str_append(str, rb_inspect(RSTRUCT_PTR(s)[i]));
+	rb_str_append(str, rb_inspect(ptr[i]));
     }
     rb_str_cat2(str, ">");
     OBJ_INFECT(str, s);
@@ -588,14 +601,16 @@ rb_struct_init_copy(VALUE copy, VALUE s)
 static VALUE
 rb_struct_aref_id(VALUE s, ID id)
 {
-    VALUE members;
+    VALUE *ptr, members, *ptr_members;
     long i, len;

+    ptr = RSTRUCT_PTR(s);
     members = rb_struct_members(s);
+    ptr_members = RARRAY_PTR(members);
     len = RARRAY_LEN(members);
     for (i=0; i<len; i++) {
-	if (SYM2ID(RARRAY_PTR(members)[i]) == id) {
-	    return RSTRUCT_PTR(s)[i];
+	if (SYM2ID(ptr_members[i]) == id) {
+	    return ptr[i];
 	}
     }
     rb_name_error(id, "no member '%s' in struct", rb_id2name(id));
@@ -644,19 +659,21 @@ rb_struct_aref(VALUE s, VALUE idx)
 static VALUE
 rb_struct_aset_id(VALUE s, ID id, VALUE val)
 {
-    VALUE members;
+    VALUE members, *ptr, *ptr_members;
     long i, len;

     members = rb_struct_members(s);
-    rb_struct_modify(s);
     len = RARRAY_LEN(members);
-    if (RSTRUCT_LEN(s) != RARRAY_LEN(members)) {
+    rb_struct_modify(s);
+    if (RSTRUCT_LEN(s) != len) {
 	rb_raise(rb_eTypeError, "struct size differs (%ld required %ld given)",
-		 RARRAY_LEN(members), RSTRUCT_LEN(s));
+		 len, RSTRUCT_LEN(s));
     }
+    ptr = RSTRUCT_PTR(s);
+    ptr_members = RARRAY_PTR(members);
     for (i=0; i<len; i++) {
-	if (SYM2ID(RARRAY_PTR(members)[i]) == id) {
-	    RSTRUCT_PTR(s)[i] = val;
+	if (SYM2ID(ptr_members[i]) == id) {
+	    ptr[i] = val;
 	    return val;
 	}
     }
@@ -771,11 +788,15 @@ rb_struct_select(int argc, VALUE *argv, VALUE s)
 static VALUE
 recursive_equal(VALUE s, VALUE s2, int recur)
 {
-    long i;
+    VALUE *ptr, *ptr2;
+    long i, len;

     if (recur) return Qtrue; /* Subtle! */
-    for (i=0; i<RSTRUCT_LEN(s); i++) {
-	if (!rb_equal(RSTRUCT_PTR(s)[i], RSTRUCT_PTR(s2)[i])) return Qfalse;
+    ptr = RSTRUCT_PTR(s);
+    ptr2 = RSTRUCT_PTR(s2);
+    len = RSTRUCT_LEN(s);
+    for (i=0; i<len; i++) {
+	if (!rb_equal(ptr[i], ptr2[i])) return Qfalse;
     }
     return Qtrue;
 }
@@ -813,14 +834,16 @@ rb_struct_equal(VALUE s, VALUE s2)
 static VALUE
 recursive_hash(VALUE s, VALUE dummy, int recur)
 {
-    long i;
+    long i, len;
     st_index_t h;
-    VALUE n;
+    VALUE n, *ptr;

     h = rb_hash_start(rb_hash(rb_obj_class(s)));
     if (!recur) {
-	for (i = 0; i < RSTRUCT_LEN(s); i++) {
-	    n = rb_hash(RSTRUCT_PTR(s)[i]);
+	ptr = RSTRUCT_PTR(s);
+	len = RSTRUCT_LEN(s);
+	for (i = 0; i < len; i++) {
+	    n = rb_hash(ptr[i]);
 	    h = rb_hash_uint(h, NUM2LONG(n));
 	}
     }
@@ -844,11 +867,15 @@ rb_struct_hash(VALUE s)
 static VALUE
 recursive_eql(VALUE s, VALUE s2, int recur)
 {
-    long i;
+    VALUE *ptr, *ptr2;
+    long i, len;

     if (recur) return Qtrue; /* Subtle! */
-    for (i=0; i<RSTRUCT_LEN(s); i++) {
-	if (!rb_eql(RSTRUCT_PTR(s)[i], RSTRUCT_PTR(s2)[i])) return Qfalse;
+    ptr = RSTRUCT_PTR(s);
+    ptr2 = RSTRUCT_PTR(s2);
+    len = RSTRUCT_LEN(s);
+    for (i=0; i<len; i++) {
+	if (!rb_eql(ptr[i], ptr2[i])) return Qfalse;
     }
     return Qtrue;
 }


--
Masahiro Kanai (CanI)
http://d.hatena.ne.jp/CanI/