2007/12/4, Nobuyoshi Nakada <nobu / ruby-lang.org>:
> I guess it would be better use st_table and st_init_numtable()
> instead of Hash.

You're right.  I have referred to the implementations of diff and
uniq which use Hash internally, but they seem to be in order to
reutilize ary_make_hash.


> Anyway, don't you have commit access yet?

No, I don't.


By the way, how about appling the same approach to flatten! as
below?

  def flatten!(*a)
    return nil unless any? {|x| x.is_a?(Array) }
    replace(flatten(*a))
  end



Index: array.c
===================================================================
--- array.c	(revision 14101)
+++ array.c	(working copy)
@@ -2755,35 +2755,48 @@
     return LONG2NUM(n);
 }

-static long
-flatten(VALUE ary, long idx, VALUE ary2, VALUE memo, int level)
+static VALUE
+flatten(VALUE ary, int level)
 {
-    VALUE id;
-    long i = idx;
-    long n, lim = idx + RARRAY_LEN(ary2);
+    long i = 0;
+    VALUE id, stack, result, tmp;
+    st_table *memo;

-    level--;
-    id = rb_obj_id(ary2);
-    if (rb_ary_includes(memo, id)) {
-	rb_raise(rb_eArgError, "tried to flatten recursive array");
-    }
-    rb_ary_push(memo, id);
-    rb_ary_splice(ary, idx, 1, ary2);
-    if (level != 0) {
-    while (i < lim) {
-	VALUE tmp;
+    stack = rb_ary_new();
+    result = rb_ary_new();
+    memo = st_init_numtable();
+    st_insert(memo, (st_data_t)rb_obj_id(ary), (st_data_t)Qtrue);

-	tmp = rb_check_array_type(rb_ary_elt(ary, i));
-	if (!NIL_P(tmp)) {
-		n = flatten(ary, i, tmp, memo, level);
-	    i += n; lim += n;
+    while (1) {
+	while (i < RARRAY_LEN(ary)) {
+	    tmp = rb_ary_elt(ary, i++);
+	    if (NIL_P(rb_check_array_type(tmp)) ||
+		(level >= 0 && RARRAY_LEN(stack) / 2 >= level)) {
+		rb_ary_push(result, tmp);
+	    }
+	    else {
+		id = rb_obj_id(tmp);
+		if (st_lookup(memo, id, 0)) {
+		    rb_raise(rb_eArgError, "tried to flatten recursive array");
+		}
+		st_insert(memo, (st_data_t)id, (st_data_t)Qtrue);
+		rb_ary_push(stack, ary);
+		rb_ary_push(stack, LONG2NUM(i));
+		ary = tmp;
+		i = 0;
+	    }
 	}
-	i++;
+	if (RARRAY_LEN(stack) == 0) {
+	    break;
+	}
+	id = rb_obj_id(ary);
+	st_delete(memo, (st_data_t*)&id, 0);
+	tmp = rb_ary_pop(stack);
+	i = NUM2LONG(tmp);
+	ary = rb_ary_pop(stack);
     }
-    }
-    rb_ary_pop(memo);

-    return lim - idx - 1;	/* returns number of increased items */
+    return result;
 }

 /*
@@ -2807,30 +2820,23 @@
 static VALUE
 rb_ary_flatten_bang(int argc, VALUE *argv, VALUE ary)
 {
-    long i = 0;
-    int mod = 0;
-    int level = -1;
-    VALUE memo = Qnil;
+    long i;
+    int mod = 0, level = -1;
     VALUE lv;

     rb_scan_args(argc, argv, "01", &lv);
     if (!NIL_P(lv)) level = NUM2INT(lv);
     if (level == 0) return ary;
-    while (i<RARRAY_LEN(ary)) {
-	VALUE ary2 = RARRAY_PTR(ary)[i];
-	VALUE tmp;
-
-	tmp = rb_check_array_type(ary2);
-	if (!NIL_P(tmp)) {
-	    if (NIL_P(memo)) {
-		memo = rb_ary_new();
-	    }
-	    i += flatten(ary, i, tmp, memo, level);
+    for (i=0; i<RARRAY_LEN(ary); i++) {
+	VALUE tmp = RARRAY_PTR(ary)[i];
+	if (!NIL_P(rb_check_array_type(tmp))) {
 	    mod = 1;
+	    break;
 	}
-	i++;
     }
     if (mod == 0) return Qnil;
+    rb_ary_replace(ary, flatten(ary, level));
+
     return ary;
 }

@@ -2855,9 +2861,18 @@
 static VALUE
 rb_ary_flatten(int argc, VALUE *argv, VALUE ary)
 {
-    ary = rb_ary_dup(ary);
-    rb_ary_flatten_bang(argc, argv, ary);
-    return ary;
+    long i = 0;
+    int level = -1;
+    VALUE result, lv;
+
+    rb_scan_args(argc, argv, "01", &lv);
+    if (!NIL_P(lv)) level = NUM2INT(lv);
+    if (level == 0) return ary;
+
+    result = flatten(ary, level);
+    if (OBJ_TAINTED(ary)) OBJ_TAINT(result);
+
+    return result;
 }

 /*

-- 
Yusuke ENDOH <mame / tsg.ne.jp>