Hi,

2007/12/3, Voroztsov Artem <artem.voroztsov / gmail.com>:
> I tried to add missed functionality:

It seems to not be able to handle a non-recursive array that
contains multiple references to one another array:


  a = []
  a = [a, a]
  p a.flatten2  #=> tried to flatten recursive array


In addition, I implemented Voroztsov's idea in C.  Please use as
a springboard for discussion.


  a = [0]; 20.times { a = [a, a] }; a.flatten

  original:  16.560000   0.580000  17.140000 ( 18.016472)
  patched:   5.720000   0.010000   5.730000 (  5.767103)

  a = [0]; 1000000.times { a = [a] }; a.flatten

  original:  N/A (more than 5 min.)
  patched:   6.340000   0.150000   6.490000 (  6.837591)



Index: array.c
===================================================================
--- array.c	(revision 14094)
+++ array.c	(working copy)
@@ -2756,7 +2756,7 @@
 }

 static long
-flatten(VALUE ary, long idx, VALUE ary2, VALUE memo, int level)
+flatten_bang(VALUE ary, long idx, VALUE ary2, VALUE memo, int level)
 {
     VALUE id;
     long i = idx;
@@ -2775,7 +2775,7 @@

 	tmp = rb_check_array_type(rb_ary_elt(ary, i));
 	if (!NIL_P(tmp)) {
-		n = flatten(ary, i, tmp, memo, level);
+		n = flatten_bang(ary, i, tmp, memo, level);
 	    i += n; lim += n;
 	}
 	i++;
@@ -2825,7 +2825,7 @@
 	    if (NIL_P(memo)) {
 		memo = rb_ary_new();
 	    }
-	    i += flatten(ary, i, tmp, memo, level);
+	    i += flatten_bang(ary, i, tmp, memo, level);
 	    mod = 1;
 	}
 	i++;
@@ -2834,6 +2834,53 @@
     return ary;
 }

+static VALUE
+flatten(VALUE ary, int level)
+{
+    long i = 0;
+    VALUE id, stack, memo, result, tmp;
+
+    stack = rb_ary_new();
+    memo = rb_hash_new();
+    result = rb_ary_new();
+    rb_hash_aset(memo, rb_obj_id(ary), Qtrue);
+
+    while (1) {
+	while (i < RARRAY_LEN(ary)) {
+	    tmp = rb_ary_elt(ary, i++);
+	    if (NIL_P(rb_check_array_type(tmp))) {
+		rb_ary_push(result, tmp);
+	    }
+	    else {
+		if (level >= 0 && RARRAY_LEN(stack) / 2 >= level) {
+		    rb_ary_push(result, tmp);
+		}
+		else {
+		    id = rb_obj_id(tmp);
+		    if (st_lookup(RHASH_TBL(memo), id, 0)) {
+			rb_raise(rb_eArgError, "tried to flatten recursive array");
+		    }
+		    rb_hash_aset(memo, id, Qtrue);
+		    rb_ary_push(stack, ary);
+		    rb_ary_push(stack, LONG2NUM(i));
+		    ary = tmp;
+		    i = 0;
+		}
+	    }
+	}
+	if (RARRAY_LEN(stack) == 0) {
+	    break;
+	}
+	id = rb_obj_id(ary);
+	st_delete(RHASH_TBL(memo), (st_data_t*)&id, 0);
+	tmp = rb_ary_pop(stack);
+	i = NUM2LONG(tmp);
+	ary = rb_ary_pop(stack);
+    }
+
+    return result;
+}
+
 /*
  *  call-seq:
  *     array.flatten -> an_array
@@ -2855,9 +2902,18 @@
 static VALUE
 rb_ary_flatten(int argc, VALUE *argv, VALUE ary)
 {
-    ary = rb_ary_dup(ary);
-    rb_ary_flatten_bang(argc, argv, ary);
-    return ary;
+    long i = 0;
+    int level = -1;
+    VALUE result, lv;
+
+    rb_scan_args(argc, argv, "01", &lv);
+    if (!NIL_P(lv)) level = NUM2INT(lv);
+    if (level == 0) return ary;
+
+    result = flatten(ary, level);
+    if (OBJ_TAINTED(ary)) OBJ_TAINT(result);
+
+    return result;
 }

 /*

-- 
Yusuke ENDOH <mame / tsg.ne.jp>