Hi, 2007/12/3, Voroztsov Artem <artem.voroztsov / gmail.com>: > I tried to add missed functionality: It seems to not be able to handle a non-recursive array that contains multiple references to one another array: a = [] a = [a, a] p a.flatten2 #=> tried to flatten recursive array In addition, I implemented Voroztsov's idea in C. Please use as a springboard for discussion. a = [0]; 20.times { a = [a, a] }; a.flatten original: 16.560000 0.580000 17.140000 ( 18.016472) patched: 5.720000 0.010000 5.730000 ( 5.767103) a = [0]; 1000000.times { a = [a] }; a.flatten original: N/A (more than 5 min.) patched: 6.340000 0.150000 6.490000 ( 6.837591) Index: array.c =================================================================== --- array.c (revision 14094) +++ array.c (working copy) @@ -2756,7 +2756,7 @@ } static long -flatten(VALUE ary, long idx, VALUE ary2, VALUE memo, int level) +flatten_bang(VALUE ary, long idx, VALUE ary2, VALUE memo, int level) { VALUE id; long i = idx; @@ -2775,7 +2775,7 @@ tmp = rb_check_array_type(rb_ary_elt(ary, i)); if (!NIL_P(tmp)) { - n = flatten(ary, i, tmp, memo, level); + n = flatten_bang(ary, i, tmp, memo, level); i += n; lim += n; } i++; @@ -2825,7 +2825,7 @@ if (NIL_P(memo)) { memo = rb_ary_new(); } - i += flatten(ary, i, tmp, memo, level); + i += flatten_bang(ary, i, tmp, memo, level); mod = 1; } i++; @@ -2834,6 +2834,53 @@ return ary; } +static VALUE +flatten(VALUE ary, int level) +{ + long i = 0; + VALUE id, stack, memo, result, tmp; + + stack = rb_ary_new(); + memo = rb_hash_new(); + result = rb_ary_new(); + rb_hash_aset(memo, rb_obj_id(ary), Qtrue); + + while (1) { + while (i < RARRAY_LEN(ary)) { + tmp = rb_ary_elt(ary, i++); + if (NIL_P(rb_check_array_type(tmp))) { + rb_ary_push(result, tmp); + } + else { + if (level >= 0 && RARRAY_LEN(stack) / 2 >= level) { + rb_ary_push(result, tmp); + } + else { + id = rb_obj_id(tmp); + if (st_lookup(RHASH_TBL(memo), id, 0)) { + rb_raise(rb_eArgError, "tried to flatten recursive array"); + } + rb_hash_aset(memo, id, Qtrue); + rb_ary_push(stack, ary); + rb_ary_push(stack, LONG2NUM(i)); + ary = tmp; + i = 0; + } + } + } + if (RARRAY_LEN(stack) == 0) { + break; + } + id = rb_obj_id(ary); + st_delete(RHASH_TBL(memo), (st_data_t*)&id, 0); + tmp = rb_ary_pop(stack); + i = NUM2LONG(tmp); + ary = rb_ary_pop(stack); + } + + return result; +} + /* * call-seq: * array.flatten -> an_array @@ -2855,9 +2902,18 @@ static VALUE rb_ary_flatten(int argc, VALUE *argv, VALUE ary) { - ary = rb_ary_dup(ary); - rb_ary_flatten_bang(argc, argv, ary); - return ary; + long i = 0; + int level = -1; + VALUE result, lv; + + rb_scan_args(argc, argv, "01", &lv); + if (!NIL_P(lv)) level = NUM2INT(lv); + if (level == 0) return ary; + + result = flatten(ary, level); + if (OBJ_TAINTED(ary)) OBJ_TAINT(result); + + return result; } /* -- Yusuke ENDOH <mame / tsg.ne.jp>