Hi,

At Tue, 4 Dec 2007 21:13:33 +0900,
Yusuke ENDOH wrote in [ruby-core:13877]:
> > I guess it would be better use st_table and st_init_numtable()
> > instead of Hash.
> 
> You're right.  I have referred to the implementations of diff and
> uniq which use Hash internally, but they seem to be in order to
> reutilize ary_make_hash.

Using st_table, you don't need to call rb_obj_id().  It's OK to
insert VALUE itself.

> > Anyway, don't you have commit access yet?
> 
> No, I don't.

You should have, I guess.

> By the way, how about appling the same approach to flatten! as
> below?
> 
>   def flatten!(*a)
>     return nil unless any? {|x| x.is_a?(Array) }
>     replace(flatten(*a))
>   end

Seems nice.

> +	    tmp = rb_ary_elt(ary, i++);
> +	    if (NIL_P(rb_check_array_type(tmp)) ||
> +		(level >= 0 && RARRAY_LEN(stack) / 2 >= level)) {
> +		rb_ary_push(result, tmp);
> +	    }

Non-nil result of rb_check_array_type() doesn't mean the
argument is an Array always.  It may invoke to_ary method and
return the result.  So you should not discard the result too.

The following is a patch from yours.


--- array.c~ 2007-12-04 21:33:47.000000000 +0900 +++ array.c 2007-12-04 21:49:20.000000000 +0900 @@ -2757,28 +2757,31 @@ rb_ary_nitems(VALUE ary) static VALUE -flatten(VALUE ary, int level) +flatten(VALUE ary, int level, int *modified) { long i = 0; - VALUE id, stack, result, tmp; + VALUE stack, result, tmp, elt; st_table *memo; + st_data_t id; stack = rb_ary_new(); result = rb_ary_new(); memo = st_init_numtable(); - st_insert(memo, (st_data_t)rb_obj_id(ary), (st_data_t)Qtrue); + st_insert(memo, (st_data_t)ary, (st_data_t)Qtrue); + *modified = 0; while (1) { while (i < RARRAY_LEN(ary)) { - tmp = rb_ary_elt(ary, i++); - if (NIL_P(rb_check_array_type(tmp)) || - (level >= 0 && RARRAY_LEN(stack) / 2 >= level)) { - rb_ary_push(result, tmp); + elt = RARRAY_PTR(ary)[i++]; + tmp = rb_check_array_type(elt); + if (NIL_P(tmp) || (level >= 0 && RARRAY_LEN(stack) / 2 >= level)) { + rb_ary_push(result, elt); } else { - id = rb_obj_id(tmp); + *modified = 1; + id = (st_data_t)tmp; if (st_lookup(memo, id, 0)) { rb_raise(rb_eArgError, "tried to flatten recursive array"); } - st_insert(memo, (st_data_t)id, (st_data_t)Qtrue); + st_insert(memo, id, (st_data_t)Qtrue); rb_ary_push(stack, ary); rb_ary_push(stack, LONG2NUM(i)); @@ -2790,6 +2793,6 @@ flatten(VALUE ary, int level) break; } - id = rb_obj_id(ary); - st_delete(memo, (st_data_t*)&id, 0); + id = (st_data_t)ary; + st_delete(memo, &id, 0); tmp = rb_ary_pop(stack); i = NUM2LONG(tmp); @@ -2821,20 +2824,14 @@ static VALUE rb_ary_flatten_bang(int argc, VALUE *argv, VALUE ary) { - long i; int mod = 0, level = -1; - VALUE lv; + VALUE result, lv; rb_scan_args(argc, argv, "01", &lv); if (!NIL_P(lv)) level = NUM2INT(lv); if (level == 0) return ary; - for (i=0; i<RARRAY_LEN(ary); i++) { - VALUE tmp = RARRAY_PTR(ary)[i]; - if (!NIL_P(rb_check_array_type(tmp))) { - mod = 1; - break; - } - } + + result = flatten(ary, level, &mod); if (mod == 0) return Qnil; - rb_ary_replace(ary, flatten(ary, level)); + rb_ary_replace(ary, result); return ary; @@ -2862,6 +2859,5 @@ static VALUE rb_ary_flatten(int argc, VALUE *argv, VALUE ary) { - long i = 0; - int level = -1; + int mod = 0, level = -1; VALUE result, lv; @@ -2870,5 +2866,5 @@ rb_ary_flatten(int argc, VALUE *argv, VA if (level == 0) return ary; - result = flatten(ary, level); + result = flatten(ary, level, &mod); if (OBJ_TAINTED(ary)) OBJ_TAINT(result);
-- Nobu Nakada