>diff -ru ruby.orig/array.c ruby/array.c
>--- ruby.orig/array.c	2005-04-20 02:09:10.000000000 +0200
>+++ ruby/array.c	2005-04-20 02:22:22.000000000 +0200
>@@ -2074,16 +2074,31 @@
> 	argv[i] = to_ary(argv[i]);
>     }
>     if (rb_block_given_p()) {
>-	for (i=0; i<RARRAY(ary)->len; i++) {
>+	if (rb_multiple_asgn_in_block_p()) {
> 	    VALUE tmp = rb_ary_new2(argc+1);
> 
>-	    rb_ary_push(tmp, rb_ary_elt(ary, i));
>-	    for (j=0; j<argc; j++) {
>-		rb_ary_push(tmp, rb_ary_elt(argv[j], i));
>+	    RARRAY(tmp)->len = argc + 1;
>+	    for (i=0; i<RARRAY(ary)->len; i++) {
>+		RARRAY(tmp)->ptr[0] = RARRAY(ary)->ptr[i]; 
>+		for (j=0; j<argc; j++) {
>+		    RARRAY(tmp)->ptr[1+j] = rb_ary_elt(argv[j], i);
>+		}
>+		rb_yield(tmp);
> 	    }
>-	    rb_yield(tmp);
>+	    return Qnil;
>+	} 
>+	else {
>+	    for (i=0; i<RARRAY(ary)->len; i++) {
>+		VALUE tmp = rb_ary_new2(argc+1);
>+
>+		rb_ary_push(tmp, rb_ary_elt(ary, i));
>+		for (j=0; j<argc; j++) {
>+		    rb_ary_push(tmp, rb_ary_elt(argv[j], i));
>+		}
>+		rb_yield(tmp);
>+	    }
>+	    return Qnil;
> 	}
>-	return Qnil;
>     }
>     len = RARRAY(ary)->len;
>     result = rb_ary_new2(len);

It doesn't look nice both optimized code (masign) and non optimized code (array) coexist.
Instead, is this possible? (This is just an idea, though)

  1. define some internal array object. this object never be passed to ruby world. (ex: InternalArray)

  2. implement Array#zip using this.

    if (rb_block_given_p()) {
        VALUE temp = rb_internal_array_new2(argc+1);

        ...

        rb_yield(temp); /* If {|a| ... }, convert InternalArray to Array.
                           If {|a,b,c| ... }, pass each element of InternalArray */
    }

  3. implement rb_yield like above.

Maybe Value object can be used as internal array. (this doesn't work as expected now. just an idea)

static VALUE
rb_ary_zip(argc, argv, ary)
    int argc;
    VALUE *argv;
    VALUE ary;
{
    int i, j;
    VALUE result = Qnil;
    VALUE tmp;

    for (i=0; i<argc; i++) {
	argv[i] = to_a(argv[i]);
    }
    if (!rb_block_given_p()) {
	result = rb_ary_new2(RARRAY(ary)->len);
    }

	tmp = rb_values_new2(argc+1, 0);
	RARRAY(tmp)->len = argc+1;

    for (i=0; i<RARRAY(ary)->len; i++) {

	RARRAY(tmp)->ptr[0] = rb_ary_elt(ary, i);

	for (j=0; j<argc; j++) {
	    RARRAY(tmp)->ptr[j+1] = rb_ary_elt(argv[j], i);
	}
	if (!NIL_P(result))
	    rb_ary_push(result, to_a(tmp)); /* I hope this convert Value to new Array */
	else
	    rb_yield(tmp); /* I hope this works as described above */
    }
    return result;
}

//////////////////

Anyway, current code already looks dup....
Fotunately, I cannot see speed down on this code.

Index: array.c
===================================================================
RCS file: /src/ruby/array.c,v
retrieving revision 1.171
diff -u -w -b -p -r1.171 array.c
--- array.c	4 Mar 2005 06:47:45 -0000	1.171
+++ array.c	20 Apr 2005 09:17:20 -0000
@@ -2108,34 +2108,25 @@ rb_ary_zip(argc, argv, ary)
     VALUE ary;
 {
     int i, j;
-    long len;
-    VALUE result;
+    VALUE result = Qnil;
 
     for (i=0; i<argc; i++) {
 	argv[i] = to_a(argv[i]);
     }
-    if (rb_block_given_p()) {
-	for (i=0; i<RARRAY(ary)->len; i++) {
-	    VALUE tmp = rb_ary_new2(argc+1);
-
-	    rb_ary_push(tmp, rb_ary_elt(ary, i));
-	    for (j=0; j<argc; j++) {
-		rb_ary_push(tmp, rb_ary_elt(argv[j], i));
+    if (!rb_block_given_p()) {
+	result = rb_ary_new2(RARRAY(ary)->len);
 	    }
-	    rb_yield(tmp);
-	}
-	return Qnil;
-    }
-    len = RARRAY(ary)->len;
-    result = rb_ary_new2(len);
-    for (i=0; i<len; i++) {
+    for (i=0; i<RARRAY(ary)->len; i++) {
 	VALUE tmp = rb_ary_new2(argc+1);
 
 	rb_ary_push(tmp, rb_ary_elt(ary, i));
 	for (j=0; j<argc; j++) {
 	    rb_ary_push(tmp, rb_ary_elt(argv[j], i));
 	}
+	if (!NIL_P(result))
 	rb_ary_push(result, tmp);
+	else
+	    rb_yield(tmp);
     }
     return result;
 }