Hello,

	homework for you, heroic nobu and others... :-)

	So, let's go:


Index: ruby.h
===================================================================
RCS file: /src/ruby/ruby.h,v
retrieving revision 1.69
diff -u -p -r1.69 ruby.h
--- ruby.h	2002/09/03 05:20:06	1.69
+++ ruby.h	2002/09/06 16:09:25
@@ -316,7 +316,7 @@ struct RString {
     long len;
     char *ptr;
     union {
-	int capa;
+	long capa;
 	VALUE shared;
     } aux;
 };
@@ -325,7 +325,7 @@ struct RArray {
     struct RBasic basic;
     long len;
     union {
-	int capa;
+	long capa;
 	VALUE shared;
     } aux;
     VALUE *ptr;

Nothing to say. len is LONG -> capa should be LONG as well.

Index: string.c
===================================================================
RCS file: /src/ruby/string.c,v
retrieving revision 1.117
diff -u -p -r1.117 string.c
--- string.c	2002/09/03 05:20:06	1.117
+++ string.c	2002/09/06 16:09:30
@@ -61,18 +61,18 @@ str_new(klass, ptr, len)
     if (len < 0) {
 	rb_raise(rb_eArgError, "negative string size (or size too big)");
     }
-
     str = rb_obj_alloc(klass);
     RSTRING(str)->len = len;
     RSTRING(str)->aux.capa = len;
     RSTRING(str)->ptr = ALLOC_N(char,len+1);
+
     if (ptr) {
 	memcpy(RSTRING(str)->ptr, ptr, len);
+	RSTRING(str)->ptr[len] = '\0';
     }
     else {
-	MEMZERO(RSTRING(str)->ptr, char, len);
+	MEMZERO(RSTRING(str)->ptr, char, len+1);
     }
-    RSTRING(str)->ptr[len] = '\0';
     return str;
 }

Useless move of sentinel to memcpy block and make MEMZERO go to make
sentinel for us.

@@ -217,7 +217,7 @@ rb_str_shared_replace(str, str2)
     VALUE str, str2;
 {
     if (str == str2) return;
-    if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
+    if (str_independent(str)) free(RSTRING(str)->ptr);
     if (NIL_P(str2)) {
 	RSTRING(str)->ptr = 0;
 	RSTRING(str)->len = 0;
@@ -236,7 +236,7 @@ rb_str_shared_replace(str, str2)
     RSTRING(str2)->ptr = 0;	/* abandon str2 */
     RSTRING(str2)->len = 0;
     RSTRING(str2)->aux.capa = 0;
-    FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
+    FL_UNSET(str2, ELTS_SHARED|STR_ASSOC);
     if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
 }

I think that we should test if we can modify str before doing replace.
BUGFIX: Flags should be unset for str2, not for str!

@@ -249,17 +249,11 @@ rb_str_associate(str, add)
 	rb_ary_concat(RSTRING(str)->aux.shared, add);
     }
     else {
-	if (FL_TEST(str, ELTS_SHARED)) {
-	    rb_str_modify(str);
-	}
-	else if (RSTRING(str)->aux.shared) {
-	    /* str_buf */
-	    if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
-		RESIZE_CAPA(str, RSTRING(str)->len);
-	    }
+	rb_str_modify(str);
+	if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
+	    RESIZE_CAPA(str, RSTRING(str)->len);
 	}
 	RSTRING(str)->aux.shared = add;
-	FL_UNSET(str, ELTS_SHARED);
 	FL_SET(str, STR_ASSOC);
     }
 }

I think that rb_str_modify should be called if the str isn't already
associated -> no need to clean SHARED flag then.

@@ -398,13 +392,21 @@ rb_str_format(str, arg)
     return rb_f_sprintf(2, argv);
 }
 
-static int
-str_independent(str)
+void
+rb_str_modify_check(str)
     VALUE str;
 {
     if (OBJ_FROZEN(str)) rb_error_frozen("string");
-    if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
+    if (!OBJ_TAINTED(str) && rb_safe_level() >= 4) {
 	rb_raise(rb_eSecurityError, "Insecure: can't modify string");
+    }
+}
+
+static int
+str_independent(str)
+    VALUE str;
+{
+    rb_str_modify_check(str);
     if (!FL_TEST(str, ELTS_SHARED)) return 1;
     return 0;
 }

New check and use it where it was before.

@@ -414,14 +416,18 @@ str_make_independent(str)
     VALUE str;
 {
     char *ptr;
+    long len = RSTRING(str)->len;
 
-    ptr = ALLOC_N(char, RSTRING(str)->len+1);
+    ptr = ALLOC_N(char, len+1);
     if (RSTRING(str)->ptr) {
-	memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
+	memcpy(ptr, RSTRING(str)->ptr, len);
+	ptr[len] = '\0';
+    }
+    else {
+	MEMZERO(ptr, char, len+1);
     }
-    ptr[RSTRING(str)->len] = 0;
     RSTRING(str)->ptr = ptr;
-    RSTRING(str)->aux.capa = RSTRING(str)->len;
+    RSTRING(str)->aux.capa = len;
     FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
 }

Just a little cleanup and use MEMZERO for nice output.

@@ -429,8 +435,9 @@ void
 rb_str_modify(str)
     VALUE str;
 {
-    if (!str_independent(str))
+    if (!str_independent(str)) {
 	str_make_independent(str);
+    }
 }

 VALUE

Ha - indent only... Sorry.

@@ -493,10 +500,6 @@ VALUE
 rb_str_dup_frozen(str)
     VALUE str;
 {
-    if (FL_TEST(str, ELTS_SHARED)) {
-	OBJ_FREEZE(RSTRING(str)->aux.shared);
-	return RSTRING(str)->aux.shared;
-    }
     if (OBJ_FROZEN(str)) return str;
     str = rb_str_dup(str);
     OBJ_FREEZE(str);

I don't understand here. This function should dup and return freezed
result, or freeze str and dup it?

@@ -511,15 +514,17 @@ rb_str_resize(str, len)
     if (len < 0) {
 	rb_raise(rb_eArgError, "negative string size (or size too big)");
     }
-	
+    rb_str_modify(str);
+    
+    if (FL_TEST(str, STR_ASSOC)) {
+	rb_bug("Discarding ASSOC in rb_str_resize");
+    }
     if (len != RSTRING(str)->len) {
-	rb_str_modify(str);
-
 	if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
 	    RESIZE_CAPA(str, len);
 	}
+	MEMZERO(RSTRING(str)->ptr, char, len+1);
 	RSTRING(str)->len = len;
-	RSTRING(str)->ptr[len] = '\0';	/* sentinel */
     }
     return str;
 }

I think that modify check should be done. (Added a nice check about
discarding ASSOC. :-))

@@ -532,8 +537,14 @@ rb_str_buf_cat(str, ptr, len)
 {
     long capa, total;
 
-    if (FL_TEST(str, ELTS_SHARED)) {
-	rb_str_modify(str);
+    if (len == 0) return str;
+    if (len < 0) {
+	rb_raise(rb_eArgError, "negative string size (or size too big)");
+    }
+    rb_str_modify(str);
+    
+    if (FL_TEST(str, STR_ASSOC)) {
+	rb_bug("String with ASSOC in rb_str_buf_cat!");
     }
     capa = RSTRING(str)->aux.capa;
     total = RSTRING(str)->len+len;

Again: Modify and rb_bug when ASSOCiated.

@@ -544,8 +555,8 @@ rb_str_buf_cat(str, ptr, len)
 	RESIZE_CAPA(str, capa);
     }
     memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
-    RSTRING(str)->len = total;
     RSTRING(str)->ptr[total] = '\0'; /* sentinel */
+    RSTRING(str)->len = total;
 
     return str;
 }

Ahh, indent again, sorry.

@@ -564,23 +575,24 @@ rb_str_cat(str, ptr, len)
     const char *ptr;
     long len;
 {
+    if (len < 0) {
+	rb_raise(rb_eArgError, "negative string size (or size too big)");
+    }
     rb_str_modify(str);
-    if (len > 0) {
-	if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
-	    return rb_str_buf_cat(str, ptr, len);
-	}
-	RESIZE_CAPA(str, RSTRING(str)->len + len);
+
+    if (FL_TEST(str, STR_ASSOC)) {
+	REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len+len+1);
 	if (ptr) {
 	    memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
+	    RSTRING(str)->ptr[RSTRING(str)->len + len] = '\0'; /* sentinel */
 	}
 	else {
-	    MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len);
+	    MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len+1);
 	}
 	RSTRING(str)->len += len;
-	RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
+	return str;
     }
-
-    return str;
+    return rb_str_buf_cat(str, ptr, len);
 }
 
 VALUE

Cleaned a bit.
Fix: Don't use RESIZE_CAPA when STR_ASSOC is set.

PS: I forgot to handle len == 0, will do.

@@ -597,12 +609,14 @@ rb_str_buf_append(str, str2)
 {
     long capa, len;
 
-    if (FL_TEST(str, ELTS_SHARED)) {
-	rb_str_modify(str);
+    rb_str_modify(str);
+
+    if (FL_TEST(str, STR_ASSOC)) {
+	rb_bug("String with ASSOC in rb_str_buf_append!");
     }
     capa = RSTRING(str)->aux.capa;
-
     len = RSTRING(str)->len+RSTRING(str2)->len;
+    
     if (capa <= len) {
 	while (len > capa) {
 	    capa = (capa + 1) * 2;

Again: Modify and bug when...

@@ -625,21 +639,21 @@ rb_str_append(str, str2)
 
     StringValue(str2);
     rb_str_modify(str);
+
     if (RSTRING(str2)->len > 0) {
 	len = RSTRING(str)->len+RSTRING(str2)->len;
-	if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
+	if (FL_TEST(str, STR_ASSOC)) {
+	    REALLOC_N(RSTRING(str)->ptr, char, len+1);
+	    memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
+		    RSTRING(str2)->ptr, RSTRING(str2)->len);
+	    RSTRING(str)->ptr[len] = '\0'; /* sentinel */
+	    RSTRING(str)->len = len;
+	}
+	else {
 	    rb_str_buf_append(str, str2);
-	    OBJ_INFECT(str, str2);
-	    return str;
 	}
-	RESIZE_CAPA(str, len);
-	memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
-	       RSTRING(str2)->ptr, RSTRING(str2)->len);
-	RSTRING(str)->len += RSTRING(str2)->len;
-	RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
     }
     OBJ_INFECT(str, str2);
-
     return str;
 }

A little cleanup: Don't need to check SHARED flag (we did rb_str_modify)
Fix: Don't use RESIZE_CAPA when ASSOC flag is set.

@@ -1493,6 +1507,9 @@ str_gsub(argc, argv, str, bang)
     char *buf, *bp, *cp;
     int tainted = 0;
 
+    if (bang) {
+	rb_str_modify_check(str);
+    }
     if (argc == 1 && rb_block_given_p()) {
 	iter = 1;
     }

When bang is set, modify_check should be done?

@@ -1617,6 +1634,7 @@ rb_str_replace(str, str2)
 {
     if (str == str2) return str;
 
+    rb_str_modify_check(str);
     StringValue(str2);
     if (FL_TEST(str2, ELTS_SHARED)) {
 	if (str_independent(str)) {

Modify_check?

@@ -1624,16 +1642,15 @@ rb_str_replace(str, str2)
 	}
 	RSTRING(str)->len = RSTRING(str2)->len;
 	RSTRING(str)->ptr = RSTRING(str2)->ptr;
-	FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
 	RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
+	FL_SET(str, ELTS_SHARED);
     }
     else {
-	rb_str_modify(str);
 	rb_str_resize(str, RSTRING(str2)->len);
 	memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
 	if (FL_TEST(str2, STR_ASSOC)) {
-	    FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
 	    RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
+	    FL_SET(str, STR_ASSOC);
 	}
     }
 
rb_str_resize now performs rb_str_modify. No need to read flags from
str2, we already tested them.

	Enjoy!

		Michal
		
-- 
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
Michal Rokos                         Czech Technical University, Prague
E-mail:m.rokos / sh.cvut.cz      ICQ:36118339      Jabber:majkl / jabber.cz
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-