Hi,

In message "[Clenup, fixes] ruby.h, string.c"
    on 02/09/07, Michal Rokos <m.rokos / sh.cvut.cz> writes:

|--- ruby.h	2002/09/03 05:20:06	1.69
|+++ ruby.h	2002/09/06 16:09:25
|@@ -316,7 +316,7 @@ struct RString {
|     long len;
|     char *ptr;
|     union {
|-	int capa;
|+	long capa;
| 	VALUE shared;
|     } aux;
| };
|@@ -325,7 +325,7 @@ struct RArray {
|     struct RBasic basic;
|     long len;
|     union {
|-	int capa;
|+	long capa;
| 	VALUE shared;
|     } aux;
|     VALUE *ptr;
|
|Nothing to say. len is LONG -> capa should be LONG as well.

OK.

|diff -u -p -r1.117 string.c
|--- string.c	2002/09/03 05:20:06	1.117
|+++ string.c	2002/09/06 16:09:30
|@@ -61,18 +61,18 @@ str_new(klass, ptr, len)
|     if (len < 0) {
| 	rb_raise(rb_eArgError, "negative string size (or size too big)");
|     }
|-
|     str = rb_obj_alloc(klass);
|     RSTRING(str)->len = len;
|     RSTRING(str)->aux.capa = len;
|     RSTRING(str)->ptr = ALLOC_N(char,len+1);
|+
|     if (ptr) {
| 	memcpy(RSTRING(str)->ptr, ptr, len);
|+	RSTRING(str)->ptr[len] = '\0';
|     }
|     else {
|-	MEMZERO(RSTRING(str)->ptr, char, len);
|+	MEMZERO(RSTRING(str)->ptr, char, len+1);
|     }
|-    RSTRING(str)->ptr[len] = '\0';
|     return str;
| }
|
|Useless move of sentinel to memcpy block and make MEMZERO go to make
|sentinel for us.

OK.

|@@ -217,7 +217,7 @@ rb_str_shared_replace(str, str2)
|     VALUE str, str2;
| {
|     if (str == str2) return;
|-    if (!FL_TEST(str, ELTS_SHARED)) free(RSTRING(str)->ptr);
|+    if (str_independent(str)) free(RSTRING(str)->ptr);
|     if (NIL_P(str2)) {
| 	RSTRING(str)->ptr = 0;
| 	RSTRING(str)->len = 0;
|@@ -236,7 +236,7 @@ rb_str_shared_replace(str, str2)
|     RSTRING(str2)->ptr = 0;	/* abandon str2 */
|     RSTRING(str2)->len = 0;
|     RSTRING(str2)->aux.capa = 0;
|-    FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
|+    FL_UNSET(str2, ELTS_SHARED|STR_ASSOC);
|     if (OBJ_TAINTED(str2)) OBJ_TAINT(str);
| }
|
|I think that we should test if we can modify str before doing replace.
|BUGFIX: Flags should be unset for str2, not for str!

str2 for rb_str_shared_replace() is always a brand-new clean string.
It's ensured, so that we don't need any extra check here.  And we
shouldn't have cleared flags for str.  We should fix this.

|@@ -249,17 +249,11 @@ rb_str_associate(str, add)
| 	rb_ary_concat(RSTRING(str)->aux.shared, add);
|     }
|     else {
|-	if (FL_TEST(str, ELTS_SHARED)) {
|-	    rb_str_modify(str);
|-	}
|-	else if (RSTRING(str)->aux.shared) {
|-	    /* str_buf */
|-	    if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
|-		RESIZE_CAPA(str, RSTRING(str)->len);
|-	    }
|+	rb_str_modify(str);
|+	if (RSTRING(str)->aux.capa != RSTRING(str)->len) {
|+	    RESIZE_CAPA(str, RSTRING(str)->len);
| 	}
| 	RSTRING(str)->aux.shared = add;
|-	FL_UNSET(str, ELTS_SHARED);
| 	FL_SET(str, STR_ASSOC);
|     }
| }
|
|I think that rb_str_modify should be called if the str isn't already
|associated -> no need to clean SHARED flag then.

Probably both you and me were wrong.  Since rb_str_associate() just
associate internal data to the string, we don't need to check freeze
flag at all.  We just need to abandon shared flag and associate data
if required.

|@@ -398,13 +392,21 @@ rb_str_format(str, arg)
|     return rb_f_sprintf(2, argv);
| }
| 
|-static int
|-str_independent(str)
|+void
|+rb_str_modify_check(str)
|     VALUE str;
| {
|     if (OBJ_FROZEN(str)) rb_error_frozen("string");
|-    if (!OBJ_TAINTED(str) && rb_safe_level() >= 4)
|+    if (!OBJ_TAINTED(str) && rb_safe_level() >= 4) {
| 	rb_raise(rb_eSecurityError, "Insecure: can't modify string");
|+    }
|+}
|+
|+static int
|+str_independent(str)
|+    VALUE str;
|+{
|+    rb_str_modify_check(str);
|     if (!FL_TEST(str, ELTS_SHARED)) return 1;
|     return 0;
| }
|
|New check and use it where it was before.

I'm not sure if we need to separate these functions; see below.

|@@ -414,14 +416,18 @@ str_make_independent(str)
|     VALUE str;
| {
|     char *ptr;
|+    long len = RSTRING(str)->len;
| 
|-    ptr = ALLOC_N(char, RSTRING(str)->len+1);
|+    ptr = ALLOC_N(char, len+1);
|     if (RSTRING(str)->ptr) {
|-	memcpy(ptr, RSTRING(str)->ptr, RSTRING(str)->len);
|+	memcpy(ptr, RSTRING(str)->ptr, len);
|+	ptr[len] = '\0';
|+    }
|+    else {
|+	MEMZERO(ptr, char, len+1);
|     }
|-    ptr[RSTRING(str)->len] = 0;
|     RSTRING(str)->ptr = ptr;
|-    RSTRING(str)->aux.capa = RSTRING(str)->len;
|+    RSTRING(str)->aux.capa = len;
|     FL_UNSET(str, ELTS_SHARED|STR_ASSOC);
| }
|
|Just a little cleanup and use MEMZERO for nice output.

OK.

|@@ -429,8 +435,9 @@ void
| rb_str_modify(str)
|     VALUE str;
| {
|-    if (!str_independent(str))
|+    if (!str_independent(str)) {
| 	str_make_independent(str);
|+    }
| }
|
| VALUE
|
|Ha - indent only... Sorry.

OK.

|@@ -493,10 +500,6 @@ VALUE
| rb_str_dup_frozen(str)
|     VALUE str;
| {
|-    if (FL_TEST(str, ELTS_SHARED)) {
|-	OBJ_FREEZE(RSTRING(str)->aux.shared);
|-	return RSTRING(str)->aux.shared;
|-    }
|     if (OBJ_FROZEN(str)) return str;
|     str = rb_str_dup(str);
|     OBJ_FREEZE(str);
|
|I don't understand here. This function should dup and return freezed
|result, or freeze str and dup it?

rb_str_dup() is used internally for hash keys only, so that if we can
avoid copying, we should to gain performance.  I guess we should leave
this as it is.

|@@ -511,15 +514,17 @@ rb_str_resize(str, len)
|     if (len < 0) {
| 	rb_raise(rb_eArgError, "negative string size (or size too big)");
|     }
|-	
|+    rb_str_modify(str);
|+    
|+    if (FL_TEST(str, STR_ASSOC)) {
|+	rb_bug("Discarding ASSOC in rb_str_resize");
|+    }
|     if (len != RSTRING(str)->len) {
|-	rb_str_modify(str);
|-
| 	if (RSTRING(str)->len < len || RSTRING(str)->len - len > 1024) {
| 	    RESIZE_CAPA(str, len);
| 	}
|+	MEMZERO(RSTRING(str)->ptr, char, len+1);
| 	RSTRING(str)->len = len;
|-	RSTRING(str)->ptr[len] = '\0';	/* sentinel */
|     }
|     return str;
| }
|
|I think that modify check should be done. (Added a nice check about
|discarding ASSOC. :-))

rb_bug() is too much.  We just need to discard STR_ASSOC flag and
associated data.  Besides since rb_str_resize() is not a function to
implement a method, we don't need to call rb_str_modify() unless it
is really required.  I think we should leave this as it is as well.

|@@ -532,8 +537,14 @@ rb_str_buf_cat(str, ptr, len)
| {
|     long capa, total;
| 
|-    if (FL_TEST(str, ELTS_SHARED)) {
|-	rb_str_modify(str);
|+    if (len == 0) return str;
|+    if (len < 0) {
|+	rb_raise(rb_eArgError, "negative string size (or size too big)");
|+    }
|+    rb_str_modify(str);
|+    
|+    if (FL_TEST(str, STR_ASSOC)) {
|+	rb_bug("String with ASSOC in rb_str_buf_cat!");
|     }
|     capa = RSTRING(str)->aux.capa;
|     total = RSTRING(str)->len+len;
|
|Again: Modify and rb_bug when ASSOCiated.

See the comment above.

|@@ -544,8 +555,8 @@ rb_str_buf_cat(str, ptr, len)
| 	RESIZE_CAPA(str, capa);
|     }
|     memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
|-    RSTRING(str)->len = total;
|     RSTRING(str)->ptr[total] = '\0'; /* sentinel */
|+    RSTRING(str)->len = total;
| 
|     return str;
| }
|
|Ahh, indent again, sorry.

OK.

|@@ -564,23 +575,24 @@ rb_str_cat(str, ptr, len)
|     const char *ptr;
|     long len;
| {
|+    if (len < 0) {
|+	rb_raise(rb_eArgError, "negative string size (or size too big)");
|+    }
|     rb_str_modify(str);
|-    if (len > 0) {
|-	if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
|-	    return rb_str_buf_cat(str, ptr, len);
|-	}
|-	RESIZE_CAPA(str, RSTRING(str)->len + len);
|+
|+    if (FL_TEST(str, STR_ASSOC)) {
|+	REALLOC_N(RSTRING(str)->ptr, char, RSTRING(str)->len+len+1);
| 	if (ptr) {
| 	    memcpy(RSTRING(str)->ptr + RSTRING(str)->len, ptr, len);
|+	    RSTRING(str)->ptr[RSTRING(str)->len + len] = '\0'; /* sentinel */
| 	}
| 	else {
|-	    MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len);
|+	    MEMZERO(RSTRING(str)->ptr + RSTRING(str)->len, char, len+1);
| 	}
| 	RSTRING(str)->len += len;
|-	RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
|+	return str;
|     }
|-
|-    return str;
|+    return rb_str_buf_cat(str, ptr, len);
| }
| 
| VALUE
|
|Cleaned a bit.
|Fix: Don't use RESIZE_CAPA when STR_ASSOC is set.
|
|PS: I forgot to handle len == 0, will do.

We just need to discard.

|@@ -597,12 +609,14 @@ rb_str_buf_append(str, str2)
| {
|     long capa, len;
| 
|-    if (FL_TEST(str, ELTS_SHARED)) {
|-	rb_str_modify(str);
|+    rb_str_modify(str);
|+
|+    if (FL_TEST(str, STR_ASSOC)) {
|+	rb_bug("String with ASSOC in rb_str_buf_append!");
|     }
|     capa = RSTRING(str)->aux.capa;
|-
|     len = RSTRING(str)->len+RSTRING(str2)->len;
|+    
|     if (capa <= len) {
| 	while (len > capa) {
| 	    capa = (capa + 1) * 2;
|
|Again: Modify and bug when...

See the comments above.

|@@ -625,21 +639,21 @@ rb_str_append(str, str2)
| 
|     StringValue(str2);
|     rb_str_modify(str);
|+
|     if (RSTRING(str2)->len > 0) {
| 	len = RSTRING(str)->len+RSTRING(str2)->len;
|-	if (!FL_TEST(str, ELTS_SHARED) && !FL_TEST(str, STR_ASSOC)) {
|+	if (FL_TEST(str, STR_ASSOC)) {
|+	    REALLOC_N(RSTRING(str)->ptr, char, len+1);
|+	    memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
|+		    RSTRING(str2)->ptr, RSTRING(str2)->len);
|+	    RSTRING(str)->ptr[len] = '\0'; /* sentinel */
|+	    RSTRING(str)->len = len;
|+	}
|+	else {
| 	    rb_str_buf_append(str, str2);
|-	    OBJ_INFECT(str, str2);
|-	    return str;
| 	}
|-	RESIZE_CAPA(str, len);
|-	memcpy(RSTRING(str)->ptr + RSTRING(str)->len,
|-	       RSTRING(str2)->ptr, RSTRING(str2)->len);
|-	RSTRING(str)->len += RSTRING(str2)->len;
|-	RSTRING(str)->ptr[RSTRING(str)->len] = '\0'; /* sentinel */
|     }
|     OBJ_INFECT(str, str2);
|-
|     return str;
| }
|
|A little cleanup: Don't need to check SHARED flag (we did rb_str_modify)
|Fix: Don't use RESIZE_CAPA when ASSOC flag is set.

Ditto.

|@@ -1493,6 +1507,9 @@ str_gsub(argc, argv, str, bang)
|     char *buf, *bp, *cp;
|     int tainted = 0;
| 
|+    if (bang) {
|+	rb_str_modify_check(str);
|+    }
|     if (argc == 1 && rb_block_given_p()) {
| 	iter = 1;
|     }
|
|When bang is set, modify_check should be done?

It's already checked in the function.

|@@ -1617,6 +1634,7 @@ rb_str_replace(str, str2)
| {
|     if (str == str2) return str;
| 
|+    rb_str_modify_check(str);
|     StringValue(str2);
|     if (FL_TEST(str2, ELTS_SHARED)) {
| 	if (str_independent(str)) {
|
|Modify_check?

It's already checked in the function.

|@@ -1624,16 +1642,15 @@ rb_str_replace(str, str2)
| 	}
| 	RSTRING(str)->len = RSTRING(str2)->len;
| 	RSTRING(str)->ptr = RSTRING(str2)->ptr;
|-	FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
| 	RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
|+	FL_SET(str, ELTS_SHARED);
|     }
|     else {
|-	rb_str_modify(str);
| 	rb_str_resize(str, RSTRING(str2)->len);
| 	memcpy(RSTRING(str)->ptr, RSTRING(str2)->ptr, RSTRING(str2)->len);
| 	if (FL_TEST(str2, STR_ASSOC)) {
|-	    FL_SET(str, RBASIC(str2)->flags & (ELTS_SHARED|STR_ASSOC));
| 	    RSTRING(str)->aux.shared = RSTRING(str2)->aux.shared;
|+	    FL_SET(str, STR_ASSOC);
| 	}
|     }
| 
|rb_str_resize now performs rb_str_modify. No need to read flags from
|str2, we already tested them.

Only if we do the modify above.

							matz.