Eric:

I'm not sure about implementation of IO.copy_stream but do you need to
add this flag for *all* of T_STRING?  We need to care this flag if we
want to manipulate strings.


On 2017/01/31 5:40, normal wrote:
> normal	2017-01-31 05:40:18 +0900 (Tue, 31 Jan 2017)
> 
>   New Revision: 57469
> 
>   https://svn.ruby-lang.org/cgi-bin/viewvc.cgi?view=revision&revision=57469
> 
>   Log:
>     io.c: recycle garbage on write
>     
>     * string.c (STR_IS_SHARED_M): new flag to mark shared mulitple times
>       (STR_SET_SHARED): set STR_IS_SHARED_M
>       (rb_str_tmp_frozen_acquire, rb_str_tmp_frozen_release): new functions
>       (str_new_frozen): set/unset STR_IS_SHARED_M as appropriate
>     * internal.h: declare new functions
>     * io.c (fwrite_arg, fwrite_do, fwrite_end): new
>       (io_fwrite): use new functions
>     
>     Introduce rb_str_tmp_frozen_acquire and rb_str_tmp_frozen_release
>     to manage a hidden, frozen string.  Reuse one bit of the embed
>     length for shared strings as STR_IS_SHARED_M to indicate a string
>     has been shared multiple times.  In the common case, the string
>     is only shared once so the object slot can be reclaimed immediately.
>     
>     minimum results in each 3 measurements. (time and size)
>     
>     Execution time (sec)
>     name                            trunk   built
>     io_copy_stream_write            0.682   0.254
>     io_copy_stream_write_socket     1.225   0.751
>     
>     Speedup ratio: compare with the result of `trunk' (greater is better)
>     name    built
>     io_copy_stream_write            2.680
>     io_copy_stream_write_socket     1.630
>     
>     Memory usage (last size) (B)
>     name                            trunk           built
>     io_copy_stream_write            95436800.000    6512640.000
>     io_copy_stream_write_socket     117628928.000   7127040.000
>     
>     Memory consuming ratio (size) with the result of `trunk' (greater is better)
>     name    built
>     io_copy_stream_write            14.654
>     io_copy_stream_write_socket     16.505
> 
>   Modified files:
>     trunk/internal.h
>     trunk/io.c
>     trunk/string.c
> Index: string.c
> ===================================================================
> --- string.c	(revision 57468)
> +++ string.c	(revision 57469)
> @@ -70,6 +70,7 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L70
>   * 1:     RSTRING_NOEMBED
>   * 2:     STR_SHARED (== ELTS_SHARED)
>   * 2-6:   RSTRING_EMBED_LEN (5 bits == 32)
> + * 6:     STR_IS_SHARED_M (shared, when RSTRING_NOEMBED==1 && klass==0)
>   * 7:     STR_TMPLOCK
>   * 8-9:   ENC_CODERANGE (2 bits)
>   * 10-16: ENCODING (7 bits == 128)
> @@ -79,6 +80,7 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L80
>   */
>  
>  #define RUBY_MAX_CHAR_LEN 16
> +#define STR_IS_SHARED_M FL_USER6
>  #define STR_TMPLOCK FL_USER7
>  #define STR_NOFREE FL_USER18
>  #define STR_FAKESTR FL_USER19
> @@ -150,6 +152,8 @@ VALUE rb_cSymbol; https://github.com/ruby/ruby/blob/trunk/string.c#L152
>      if (!FL_TEST(str, STR_FAKESTR)) { \
>  	RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
>  	FL_SET((str), STR_SHARED); \
> +	if (RBASIC_CLASS((shared_str)) == 0) /* for CoW-friendliness */ \
> +	    FL_SET_RAW((shared_str), STR_IS_SHARED_M); \
>      } \
>  } while (0)
>  
> @@ -1127,6 +1131,45 @@ rb_str_new_frozen(VALUE orig) https://github.com/ruby/ruby/blob/trunk/string.c#L1131
>      return str;
>  }
>  
> +VALUE
> +rb_str_tmp_frozen_acquire(VALUE orig)
> +{
> +    VALUE tmp;
> +
> +    if (OBJ_FROZEN_RAW(orig)) return orig;
> +
> +    tmp = str_new_frozen(0, orig);
> +    OBJ_INFECT(tmp, orig);
> +
> +    return tmp;
> +}
> +
> +void
> +rb_str_tmp_frozen_release(VALUE orig, VALUE tmp)
> +{
> +    if (RBASIC_CLASS(tmp) != 0)
> +	return;
> +
> +    if (FL_TEST_RAW(orig, STR_SHARED) &&
> +	    !FL_TEST_RAW(orig, STR_TMPLOCK|RUBY_FL_FREEZE)) {
> +	VALUE shared = RSTRING(orig)->as.heap.aux.shared;
> +
> +	if (shared == tmp && !FL_TEST_RAW(tmp, STR_IS_SHARED_M)) {
> +	    FL_UNSET_RAW(orig, STR_SHARED);
> +	    assert(RSTRING(orig)->as.heap.ptr == RSTRING(tmp)->as.heap.ptr);
> +	    assert(RSTRING(orig)->as.heap.len == RSTRING(tmp)->as.heap.len);
> +	    RSTRING(orig)->as.heap.aux.capa = RSTRING(tmp)->as.heap.aux.capa;
> +	    RBASIC(orig)->flags |= RBASIC(tmp)->flags & STR_NOFREE;
> +	    assert(OBJ_FROZEN_RAW(tmp));
> +	    rb_gc_force_recycle(tmp);
> +	}
> +    }
> +    else if (STR_EMBED_P(tmp)) {
> +	assert(OBJ_FROZEN_RAW(tmp));
> +	rb_gc_force_recycle(tmp);
> +    }
> +}
> +
>  static VALUE
>  str_new_frozen(VALUE klass, VALUE orig)
>  {
> @@ -1152,6 +1195,8 @@ str_new_frozen(VALUE klass, VALUE orig) https://github.com/ruby/ruby/blob/trunk/string.c#L1195
>  		RSTRING(str)->as.heap.len -= ofs + rest;
>  	    }
>  	    else {
> +		if (RBASIC_CLASS(shared) == 0)
> +		    FL_SET_RAW(shared, STR_IS_SHARED_M);
>  		return shared;
>  	    }
>  	}
> @@ -1171,6 +1216,8 @@ str_new_frozen(VALUE klass, VALUE orig) https://github.com/ruby/ruby/blob/trunk/string.c#L1216
>  	    RBASIC(str)->flags |= RBASIC(orig)->flags & STR_NOFREE;
>  	    RBASIC(orig)->flags &= ~STR_NOFREE;
>  	    STR_SET_SHARED(orig, str);
> +	    if (klass == 0)
> +		FL_UNSET_RAW(str, STR_IS_SHARED_M);
>  	}
>      }
>  
> Index: io.c
> ===================================================================
> --- io.c	(revision 57468)
> +++ io.c	(revision 57469)
> @@ -1419,10 +1419,40 @@ do_writeconv(VALUE str, rb_io_t *fptr, i https://github.com/ruby/ruby/blob/trunk/io.c#L1419
>      return str;
>  }
>  
> +struct fwrite_arg {
> +    VALUE orig;
> +    VALUE tmp;
> +    rb_io_t *fptr;
> +    int nosync;
> +};
> +
> +static VALUE
> +fwrite_do(VALUE arg)
> +{
> +    struct fwrite_arg *fa = (struct fwrite_arg *)arg;
> +    const char *ptr;
> +    long len;
> +
> +    RSTRING_GETMEM(fa->tmp, ptr, len);
> +
> +    return (VALUE)io_binwrite(fa->tmp, ptr, len, fa->fptr, fa->nosync);
> +}
> +
> +static VALUE
> +fwrite_end(VALUE arg)
> +{
> +    struct fwrite_arg *fa = (struct fwrite_arg *)arg;
> +
> +    rb_str_tmp_frozen_release(fa->orig, fa->tmp);
> +
> +    return Qfalse;
> +}
> +
>  static long
>  io_fwrite(VALUE str, rb_io_t *fptr, int nosync)
>  {
>      int converted = 0;
> +    struct fwrite_arg fa;
>  #ifdef _WIN32
>      if (fptr->mode & FMODE_TTY) {
>  	long len = rb_w32_write_console(str, fptr->fd);
> @@ -1432,11 +1462,13 @@ io_fwrite(VALUE str, rb_io_t *fptr, int https://github.com/ruby/ruby/blob/trunk/io.c#L1462
>      str = do_writeconv(str, fptr, &converted);
>      if (converted)
>  	OBJ_FREEZE(str);
> -    else
> -	str = rb_str_new_frozen(str);
>  
> -    return io_binwrite(str, RSTRING_PTR(str), RSTRING_LEN(str),
> -		       fptr, nosync);
> +    fa.orig = str;
> +    fa.tmp = rb_str_tmp_frozen_acquire(str);
> +    fa.fptr = fptr;
> +    fa.nosync = nosync;
> +
> +    return (long)rb_ensure(fwrite_do, (VALUE)&fa, fwrite_end, (VALUE)&fa);
>  }
>  
>  ssize_t
> Index: internal.h
> ===================================================================
> --- internal.h	(revision 57468)
> +++ internal.h	(revision 57469)
> @@ -1475,6 +1475,8 @@ VALUE rb_id_quote_unprintable(ID); https://github.com/ruby/ruby/blob/trunk/internal.h#L1475
>  char *rb_str_fill_terminator(VALUE str, const int termlen);
>  void rb_str_change_terminator_length(VALUE str, const int oldtermlen, const int termlen);
>  VALUE rb_str_locktmp_ensure(VALUE str, VALUE (*func)(VALUE), VALUE arg);
> +VALUE rb_str_tmp_frozen_acquire(VALUE str);
> +void rb_str_tmp_frozen_release(VALUE str, VALUE tmp);
>  VALUE rb_str_chomp_string(VALUE str, VALUE chomp);
>  #ifdef RUBY_ENCODING_H
>  VALUE rb_external_str_with_enc(VALUE str, rb_encoding *eenc);
> 
> --
> ML: ruby-changes / quickml.atdot.net
> Info: http://www.atdot.net/~ko1/quickml/
> 


-- 
// SASADA Koichi at atdot dot net

Unsubscribe: <mailto:ruby-core-request / ruby-lang.org?subject=unsubscribe>
<http://lists.ruby-lang.org/cgi-bin/mailman/options/ruby-core>