ts <decoux / moulon.inra.fr> wrote: (2006/02/28 19:35) >>>>>> "H" == H Yamamoto <ocean / m2.ccsnet.ne.jp> writes: > >H> Because rb_gc_mark_locations marks all objects in the range of STACK_END to rb_gc_stack_start. >H> if GC.start runs inside block, block needs more stack, so more objects can be wrongly >H> marked as alive. (As you can see, last `pause` is outside of block, so less stack is used, >H> huge array goes out of stack range, it is freed) > > Yes this is what it do. Now try it with 1.6.8 and you'll have a > surprise :-) > > Strange to see something that I've predicted but I *really* expected to see > it in another way : ruby is faster but it use more memory :-) > > > >Guy Decoux Yes, at last `pause`, 1.6: 82MB => 62MB 1.8: 82MB => 26MB I understand 1.8 is better. Because 1.8 frees unused heap slot? :-) And Eric Hodel is right. ([ruby-talk:181568]) The memory heap slot uses is greater than the memory C string itself uses. Even after array is freed, heaps_slot itself still eats 26MB memory, and we cannot reduce it because ruby's GC cannot do compaction. //////////////////////////////////////////////////////////////////////////// Index: array.c =================================================================== RCS file: /src/ruby/array.c,v retrieving revision 1.137.2.30 diff -u -w -b -p -r1.137.2.30 array.c --- array.c 22 Dec 2005 07:08:51 -0000 1.137.2.30 +++ array.c 28 Feb 2006 04:12:46 -0000 @@ -2953,6 +2953,16 @@ rb_ary_flatten(ary) return ary; } +static VALUE +ary_capacity(VALUE ary) +{ + if (FL_TEST(ary, ELTS_SHARED)) { + return LONG2FIX(0); + } + else { + return LONG2FIX(RARRAY(ary)->aux.shared); + } +} /* Arrays are ordered, integer-indexed collections of any object. * Array indexing starts at 0, as in C or Java. A negative index is @@ -3050,6 +3060,8 @@ Init_Array() rb_define_method(rb_cArray, "flatten!", rb_ary_flatten_bang, 0); rb_define_method(rb_cArray, "nitems", rb_ary_nitems, 0); + rb_define_method(rb_cArray, "capacity", ary_capacity, 0); + id_cmp = rb_intern("<=>"); inspect_key = rb_intern("__inspect_key__"); } Index: gc.c =================================================================== RCS file: /src/ruby/gc.c,v retrieving revision 1.168.2.37 diff -u -w -b -p -r1.168.2.37 gc.c --- gc.c 13 Feb 2006 09:10:53 -0000 1.168.2.37 +++ gc.c 28 Feb 2006 11:23:11 -0000 @@ -323,6 +323,31 @@ static int heap_slots = HEAP_MIN_SLOTS; static RVALUE *himem, *lomem; +static VALUE +rb_gc_heaps(VALUE self) +{ + int i; + unsigned long total = 0; + + printf("heaps information\n"); + +printf("%d %d %d\n", sizeof(char), sizeof(int), sizeof(RVALUE)); + + for (i = 0; i < heaps_used; ++i) { + int j; + unsigned long used_in_heap = 0; + for (j = 0; j < heaps[i].limit; ++j) { + if (heaps[i].slot[j].as.free.flags) { + used_in_heap++; + } + } + printf("heap#%04d: used = %d / limit = %d\n", i, used_in_heap, heaps[i].limit); + total += heaps[i].limit; + } + + return LONG2FIX(total * sizeof(RVALUE)); /* byte */ +} + static void add_heap() { @@ -594,6 +619,18 @@ gc_mark_rest() } } +static VALUE watching = 1; + +#define GET_WATCHING() (watching - 1) +#define SET_WATCHING(value) (watching = value + 1) + +static VALUE +rb_gc_watch(VALUE self, VALUE obj) +{ + SET_WATCHING(obj); + return Qnil; +} + static inline int is_pointer_to_heap(ptr) void *ptr; @@ -620,8 +657,14 @@ mark_locations_array(x, n) register long n; { VALUE v; + while (n--) { v = *x; + if (GET_WATCHING()) { + if (GET_WATCHING() == v) { + printf("---> %p\n", x); + } + } if (is_pointer_to_heap((void *)v)) { gc_mark(v, 0); } @@ -1148,6 +1191,12 @@ obj_free(obj) break; } + if (GET_WATCHING()) { + if (GET_WATCHING() == obj) { + SET_WATCHING(0); + } + } + if (FL_TEST(obj, FL_EXIVAR)) { rb_free_generic_ivar((VALUE)obj); } @@ -1354,6 +1403,9 @@ garbage_collect() setjmp(save_regs_gc_mark); mark_locations_array((VALUE*)save_regs_gc_mark, sizeof(save_regs_gc_mark) / sizeof(VALUE *)); #if STACK_GROW_DIRECTION < 0 + if (GET_WATCHING()) { + printf("=============> %p %p\n", STACK_END, rb_gc_stack_start); + } rb_gc_mark_locations((VALUE*)STACK_END, rb_gc_stack_start); #elif STACK_GROW_DIRECTION > 0 rb_gc_mark_locations(rb_gc_stack_start, (VALUE*)STACK_END + 1); @@ -1926,6 +1978,8 @@ Init_GC() rb_define_singleton_method(rb_mGC, "enable", rb_gc_enable, 0); rb_define_singleton_method(rb_mGC, "disable", rb_gc_disable, 0); rb_define_method(rb_mGC, "garbage_collect", rb_gc_start, 0); + rb_define_singleton_method(rb_mGC, "watch", rb_gc_watch, 1); + rb_define_singleton_method(rb_mGC, "heaps", rb_gc_heaps, 0); rb_mObSpace = rb_define_module("ObjectSpace"); rb_define_module_function(rb_mObSpace, "each_object", os_each_obj, -1); Index: string.c =================================================================== RCS file: /src/ruby/string.c,v retrieving revision 1.182.2.44 diff -u -w -b -p -r1.182.2.44 string.c --- string.c 27 Oct 2005 08:19:20 -0000 1.182.2.44 +++ string.c 28 Feb 2006 04:13:06 -0000 @@ -4613,6 +4613,16 @@ rb_str_setter(val, id, var) *var = val; } +static VALUE +str_capacity(VALUE str) +{ + if (FL_TEST(str, ELTS_SHARED)) { + return LONG2FIX(0); + } + else { + return LONG2FIX(RSTRING(str)->aux.capa); + } +} /* * A <code>String</code> object holds and manipulates an arbitrary sequence of @@ -4730,6 +4740,8 @@ Init_String() rb_define_method(rb_cString, "sum", rb_str_sum, -1); + rb_define_method(rb_cString, "capacity", str_capacity, 0); /* for debug */ + rb_define_global_function("sub", rb_f_sub, -1); rb_define_global_function("gsub", rb_f_gsub, -1); //////////////////////////////////////////////////////////////////////////////// E:\ruby-cvs\win32_1_8>miniruby \a.rb heaps information heap#0000: used = 6752 / limit = 10000 heap#0001: used = 9000 / limit = 18000 heap#0002: used = 16200 / limit = 32400 heap#0003: used = 29160 / limit = 58320 heap#0004: used = 52488 / limit = 104976 heap#0005: used = 94478 / limit = 188956 heap#0006: used = 170060 / limit = 340120 heap#0007: used = 306108 / limit = 612216 heap#0008: used = 119259 / limit = 1101988 heaps total size = 59,207,424 String count = 800,124 capacity = 5,602,322 Array count = 7 capacity = 806,581 heaps information heap#0000: used = 6752 / limit = 10000 heap#0001: used = 9000 / limit = 18000 heap#0002: used = 16200 / limit = 32400 heap#0003: used = 29160 / limit = 58320 heap#0004: used = 52488 / limit = 104976 heap#0005: used = 94478 / limit = 188956 heap#0006: used = 170060 / limit = 340120 heap#0007: used = 306108 / limit = 612216 heap#0008: used = 119266 / limit = 1101988 heaps total size = 59,207,424 String count = 800,131 capacity = 5,602,467 Array count = 7 capacity = 806,581 heaps information heap#0000: used = 3502 / limit = 10000 heap#0001: used = 3 / limit = 1101988 # only 3 cell is used, but we cannot compact this slot. heaps total size = 26,687,712 # Oh my! String count = 126 capacity = 2,333 Array count = 6 capacity = 96 //////////////////////////////////////////////////////////////////////////////// But I didn't think RVALUE is so big. (26 byte on my 32bit windows) typedef struct RVALUE { union { struct { unsigned long flags; /* always 0 for freed obj */ struct RVALUE *next; } free; struct RBasic basic; struct RObject object; struct RClass klass; struct RFloat flonum; struct RString string; struct RArray array; struct RRegexp regexp; struct RHash hash; struct RData data; struct RStruct rstruct; struct RBignum bignum; struct RFile file; struct RNode node; struct RMatch match; struct RVarmap varmap; struct SCOPE scope; } as; #ifdef GC_DEBUG char *file; int line; #endif } RVALUE; # For 64bit guys, I think `unsigned int flags` is enough. Otherwise, flags requires doubled memory.