In article <Pine.GSO.4.64.0709281302390.26570 / brains.eng.cse.dmu.ac.uk>, Hugh Sasse <hgs / dmu.ac.uk> writes: > I've been looking at Tom Copeland's memory allocation problem: > > http://tomcopeland.blogs.com/juniordeveloper/2007/09/tracking-down-a.html I think OpenStruct needs much more memory than Hash. It needs accessor methods for all member of all objects. I implemented ObjectSpace.count_objects to count objects. % ./ruby -rpp -e 'pp ObjectSpace.count_objects' {:T_ARRAY=>1261, :T_BIGNUM=>42, :T_CLASS=>418, :T_DATA=>297, :T_FILE=>5, :T_FLOAT=>6, :T_HASH=>3, :T_ICLASS=>21, :T_MATCH=>1, :T_MODULE=>18, :T_NODE=>10918, :T_OBJECT=>7, :T_REGEXP=>5, :T_STRING=>3101, :T_VALUES=>2, :freed=>11895} This means that there are 1261 arrays, etc. ObjectSpace.count_objects can be used to count objects required by Hash/OpenStruct. Hash: % ./ruby -e ' o = {} GC.start; c = ObjectSpace.count_objects 10.times {|i| o["foo#{i}"] = 1 GC.start; c2 = ObjectSpace.count_objects c.keys.each {|k| n = c2[k] - c[k]; print "#{k}:#{n} " if 0<n }; puts c = c2 } ' T_STRING:28 T_HASH:1 T_STRING:1 T_ARRAY:1 T_HASH:1 T_STRING:1 T_STRING:1 T_STRING:1 T_STRING:1 T_STRING:1 T_STRING:1 T_STRING:1 T_STRING:1 Hash needs no extra objects. It just needs a string for the key. Internally Hash use st_table. st_table needs a struct st_table_entry for each entry. The size of struct st_table_entry is 6 words (Ruby 1.9) which is 24bytes on 32-bit environment. OpenStruct: % ./ruby -rostruct -e ' o = OpenStruct.new GC.start; c = ObjectSpace.count_objects 10.times {|i| o.send("foo#{i}=", 1) GC.start; c2 = ObjectSpace.count_objects c.keys.each {|k| n = c2[k] - c[k]; print "#{k}:#{n} " if 0<n }; puts c = c2 } ' T_CLASS:1 T_STRING:30 T_ARRAY:1 T_HASH:1 T_DATA:6 T_NODE:7 T_STRING:2 T_HASH:1 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 T_STRING:2 T_DATA:6 T_VALUES:1 T_NODE:7 OpenStruct member needs 1 string, 6 data, 1 values and 7 nodes, addition to a key string and struct st_table_entry. (OpenStruct uses a Hash) Since a object needs 20bytes, it require 20*(1+6+1+7) = 300bytes at least. Also T_STRING, T_DATA and T_VALUES may need more memory allocated by malloc. So I guess it is not memory leak but OpenStruct is memory eater. ObjectSpace.count_objects implementation: Index: gc.c =================================================================== --- gc.c (revision 13576) +++ gc.c (working copy) @@ -2145,6 +2145,70 @@ return (VALUE)((SIGNED_VALUE)obj|FIXNUM_FLAG); } +static VALUE +count_objects(VALUE klass) +{ + long counts[T_MASK+1]; + long freed = 0; + int i; + VALUE hash; + + for (i = 0; i <= T_MASK; i++) { + counts[i] = 0; + } + + for (i = 0; i < heaps_used; i++) { + RVALUE *p, *pend; + + p = heaps[i].slot; pend = p + heaps[i].limit; + for (;p < pend; p++) { + if (p->as.basic.flags) { + counts[BUILTIN_TYPE(p)]++; + } + else { + freed++; + } + } + } + + hash = rb_hash_new(); + rb_hash_aset(hash, ID2SYM(rb_intern("freed")), LONG2NUM(freed)); + for (i = 0; i <= T_MASK; i++) { + VALUE type; + switch (i) { + case T_NONE: type = ID2SYM(rb_intern("T_NONE")); break; + case T_NIL: type = ID2SYM(rb_intern("T_NIL")); break; + case T_OBJECT: type = ID2SYM(rb_intern("T_OBJECT")); break; + case T_CLASS: type = ID2SYM(rb_intern("T_CLASS")); break; + case T_ICLASS: type = ID2SYM(rb_intern("T_ICLASS")); break; + case T_MODULE: type = ID2SYM(rb_intern("T_MODULE")); break; + case T_FLOAT: type = ID2SYM(rb_intern("T_FLOAT")); break; + case T_STRING: type = ID2SYM(rb_intern("T_STRING")); break; + case T_REGEXP: type = ID2SYM(rb_intern("T_REGEXP")); break; + case T_ARRAY: type = ID2SYM(rb_intern("T_ARRAY")); break; + case T_FIXNUM: type = ID2SYM(rb_intern("T_FIXNUM")); break; + case T_HASH: type = ID2SYM(rb_intern("T_HASH")); break; + case T_STRUCT: type = ID2SYM(rb_intern("T_STRUCT")); break; + case T_BIGNUM: type = ID2SYM(rb_intern("T_BIGNUM")); break; + case T_FILE: type = ID2SYM(rb_intern("T_FILE")); break; + case T_TRUE: type = ID2SYM(rb_intern("T_TRUE")); break; + case T_FALSE: type = ID2SYM(rb_intern("T_FALSE")); break; + case T_DATA: type = ID2SYM(rb_intern("T_DATA")); break; + case T_MATCH: type = ID2SYM(rb_intern("T_MATCH")); break; + case T_SYMBOL: type = ID2SYM(rb_intern("T_SYMBOL")); break; + case T_VALUES: type = ID2SYM(rb_intern("T_VALUES")); break; + case T_BLOCK: type = ID2SYM(rb_intern("T_BLOCK")); break; + case T_UNDEF: type = ID2SYM(rb_intern("T_UNDEF")); break; + case T_NODE: type = ID2SYM(rb_intern("T_NODE")); break; + default: type = INT2NUM(i); break; + } + if (counts[i]) + rb_hash_aset(hash, type, LONG2NUM(counts[i])); + } + + return hash; +} + /* * The <code>GC</code> module provides an interface to Ruby's mark and * sweep garbage collection mechanism. Some of the underlying methods @@ -2190,4 +2254,6 @@ rb_define_method(rb_mKernel, "hash", rb_obj_id, 0); rb_define_method(rb_mKernel, "__id__", rb_obj_id, 0); rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0); + + rb_define_module_function(rb_mObSpace, "count_objects", count_objects, 0); } -- Tanaka Akira