Issue #8107 has been updated by tmm1 (Aman Gupta).


> Something like RubyVM.allocated_position(obj) => [file, line].

I'll defer API decisions to core, but a method under RubyVM or in the new objspace.so would be fine. I slightly prefer two separate methods, to avoid an array allocation when you're only interested in the filename

Primarily, I am interested in feedback on the runtime flag in this patch. JRuby and Rubinius both provide allocation tracking, but MRI currently has no equivalent. This makes debugging object leaks very painful.

It sounds like a command line option would be simpler to standardize on across implementations. I prefer it over an environment variable as well.

diff --git a/gc.c b/gc.c
index 2fc1d0c..cafebf2 100644
--- a/gc.c
+++ b/gc.c
@@ -1780,9 +1780,8 @@ rb_obj_get_metadata(VALUE obj)
  *
  *  Returns a string filename where +obj+ was allocated.
  *
- *  This method is only expected to work on C Ruby. An environment
- *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
- *  feature.
+ *  This method is only expected to work on C Ruby. Ruby must be run
+ *  with --debug-objects to enable this feature.
  */
 static VALUE
 rb_obj_sourcefile(VALUE obj)
@@ -1790,7 +1789,7 @@ rb_obj_sourcefile(VALUE obj)
     rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
 
     if (!track_metadata)
-        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+        rb_warn("__sourcefile__ requires --debug-objects");
 
     return meta ? meta->file : Qnil;
 }
@@ -1801,7 +1800,7 @@ rb_obj_sourceline(VALUE obj)
     rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
 
     if (!track_metadata)
-        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+        rb_warn("__sourceline__ requires --debug-objects");
 
     return meta ? INT2FIX(meta->line) : Qnil;
 }
@@ -3366,19 +3365,18 @@ rb_gc_disable(void)
 }
 
 void
+rb_obj_enable_metadata(void)
+{
+    track_metadata = TRUE;
+}
+
+void
 rb_gc_set_params(void)
 {
-    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr;
 
     if (rb_safe_level() > 0) return;
 
-    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
-    if (track_metadata_ptr != NULL) {
-	if (RTEST(ruby_verbose))
-	    fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
-	track_metadata = TRUE;
-    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
 	int malloc_limit_i = atoi(malloc_limit_ptr);
diff --git a/internal.h b/internal.h
index b099f24..5386f7d 100644
--- a/internal.h
+++ b/internal.h
@@ -143,6 +143,7 @@ void rb_w32_init_file(void);
 /* gc.c */
 void Init_heap(void);
 void *ruby_mimmalloc(size_t size);
+void rb_obj_enable_metadata(void);
 
 /* inits.c */
 void rb_call_inits(void);
diff --git a/ruby.c b/ruby.c
index a0b438d..095bf29 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1117,6 +1117,9 @@ proc_options(long argc, char **argv, struct cmdline_options *opt, int envopt)
 		set_source_encoding_once(opt, s, 0);
 	    }
 #endif
+	    else if (strcmp("debug-objects", s) == 0) {
+		rb_obj_enable_metadata();
+	    }
 	    else if (strcmp("version", s) == 0) {
 		if (envopt) goto noenvopt_long;
 		opt->dump |= DUMP_BIT(version);
@@ -1364,8 +1367,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
 	ruby_show_copyright();
     }
 
-    rb_gc_set_params();
-
     if (opt->safe_level >= 4) {
 	OBJ_TAINT(rb_argv);
 	OBJ_TAINT(GET_VM()->load_path);
@@ -1572,6 +1573,7 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);
 
     rb_set_safe_level(opt->safe_level);
+    rb_gc_set_params();
 
     return iseq;
 }

----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107#change-37715

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: 
Category: core
Target version: 


When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information.

The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;
 
 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };
 
 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif
 
+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };
 
 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #define initial_heap_min_slots	initial_params.initial_heap_min_slots
 #define initial_free_min	initial_params.initial_free_min
 #define initial_growth_factor	initial_params.initial_growth_factor
+#define track_metadata	initial_params.track_metadata
 
 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)
 
@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
 	size_t i;
 	for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
 	    aligned_free(objspace->heap.sorted[i]);
 	}
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }
 
     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;
 
+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }
 
@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
 	    }
 	    else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
 		aligned_free(objspace->heap.sorted[i]);
 	    }
 	    heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }
 
+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);
 
     goto marking;		/* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 	rb_mark_generic_ivar(ptr);
     }
 
+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr;
 
     if (rb_safe_level() > 0) return;
 
+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+	if (RTEST(ruby_verbose))
+	    fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+	track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
 	int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);
 
+    rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1);
 
     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
 	return Qtrue;
     }
 
+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
 	opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
 	VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);
 
     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();
 
     return iseq;
 }




-- 
http://bugs.ruby-lang.org/