Issue #8107 has been reported by tmm1 (Aman Gupta).

----------------------------------------
Feature #8107: [patch] runtime flag to track object allocation metadata
https://bugs.ruby-lang.org/issues/8107

Author: tmm1 (Aman Gupta)
Status: Open
Priority: Normal
Assignee: 
Category: core
Target version: 


When a ruby program contains a reference leak, debugging is a lot easier if you know where each object was allocated. Tools like bleakhouse and memprof have provided this functionality in the past, but were brittle and required source/runtime patches to ruby.

Ruby already provides basic callsite tracking if you recompile ruby with GC_DEBUG. This is impractical for daily use however, since it increases the size of the ruby heap by ~30%. There is also no API to access the debug information.

The following patch moves the GC_DEBUG file/line tracking outside of RVALUE, and adds a runtime flag (via environment variable) to enable it. This way normal usage is not affected by additional memory usage, but it is still simple to enable tracking for debugging purposes without having to recompile ruby.

I've exposed this data via BasicObject#__sourcefile__ and BasicObject#__sourceline__

$ ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
-e:4: warning: #__sourcefile__ requires RUBY_OBJECT_METADATA=1
-e:4: warning: #__sourceline__ requires RUBY_OBJECT_METADATA=1
[String, "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems/exceptions", nil, nil]

$ RUBY_OBJECT_METADATA=1 ruby -e'
  GC.start
  ObjectSpace.each_object.first(1).each{ |o|
    p [o.class, o, o.__sourcefile__, o.__sourceline__]
  }
'
[String, "$(datarootdir)/doc/$(PACKAGE)", "/Users/test/.rbenv/versions/2.1.0dev/lib/ruby/2.1.0/rubygems.rb", 8]

diff --git a/gc.c b/gc.c
index bd95073..2fc1d0c 100644
--- a/gc.c
+++ b/gc.c
@@ -81,6 +81,7 @@ typedef struct {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     int gc_stress;
 #endif
+    int track_metadata;
 } ruby_gc_params_t;
 
 static ruby_gc_params_t initial_params = {
@@ -91,6 +92,7 @@ static ruby_gc_params_t initial_params = {
 #if defined(ENABLE_VM_OBJSPACE) && ENABLE_VM_OBJSPACE
     FALSE,
 #endif
+    FALSE
 };
 
 #define nomem_error GET_VM()->special_exceptions[ruby_error_nomemory]
@@ -162,6 +164,11 @@ typedef struct RVALUE {
 #pragma pack(pop)
 #endif
 
+typedef struct rb_obj_metadata {
+    VALUE file;
+    unsigned short line;
+} rb_obj_metadata_t;
+
 struct heaps_slot {
     struct heaps_header *header;
     uintptr_t *bits;
@@ -177,6 +184,7 @@ struct heaps_header {
     RVALUE *start;
     RVALUE *end;
     size_t limit;
+    rb_obj_metadata_t *metadata;
 };
 
 struct heaps_free_bitmap {
@@ -291,6 +299,7 @@ int *ruby_initial_gc_stress_ptr = &rb_objspace.gc_stress;
 #define initial_heap_min_slots	initial_params.initial_heap_min_slots
 #define initial_free_min	initial_params.initial_free_min
 #define initial_growth_factor	initial_params.initial_growth_factor
+#define track_metadata	initial_params.track_metadata
 
 #define is_lazy_sweeping(objspace) ((objspace)->heap.sweep_slots != 0)
 
@@ -413,6 +422,8 @@ rb_objspace_free(rb_objspace_t *objspace)
     if (objspace->heap.sorted) {
 	size_t i;
 	for (i = 0; i < heaps_used; ++i) {
+            if (objspace->heap.sorted[i]->metadata)
+                free(objspace->heap.sorted[i]->metadata);
             free(objspace->heap.sorted[i]->bits);
 	    aligned_free(objspace->heap.sorted[i]);
 	}
@@ -538,6 +549,7 @@ assign_heap_slot(rb_objspace_t *objspace)
     objspace->heap.sorted[hi]->end = (p + objs);
     objspace->heap.sorted[hi]->base = heaps;
     objspace->heap.sorted[hi]->limit = objs;
+    objspace->heap.sorted[hi]->metadata = NULL;
     assert(objspace->heap.free_bitmap != NULL);
     heaps->bits = (uintptr_t *)objspace->heap.free_bitmap;
     objspace->heap.sorted[hi]->bits = (uintptr_t *)objspace->heap.free_bitmap;
@@ -667,12 +679,19 @@ newobj(VALUE klass, VALUE flags)
     }
 
     MEMZERO((void*)obj, RVALUE, 1);
-#ifdef GC_DEBUG
-    RANY(obj)->file = rb_sourcefile();
-    RANY(obj)->line = rb_sourceline();
-#endif
     objspace->total_allocated_object_num++;
 
+    if (UNLIKELY(track_metadata)) {
+        struct heaps_header *heap = GET_HEAP_HEADER(obj);
+        if (!heap->metadata)
+            heap->metadata = calloc(HEAP_OBJ_LIMIT, sizeof(rb_obj_metadata_t));
+        if (heap->metadata) {
+            rb_obj_metadata_t *meta = &heap->metadata[NUM_IN_SLOT(obj)];
+            meta->file = rb_sourcefilename();
+            meta->line = rb_sourceline();
+        }
+    }
+
     return obj;
 }
 
@@ -867,6 +886,8 @@ free_unused_heaps(rb_objspace_t *objspace)
                 last = objspace->heap.sorted[i];
 	    }
 	    else {
+                if (objspace->heap.sorted[i]->metadata)
+                    free(objspace->heap.sorted[i]->metadata);
 		aligned_free(objspace->heap.sorted[i]);
 	    }
 	    heaps_used--;
@@ -1736,6 +1757,55 @@ rb_obj_id(VALUE obj)
     return nonspecial_obj_id(obj);
 }
 
+static inline rb_obj_metadata_t *
+rb_obj_get_metadata(VALUE obj)
+{
+    struct heaps_header *heap;
+
+    if (SPECIAL_CONST_P(obj))
+        return NULL;
+
+    heap = GET_HEAP_HEADER(obj);
+    if (!heap->metadata)
+        return NULL;
+
+    return &heap->metadata[NUM_IN_SLOT(obj)];
+}
+
+/*
+ *  Document-method: __sourcefile__
+ *
+ *  call-seq:
+ *     obj.__sourcefile__       -> string
+ *
+ *  Returns a string filename where +obj+ was allocated.
+ *
+ *  This method is only expected to work on C Ruby. An environment
+ *  variable (RUBY_OBJECT_METADATA=1) must be set to enable this
+ *  feature.
+ */
+static VALUE
+rb_obj_sourcefile(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourcefile__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? meta->file : Qnil;
+}
+
+static VALUE
+rb_obj_sourceline(VALUE obj)
+{
+    rb_obj_metadata_t *meta = rb_obj_get_metadata(obj);
+
+    if (!track_metadata)
+        rb_warn("#__sourceline__ requires RUBY_OBJECT_METADATA=1");
+
+    return meta ? INT2FIX(meta->line) : Qnil;
+}
+
 static int
 set_zero(st_data_t key, st_data_t val, st_data_t arg)
 {
@@ -2606,6 +2676,7 @@ rb_gc_mark(VALUE ptr)
 static void
 gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 {
+    register rb_obj_metadata_t *meta;
     register RVALUE *obj = RANY(ptr);
 
     goto marking;		/* skip */
@@ -2626,6 +2697,9 @@ gc_mark_children(rb_objspace_t *objspace, VALUE ptr)
 	rb_mark_generic_ivar(ptr);
     }
 
+    if ((meta = rb_obj_get_metadata(ptr)) && RTEST(meta->file))
+        gc_mark(objspace, meta->file);
+
     switch (BUILTIN_TYPE(obj)) {
       case T_NIL:
       case T_FIXNUM:
@@ -3294,10 +3368,17 @@ rb_gc_disable(void)
 void
 rb_gc_set_params(void)
 {
+    char *track_metadata_ptr;
     char *malloc_limit_ptr, *heap_min_slots_ptr, *free_min_ptr, *growth_factor_ptr;
 
     if (rb_safe_level() > 0) return;
 
+    track_metadata_ptr = getenv("RUBY_OBJECT_METADATA");
+    if (track_metadata_ptr != NULL) {
+	if (RTEST(ruby_verbose))
+	    fprintf(stderr, "track_metadata=TRUE (FALSE)\n");
+	track_metadata = TRUE;
+    }
     malloc_limit_ptr = getenv("RUBY_GC_MALLOC_LIMIT");
     if (malloc_limit_ptr != NULL) {
 	int malloc_limit_i = atoi(malloc_limit_ptr);
@@ -4535,6 +4616,9 @@ Init_GC(void)
     rb_define_method(rb_cBasicObject, "__id__", rb_obj_id, 0);
     rb_define_method(rb_mKernel, "object_id", rb_obj_id, 0);
 
+    rb_define_method(rb_cBasicObject, "__sourcefile__", rb_obj_sourcefile, 0);
+    rb_define_method(rb_cBasicObject, "__sourceline__", rb_obj_sourceline, 0);
+
     rb_define_module_function(rb_mObSpace, "count_objects", count_objects, -1);
 
     {
diff --git a/ruby.c b/ruby.c
index 6b61162..4c7e93f 100644
--- a/ruby.c
+++ b/ruby.c
@@ -1337,6 +1337,8 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
 	return Qtrue;
     }
 
+    rb_gc_set_params();
+
     if (!(opt->disable & DISABLE_BIT(rubyopt)) &&
 	opt->safe_level == 0 && (s = getenv("RUBYOPT"))) {
 	VALUE src_enc_name = opt->src.enc.name;
@@ -1570,7 +1572,6 @@ process_options(int argc, char **argv, struct cmdline_options *opt)
     rb_define_readonly_boolean("$-a", opt->do_split);
 
     rb_set_safe_level(opt->safe_level);
-    rb_gc_set_params();
 
     return iseq;
 }




-- 
http://bugs.ruby-lang.org/