Issue #8048 has been updated by tmm1 (Aman Gupta).


diff --git a/load.c b/load.c
index 6330e50..3010b18 100644
--- a/load.c
+++ b/load.c
@@ -171,7 +171,7 @@ reset_loaded_features_snapshot(void)
     rb_ary_replace(vm->loaded_features_snapshot, vm->loaded_features);
 }
 
-static VALUE
+static struct st_table *
 get_loaded_features_index_raw(void)
 {
     return GET_VM()->loaded_features_index;
@@ -186,12 +186,19 @@ get_loading_table(void)
 static void
 features_index_add_single(VALUE short_feature, VALUE offset)
 {
-    VALUE features_index, this_feature_index;
+    struct st_table *features_index;
+    VALUE this_feature_index = Qnil;
+    char *short_feature_cstr;
+
+    Check_Type(offset, T_FIXNUM);
+    Check_Type(short_feature, T_STRING);
+    short_feature_cstr = RSTRING_PTR(short_feature);
+
     features_index = get_loaded_features_index_raw();
-    this_feature_index = rb_hash_lookup(features_index, short_feature);
+    st_lookup(features_index, (st_data_t)short_feature_cstr, (st_data_t *)&this_feature_index);
 
     if (NIL_P(this_feature_index)) {
-	rb_hash_aset(features_index, short_feature, offset);
+	st_insert(features_index, (st_data_t)ruby_strdup(short_feature_cstr), (st_data_t)offset);
     }
     else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
 	VALUE feature_indexes[2];
@@ -199,7 +206,7 @@ features_index_add_single(VALUE short_feature, VALUE offset)
 	feature_indexes[1] = offset;
 	this_feature_index = rb_ary_tmp_new(numberof(feature_indexes));
 	rb_ary_cat(this_feature_index, feature_indexes, numberof(feature_indexes));
-	rb_hash_aset(features_index, short_feature, this_feature_index);
+	st_insert(features_index, (st_data_t)short_feature_cstr, (st_data_t)this_feature_index);
     }
     else {
 	Check_Type(this_feature_index, T_ARRAY);
@@ -254,7 +261,14 @@ features_index_add(VALUE feature, VALUE offset)
     }
 }
 
-static VALUE
+static int
+loaded_features_index_clear_i(st_data_t key, st_data_t val, st_data_t arg)
+{
+    xfree((char*) key);
+    return ST_DELETE;
+}
+
+static st_table *
 get_loaded_features_index(void)
 {
     VALUE features;
@@ -264,7 +278,7 @@ get_loaded_features_index(void)
     if (!rb_ary_shared_with_p(vm->loaded_features_snapshot, vm->loaded_features)) {
 	/* The sharing was broken; something (other than us in rb_provide_feature())
 	   modified loaded_features.  Rebuild the index. */
-	rb_hash_clear(vm->loaded_features_index);
+	st_foreach(vm->loaded_features_index, loaded_features_index_clear_i, 0);
 	features = vm->loaded_features;
 	for (i = 0; i < RARRAY_LEN(features); i++) {
 	    VALUE entry, as_str;
@@ -357,10 +371,10 @@ loaded_feature_path_i(st_data_t v, st_data_t b, st_data_t f)
 static int
 rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn)
 {
-    VALUE features, features_index, feature_val, this_feature_index, v, p, load_path = 0;
+    VALUE features, feature_val, this_feature_index = Qnil, v, p, load_path = 0;
     const char *f, *e;
     long i, len, elen, n;
-    st_table *loading_tbl;
+    st_table *loading_tbl, *features_index;
     st_data_t data;
     int type;
 
@@ -379,7 +393,7 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
     features_index = get_loaded_features_index();
 
     feature_val = rb_str_new(feature, len);
-    this_feature_index = rb_hash_lookup(features_index, feature_val);
+    st_lookup(features_index, (st_data_t)feature, (st_data_t *)&this_feature_index);
     /* We search `features` for an entry such that either
          "#{features[i]}" == "#{load_path[j]}/#{feature}#{e}"
        for some j, or
@@ -1120,8 +1134,7 @@ Init_load()
     rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0);
     vm->loaded_features = rb_ary_new();
     vm->loaded_features_snapshot = rb_ary_tmp_new(0);
-    vm->loaded_features_index = rb_hash_new();
-    RBASIC(vm->loaded_features_index)->klass = 0;
+    vm->loaded_features_index = st_init_strtable();
 
     rb_define_global_function("load", rb_f_load, -1);
     rb_define_global_function("require", rb_f_require, 1);
diff --git a/vm.c b/vm.c
index 9126502..b4132cd 100644
--- a/vm.c
+++ b/vm.c
@@ -1565,7 +1565,6 @@ rb_vm_mark(void *ptr)
 	RUBY_MARK_UNLESS_NULL(vm->expanded_load_path);
 	RUBY_MARK_UNLESS_NULL(vm->loaded_features);
 	RUBY_MARK_UNLESS_NULL(vm->loaded_features_snapshot);
-	RUBY_MARK_UNLESS_NULL(vm->loaded_features_index);
 	RUBY_MARK_UNLESS_NULL(vm->top_self);
 	RUBY_MARK_UNLESS_NULL(vm->coverages);
 	rb_gc_mark_locations(vm->special_exceptions, vm->special_exceptions + ruby_special_error_count);
@@ -1573,6 +1572,9 @@ rb_vm_mark(void *ptr)
 	if (vm->loading_table) {
 	    rb_mark_tbl(vm->loading_table);
 	}
+	if (vm->loaded_features_index) {
+	    rb_mark_tbl(vm->loaded_features_index);
+	}
 
 	rb_vm_trace_mark_event_hooks(&vm->event_hooks);
 
diff --git a/vm_core.h b/vm_core.h
index 1838f2a..2ac57fc 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -364,7 +364,7 @@ typedef struct rb_vm_struct {
     VALUE expanded_load_path;
     VALUE loaded_features;
     VALUE loaded_features_snapshot;
-    VALUE loaded_features_index;
+    struct st_table *loaded_features_index;
     struct st_table *loading_table;
 
     /* signal */

----------------------------------------
Bug #8048: require() features_index bloats size of ruby heap
https://bugs.ruby-lang.org/issues/8048#change-37409

Author: tmm1 (Aman Gupta)
Status: Closed
Priority: Normal
Assignee: 
Category: 
Target version: 
ruby -v: 2.0.0-p0


The new features_index for require() in 2.0.0 creates a lot of extra ARRAY and STRING objects. In a big rails application, there are ~70k extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require "./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, :T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require "./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, :T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}


The following patch reduces the number of arrays used by the features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, VALUE offset)
 {
     VALUE features_index, this_feature_index;
     features_index = get_loaded_features_index_raw();
-    if ((this_feature_index = rb_hash_lookup(features_index, short_feature)) == Qnil) {
-	this_feature_index = rb_ary_new();
-	rb_hash_aset(features_index, short_feature, this_feature_index);
+    this_feature_index = rb_hash_lookup(features_index, short_feature);
+
+    if (this_feature_index == Qnil) {
+        rb_hash_aset(features_index, short_feature, offset);
+    } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
+        this_feature_index = rb_ary_new3(2, this_feature_index, offset);
+        rb_hash_aset(features_index, short_feature, this_feature_index);
+    } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+        rb_ary_push(this_feature_index, offset);
     }
-    rb_ary_push(this_feature_index, offset);
 }
 
 /* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
        or ends in '/'.  This includes both match forms above, as well
        as any distractors, so we may ignore all other entries in `features`.
      */
-    for (i = 0; this_feature_index != Qnil && i < RARRAY_LEN(this_feature_index); i++) {
-	long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+    for (i = 0; this_feature_index != Qnil; i++) {
+	long index;
+	if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
+	  if (i >= RARRAY_LEN(this_feature_index)) break;
+	  index = FIX2LONG(rb_ary_entry(this_feature_index, i));
+	} else {
+	  if (i > 0) break;
+	  index = FIX2LONG(this_feature_index);
+	}
+
 	v = RARRAY_PTR(features)[index];
 	f = StringValuePtr(v);
 	if ((n = RSTRING_LEN(v)) < len) continue;



-- 
http://bugs.ruby-lang.org/