Hi,

At Sun, 8 Nov 2009 12:25:54 +0900,
Yusuke ENDOH wrote in [ruby-core:26612]:
> > I've been toying with allowing custom hashing from the Ruby side for the
> > past 2 days and have a semi-functional version (patch attached against
> > subversion trunk 25671 - http://gist.github.com/228773)
> 
> Unfortunately, your patch has a serious problem; it lets RHash have
> four VALUEs, which makes any object require 24 bytes (traditionally
> 20 bytes) in 32 bit envirionment.

I don't think that index_with object is necessary, why the hash
itself can't have such methods?


Index: hash.c =================================================================== --- hash.c (revision 25690) +++ hash.c (working copy) @@ -24,4 +24,6 @@ static VALUE rb_hash_s_try_convert(VALUE #define HASH_DELETED FL_USER1 #define HASH_PROC_DEFAULT FL_USER2 +#define HASH_PERMANET HASH_PROC_DEFAULT +#define RHASH_CUSTOM_P(h) (RHASH(h)->ntbl && RHASH(h)->ntbl->extended) VALUE @@ -34,5 +36,5 @@ VALUE rb_cHash; static VALUE envtbl; -static ID id_hash, id_yield, id_default; +static ID id_hash, id_yield, id_default, id_key_filter, id_custom_compare, id_custom_hash; static int @@ -110,4 +112,24 @@ static const struct st_hash_type identha }; +static VALUE +rb_hash_custom_key(VALUE hash, VALUE key) +{ + if (RHASH_CUSTOM_P(hash)) { + VALUE newkey = rb_check_funcall(hash, id_key_filter, 1, &key); + if (newkey != Qundef) key = newkey; + } + return key; +} + +static VALUE +hash_lookup(VALUE hash, VALUE *key, VALUE *val) +{ + st_data_t data; + *key = rb_hash_custom_key(hash, *key); + if (!st_lookup(RHASH(hash)->ntbl, (st_data_t)*key, &data)) return FALSE; + if (val) *val = (VALUE)data; + return TRUE; +} + typedef int st_foreach_func(st_data_t, st_data_t, st_data_t); @@ -494,5 +516,5 @@ rb_hash_aref(VALUE hash, VALUE key) VALUE val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { + if (!RHASH(hash)->ntbl || !hash_lookup(hash, &key, &val)) { return rb_funcall(hash, id_default, 1, key); } @@ -505,5 +527,5 @@ rb_hash_lookup2(VALUE hash, VALUE key, V VALUE val; - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { + if (!RHASH(hash)->ntbl || !hash_lookup(hash, &key, &val)) { return def; /* without Hash#default */ } @@ -559,5 +581,5 @@ rb_hash_fetch_m(int argc, VALUE *argv, V rb_warn("block supersedes default value argument"); } - if (!RHASH(hash)->ntbl || !st_lookup(RHASH(hash)->ntbl, key, &val)) { + if (!RHASH(hash)->ntbl || !hash_lookup(hash, &key, &val)) { if (block_given) return rb_yield(key); if (argc == 1) { @@ -1406,5 +1428,5 @@ rb_hash_has_key(VALUE hash, VALUE key) if (!RHASH(hash)->ntbl) return Qfalse; - if (st_lookup(RHASH(hash)->ntbl, key, 0)) { + if (hash_lookup(hash, &key, 0)) { return Qtrue; } @@ -1452,4 +1474,5 @@ struct equal_data { VALUE result; st_table *tbl; + VALUE custom; int eql; }; @@ -1462,4 +1485,8 @@ eql_i(VALUE key, VALUE val1, VALUE arg) if (key == Qundef) return ST_CONTINUE; + if (data->custom != Qundef) { + VALUE newkey = rb_check_funcall(data->custom, id_key_filter, 1, &key); + if (newkey != Qundef) key = newkey; + } if (!st_lookup(data->tbl, key, &val2)) { data->result = Qfalse; @@ -1514,4 +1541,5 @@ hash_equal(VALUE hash1, VALUE hash2, int data.tbl = RHASH(hash2)->ntbl; + data.custom = hash2; data.eql = eql; return rb_exec_recursive_paired(recursive_eql, hash1, hash2, (VALUE)&data); @@ -1854,4 +1882,49 @@ rb_hash_compare_by_id_p(VALUE hash) } +static int +rb_hash_custom_compare(st_data_t a, st_data_t b, st_data_t hash) +{ + VALUE args[2], c; + args[0] = (VALUE)a; + args[1] = (VALUE)b; + c = rb_check_funcall((VALUE)hash, id_custom_compare, 2, args); + if (c == Qundef) return rb_any_cmp((VALUE)a, (VALUE)b); + return rb_cmpint(c, (VALUE)a, (VALUE)b); +} + +static st_index_t +rb_hash_custom_hash(st_data_t a, st_data_t hash) +{ + VALUE args[1], c; + args[0] = (VALUE)a; + c = rb_check_funcall((VALUE)hash, id_custom_hash, 1, args); + if (c == Qundef) return rb_any_hash((VALUE)a); + return NUM2ULONG(c); +} + +static const struct st_hash_type customhash = { + rb_hash_custom_compare, + rb_hash_custom_hash, +}; + +VALUE +rb_hash_customized_p(VALUE hash) +{ + return RHASH_CUSTOM_P(hash) ? Qtrue : Qfalse; +} + +VALUE +rb_hash_customize(VALUE hash) +{ + st_table *tbl = RHASH(hash)->ntbl; + if (tbl) { + if (tbl->extended) return hash; + if (tbl->num_entries) rb_raise(rb_eArgError, "non-empty hash"); + st_free_table(tbl); + } + RHASH(hash)->ntbl = st_init_extable(&customhash, (st_data_t)hash); + return hash; +} + static int path_tainted = -1; @@ -2649,4 +2722,7 @@ Init_Hash(void) id_yield = rb_intern("yield"); id_default = rb_intern("default"); + id_key_filter = rb_intern("key_filter"); + id_custom_compare = rb_intern("custom_compare"); + id_custom_hash = rb_intern("custom_hash"); rb_cHash = rb_define_class("Hash", rb_cObject); @@ -2717,4 +2793,6 @@ Init_Hash(void) rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0); rb_define_method(rb_cHash,"compare_by_identity?", rb_hash_compare_by_id_p, 0); + rb_define_method(rb_cHash,"customized?", rb_hash_customized_p, 0); + rb_define_method(rb_cHash,"customize", rb_hash_customize, 0); origenviron = environ; Index: st.c =================================================================== --- st.c (revision 25690) +++ st.c (working copy) @@ -26,4 +26,9 @@ struct st_table_entry { }; +typedef struct st_extable { + struct st_table basic; + st_data_t data; +} st_extable; + #define ST_DEFAULT_MAX_DENSITY 5 #define ST_DEFAULT_INIT_TABLE_SIZE 11 @@ -62,4 +67,5 @@ static void rehash(st_table *); #define malloc xmalloc #define calloc xcalloc +#define realloc xrealloc #define free(x) xfree(x) #endif @@ -70,8 +76,15 @@ static void rehash(st_table *); #define Calloc(n,s) (char*)calloc((n),(s)) -#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) +#define extended_data(table) (((st_extable*)(table))->data) +#define compare_basic(table,x,y) (*table->type->compare)((x),(y)) +#define compare_extended(table,x,y) (*table->type->compare)((x),(y),extended_data(table)) +#define do_compare(table,x,y) ((table)->extended ? compare_extended(table,x,y) : compare_basic(table,x,y)) +#define EQUAL(table,x,y) ((x)==(y) || do_compare(table,x,y) == 0) /* remove cast to unsigned int in the future */ -#define do_hash(key,table) (unsigned int)(st_index_t)(*(table)->type->hash)((key)) +#define hash_basic(key,table) (*(table)->type->hash)((key)) +#define hash_extended(key,table) (*(table)->type->hash)((key),extended_data(table)) +#define do_hash(key,table) (unsigned int)(st_index_t)\ + ((table)->extended ? hash_extended(key,table) : hash_basic(key,table)) #define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) @@ -164,6 +177,6 @@ stat_col(void) #define MAX_PACKED_NUMHASH (ST_DEFAULT_INIT_TABLE_SIZE/2) -st_table* -st_init_table_with_size(const struct st_hash_type *type, st_index_t size) +static st_table* +init_table_with_size(size_t tblsize, const struct st_hash_type *type, st_index_t size) { st_table *tbl; @@ -184,6 +197,7 @@ st_init_table_with_size(const struct st_ size = new_size(size); /* round up to prime number */ - tbl = alloc(st_table); + tbl = malloc(tblsize); tbl->type = type; + tbl->extended = 0; tbl->num_entries = 0; tbl->entries_packed = type == &type_numhash && size/2 <= MAX_PACKED_NUMHASH; @@ -197,4 +211,10 @@ st_init_table_with_size(const struct st_ st_table* +st_init_table_with_size(const struct st_hash_type *type, st_index_t size) +{ + return init_table_with_size(sizeof(st_table), type, size); +} + +st_table* st_init_table(const struct st_hash_type *type) { @@ -238,4 +258,28 @@ st_init_strcasetable_with_size(st_index_ } +st_table* +st_init_extable_with_size(const struct st_hash_type *type, st_data_t data, st_index_t size) +{ + st_table *tbl = init_table_with_size(sizeof(st_extable), type, size); + tbl->extended = 1; + extended_data(tbl) = data; + return tbl; +} + +st_table* +st_init_extable(const struct st_hash_type *type, st_data_t data) +{ + return st_init_extable_with_size(type, data, 0); +} + +st_table * +st_extend(st_table *table, const struct st_hash_type *type, st_data_t data) +{ + st_table *tbl = realloc(table, sizeof(st_extable)); + tbl->extended = 1; + extended_data(tbl) = data; + return tbl; +} + void st_clear(st_table *table) @@ -563,5 +607,5 @@ st_copy(st_table *old_table) st_index_t hash_val; - new_table = alloc(st_table); + new_table = malloc(old_table->extended ? sizeof(st_table) : sizeof(st_extable)); if (new_table == 0) { return 0; Index: include/ruby/st.h =================================================================== --- include/ruby/st.h (revision 25690) +++ include/ruby/st.h (working copy) @@ -79,5 +79,9 @@ struct st_hash_type { struct st_table { const struct st_hash_type *type; - st_index_t num_bins; + unsigned int extended : 1; +#ifdef __GNUC__ + __extension__ +#endif + st_index_t num_bins : ST_INDEX_BITS - 1; unsigned int entries_packed : 1; #ifdef __GNUC__ @@ -101,4 +105,7 @@ st_table *st_init_strtable_with_size(st_ st_table *st_init_strcasetable(void); st_table *st_init_strcasetable_with_size(st_index_t); +st_table *st_init_extable(const struct st_hash_type *, st_data_t); +st_table *st_init_extable_with_size(const struct st_hash_type *, st_data_t, st_index_t); +st_table *st_extend(st_table *, const struct st_hash_type *, st_data_t); int st_delete(st_table *, st_data_t *, st_data_t *); /* returns 0:notfound 1:deleted */ int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t); Index: test/ruby/test_hash.rb =================================================================== --- test/ruby/test_hash.rb (revision 25690) +++ test/ruby/test_hash.rb (working copy) @@ -869,6 +869,44 @@ class TestHash < Test::Unit::TestCase o = Object.new - def o.hash; 2<<100; end + def o.hash; 2 << 100; end assert_equal({x=>1}.hash, {x=>1}.hash) end + + class StringHash < Hash + def key_filter(key) + key.to_s + end + def initialize(*) + super.customize + end + end + + class IdentHash < Hash + def custom_compare(a, b) + p [a,b] + a.object_id == b.object_id + end + def custom_hash(a) + a.object_id + end + def initialize(*) + super.customize + end + end + + def test_stringhash + h = {"abc"=>"def"} + assert_nil(h[:abc]) + s = StringHash.new.update(h) + assert_equal("def", s[:abc]) + end + + def test_identhash + a = "abc" + h = {a=>"def"} + assert_equal("def", h["abc"]) + i = IdentHash.new.update(h) + assert_nil(i["abc"]) + assert_equal("def", h[a]) + end end
-- Nobu Nakada