なかだです。 compared_byという名前にしようかと思いましたが、比較だけではない のでidentifed_byとしてみました。 marshal.cのw_encoding()も整理してあります。
Index: hash.c =================================================================== --- hash.c (revision 15489) +++ hash.c (working copy) @@ -25,4 +25,5 @@ static VALUE rb_hash_s_try_convert(VALUE #define HASH_DELETED FL_USER1 #define HASH_PROC_DEFAULT FL_USER2 +#define HASH_IDENT_BY_METHOD FL_USER3 VALUE @@ -410,5 +411,6 @@ static VALUE rb_hash_rehash(VALUE hash) { - st_table *tbl; + st_table *tbl, *ntbl; + VALUE htmp; if (RHASH(hash)->iter_lev > 0) { @@ -416,10 +418,15 @@ rb_hash_rehash(VALUE hash) } rb_hash_modify_check(hash); - if (!RHASH(hash)->ntbl) + if (!(ntbl = RHASH(hash)->ntbl)) return hash; - tbl = st_init_table_with_size(RHASH(hash)->ntbl->type, RHASH(hash)->ntbl->num_entries); + htmp = hash_alloc(0); + tbl = st_init_table_with_size(ntbl->type, ntbl->num_entries); + RHASH(htmp)->ntbl = tbl; rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl); - st_free_table(RHASH(hash)->ntbl); + ntbl = RHASH(hash)->ntbl; + RHASH(htmp)->ntbl = 0; RHASH(hash)->ntbl = tbl; + st_free_table(ntbl); + rb_gc_force_recycle(htmp); return hash; @@ -1683,21 +1690,46 @@ rb_hash_flatten(int argc, VALUE *argv, V } -static const struct st_hash_type identhash = { - st_numcmp, - st_numhash, +struct hash_ident { + ID equal, hash; +}; + +static int +compare_by_method(st_data_t x, st_data_t y, st_table *t) +{ + struct hash_ident *ids = st_extra_data(t); + VALUE other = (VALUE)y; + VALUE val = rb_funcall2((VALUE)x, ids->equal, 1, &other); + return RTEST(val); +} + +static int +hash_by_method(st_data_t x, st_table *t) +{ + struct hash_ident *ids = st_extra_data(t); + VALUE val = rb_funcall2((VALUE)x, ids->hash, 0, 0); + return NUM2INT(val); +} + +static const struct st_hash_type hash_ident_type = { + compare_by_method, + hash_by_method, }; /* * call-seq: - * hsh.compare_by_identity => hsh + * Hash.identified_by(equ, hash) => hsh * - * Makes <i>hsh</i> to compare its keys by their identity, i.e. it - * will consider exact same objects as same keys. + + * Makes <i>hsh</i> which identifies its keys with +equ+ and +hash+ + * methods. + * - * h1 = { "a" => 100, "b" => 200, :c => "c" } - * h1["a"] #=> 100 - * h1.compare_by_identity - * h1.compare_by_identity? #=> true + * h1 = Hash.identified_by(:equal?, :object_id) + * h1["a"] = 100 + * key_a = h1.key(100) + * h1["b"] = 200 + * h1[:c] = "c" * h1["a"] #=> nil # different objects. + * h1[key_a] #=> 100 # same object as a key. * h1[:c] #=> "c" # same symbols are all same. * @@ -1705,30 +1737,75 @@ static const struct st_hash_type identha static VALUE -rb_hash_compare_by_id(VALUE hash) +rb_hash_s_identified_by(VALUE klass, VALUE equal_mid, VALUE hash_mid) { - rb_hash_modify(hash); - RHASH(hash)->ntbl->type = &identhash; - rb_hash_rehash(hash); - return hash; + struct hash_ident ids; + VALUE h; + st_table *tbl; + + ids.equal = rb_to_id(equal_mid); + ids.hash = rb_to_id(hash_mid); + h = hash_alloc(klass); + RBASIC(h)->flags |= HASH_IDENT_BY_METHOD; + tbl = st_init_table_extra(&hash_ident_type, sizeof(ids)); + RHASH(h)->ntbl = tbl; + *(struct hash_ident *)st_extra_data(tbl) = ids; + return h; } /* * call-seq: - * hsh.compare_by_identity? => true or false - * - * Returns <code>true</code> if <i>hsh</i> will compare its keys by - * their identity. Also see <code>Hash#compare_by_identity</code>. + * hsh.methods_to_identify => a pair of symbols or nil * + * Returns <code>Symbol</code>s to be used to identify the items if + * <i>hsh</i> is created with <code>Hash#identified_by</code>. + * Otherwise, returns nil. */ -static VALUE -rb_hash_compare_by_id_p(VALUE hash) +VALUE +rb_hash_get_ident_methods(VALUE hash) { - if (!RHASH(hash)->ntbl) - return Qfalse; - if (RHASH(hash)->ntbl->type == &identhash) { - return Qtrue; - } - return Qfalse; + struct hash_ident *ids; + + if (!FL_TEST(hash, HASH_IDENT_BY_METHOD)) return Qnil; + ids = st_extra_data(RHASH(hash)->ntbl); + return rb_assoc_new(ID2SYM(ids->equal), ID2SYM(ids->hash)); +} + +ID +rb_hash_ident_methods_id(void) +{ + return rb_intern("methods_to_identify"); +} + +void +rb_hash_set_ident_method(VALUE hash, VALUE val) +{ + struct hash_ident ids; + st_table *tbl, *ntbl; + VALUE htmp; + + if (TYPE(val) != T_ARRAY || RARRAY_LEN(val) != 2 || + !SYMBOL_P(RARRAY_PTR(val)[0]) || + !SYMBOL_P(RARRAY_PTR(val)[1])) { + rb_raise(rb_eArgError, "wrong methods to identify"); + } + ids.equal = SYM2ID(RARRAY_PTR(val)[0]); + ids.hash = SYM2ID(RARRAY_PTR(val)[1]); + + ntbl = RHASH(hash)->ntbl; + htmp = hash_alloc(0); + tbl = st_init_table_extra_with_size(&hash_ident_type, + sizeof(ids), + ntbl->num_entries); + RBASIC(htmp)->flags |= HASH_IDENT_BY_METHOD; + RHASH(htmp)->ntbl = tbl; + *(struct hash_ident *)st_extra_data(tbl) = ids; + rb_hash_foreach(hash, rb_hash_rehash_i, (st_data_t)tbl); + ntbl = RHASH(hash)->ntbl; + RHASH(htmp)->ntbl = 0; + RHASH(hash)->ntbl = tbl; + RBASIC(hash)->flags |= HASH_IDENT_BY_METHOD; + st_free_table(ntbl); + rb_gc_force_recycle(htmp); } @@ -2610,6 +2687,6 @@ Init_Hash(void) rb_define_method(rb_cHash,"value?", rb_hash_has_value, 1); - rb_define_method(rb_cHash,"compare_by_identity", rb_hash_compare_by_id, 0); - rb_define_method(rb_cHash,"compare_by_identity?", rb_hash_compare_by_id_p, 0); + rb_define_singleton_method(rb_cHash, "identified_by", rb_hash_s_identified_by, 2); + rb_define_method(rb_cHash, "methods_to_identify", rb_hash_get_ident_methods, 0); #ifndef __MACOS__ /* environment variables nothing on MacOS. */ Index: marshal.c =================================================================== --- marshal.c (revision 15489) +++ marshal.c (working copy) @@ -465,34 +465,91 @@ w_obj_each(ID id, VALUE value, struct du } -static void -w_encoding(VALUE obj, long num, struct dump_call_arg *arg) +ID rb_hash_ident_methods_id(void); +VALUE rb_hash_get_ident_methods(VALUE); +void rb_hash_set_ident_method(VALUE, VALUE); + +struct extra_ivar { + VALUE obj; + int count; + VALUE encoding; + union { + struct { + VALUE ident_methods; + } hash; + } as; +}; + +static int +has_extra_ivars(VALUE obj, struct extra_ivar *exivars, struct dump_arg *arg) { - int encidx = rb_enc_get_index(obj); - rb_encoding *enc = 0; - st_data_t name; + int count = 0, encidx; + rb_encoding *enc; - if (encidx <= 0 || !(enc = rb_enc_from_index(encidx))) { - w_long(num, arg->arg); - return; + if (!ENCODING_IS_ASCII8BIT(obj) && + (encidx = rb_enc_get_index(obj)) > 0 && + (enc = rb_enc_from_index(encidx)) != 0) { + st_data_t name; + + do { + if (!arg->encodings) + arg->encodings = st_init_strcasetable(); + else if (st_lookup(arg->encodings, (st_data_t)rb_enc_name(enc), &name)) + break; + name = (st_data_t)rb_str_new2(rb_enc_name(enc)); + st_insert(arg->encodings, (st_data_t)rb_enc_name(enc), name); + } while (0); + ++count; + exivars->encoding = (VALUE)name; + } + else { + exivars->encoding = 0; + } + + switch (BUILTIN_TYPE(obj)) { + case T_HASH: { + VALUE ids = rb_hash_get_ident_methods(obj); + if (RTEST(ids)) { + exivars->as.hash.ident_methods = ids; + ++count; + } + else { + exivars->as.hash.ident_methods = 0; + } + break; + } + } + + exivars->obj = obj; + return exivars->count = count; +} + +static void +w_extra_ivars(struct extra_ivar *exivars, struct dump_call_arg *arg) +{ + if (exivars->encoding) { + w_symbol(rb_id_encoding(), arg->arg); + w_object(exivars->encoding, arg->arg, arg->limit); + } + + switch (BUILTIN_TYPE(exivars->obj)) { + case T_HASH: + if (exivars->as.hash.ident_methods) { + w_symbol(rb_hash_ident_methods_id(), arg->arg); + w_object(exivars->as.hash.ident_methods, arg->arg, arg->limit); + } + break; } - w_long(num + 1, arg->arg); - w_symbol(rb_id_encoding(), arg->arg); - do { - if (!arg->arg->encodings) - arg->arg->encodings = st_init_strcasetable(); - else if (st_lookup(arg->arg->encodings, (st_data_t)rb_enc_name(enc), &name)) - break; - name = (st_data_t)rb_str_new2(rb_enc_name(enc)); - st_insert(arg->arg->encodings, (st_data_t)rb_enc_name(enc), name); - } while (0); - w_object(name, arg->arg, arg->limit); } static void -w_ivar(VALUE obj, st_table *tbl, struct dump_call_arg *arg) +w_ivar(struct extra_ivar *exivars, st_table *tbl, struct dump_call_arg *arg) { long num = tbl ? tbl->num_entries : 0; - w_encoding(obj, num, arg); + num += exivars->count; + w_long(num, arg->arg); + if (exivars->count) { + w_extra_ivars(exivars, arg); + } if (tbl) { st_foreach_safe(tbl, w_obj_each, (st_data_t)arg); @@ -501,17 +558,20 @@ w_ivar(VALUE obj, st_table *tbl, struct static void -w_objivar(VALUE obj, struct dump_call_arg *arg) +w_objivar(struct extra_ivar *exivars, struct dump_call_arg *arg) { - VALUE *ptr; + VALUE *ptr, obj = exivars->obj; long i, len, num; len = ROBJECT_LEN(obj); ptr = ROBJECT_PTR(obj); - num = 0; + num = exivars->count; for (i = 0; i < len; i++) if (ptr[i] != Qundef) num += 1; - w_encoding(obj, num, arg); + w_long(num, arg->arg); + if (exivars->count) { + w_extra_ivars(exivars, arg); + } if (num != 0) { rb_ivar_foreach(obj, w_obj_each, (st_data_t)arg); @@ -526,6 +586,8 @@ w_object(VALUE obj, struct dump_arg *arg st_data_t num; int hasiv = 0; -#define has_ivars(obj, ivtbl) ((ivtbl = rb_generic_ivar_table(obj)) != 0 || \ - (!SPECIAL_CONST_P(obj) && !ENCODING_IS_ASCII8BIT(obj))) + struct extra_ivar exivar; +#define has_ivars(obj, ivtbl, exivar) \ + (((ivtbl = rb_generic_ivar_table(obj)) != 0) | \ + (!SPECIAL_CONST_P(obj) && has_extra_ivars(obj, &exivar, arg))) if (limit == 0) { @@ -543,5 +605,5 @@ w_object(VALUE obj, struct dump_arg *arg } - if ((hasiv = has_ivars(obj, ivtbl)) != 0) { + if ((hasiv = has_ivars(obj, ivtbl, exivar)) != 0) { w_byte(TYPE_IVAR, arg); } @@ -586,4 +648,5 @@ w_object(VALUE obj, struct dump_arg *arg VALUE real_obj = obj; obj = compat->dumper(real_obj); + exivar.obj = obj; st_insert(arg->compat_tbl, (st_data_t)obj, (st_data_t)real_obj); } @@ -596,5 +659,5 @@ w_object(VALUE obj, struct dump_arg *arg w_class(TYPE_USRMARSHAL, obj, arg, Qfalse); w_object(v, arg, limit); - if (hasiv) w_ivar(obj, 0, &c_arg); + if (hasiv) w_ivar(&exivar, 0, &c_arg); return; } @@ -602,4 +665,5 @@ w_object(VALUE obj, struct dump_arg *arg VALUE v; st_table *ivtbl2 = 0; + struct extra_ivar exivar2; int hasiv2; @@ -608,5 +672,5 @@ w_object(VALUE obj, struct dump_arg *arg rb_raise(rb_eTypeError, "_dump() must return string"); } - if ((hasiv2 = has_ivars(v, ivtbl2)) != 0 && !hasiv) { + if ((hasiv2 = has_ivars(v, ivtbl2, exivar2)) != 0 && !hasiv) { w_byte(TYPE_IVAR, arg); } @@ -614,8 +678,8 @@ w_object(VALUE obj, struct dump_arg *arg w_bytes(RSTRING_PTR(v), RSTRING_LEN(v), arg); if (hasiv2) { - w_ivar(v, ivtbl2, &c_arg); + w_ivar(&exivar2, ivtbl2, &c_arg); } else if (hasiv) { - w_ivar(obj, ivtbl, &c_arg); + w_ivar(&exivar, ivtbl, &c_arg); } return; @@ -739,5 +803,5 @@ w_object(VALUE obj, struct dump_arg *arg case T_OBJECT: w_class(TYPE_OBJECT, obj, arg, Qtrue); - w_objivar(obj, &c_arg); + w_objivar(&exivar, &c_arg); break; @@ -764,5 +828,5 @@ w_object(VALUE obj, struct dump_arg *arg } if (hasiv) { - w_ivar(obj, ivtbl, &c_arg); + w_ivar(&exivar, ivtbl, &c_arg); } } @@ -1093,4 +1159,8 @@ r_ivar(VALUE obj, struct load_arg *arg) if (idx > 0) rb_enc_associate_index(obj, idx); } + else if (TYPE(obj) == T_HASH && + id == rb_hash_ident_methods_id()) { + rb_hash_set_ident_method(obj, val); + } else { rb_ivar_set(obj, id, val); Index: st.c =================================================================== --- st.c (revision 15489) +++ st.c (working copy) @@ -69,7 +69,8 @@ static void rehash(st_table *); #define Calloc(n,s) (char*)calloc((n),(s)) -#define EQUAL(table,x,y) ((x)==(y) || (*table->type->compare)((x),(y)) == 0) +#define EQUAL(table,x,y) ((x)==(y) || \ + (*table->type->compare)((x),(y),(table)) == 0) -#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key)) +#define do_hash(key,table) (unsigned int)(*(table)->type->hash)((key),(table)) #define do_hash_bin(key,table) (do_hash(key, table)%(table)->num_bins) @@ -154,19 +155,17 @@ stat_col() #define MAX_PACKED_NUMHASH 5 -st_table* -st_init_table_with_size(const struct st_hash_type *type, int size) -{ - st_table *tbl; - #ifdef HASH_LOG - if (init_st == 0) { - init_st = 1; - atexit(stat_col); - } +#define INIT_HASH_LOG() \ + (void)((init_st == 0) ? \ + (init_st = 1, atexit(stat_col)) : 0) +#else +#define INIT_HASH_LOG() ((void)0) #endif +static st_table * +st_table_init(st_table *tbl, const struct st_hash_type *type, int size) +{ size = new_size(size); /* round up to prime number */ - tbl = alloc(st_table); tbl->type = type; tbl->num_entries = 0; @@ -180,4 +179,23 @@ st_init_table_with_size(const struct st_ st_table* +st_init_table_with_size(const struct st_hash_type *type, int size) +{ + return st_table_init(alloc(st_table), type, size); +} + +st_table * +st_init_table_extra(const struct st_hash_type *type, int extra) +{ + return st_table_init(malloc(sizeof(st_table) + extra), type, 0); +} + +st_table * +st_init_table_extra_with_size(const struct st_hash_type *type, int extra, int size) +{ + return st_table_init(malloc(sizeof(st_table) + extra), + type, size); +} + +st_table* st_init_table(const struct st_hash_type *type) { Index: include/ruby/st.h =================================================================== --- include/ruby/st.h (revision 15489) +++ include/ruby/st.h (working copy) @@ -74,4 +74,6 @@ st_table *st_init_strtable_with_size(int st_table *st_init_strcasetable(void); st_table *st_init_strcasetable_with_size(int); +st_table *st_init_table_extra(const struct st_hash_type *, int); +st_table *st_init_table_extra_with_size(const struct st_hash_type *, int, int); int st_delete(st_table *, st_data_t *, st_data_t *); int st_delete_safe(st_table *, st_data_t *, st_data_t *, st_data_t); @@ -91,4 +93,6 @@ int st_strcasecmp(const char *s1, const int st_strncasecmp(const char *s1, const char *s2, size_t n); +static inline void *st_extra_data(st_table *tbl) {return tbl + 1;} + #if defined(__cplusplus) #if 0 Index: test/ruby/test_hash.rb =================================================================== --- test/ruby/test_hash.rb (revision 15511) +++ test/ruby/test_hash.rb (working copy) @@ -818,12 +818,11 @@ class TestHash < Test::Unit::TestCase end - def test_compare_by_identity - a = "foo" - assert(!{}.compare_by_identity?) - h = { a => "bar" } - assert(!h.compare_by_identity?) - h.compare_by_identity - assert(h.compare_by_identity?) - #assert_equal("bar", h[a]) + def test_identified_by + a = "foo".freeze + assert(!{}.methods_to_identify) + h = Hash.identified_by(:equal?, :object_id) + assert_equal([:equal?, :object_id], h.methods_to_identify) + h[a] = "bar" + assert_equal("bar", h[a]) assert_nil(h["foo"]) end
-- --- 僕の前にBugはない。 --- 僕の後ろにBugはできる。 中田 伸悦