なかだです。

At Wed, 12 Mar 2008 19:50:18 +0900,
NARUSE, Yui wrote in [ruby-dev:34026]:
> というわけで、int32_t, uint32_t, intptr_t, uintptr_t を定義しつつ、
> hash() をアライメント考慮させるパッチです。

inttypes.hをチェックしてないような気がします。

あとBig endianになるとやや遅くなりそうなので、一応作っておいたの
も出してみます。


Index: configure.in =================================================================== --- configure.in (revision 15758) +++ configure.in (working copy) @@ -583,5 +583,5 @@ AC_CHECK_HEADERS(stdlib.h string.h unist syscall.h pwd.h grp.h a.out.h utime.h memory.h direct.h sys/resource.h \ sys/mkdev.h sys/utime.h xti.h netinet/in_systm.h float.h ieeefp.h pthread.h \ - ucontext.h intrinsics.h langinfo.h locale.h) + ucontext.h intrinsics.h langinfo.h locale.h stdint.h) dnl Check additional types. @@ -621,4 +621,16 @@ AC_CHECK_TYPES(struct timespec) AC_CHECK_TYPE(fd_mask, [AC_DEFINE(HAVE_RB_FD_INIT, 1)]) +test ${rb_cv_type_uint32_t+set} && ac_cv_type_uint32_t=yes +AC_CHECK_TYPE(uint32_t) +if test ${ac_cv_type_uint32_t} != yes; then + AC_CACHE_CHECK([unsigned 32bit int], + rb_cv_type_uint32_t, + [for type in short int long; do + AC_COMPILE_IFELSE(AC_LANG_BOOL_COMPILE_TRY([], [sizeof($type) == 4]), + [rb_cv_type_uint32_t=$type; break], []) + done]) + AC_DEFINE(uint32_t, $rb_cv_type_uint32_t) +fi + AC_CACHE_CHECK(for stack end address, rb_cv_stack_end_address, [rb_cv_stack_end_address=no Index: string.c =================================================================== --- string.c (revision 15758) +++ string.c (working copy) @@ -26,4 +26,8 @@ #endif +#if HAVE_STDINT_H +#include <stdint.h> +#endif + VALUE rb_cString; VALUE rb_cSymbol; @@ -1686,4 +1690,11 @@ rb_str_concat(VALUE str1, VALUE str2) } +#if defined __i386__ || defined _M_IX86 || defined __ia64 +#define UNALIGNED_WORD_ACCESS 1 +#endif +#ifndef UNALIGNED_WORD_ACCESS +#define UNALIGNED_WORD_ACCESS 0 +#endif + /* MurmurHash described in http://murmurhash.googlepages.com/ */ unsigned int @@ -1695,14 +1706,93 @@ hash(const unsigned char * data, int len h += 0xdeadbeef; - while(len >= 4) { - h += *(unsigned int *)data; - h *= m; - h ^= h >> r; + if (len >= 4) { +#if !UNALIGNED_WORD_ACCESS + int align = (VALUE)data & 3; + if (align) { + uint32_t t = 0, d = 0; + int sl, sr, pack; + + switch (align) { +#ifdef WORDS_BIGENDIAN + case 1: t |= data[2]; + case 2: t |= data[1] << 8; + case 3: t |= data[0] << 16; +#else + case 1: t |= data[2] << 16; + case 2: t |= data[1] << 8; + case 3: t |= data[0]; +#endif + } + + t <<= (8 * align); + + data += 4-align; + len -= 4-align; + + sl = 8 * (4-align); + sr = 8 * align; + + while (len >= 4) { + d = *(uint32_t *)data; +#ifdef WORDS_BIGENDIAN + t = (t << sr) | (d >> sl); +#else + t = (t >> sr) | (d << sl); +#endif + h += t; + h *= m; + h ^= h >> r; + t = d; + + data += 4; + len -= 4; + } + + pack = len < align ? len : align; + d = 0; + switch (pack) { +#ifdef WORDS_BIGENDIAN + case 3: d |= data[2]; + case 2: d |= data[1] << 8; + case 1: d |= data[0] << 16; + case 0: + h += (t << sr) | (d >> sl); +#else + case 3: d |= data[2] << 16; + case 2: d |= data[1] << 8; + case 1: d |= data[0]; + case 0: + h += (t >> sr) | (d << sl); +#endif + h *= m; + h ^= h >> r; + } - data += 4; - len -= 4; + data += pack; + len -= pack; + } + else +#endif + { + do { + h += *(uint32_t *)data; + h *= m; + h ^= h >> r; + + data += 4; + len -= 4; + } while (len >= 4); + } } switch(len) { +#ifdef WORDS_BIGENDIAN + case 3: + h += data[2]; + case 2: + h += data[1] << 8; + case 1: + h += data[0] << 16; +#else case 3: h += data[2] << 16; @@ -1711,4 +1801,5 @@ hash(const unsigned char * data, int len case 1: h += data[0]; +#endif h *= m; h ^= h >> r;
-- --- 僕の前にBugはない。 --- 僕の後ろにBugはできる。 中田 伸悦