In article <20050103233811.7dd92150.sheepman / tcn.zaq.ne.jp>,
  sheepman <sheepman / tcn.zaq.ne.jp> writes:

>> 次のように、rand の結果があからさまに一様でない分布を示すことがあるようです。
>
> こんな感じでどうでしょうか。

そうすると、

% ./ruby -ve '20.times { printf "%9x\n", rand(0x1ffffffff) }'
ruby 1.8.2 (2005-01-04) [i686-linux]
 2223ec71
 28e25b5c
 37326e3d
 42e8cdd2
 27a4a874
 ed512afe
 3b7a6d2d
 1e493032
 ad1b372f
 36423f58
 5d6232b4
  97a3051
 11926c94
 aec1ef57
 cbb7f06f
 ba24d7a4
 7c77dd16
 99fd7e36
  6ce15ae
 6cfcfa17

というように、rand(0x1ffffffff) が常に 0xffffffff 以下を返すようになります。

それはそれとして、rand の問題はほかにもいくつかあるようです。

* rand(fixnum) も一様でない

% ./ruby -e '
s = 10000
t = 3
arr = [0] * t
(64*s).times {
  arr[rand(0x30000000) % t] += 1
}
arr.each {|n| puts "*" * (n / s) }
'
************************
*******************
*******************

* rand(nil) で、仮数部の下位ビットに 0 が並ぶ

% ./ruby -ve 'srand(0); 10.times { p [rand].pack("G").unpack("B*") }'
ruby 1.8.2 (2005-01-04) [i686-linux]
["0011111111100001100011111110000101010101100000000000000000000000"]
["0011111111100010111110001001010101000101111000000000000000000000"]
["0011111111100110111000101101010011001110101000000000000000000000"]
["0011111111101011000001000011100110011000000000000000000000000000"]
["0011111111100011010010011101011001101000011000000000000000000000"]
["0011111111101011011101000100101001011111011000000000000000000000"]
["0011111111100001011011111010111011011000011000000000000000000000"]
["0011111111101011000111001010111110101100111000000000000000000000"]
["0011111111011011000111010010100100000010010000000000000000000000"]
["0011111111100011111101000011101111011010011000000000000000000000"]

* 64bit マシンで rand が返す値の下位ビットが 0 になる

% ruby -ve 'srand(0); 10.times { p rand(0x3000000000000000).to_s(16) }'
ruby 1.8.2 (2004-12-06) [x86_64-linux]
"1a57d20040000000"
"1c74dfe8d0000000"
"22543f35f0000000"
"2886566400000000"
"1ceec19c90000000"
"292e6f8f10000000"
"1a27864490000000"
"28ab078350000000"
"1455dec1b0000000"
"1dee59c790000000"

* 32bit の種しか与えられないので、brute force attack に弱い

という話を直したつもりなパッチをつけます。

なお、/dev/urandom から以前よりも長く読み込み、bignum として表現する都
合上、srand の返値が

% ./ruby -e 'srand; p srand'
44554367493460933033425899290213730169575912281951203251910081025545624407566

というかんじでそれなりに長くなっています。

また、いくつか必要な関数を mt19937ar-cok.tgz からもってきました。

あと、rb_big_rand は不都合があるので使わず、直接 RBignum の中身をいじっ
ています。そして SIZEOF_BDIGITS は 2 か 4 であることを仮定していますが、
2 の場合はテストしてません。

Index: random.c
===================================================================
RCS file: /src/ruby/random.c,v
retrieving revision 1.34
diff -u -p -r1.34 random.c
--- random.c	3 Jan 2005 05:13:10 -0000	1.34
+++ random.c	4 Jan 2005 16:04:52 -0000
@@ -92,6 +92,37 @@ init_genrand(s)
     left = 1; initf = 1;
 }
 
+/* initialize by an array with array-length */
+/* init_key is the array for initializing keys */
+/* key_length is its length */
+/* slight change for C++, 2004/2/26 */
+static void
+init_by_array(unsigned long init_key[], int key_length)
+{
+    int i, j, k;
+    init_genrand(19650218UL);
+    i=1; j=0;
+    k = (N>key_length ? N : key_length);
+    for (; k; k--) {
+        state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1664525UL))
+          + init_key[j] + j; /* non linear */
+        state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+        i++; j++;
+        if (i>=N) { state[0] = state[N-1]; i=1; }
+        if (j>=key_length) j=0;
+    }
+    for (k=N-1; k; k--) {
+        state[i] = (state[i] ^ ((state[i-1] ^ (state[i-1] >> 30)) * 1566083941UL))
+          - i; /* non linear */
+        state[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */
+        i++;
+        if (i>=N) { state[0] = state[N-1]; i=1; }
+    }
+
+    state[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ 
+    left = 1; initf = 1;
+}
+
 static void
 next_state()
 {
@@ -114,9 +145,9 @@ next_state()
     *p = p[M-N] ^ TWIST(p[0], state[0]);
 }
 
-/* generates a random number on [0,1)-real-interval */
-static double
-genrand_real()
+/* generates a random number on [0,0xffffffff]-interval */
+static unsigned long
+genrand_int32(void)
 {
     unsigned long y;
 
@@ -129,10 +160,18 @@ genrand_real()
     y ^= (y << 15) & 0xefc60000UL;
     y ^= (y >> 18);
 
-    return (double)y * (1.0/4294967296.0); 
-    /* divided by 2^32 */
+    return y;
 }
 
+/* generates a random number on [0,1) with 53-bit resolution*/
+static double
+genrand_real(void) 
+{ 
+    unsigned long a=genrand_int32()>>5, b=genrand_int32()>>6; 
+    return(a*67108864.0+b)*(1.0/9007199254740992.0); 
+} 
+/* These real versions are due to Isaku Wada, 2002/01/09 added */
+
 #undef N
 #undef M
 
@@ -151,36 +190,91 @@ genrand_real()
 #endif
 
 static int first = 1;
+static VALUE saved_seed = INT2FIX(0);
 
-static int
-rand_init(seed)
-    unsigned long seed;
+static VALUE
+rand_init(vseed)
+    VALUE vseed;
 {
-    static unsigned long saved_seed;
-    unsigned long old;
+    volatile VALUE seed;
+    VALUE old;
+    long len;
+    unsigned long *buf;
 
+    seed = rb_to_int(vseed);
+    switch (TYPE(seed)) {
+      case T_FIXNUM:
+          len = sizeof(VALUE);
+          break;
+      case T_BIGNUM:
+          len = RBIGNUM(seed)->len * SIZEOF_BDIGITS;
+          if (len == 0)
+              len = 4;
+          break;
+      default:
+          rb_raise(rb_eTypeError, "failed to convert %s into Integer",
+                   rb_obj_classname(vseed));
+    }
+    len = ((len + 3) / 4); /* round up to 32bits */
+    buf = ALLOCA_N(long, len); /* allocate longs for init_by_array */
+    memset(buf, 0, len * sizeof(long));
+    if (FIXNUM_P(seed)) {
+        buf[0] = FIX2ULONG(seed) & 0xffffffff;
+#if SIZEOF_LONG > 4
+        buf[1] = FIX2ULONG(seed) >> 32;
+#endif
+    }
+    else {
+        int i, j;
+        for (i = RBIGNUM(seed)->len-1; 0 <= i; i--) {
+            j = i * SIZEOF_BDIGITS / 4;
+#if SIZEOF_BDIGITS < SIZEOF_LONG
+            buf[j] = buf[j] << (SIZEOF_BDIGITS * 8);
+#endif
+            buf[j] |= ((BDIGIT *)RBIGNUM(seed)->digits)[i];
+        }
+    }
+    while (1 < len && buf[len-1] == 0) {
+        len--;
+    }
+    if (len <= 1) {
+        init_genrand(buf[0]);
+    }
+    else {
+        if (buf[len-1] == 1) /* remove leading-zero-guard */
+            len--;
+        init_by_array(buf, len);
+    }
     first = 0;
-    init_genrand(seed);
     old = saved_seed;
     saved_seed = seed;
 
     return old;
 }
 
-static unsigned long
+static VALUE
 random_seed()
 {
     static int n = 0;
     struct timeval tv;
-    unsigned long result;
     int fd;
-    unsigned long buf;
     struct stat statbuf;
+    BDIGIT *digits;
+    unsigned long *seed;
+    NEWOBJ(big, struct RBignum);
+    OBJSETUP(big, rb_cBignum, T_BIGNUM);
+    big->sign = 1;
+    big->len = sizeof(long) * 8 / SIZEOF_BDIGITS + 1;
+    digits = big->digits = ALLOC_N(BDIGIT, big->len);
+    seed = (unsigned long *)big->digits;
 
-    gettimeofday(&tv, 0);
-    result = tv.tv_sec ^ tv.tv_usec ^ getpid() ^ n++;
+    memset(digits, 0, big->len * SIZEOF_BDIGITS);
 
-    result += (unsigned long)&result;
+    gettimeofday(&tv, 0);
+    seed[0] = tv.tv_usec;
+    seed[1] = tv.tv_sec;
+    seed[2] = getpid() ^ (n++ << 16);
+    seed[3] = (unsigned long)&seed;
 
 #ifdef S_ISCHR
     if ((fd = open("/dev/urandom", O_RDONLY|O_NONBLOCK
@@ -192,14 +286,16 @@ random_seed()
 #endif
             )) >= 0) {
         if (fstat(fd, &statbuf) == 0 && S_ISCHR(statbuf.st_mode)) {
-            read(fd, &buf, sizeof(buf));
-            result ^= buf;
+            read(fd, &seed[4], 4 * sizeof(*seed));
         }
         close(fd);
     }
 #endif
 
-    return result;
+    /* set leading-zero-guard if need. */
+    digits[big->len-1] = digits[big->len-2] <= 1 ? 1 : 0;
+
+    return rb_big_norm((VALUE)big);
 }
 
 /*
@@ -222,19 +318,94 @@ rb_f_srand(argc, argv, obj)
     VALUE *argv;
     VALUE obj;
 {
-    VALUE sd;
-    unsigned long seed, old;
+    VALUE seed, old;
 
     rb_secure(4);
-    if (rb_scan_args(argc, argv, "01", &sd) == 0) {
+    if (rb_scan_args(argc, argv, "01", &seed) == 0) {
 	seed = random_seed();
     }
-    else {
-	seed = NUM2ULONG(sd);
-    }
     old = rand_init(seed);
 
-    return ULONG2NUM(old);
+    return old;
+}
+
+static unsigned long 
+make_mask(unsigned long x)
+{
+    x = x | x >> 1;
+    x = x | x >> 2;
+    x = x | x >> 4;
+    x = x | x >> 8;
+    x = x | x >> 16;
+#if 4 < SIZEOF_LONG
+    x = x | x >> 32;
+#endif
+    return x;
+}
+
+static unsigned long
+limited_rand(unsigned long limit)
+{
+    unsigned long mask = make_mask(limit);
+    int i;
+    unsigned long val;
+
+  retry:
+    val = 0;
+    for (i = SIZEOF_LONG/4-1; 0 <= i; i--) {
+        if (mask >> (i * 32)) {
+            val |= genrand_int32() << (i * 32);
+            val &= mask;
+            if (limit < val)
+                goto retry;
+        }
+    }
+    return val;
+}
+
+static VALUE
+limited_big_rand(struct RBignum *limit)
+{
+    unsigned long mask, lim, rnd;
+    struct RBignum *val;
+    int i, len, boundary;
+
+    len = (limit->len * SIZEOF_BDIGITS + 3) / 4;
+    val = (struct RBignum *)rb_big_clone((VALUE)limit);
+    val->sign = 1;
+#if SIZEOF_BDIGITS == 2
+# define BIG_GET32(big,i) (((BDIGIT *)(big)->digits)[(i)/2] | \
+                           ((i)/2+1 < (big)->len ? (((BDIGIT *)(big)->digits)[(i)/2+1] << 16)
+                                                 : 0))
+# define BIG_SET32(big,i,d) ((((BDIGIT *)(big)->digits)[(i)/2] = (d) & 0xffff), \
+                             ((i)/2+1 < (big)->len ? (((BDIGIT *)(big)->digits)[(i)/2+1] = (d) >> 16) \
+                                                   : 0))
+#else
+    /* SIZEOF_BDIGITS == 4 */
+# define BIG_GET32(big,i) (((BDIGIT *)(big)->digits)[i])
+# define BIG_SET32(big,i,d) (((BDIGIT *)(big)->digits)[i] = (d))
+#endif
+  retry:
+    mask = 0;
+    boundary = 1;
+    for (i = len-1; 0 <= i; i--) {
+        lim = BIG_GET32(limit, i);
+        mask = mask ? 0xffffffff : make_mask(lim);
+        if (mask) {
+            rnd = genrand_int32() & mask;
+            if (boundary) {
+                if (lim < rnd)
+                    goto retry;
+                if (rnd < lim)
+                    boundary = 0;
+            }
+        }
+        else {
+            rnd = 0;
+        }
+        BIG_SET32(val, i, rnd);
+    }
+    return rb_big_norm((VALUE)val);
 }
 
 /*
@@ -276,18 +447,26 @@ rb_f_rand(argc, argv, obj)
 	    max = (long)RFLOAT(vmax)->value;
 	    break;
 	}
-	vmax = rb_dbl2big(RFLOAT(vmax)->value);
+        if (RFLOAT(vmax)->value < 0)
+            vmax = rb_dbl2big(-RFLOAT(vmax)->value);
+        else
+            vmax = rb_dbl2big(RFLOAT(vmax)->value);
 	/* fall through */
       case T_BIGNUM:
       bignum:
         {
-	    long len = RBIGNUM(vmax)->len;
-	    double *buf = ALLOCA_N(double, len);
-
-	    while (len--) {
-		buf[len] = genrand_real();
-	    }
-	    return rb_big_rand(vmax, buf);
+            struct RBignum *limit = (struct RBignum *)vmax;
+            if (!limit->sign) {
+                limit = (struct RBignum *)rb_big_clone(vmax);
+                limit->sign = 1;
+            }
+            limit = (struct RBignum *)rb_big_minus((VALUE)limit, INT2FIX(1));
+            if (FIXNUM_P((VALUE)limit)) {
+                if (FIX2LONG((VALUE)limit) == -1)
+                    return rb_float_new(genrand_real());
+                return LONG2NUM(limited_rand(FIX2LONG((VALUE)limit)));
+            }
+            return limited_big_rand(limit);
 	}
       case T_NIL:
 	max = 0;
@@ -304,8 +483,7 @@ rb_f_rand(argc, argv, obj)
 	return rb_float_new(genrand_real());
     }
     if (max < 0) max = -max;
-    val = max*genrand_real();
-
+    val = limited_rand(max-1);
     return LONG2NUM(val);
 }
 
@@ -314,4 +492,5 @@ Init_Random()
 {
     rb_define_global_function("srand", rb_f_srand, -1);
     rb_define_global_function("rand", rb_f_rand, -1);
+    rb_global_variable(&saved_seed);
 }
-- 
[田中 哲][たなか あきら][Tanaka Akira]