On Wednesday 28 May 2003 06:20 pm, you wrote:
> No one seems to be interested in this issue so I'll have to reply to
> myself... ;)

Actually, I am. I am doing packing of 64bits word for SNMP and my solution is 
clumsy at best.


> No one has pointed out that a clean solution for this is currently
> available so I went ahead and implemented it. Below is unit test I used
> and one patch for bignum.c and one for pack.c. The patches are taken
> against latest nightly snapshot:
>
> $ ruby -v
> ruby 1.8.0 (2003-05-27) [i386-mingw32]
>
> It should work for both big and little endian architectures but I've only
> tried on little endian. Would be great if someone can try on big-endian
> machine.

I'll try on my Mac (should be big-endian, isn't it?). It may take may a few 
days before I get around it though.

Guillaume.


> This patch adds a 'W' template character to pack and unpack for
> packing/unpacking an unsigned integer (Fixnum *OR* Bignum). The packing
> is from MSB to LSB so that
>
>  [0xff00].pack("W") == "\377\000"
>
> regardless of the endianness of the machine. Leading zeroes are trimmed
> from the string (except for negative numbers see below). If you pack a
> negative number you loose information about the sign, ie
>
>  [-1].pack("W").unpack("W").first == 1
>
> which is the same as for template 'I' but in contrast to template 'Q'.
>
> I choose W as in "raW binary representation of number" but its hard to
> find a good template char since most are taken.
>
> I didn't implement 'w' for dumping negative numbers since I don't see
> the need. However, the implementation hints at one possible way for how
> to do 'w' (by only allowing negative numbers to have leading zeroes).
>
> If someone finds this worthy/useful its in the public domain so use in
> anyway you want. I tried to stay close to the style in Ruby source but I'm
> sure the code can be even cleaner/nicer/faster.
>
> Regards,
>
> Robert Feldt
>
> Ps. This post is probably too long; I'm sorry... Maybe ruby-core list is
> better for these things? Or just to matz? I'm not fully up-to-date with
> community procedures.
>
> ----------utest_bignum_pack_unpack.rb------------------------------------
> require 'test/unit'
>
> class TestBignumPackAndUnpack < Test::Unit::TestCase
>   def test_01_pack_W_one_byte
>     (0..255).each do |i|
>       assert_equal(i.chr, [i].pack("W"))
>     end
>   end
>
>   def num_with_bytes(bytes)
>     low_limit = 2**(8 * (bytes-1))
>     low_limit + rand(-low_limit + 2**(8*bytes))
>   end
>
>   def assert_pack_W_sampled(numBytes, numSamples = 100)
>     numSamples.times do
>       num = num_with_bytes(numBytes)
>       packed = [num].pack("W")
>       assert_equal(numBytes, packed.length, "num = #{num}")
>       lsb_first = packed.reverse
>       numBytes.times do |i|
> 	assert_equal(lsb_first[i], num & 0xff)
> 	num >>= 8
>       end
>     end
>   end
>
>   def test_02_pack_W_sampled_positive_multi_bytes
>     (2..10).each do |num_bytes|
>       assert_pack_W_sampled(num_bytes, 25)
>     end
>   end
>
>   def test_03_pack_W_large
>     p1024 = [2**1024].pack("W")
>     assert_equal(1.chr + (0.chr*(1024/8)), p1024)
>     p1024_ones = [2**1024-1].pack("W")
>     assert_equal(0xff.chr * (1024/8), p1024_ones)
>     p2048 = [2**2048].pack("W")
>     assert_equal(1.chr + (0.chr*(2048/8)), p2048)
>     p2048_ones = [2**2048-1].pack("W")
>     assert_equal(0xff.chr * (2048/8), p2048_ones)
>   end
>
>   # This might not be what one wants but I think main use is in
>   # converting positive nums so lets leave it as is...
>   # To do 'w' we could make sure that negative numbers always
>   # start with leading 0. This way we could later unpack them without
>   # losing the sign.
>   def test_04_pack_W_negative_numbers
>     assert_equal("\000\000\000\001", [-1].pack("W"))
>     assert_equal("\000\000\000\002", [-2].pack("W"))
>     assert_equal("\000\000\000\377", [-255].pack("W"))
>     assert_equal("\000\000\377\377", [-2**16+1].pack("W"))
>     assert_equal("\000\377\377\377", [-2**24+1].pack("W"))
>     assert_equal("\377\377\377\377", [-2**32+1].pack("W"))
>     assert_equal("\000\000\000\001\000\000\000\000", [-2**32].pack("W"))
>   end
>
>   def test_05_unpack_W_one_byte
>     (0..255).each do |i|
>       assert_equal(i, i.chr.unpack("W").first)
>     end
>   end
>
>   def str_with_bytes(bytes)
>     s = ""
>     bytes.times {s << rand(256).chr}
>     s
>   end
>
>   def test_06_unpack_W_sampled_positive_multi_bytes
>     (2..10).each do |num_bytes|
>       25.times do
> 	s = str_with_bytes(num_bytes)
> 	num = s.unpack("W").first
> 	lsb_first = s.reverse
> 	num_bytes.times do |i|
> 	  assert_equal(lsb_first[i], num & 0xff,
> 		       "s = #{s.unpack('H*')}, num = #{num}")
> 	  num >>= 8
> 	end
>       end
>     end
>   end
>
>   def test_07_unpack_W_large
>     u1024 = (1.chr + (0.chr*(1024/8))).unpack("W").first
>     assert_equal(2**1024, u1024)
>     u1024_ones = (0xff.chr * (1024/8)).unpack("W").first
>     assert_equal(2**1024-1, u1024_ones)
>     u2048 = (1.chr + (0.chr*(2048/8))).unpack("W").first
>     assert_equal(2**2048, u2048)
>     u2048_ones = (0xff.chr * (2048/8)).unpack("W").first
>     assert_equal(2**2048-1, u2048_ones)
>   end
>
>   def test_08_unpack_W_packed_negative_numbers
>     assert_equal(1, [-1].pack("W").unpack("W").first)
>     assert_equal(2, [-2].pack("W").unpack("W").first)
>     assert_equal(255, [-255].pack("W").unpack("W").first)
>     assert_equal(2**16-1, [-2**16+1].pack("W").unpack("W").first)
>     assert_equal(2**24-1, [-2**24+1].pack("W").unpack("W").first)
>     assert_equal(2**32-1, [-2**32+1].pack("W").unpack("W").first)
>     assert_equal(2**32, [-2**32].pack("W").unpack("W").first)
>   end
>
>   def test_09_cycle_pack_then_unpack
>     1000.times do
>       num = rand(2**200)
>       assert_equal(num, [num].pack("W").unpack("W").first)
>     end
>   end
> end
>
> ---------upatch_bignum_c---------------------------------------------
> --- bignum.c	2003-05-28 23:50:04.000000000 +0200
> +++ bignum.c.old	2003-05-28 11:09:30.000000000 +0200
> @@ -306,110 +306,6 @@
>
>  #endif
>
> -/* We should probably use endian in pack.c instead but I had problems
> - * when linking so...
> - */
> -static int
> -big_endian()
> -{
> -    static int init = 0;
> -    static int big_endian_value;
> -    char *p;
> -
> -    if (init) return big_endian_value;
> -    init = 1;
> -    p = (char*)&init;
> -    return big_endian_value = (p[0]==1)?0:1;
> -}
> -
> -/* Pack a nonnegative bignum as raw binary data/bitstring starting from
> - * MSB to LSB.
> - * Returned data will be multiple of SIZEOF_BDIGITS so there can be up to
> - * SIZEOF_BDIGITS-1 leading zeroes.
> - * Assumes that val is really a bignum ie. fixnums
> - * needs to be converted prior to calling this.
> - */
> -void
> -rb_nonneg_bignum_pack(buf, val)
> -    char *buf;
> -    VALUE val;
> -{
> -    long len, i, j, chars;
> -    char *next_digit;
> -
> -    len = RBIGNUM(val)->len;
> -    next_digit = RBIGNUM(val)->digits + (len * SIZEOF_BDIGITS);
> -    if (big_endian()) {
> -        for(i=0; i<len; i++) {
> -            next_digit -= SIZEOF_BDIGITS;
> -            for(j=0; j<SIZEOF_BDIGITS; j++) {
> -	        *buf++ = *(next_digit+j);
> -	    }
> -	}
> -    } else {
> -        for(i=0; i<len; i++) {
> -            next_digit -= SIZEOF_BDIGITS;
> -            for(j=SIZEOF_BDIGITS-1; j>=0; j--) {
> -	        *buf++ = *(next_digit+j);
> -	    }
> -	}
> -    }
> -}
> -
> -VALUE
> -rb_bignum_unpack(buf, sign, len)
> -    const char *buf;
> -    int sign;
> -    long len;
> -{
> -    VALUE big;
> -    long num_digits, i, j;
> -    char *next_digit;
> -    char *extra_digit;
> -    long num_full_digits = len / SIZEOF_BDIGITS;
> -    int extra_bytes = len % SIZEOF_BDIGITS;
> -
> -    num_digits = num_full_digits + (extra_bytes>0 ? 1 : 0);
> -    big = bignew(num_digits, 1);
> -    extra_digit = next_digit =
> -      (char*)RBIGNUM(big)->digits + num_full_digits * SIZEOF_BDIGITS;
> -
> -    if (big_endian()) {
> -        if (extra_bytes > 0) {
> -	    for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) {
> -	        *extra_digit++ = 0;
> -	    }
> -	    for(i = 0; i < extra_bytes; i++) {
> -	        *extra_digit++ = *buf++;
> -	    }
> -        }
> -        for(i = 0; i < num_full_digits; i++) {
> -	    next_digit -= SIZEOF_BDIGITS;
> -	    for(j = 0; j < SIZEOF_BDIGITS; j++) {
> -	        *next_digit++ = *buf++;
> -	    }
> -        }
> -    } else {
> -        if (extra_bytes > 0) {
> -	    for(i = extra_bytes - 1; i >= 0 ; i--) {
> -	        *(extra_digit+i) = *buf++;
> -	    }
> -	    extra_digit += extra_bytes;
> -	    for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) {
> -	        *extra_digit++ = 0;
> -	    }
> -        }
> -        for(i = 0; i < num_full_digits; i++) {
> -	    next_digit -= SIZEOF_BDIGITS;
> -	    for(j = SIZEOF_BDIGITS - 1; j >= 0; j--) {
> -	        *(next_digit+j) = *buf++;
> -	    }
> -        }
> -    }
> -
> -    return bignorm(big);
> -}
> -
>  VALUE
>  rb_cstr_to_inum(str, base, badcheck)
>      const char *str;
>
>
> --------upatch_pack_c------------------------------------------------------
>- --- pack.c	2003-05-28 23:55:32.000000000 +0200
> +++ pack.c.old	2003-05-28 11:19:21.000000000 +0200
> @@ -376,21 +376,6 @@
>  static int uv_to_utf8 _((char*,unsigned long));
>  static unsigned long utf8_to_uv _((char*,long*));
>
> -VALUE
> -ensure_bignum(val)
> -    VALUE val;
> -{
> -    if (NIL_P(val)) {
> -        val = INT2FIX(0);
> -    } else {
> -        val = rb_to_int(val);
> -    }
> -    if (FIXNUM_P(val)) {
> -        val = rb_int2big(FIX2LONG(val));
> -    }
> -    return val;
> -}
> -
>  static VALUE
>  pack_pack(ary, fmt)
>      VALUE ary, fmt;
> @@ -683,33 +668,6 @@
>  	    }
>  	    break;
>
> -	  case 'W':
> -            while (len-- > 0) {
> -	        VALUE from;
> -                long len;
> -		long num_bytes_to_skip = 0;
> -
> -		from = ensure_bignum(NEXTFROM);
> -                len = RBIGNUM(from)->len * SIZEOF_BDIGITS;
> -                {
> -		    char tmp[len];
> -
> -                    rb_nonneg_bignum_pack(tmp, from);
> -		    // Skip leading zeroes if positive bignum. Extend
> -		    // this "strategy" for 'w' so that only negative
> -		    // bignums (and 0) can have leading zero?
> -		    if (RBIGNUM(from)->sign) {
> -		        while (num_bytes_to_skip < (len-1) &&
> -			       tmp[num_bytes_to_skip] == 0x00) {
> -		          num_bytes_to_skip++;
> -		        }
> -		    }
> -		    rb_str_buf_cat(res, ((char*)&tmp) + num_bytes_to_skip,
> -				   len - num_bytes_to_skip);
> -		}
> -	    }
> -	    break;
> -
>  	  case 'n':
>  	    while (len-- > 0) {
>  		unsigned short s;
> @@ -1456,11 +1414,6 @@
>  	    }
>  	    break;
>
> -	case 'W':
> -	  rb_ary_push(ary, rb_bignum_unpack(s, 1, send - s));
> -	  s = send;
> -	  break;
> -
>  	  case 'n':
>  	    PACK_LENGTH_ADJUST(unsigned short,2);
>  	    while (len-- > 0) {
>
> --------------------------------------------------------------------