On Wednesday 28 May 2003 06:20 pm, you wrote: > No one seems to be interested in this issue so I'll have to reply to > myself... ;) Actually, I am. I am doing packing of 64bits word for SNMP and my solution is clumsy at best. > No one has pointed out that a clean solution for this is currently > available so I went ahead and implemented it. Below is unit test I used > and one patch for bignum.c and one for pack.c. The patches are taken > against latest nightly snapshot: > > $ ruby -v > ruby 1.8.0 (2003-05-27) [i386-mingw32] > > It should work for both big and little endian architectures but I've only > tried on little endian. Would be great if someone can try on big-endian > machine. I'll try on my Mac (should be big-endian, isn't it?). It may take may a few days before I get around it though. Guillaume. > This patch adds a 'W' template character to pack and unpack for > packing/unpacking an unsigned integer (Fixnum *OR* Bignum). The packing > is from MSB to LSB so that > > [0xff00].pack("W") == "\377\000" > > regardless of the endianness of the machine. Leading zeroes are trimmed > from the string (except for negative numbers see below). If you pack a > negative number you loose information about the sign, ie > > [-1].pack("W").unpack("W").first == 1 > > which is the same as for template 'I' but in contrast to template 'Q'. > > I choose W as in "raW binary representation of number" but its hard to > find a good template char since most are taken. > > I didn't implement 'w' for dumping negative numbers since I don't see > the need. However, the implementation hints at one possible way for how > to do 'w' (by only allowing negative numbers to have leading zeroes). > > If someone finds this worthy/useful its in the public domain so use in > anyway you want. I tried to stay close to the style in Ruby source but I'm > sure the code can be even cleaner/nicer/faster. > > Regards, > > Robert Feldt > > Ps. This post is probably too long; I'm sorry... Maybe ruby-core list is > better for these things? Or just to matz? I'm not fully up-to-date with > community procedures. > > ----------utest_bignum_pack_unpack.rb------------------------------------ > require 'test/unit' > > class TestBignumPackAndUnpack < Test::Unit::TestCase > def test_01_pack_W_one_byte > (0..255).each do |i| > assert_equal(i.chr, [i].pack("W")) > end > end > > def num_with_bytes(bytes) > low_limit = 2**(8 * (bytes-1)) > low_limit + rand(-low_limit + 2**(8*bytes)) > end > > def assert_pack_W_sampled(numBytes, numSamples = 100) > numSamples.times do > num = num_with_bytes(numBytes) > packed = [num].pack("W") > assert_equal(numBytes, packed.length, "num = #{num}") > lsb_first = packed.reverse > numBytes.times do |i| > assert_equal(lsb_first[i], num & 0xff) > num >>= 8 > end > end > end > > def test_02_pack_W_sampled_positive_multi_bytes > (2..10).each do |num_bytes| > assert_pack_W_sampled(num_bytes, 25) > end > end > > def test_03_pack_W_large > p1024 = [2**1024].pack("W") > assert_equal(1.chr + (0.chr*(1024/8)), p1024) > p1024_ones = [2**1024-1].pack("W") > assert_equal(0xff.chr * (1024/8), p1024_ones) > p2048 = [2**2048].pack("W") > assert_equal(1.chr + (0.chr*(2048/8)), p2048) > p2048_ones = [2**2048-1].pack("W") > assert_equal(0xff.chr * (2048/8), p2048_ones) > end > > # This might not be what one wants but I think main use is in > # converting positive nums so lets leave it as is... > # To do 'w' we could make sure that negative numbers always > # start with leading 0. This way we could later unpack them without > # losing the sign. > def test_04_pack_W_negative_numbers > assert_equal("\000\000\000\001", [-1].pack("W")) > assert_equal("\000\000\000\002", [-2].pack("W")) > assert_equal("\000\000\000\377", [-255].pack("W")) > assert_equal("\000\000\377\377", [-2**16+1].pack("W")) > assert_equal("\000\377\377\377", [-2**24+1].pack("W")) > assert_equal("\377\377\377\377", [-2**32+1].pack("W")) > assert_equal("\000\000\000\001\000\000\000\000", [-2**32].pack("W")) > end > > def test_05_unpack_W_one_byte > (0..255).each do |i| > assert_equal(i, i.chr.unpack("W").first) > end > end > > def str_with_bytes(bytes) > s = "" > bytes.times {s << rand(256).chr} > s > end > > def test_06_unpack_W_sampled_positive_multi_bytes > (2..10).each do |num_bytes| > 25.times do > s = str_with_bytes(num_bytes) > num = s.unpack("W").first > lsb_first = s.reverse > num_bytes.times do |i| > assert_equal(lsb_first[i], num & 0xff, > "s = #{s.unpack('H*')}, num = #{num}") > num >>= 8 > end > end > end > end > > def test_07_unpack_W_large > u1024 = (1.chr + (0.chr*(1024/8))).unpack("W").first > assert_equal(2**1024, u1024) > u1024_ones = (0xff.chr * (1024/8)).unpack("W").first > assert_equal(2**1024-1, u1024_ones) > u2048 = (1.chr + (0.chr*(2048/8))).unpack("W").first > assert_equal(2**2048, u2048) > u2048_ones = (0xff.chr * (2048/8)).unpack("W").first > assert_equal(2**2048-1, u2048_ones) > end > > def test_08_unpack_W_packed_negative_numbers > assert_equal(1, [-1].pack("W").unpack("W").first) > assert_equal(2, [-2].pack("W").unpack("W").first) > assert_equal(255, [-255].pack("W").unpack("W").first) > assert_equal(2**16-1, [-2**16+1].pack("W").unpack("W").first) > assert_equal(2**24-1, [-2**24+1].pack("W").unpack("W").first) > assert_equal(2**32-1, [-2**32+1].pack("W").unpack("W").first) > assert_equal(2**32, [-2**32].pack("W").unpack("W").first) > end > > def test_09_cycle_pack_then_unpack > 1000.times do > num = rand(2**200) > assert_equal(num, [num].pack("W").unpack("W").first) > end > end > end > > ---------upatch_bignum_c--------------------------------------------- > --- bignum.c 2003-05-28 23:50:04.000000000 +0200 > +++ bignum.c.old 2003-05-28 11:09:30.000000000 +0200 > @@ -306,110 +306,6 @@ > > #endif > > -/* We should probably use endian in pack.c instead but I had problems > - * when linking so... > - */ > -static int > -big_endian() > -{ > - static int init = 0; > - static int big_endian_value; > - char *p; > - > - if (init) return big_endian_value; > - init = 1; > - p = (char*)&init; > - return big_endian_value = (p[0]==1)?0:1; > -} > - > -/* Pack a nonnegative bignum as raw binary data/bitstring starting from > - * MSB to LSB. > - * Returned data will be multiple of SIZEOF_BDIGITS so there can be up to > - * SIZEOF_BDIGITS-1 leading zeroes. > - * Assumes that val is really a bignum ie. fixnums > - * needs to be converted prior to calling this. > - */ > -void > -rb_nonneg_bignum_pack(buf, val) > - char *buf; > - VALUE val; > -{ > - long len, i, j, chars; > - char *next_digit; > - > - len = RBIGNUM(val)->len; > - next_digit = RBIGNUM(val)->digits + (len * SIZEOF_BDIGITS); > - if (big_endian()) { > - for(i=0; i<len; i++) { > - next_digit -= SIZEOF_BDIGITS; > - for(j=0; j<SIZEOF_BDIGITS; j++) { > - *buf++ = *(next_digit+j); > - } > - } > - } else { > - for(i=0; i<len; i++) { > - next_digit -= SIZEOF_BDIGITS; > - for(j=SIZEOF_BDIGITS-1; j>=0; j--) { > - *buf++ = *(next_digit+j); > - } > - } > - } > -} > - > -VALUE > -rb_bignum_unpack(buf, sign, len) > - const char *buf; > - int sign; > - long len; > -{ > - VALUE big; > - long num_digits, i, j; > - char *next_digit; > - char *extra_digit; > - long num_full_digits = len / SIZEOF_BDIGITS; > - int extra_bytes = len % SIZEOF_BDIGITS; > - > - num_digits = num_full_digits + (extra_bytes>0 ? 1 : 0); > - big = bignew(num_digits, 1); > - extra_digit = next_digit = > - (char*)RBIGNUM(big)->digits + num_full_digits * SIZEOF_BDIGITS; > - > - if (big_endian()) { > - if (extra_bytes > 0) { > - for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) { > - *extra_digit++ = 0; > - } > - for(i = 0; i < extra_bytes; i++) { > - *extra_digit++ = *buf++; > - } > - } > - for(i = 0; i < num_full_digits; i++) { > - next_digit -= SIZEOF_BDIGITS; > - for(j = 0; j < SIZEOF_BDIGITS; j++) { > - *next_digit++ = *buf++; > - } > - } > - } else { > - if (extra_bytes > 0) { > - for(i = extra_bytes - 1; i >= 0 ; i--) { > - *(extra_digit+i) = *buf++; > - } > - extra_digit += extra_bytes; > - for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) { > - *extra_digit++ = 0; > - } > - } > - for(i = 0; i < num_full_digits; i++) { > - next_digit -= SIZEOF_BDIGITS; > - for(j = SIZEOF_BDIGITS - 1; j >= 0; j--) { > - *(next_digit+j) = *buf++; > - } > - } > - } > - > - return bignorm(big); > -} > - > VALUE > rb_cstr_to_inum(str, base, badcheck) > const char *str; > > > --------upatch_pack_c------------------------------------------------------ >- --- pack.c 2003-05-28 23:55:32.000000000 +0200 > +++ pack.c.old 2003-05-28 11:19:21.000000000 +0200 > @@ -376,21 +376,6 @@ > static int uv_to_utf8 _((char*,unsigned long)); > static unsigned long utf8_to_uv _((char*,long*)); > > -VALUE > -ensure_bignum(val) > - VALUE val; > -{ > - if (NIL_P(val)) { > - val = INT2FIX(0); > - } else { > - val = rb_to_int(val); > - } > - if (FIXNUM_P(val)) { > - val = rb_int2big(FIX2LONG(val)); > - } > - return val; > -} > - > static VALUE > pack_pack(ary, fmt) > VALUE ary, fmt; > @@ -683,33 +668,6 @@ > } > break; > > - case 'W': > - while (len-- > 0) { > - VALUE from; > - long len; > - long num_bytes_to_skip = 0; > - > - from = ensure_bignum(NEXTFROM); > - len = RBIGNUM(from)->len * SIZEOF_BDIGITS; > - { > - char tmp[len]; > - > - rb_nonneg_bignum_pack(tmp, from); > - // Skip leading zeroes if positive bignum. Extend > - // this "strategy" for 'w' so that only negative > - // bignums (and 0) can have leading zero? > - if (RBIGNUM(from)->sign) { > - while (num_bytes_to_skip < (len-1) && > - tmp[num_bytes_to_skip] == 0x00) { > - num_bytes_to_skip++; > - } > - } > - rb_str_buf_cat(res, ((char*)&tmp) + num_bytes_to_skip, > - len - num_bytes_to_skip); > - } > - } > - break; > - > case 'n': > while (len-- > 0) { > unsigned short s; > @@ -1456,11 +1414,6 @@ > } > break; > > - case 'W': > - rb_ary_push(ary, rb_bignum_unpack(s, 1, send - s)); > - s = send; > - break; > - > case 'n': > PACK_LENGTH_ADJUST(unsigned short,2); > while (len-- > 0) { > > --------------------------------------------------------------------