No one seems to be interested in this issue so I'll have to reply to
myself... ;)
No one has pointed out that a clean solution for this is currently
available so I went ahead and implemented it. Below is unit test I used
and one patch for bignum.c and one for pack.c. The patches are taken
against latest nightly snapshot:
$ ruby -v
ruby 1.8.0 (2003-05-27) [i386-mingw32]
It should work for both big and little endian architectures but I've only
tried on little endian. Would be great if someone can try on big-endian
machine.
This patch adds a 'W' template character to pack and unpack for
packing/unpacking an unsigned integer (Fixnum *OR* Bignum). The packing
is from MSB to LSB so that
[0xff00].pack("W") == "\377\000"
regardless of the endianness of the machine. Leading zeroes are trimmed
from the string (except for negative numbers see below). If you pack a
negative number you loose information about the sign, ie
[-1].pack("W").unpack("W").first == 1
which is the same as for template 'I' but in contrast to template 'Q'.
I choose W as in "raW binary representation of number" but its hard to
find a good template char since most are taken.
I didn't implement 'w' for dumping negative numbers since I don't see
the need. However, the implementation hints at one possible way for how
to do 'w' (by only allowing negative numbers to have leading zeroes).
If someone finds this worthy/useful its in the public domain so use in
anyway you want. I tried to stay close to the style in Ruby source but I'm
sure the code can be even cleaner/nicer/faster.
Regards,
Robert Feldt
Ps. This post is probably too long; I'm sorry... Maybe ruby-core list is
better for these things? Or just to matz? I'm not fully up-to-date with
community procedures.
----------utest_bignum_pack_unpack.rb------------------------------------
require 'test/unit'
class TestBignumPackAndUnpack < Test::Unit::TestCase
def test_01_pack_W_one_byte
(0..255).each do |i|
assert_equal(i.chr, [i].pack("W"))
end
end
def num_with_bytes(bytes)
low_limit = 2**(8 * (bytes-1))
low_limit + rand(-low_limit + 2**(8*bytes))
end
def assert_pack_W_sampled(numBytes, numSamples = 100)
numSamples.times do
num = num_with_bytes(numBytes)
packed = [num].pack("W")
assert_equal(numBytes, packed.length, "num = #{num}")
lsb_first = packed.reverse
numBytes.times do |i|
assert_equal(lsb_first[i], num & 0xff)
num >>= 8
end
end
end
def test_02_pack_W_sampled_positive_multi_bytes
(2..10).each do |num_bytes|
assert_pack_W_sampled(num_bytes, 25)
end
end
def test_03_pack_W_large
p1024 = [2**1024].pack("W")
assert_equal(1.chr + (0.chr*(1024/8)), p1024)
p1024_ones = [2**1024-1].pack("W")
assert_equal(0xff.chr * (1024/8), p1024_ones)
p2048 = [2**2048].pack("W")
assert_equal(1.chr + (0.chr*(2048/8)), p2048)
p2048_ones = [2**2048-1].pack("W")
assert_equal(0xff.chr * (2048/8), p2048_ones)
end
# This might not be what one wants but I think main use is in
# converting positive nums so lets leave it as is...
# To do 'w' we could make sure that negative numbers always
# start with leading 0. This way we could later unpack them without
# losing the sign.
def test_04_pack_W_negative_numbers
assert_equal("\000\000\000\001", [-1].pack("W"))
assert_equal("\000\000\000\002", [-2].pack("W"))
assert_equal("\000\000\000\377", [-255].pack("W"))
assert_equal("\000\000\377\377", [-2**16+1].pack("W"))
assert_equal("\000\377\377\377", [-2**24+1].pack("W"))
assert_equal("\377\377\377\377", [-2**32+1].pack("W"))
assert_equal("\000\000\000\001\000\000\000\000", [-2**32].pack("W"))
end
def test_05_unpack_W_one_byte
(0..255).each do |i|
assert_equal(i, i.chr.unpack("W").first)
end
end
def str_with_bytes(bytes)
s = ""
bytes.times {s << rand(256).chr}
s
end
def test_06_unpack_W_sampled_positive_multi_bytes
(2..10).each do |num_bytes|
25.times do
s = str_with_bytes(num_bytes)
num = s.unpack("W").first
lsb_first = s.reverse
num_bytes.times do |i|
assert_equal(lsb_first[i], num & 0xff,
"s = #{s.unpack('H*')}, num = #{num}")
num >>= 8
end
end
end
end
def test_07_unpack_W_large
u1024 = (1.chr + (0.chr*(1024/8))).unpack("W").first
assert_equal(2**1024, u1024)
u1024_ones = (0xff.chr * (1024/8)).unpack("W").first
assert_equal(2**1024-1, u1024_ones)
u2048 = (1.chr + (0.chr*(2048/8))).unpack("W").first
assert_equal(2**2048, u2048)
u2048_ones = (0xff.chr * (2048/8)).unpack("W").first
assert_equal(2**2048-1, u2048_ones)
end
def test_08_unpack_W_packed_negative_numbers
assert_equal(1, [-1].pack("W").unpack("W").first)
assert_equal(2, [-2].pack("W").unpack("W").first)
assert_equal(255, [-255].pack("W").unpack("W").first)
assert_equal(2**16-1, [-2**16+1].pack("W").unpack("W").first)
assert_equal(2**24-1, [-2**24+1].pack("W").unpack("W").first)
assert_equal(2**32-1, [-2**32+1].pack("W").unpack("W").first)
assert_equal(2**32, [-2**32].pack("W").unpack("W").first)
end
def test_09_cycle_pack_then_unpack
1000.times do
num = rand(2**200)
assert_equal(num, [num].pack("W").unpack("W").first)
end
end
end
---------upatch_bignum_c---------------------------------------------
--- bignum.c 2003-05-28 23:50:04.000000000 +0200
+++ bignum.c.old 2003-05-28 11:09:30.000000000 +0200
@@ -306,110 +306,6 @@
#endif
-/* We should probably use endian in pack.c instead but I had problems
- * when linking so...
- */
-static int
-big_endian()
-{
- static int init = 0;
- static int big_endian_value;
- char *p;
-
- if (init) return big_endian_value;
- init = 1;
- p = (char*)&init;
- return big_endian_value = (p[0]==1)?0:1;
-}
-
-/* Pack a nonnegative bignum as raw binary data/bitstring starting from
- * MSB to LSB.
- * Returned data will be multiple of SIZEOF_BDIGITS so there can be up to
- * SIZEOF_BDIGITS-1 leading zeroes.
- * Assumes that val is really a bignum ie. fixnums
- * needs to be converted prior to calling this.
- */
-void
-rb_nonneg_bignum_pack(buf, val)
- char *buf;
- VALUE val;
-{
- long len, i, j, chars;
- char *next_digit;
-
- len = RBIGNUM(val)->len;
- next_digit = RBIGNUM(val)->digits + (len * SIZEOF_BDIGITS);
- if (big_endian()) {
- for(i=0; i<len; i++) {
- next_digit -= SIZEOF_BDIGITS;
- for(j=0; j<SIZEOF_BDIGITS; j++) {
- *buf++ = *(next_digit+j);
- }
- }
- } else {
- for(i=0; i<len; i++) {
- next_digit -= SIZEOF_BDIGITS;
- for(j=SIZEOF_BDIGITS-1; j>=0; j--) {
- *buf++ = *(next_digit+j);
- }
- }
- }
-}
-
-VALUE
-rb_bignum_unpack(buf, sign, len)
- const char *buf;
- int sign;
- long len;
-{
- VALUE big;
- long num_digits, i, j;
- char *next_digit;
- char *extra_digit;
- long num_full_digits = len / SIZEOF_BDIGITS;
- int extra_bytes = len % SIZEOF_BDIGITS;
-
- num_digits = num_full_digits + (extra_bytes>0 ? 1 : 0);
- big = bignew(num_digits, 1);
- extra_digit = next_digit =
- (char*)RBIGNUM(big)->digits + num_full_digits * SIZEOF_BDIGITS;
-
- if (big_endian()) {
- if (extra_bytes > 0) {
- for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) {
- *extra_digit++ = 0;
- }
- for(i = 0; i < extra_bytes; i++) {
- *extra_digit++ = *buf++;
- }
- }
- for(i = 0; i < num_full_digits; i++) {
- next_digit -= SIZEOF_BDIGITS;
- for(j = 0; j < SIZEOF_BDIGITS; j++) {
- *next_digit++ = *buf++;
- }
- }
- } else {
- if (extra_bytes > 0) {
- for(i = extra_bytes - 1; i >= 0 ; i--) {
- *(extra_digit+i) = *buf++;
- }
- extra_digit += extra_bytes;
- for(i = 0; i < SIZEOF_BDIGITS - extra_bytes; i++) {
- *extra_digit++ = 0;
- }
- }
- for(i = 0; i < num_full_digits; i++) {
- next_digit -= SIZEOF_BDIGITS;
- for(j = SIZEOF_BDIGITS - 1; j >= 0; j--) {
- *(next_digit+j) = *buf++;
- }
- }
- }
-
- return bignorm(big);
-}
-
VALUE
rb_cstr_to_inum(str, base, badcheck)
const char *str;
--------upatch_pack_c-------------------------------------------------------
--- pack.c 2003-05-28 23:55:32.000000000 +0200
+++ pack.c.old 2003-05-28 11:19:21.000000000 +0200
@@ -376,21 +376,6 @@
static int uv_to_utf8 _((char*,unsigned long));
static unsigned long utf8_to_uv _((char*,long*));
-VALUE
-ensure_bignum(val)
- VALUE val;
-{
- if (NIL_P(val)) {
- val = INT2FIX(0);
- } else {
- val = rb_to_int(val);
- }
- if (FIXNUM_P(val)) {
- val = rb_int2big(FIX2LONG(val));
- }
- return val;
-}
-
static VALUE
pack_pack(ary, fmt)
VALUE ary, fmt;
@@ -683,33 +668,6 @@
}
break;
- case 'W':
- while (len-- > 0) {
- VALUE from;
- long len;
- long num_bytes_to_skip = 0;
-
- from = ensure_bignum(NEXTFROM);
- len = RBIGNUM(from)->len * SIZEOF_BDIGITS;
- {
- char tmp[len];
-
- rb_nonneg_bignum_pack(tmp, from);
- // Skip leading zeroes if positive bignum. Extend
- // this "strategy" for 'w' so that only negative
- // bignums (and 0) can have leading zero?
- if (RBIGNUM(from)->sign) {
- while (num_bytes_to_skip < (len-1) &&
- tmp[num_bytes_to_skip] == 0x00) {
- num_bytes_to_skip++;
- }
- }
- rb_str_buf_cat(res, ((char*)&tmp) + num_bytes_to_skip,
- len - num_bytes_to_skip);
- }
- }
- break;
-
case 'n':
while (len-- > 0) {
unsigned short s;
@@ -1456,11 +1414,6 @@
}
break;
- case 'W':
- rb_ary_push(ary, rb_bignum_unpack(s, 1, send - s));
- s = send;
- break;
-
case 'n':
PACK_LENGTH_ADJUST(unsigned short,2);
while (len-- > 0) {
--------------------------------------------------------------------