遠藤です。

2008/09/24 7:13 Tanaka Akira <akr / fsij.org>:
> In article <e0b1e5700809231144n376fd4eencfe06c49ed66665e / mail.gmail.com>,
>  "Yusuke ENDOH" <mame / tsg.ne.jp> writes:
>
>> うーん。base64.rb には decode_b とか b64encode とか禍々しい
>> メソッドがあるので、個人的にはあまり復活させたくないところ
>> です。
>
> そのへんは消しちゃっていいと思います。


そのへんと deprecated なコードを消しちゃったらだいぶすっきりしました。

module Base64 は以下の 6 つの module_function を持ちます。

- Base64.encode64         : RFC 2045 準拠なエンコード (改行を入れる)
- Base64.decode64         : RFC 2045 準拠なデコード (改行などを無視する)
- Base64.strict_encode64  : RFC 4648 準拠なエンコード (改行を入れない)
- Base64.strict_decode64  : RFC 4648 準拠なデコード (改行や = の不足は例外)
- Base64.urlsafe_encode64 : URL セーフなバリアントのエンコード
- Base64.urlsafe_decode64 : URL セーフなバリアントのデコード

standard_encode64 は RFC を意識しすぎな名前だなぁと思ったので、
一般ユーザにわかりやすそうな strict にしてみました。
standard の方が良ければ直します。

これでよければコミットしたいと思いますが、いかがでしょうか。


Index: pack.c
===================================================================
--- pack.c	(revision 19526)
+++ pack.c	(working copy)
@@ -362,7 +362,7 @@
 #endif
 static const char toofew[] = "too few arguments";

-static void encodes(VALUE,const char*,long,int);
+static void encodes(VALUE,const char*,long,int,int);
 static void qpencode(VALUE,VALUE,long);

 static unsigned long utf8_to_uv(const char*,long*);
@@ -887,6 +887,11 @@
 	    ptr = RSTRING_PTR(from);
 	    plen = RSTRING_LEN(from);

+	    if (len == 0) {
+		encodes(res, ptr, plen, type, 0);
+		ptr += plen;
+		break;
+	    }
 	    if (len <= 2)
 		len = 45;
 	    else
@@ -898,7 +903,7 @@
 		    todo = len;
 		else
 		    todo = plen;
-		encodes(res, ptr, todo, type);
+		encodes(res, ptr, todo, type, 1);
 		plen -= todo;
 		ptr += todo;
 	    }
@@ -1007,7 +1012,7 @@
 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

 static void
-encodes(VALUE str, const char *s, long len, int type)
+encodes(VALUE str, const char *s, long len, int type, int tail_lf)
 {
     char buff[4096];
     long i = 0;
@@ -1048,7 +1053,7 @@
 	buff[i++] = padding;
 	buff[i++] = padding;
     }
-    buff[i++] = '\n';
+    if (tail_lf) buff[i++] = '\n';
     rb_str_buf_cat(str, buff, i);
 }

@@ -1793,7 +1798,7 @@
 	    {
 		VALUE buf = infected_str_new(0, (send - s)*3/4, str);
 		char *ptr = RSTRING_PTR(buf);
-		int a = -1,b = -1,c = 0,d;
+		int a = -1,b = -1,c = 0,d = 0;
 		static signed char b64_xtable[256];

 		if (b64_xtable['/'] <= 0) {
@@ -1806,32 +1811,64 @@
 			b64_xtable[(unsigned char)b64_table[i]] = i;
 		    }
 		}
-		while (s < send) {
-		    a = b = c = d = -1;
-		    while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
-		    if (s >= send) break;
-		    s++;
-		    while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
-		    if (s >= send) break;
-		    s++;
-		    while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send)
{if (*s == '=') break; s++;}
-		    if (*s == '=' || s >= send) break;
-		    s++;
-		    while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send)
{if (*s == '=') break; s++;}
-		    if (*s == '=' || s >= send) break;
-		    s++;
-		    *ptr++ = a << 2 | b >> 4;
-		    *ptr++ = b << 4 | c >> 2;
-		    *ptr++ = c << 6 | d;
-		}
-		if (a != -1 && b != -1) {
-		    if (c == -1 && *s == '=')
+		if (len == 0) {
+		    while (s < send) {
+			a = b = c = d = -1;
+			a = b64_xtable[(unsigned char)*s++];
+			if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
+			b = b64_xtable[(unsigned char)*s++];
+			if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
+			if (*s == '=') {
+			    if (s + 2 == send && *(s + 1) == '=') break;
+			    rb_raise(rb_eArgError, "invalid base64");
+			}
+			c = b64_xtable[(unsigned char)*s++];
+			if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
+			if (s + 1 == send && *s == '=') break;
+			d = b64_xtable[(unsigned char)*s++];
+			if (d == -1) rb_raise(rb_eArgError, "invalid base64");
 			*ptr++ = a << 2 | b >> 4;
-		    else if (c != -1 && *s == '=') {
+			*ptr++ = b << 4 | c >> 2;
+			*ptr++ = c << 6 | d;
+		    }
+		    if (c == -1) {
 			*ptr++ = a << 2 | b >> 4;
+			if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
+		    }
+		    else if (d == -1) {
+			*ptr++ = a << 2 | b >> 4;
 			*ptr++ = b << 4 | c >> 2;
+			if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
 		    }
 		}
+		else {
+		    while (s < send) {
+			a = b = c = d = -1;
+			while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
+			if (s >= send) break;
+			s++;
+			while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
+			if (s >= send) break;
+			s++;
+			while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if
(*s == '=') break; s++;}
+			if (*s == '=' || s >= send) break;
+			s++;
+			while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if
(*s == '=') break; s++;}
+			if (*s == '=' || s >= send) break;
+			s++;
+			*ptr++ = a << 2 | b >> 4;
+			*ptr++ = b << 4 | c >> 2;
+			*ptr++ = c << 6 | d;
+		    }
+		    if (a != -1 && b != -1) {
+			if (c == -1 && *s == '=')
+			    *ptr++ = a << 2 | b >> 4;
+			else if (c != -1 && *s == '=') {
+			    *ptr++ = a << 2 | b >> 4;
+			    *ptr++ = b << 4 | c >> 2;
+			}
+		    }
+		}
 		rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
 		UNPACK_PUSH(buf);
 	    }
Index: lib/base64.rb
===================================================================
--- lib/base64.rb	(revision 0)
+++ lib/base64.rb	(revision 0)
@@ -0,0 +1,91 @@
+#
+# = base64.rb: methods for base64-encoding and -decoding stings
+#
+
+# The Base64 module provides for the encoding (#encode64, #strict_encode64,
+# #urlsafe_encode64) and decoding (#decode64, #strict_decode64,
+# #urlsafe_decode64) of binary data using a Base64 representation.
+#
+# == Example
+#
+# A simple encoding and decoding.
+#
+#     require "base64"
+#
+#     enc   = Base64.encode64('Send reinforcements')
+#                         # -> "U2VuZCByZWluZm9yY2VtZW50cw==\n"
+#     plain = Base64.decode64(enc)
+#                         # -> "Send reinforcements"
+#
+# The purpose of using base64 to encode data is that it translates any
+# binary data into purely printable characters.
+
+module Base64
+  module_function
+
+  # Returns the Base64-encoded version of +bin+.
+  # This method complies with RFC 2045.
+  # Line feeds are added to every 60 encoded charactors.
+  #
+  #    require 'base64'
+  #    Base64.encode64("Now is the time for all good coders\nto learn Ruby")
+  #
+  # <i>Generates:</i>
+  #
+  #    Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g
+  #    UnVieQ==
+  def encode64(bin)
+    [bin].pack("m")
+  end
+
+  # Returns the Base64-decoded version of +str+.
+  # This method complies with RFC 2045.
+  # Characters outside the base alphabet are ignored.
+  #
+  #   require 'base64'
+  #   str = 'VGhpcyBpcyBsaW5lIG9uZQpUaGlzIG' +
+  #         'lzIGxpbmUgdHdvClRoaXMgaXMgbGlu' +
+  #         'ZSB0aHJlZQpBbmQgc28gb24uLi4K'
+  #   puts Base64.decode64(str)
+  #
+  # <i>Generates:</i>
+  #
+  #    This is line one
+  #    This is line two
+  #    This is line three
+  #    And so on...
+  def decode64(str)
+    str.unpack("m").first
+  end
+
+  # Returns the Base64-encoded version of +bin+.
+  # This method complies with RFC 4648.
+  # No line feeds are added.
+  def strict_encode64(bin)
+    [bin].pack("m0")
+  end
+
+  # Returns the Base64-decoded version of +str+.
+  # This method complies with RFC 4648.
+  # ArgumentError is raised if +str+ is incorrectly padded or contains
+  # non-alphabet characters.  Note that CR or LF are also rejected.
+  def strict_decode64(str)
+    str.unpack("m0").first
+  end
+
+  # Returns the Base64-encoded version of +bin+.
+  # This method complies with ``Base 64 Encoding with URL and Filename Safe
+  # Alphabet'' in RFC 4648.
+  # The alphabet uses '-' instead of '+' and '_' instead of '/'.
+  def urlsafe_encode64(bin)
+    strict_encode64(bin).tr("+/", "-_")
+  end
+
+  # Returns the Base64-decoded version of +str+.
+  # This method complies with ``Base 64 Encoding with URL and Filename Safe
+  # Alphabet'' in RFC 4648.
+  # The alphabet uses '-' instead of '+' and '_' instead of '/'.
+  def urlsafe_decode64(str)
+    strict_decode64(str.tr("-_", "+/"))
+  end
+end
Index: test/ruby/test_pack.rb
===================================================================
--- test/ruby/test_pack.rb	(revision 19526)
+++ test/ruby/test_pack.rb	(working copy)
@@ -379,6 +379,36 @@
     assert_equal(["\377\377\377"], "////\n".unpack("m"))
   end

+  def test_pack_unpack_m0
+    assert_equal("", [""].pack("m0"))
+    assert_equal("AA==", ["\0"].pack("m0"))
+    assert_equal("AAA=", ["\0\0"].pack("m0"))
+    assert_equal("AAAA", ["\0\0\0"].pack("m0"))
+    assert_equal("/w==", ["\377"].pack("m0"))
+    assert_equal("//8=", ["\377\377"].pack("m0"))
+    assert_equal("////", ["\377\377\377"].pack("m0"))
+
+    assert_equal([""], "".unpack("m0"))
+    assert_equal(["\0"], "AA==".unpack("m0"))
+    assert_equal(["\0\0"], "AAA=".unpack("m0"))
+    assert_equal(["\0\0\0"], "AAAA".unpack("m0"))
+    assert_equal(["\377"], "/w==".unpack("m0"))
+    assert_equal(["\377\377"], "//8=".unpack("m0"))
+    assert_equal(["\377\377\377"], "////".unpack("m0"))
+
+    assert_raise(ArgumentError) { "^".unpack("m0") }
+    assert_raise(ArgumentError) { "A".unpack("m0") }
+    assert_raise(ArgumentError) { "A^".unpack("m0") }
+    assert_raise(ArgumentError) { "AA".unpack("m0") }
+    assert_raise(ArgumentError) { "AA=".unpack("m0") }
+    assert_raise(ArgumentError) { "AA===".unpack("m0") }
+    assert_raise(ArgumentError) { "AA=x".unpack("m0") }
+    assert_raise(ArgumentError) { "AAA".unpack("m0") }
+    assert_raise(ArgumentError) { "AAA^".unpack("m0") }
+    assert_raise(ArgumentError) { "AB==".unpack("m0") }
+    assert_raise(ArgumentError) { "AAB=".unpack("m0") }
+  end
+
   def test_pack_unpack_M
     assert_equal("a b c\td =\n\ne=\n", ["a b c\td \ne"].pack("M"))
     assert_equal(["a b c\td \ne"], "a b c\td =\n\ne=\n".unpack("M"))
Index: test/base64/test_base64.rb
===================================================================
--- test/base64/test_base64.rb	(revision 0)
+++ test/base64/test_base64.rb	(revision 0)
@@ -0,0 +1,99 @@
+require "test/unit"
+require "base64"
+
+class TestBase64 < Test::Unit::TestCase
+  def test_sample
+    assert_equal("U2VuZCByZWluZm9yY2VtZW50cw==\n",
Base64.encode64('Send reinforcements'))
+    assert_equal('Send reinforcements',
Base64.decode64("U2VuZCByZWluZm9yY2VtZW50cw==\n"))
+    assert_equal(
+      "Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g\nUnVieQ==\n",
+      Base64.encode64("Now is the time for all good coders\nto learn Ruby"))
+    assert_equal(
+      "Now is the time for all good coders\nto learn Ruby",
+      Base64.decode64("Tm93IGlzIHRoZSB0aW1lIGZvciBhbGwgZ29vZCBjb2RlcnMKdG8gbGVhcm4g\nUnVieQ==\n"))
+    assert_equal(
+      "VGhpcyBpcyBsaW5lIG9uZQpUaGlzIGlzIGxpbmUgdHdvClRoaXMgaXMgbGlu\nZSB0aHJlZQpBbmQgc28gb24uLi4K\n",
+      Base64.encode64("This is line one\nThis is line two\nThis is
line three\nAnd so on...\n"))
+    assert_equal(
+      "This is line one\nThis is line two\nThis is line three\nAnd so on...\n",
+      Base64.decode64("VGhpcyBpcyBsaW5lIG9uZQpUaGlzIGlzIGxpbmUgdHdvClRoaXMgaXMgbGluZSB0aHJlZQpBbmQgc28gb24uLi4K"))
+  end
+
+  def test_encode64
+    assert_equal("", Base64.encode64(""))
+    assert_equal("AA==\n", Base64.encode64("\0"))
+    assert_equal("AAA=\n", Base64.encode64("\0\0"))
+    assert_equal("AAAA\n", Base64.encode64("\0\0\0"))
+    assert_equal("/w==\n", Base64.encode64("\377"))
+    assert_equal("//8=\n", Base64.encode64("\377\377"))
+    assert_equal("////\n", Base64.encode64("\377\377\377"))
+    assert_equal("/+8=\n", Base64.encode64("\xff\xef"))
+  end
+
+  def test_decode64
+    assert_equal("", Base64.decode64(""))
+    assert_equal("\0", Base64.decode64("AA==\n"))
+    assert_equal("\0\0", Base64.decode64("AAA=\n"))
+    assert_equal("\0\0\0", Base64.decode64("AAAA\n"))
+    assert_equal("\377", Base64.decode64("/w==\n"))
+    assert_equal("\377\377", Base64.decode64("//8=\n"))
+    assert_equal("\377\377\377", Base64.decode64("////\n"))
+    assert_equal("\xff\xef", Base64.decode64("/+8=\n"))
+  end
+
+  def test_strict_encode64
+    assert_equal("", Base64.strict_encode64(""))
+    assert_equal("AA==", Base64.strict_encode64("\0"))
+    assert_equal("AAA=", Base64.strict_encode64("\0\0"))
+    assert_equal("AAAA", Base64.strict_encode64("\0\0\0"))
+    assert_equal("/w==", Base64.strict_encode64("\377"))
+    assert_equal("//8=", Base64.strict_encode64("\377\377"))
+    assert_equal("////", Base64.strict_encode64("\377\377\377"))
+    assert_equal("/+8=", Base64.strict_encode64("\xff\xef"))
+  end
+
+  def test_strict_decode64
+    assert_equal("", Base64.strict_decode64(""))
+    assert_equal("\0", Base64.strict_decode64("AA=="))
+    assert_equal("\0\0", Base64.strict_decode64("AAA="))
+    assert_equal("\0\0\0", Base64.strict_decode64("AAAA"))
+    assert_equal("\377", Base64.strict_decode64("/w=="))
+    assert_equal("\377\377", Base64.strict_decode64("//8="))
+    assert_equal("\377\377\377", Base64.strict_decode64("////"))
+    assert_equal("\xff\xef", Base64.strict_decode64("/+8="))
+
+    assert_raise(ArgumentError) { Base64.strict_decode64("^") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("A") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("A^") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AA") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AA=") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AA===") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AA=x") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AAA") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AAA^") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AB==") }
+    assert_raise(ArgumentError) { Base64.strict_decode64("AAB=") }
+  end
+
+  def test_urlsafe_encode64
+    assert_equal("", Base64.urlsafe_encode64(""))
+    assert_equal("AA==", Base64.urlsafe_encode64("\0"))
+    assert_equal("AAA=", Base64.urlsafe_encode64("\0\0"))
+    assert_equal("AAAA", Base64.urlsafe_encode64("\0\0\0"))
+    assert_equal("_w==", Base64.urlsafe_encode64("\377"))
+    assert_equal("__8=", Base64.urlsafe_encode64("\377\377"))
+    assert_equal("____", Base64.urlsafe_encode64("\377\377\377"))
+    assert_equal("_-8=", Base64.urlsafe_encode64("\xff\xef"))
+  end
+
+  def test_urlsafe_decode64
+    assert_equal("", Base64.urlsafe_decode64(""))
+    assert_equal("\0", Base64.urlsafe_decode64("AA=="))
+    assert_equal("\0\0", Base64.urlsafe_decode64("AAA="))
+    assert_equal("\0\0\0", Base64.urlsafe_decode64("AAAA"))
+    assert_equal("\377", Base64.urlsafe_decode64("_w=="))
+    assert_equal("\377\377", Base64.urlsafe_decode64("__8="))
+    assert_equal("\377\377\377", Base64.urlsafe_decode64("____"))
+    assert_equal("\xff\xef", Base64.urlsafe_decode64("_+8="))
+  end
+end

-- 
Yusuke ENDOH <mame / tsg.ne.jp>