Hi,

Current REXML::Encoding mechanism seems to have problems.

* thread-unsafe due to passing string by a class variable,
* inefficiency due to load and eval each time, and
* method names in SHIFT[-_]JIS.rb are not required ones.


Index: lib/rexml/encoding.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encoding.rb,v retrieving revision 1.5 diff -u -2 -p -d -r1.5 encoding.rb --- lib/rexml/encoding.rb 9 Dec 2003 02:41:33 -0000 1.5 +++ lib/rexml/encoding.rb 14 Dec 2003 13:53:00 -0000 @@ -1,5 +1,15 @@ +# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2 module REXML module Encoding - @@uconv_available = false + @encoding_methods = {} + def self.register(enc, &block) + @encoding_methods[enc] = block + end + def self.apply(obj, enc) + @encoding_methods[enc][obj] + end + def self.encoding_method(enc) + @encoding_methods[enc] + end # Native, default format is UTF-8, so it is declared here rather than in @@ -19,24 +29,22 @@ module REXML @encoding = enc.upcase begin - load 'rexml/encodings/ICONV.rb' - instance_eval @@__REXML_encoding_methods - Iconv::iconv( UTF_8, @encoding, "" ) + require 'rexml/encodings/ICONV.rb' + Encoding.apply(self, "ICONV") rescue LoadError, Exception => err - raise "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ + raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ @encoding.untaint enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) begin - load enc_file - instance_eval @@__REXML_encoding_methods + require enc_file + Encoding.apply(self, @encoding) rescue LoadError - puts $!.message - raise Exception.new( "No decoder found for encoding #@encoding. Please install iconv." ) + puts $!.message + raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." end end else - enc = UTF_8 - @encoding = enc.upcase - load 'rexml/encodings/UTF-8.rb' - instance_eval @@__REXML_encoding_methods + @encoding = UTF_8 + require 'rexml/encodings/UTF-8.rb' + Encoding.apply(self, @encoding) end ensure Index: lib/rexml/encodings/EUC-JP.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/EUC-JP.rb,v retrieving revision 1.6 diff -u -2 -p -d -r1.6 EUC-JP.rb --- lib/rexml/encodings/EUC-JP.rb 9 Dec 2003 02:41:33 -0000 1.6 +++ lib/rexml/encodings/EUC-JP.rb 14 Dec 2003 13:35:36 -0000 @@ -1,37 +1,20 @@ -begin - require 'iconv' - - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode(str) - return Iconv::iconv("utf-8", "euc-jp", str)[0] - end +require 'uconv' - def encode content - return Iconv::iconv("euc-jp", "utf-8", content)[0] - end - EOL +module REXML + module Encoding + def decode_eucjp(str) + Uconv::euctou8(str) end - end -rescue LoadError - begin - require 'uconv' - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode(str) - return Uconv::euctou8(str) - end + def encode_eucjp content + Uconv::u8toeuc(content) + end - def encode content - return Uconv::u8toeuc(content) - end - EOL + register("EUC-JP") do |obj| + class << obj + alias decode decode_eucjp + alias encode encode_eucjp end end - rescue LoadError - raise "uconv or iconv is required for Japanese encoding support." end end Index: lib/rexml/encodings/ICONV.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/ICONV.rb,v retrieving revision 1.2 diff -u -2 -p -d -r1.2 ICONV.rb --- lib/rexml/encodings/ICONV.rb 9 Dec 2003 02:41:33 -0000 1.2 +++ lib/rexml/encodings/ICONV.rb 14 Dec 2003 15:56:43 -0000 @@ -4,13 +4,19 @@ raise LoadError unless defined? Iconv module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode( str ) - return Iconv::iconv("utf-8", @encoding, str)[0] + def decode_iconv(str) + Iconv.conv(UTF_8, @encoding, str) end - def encode( content ) - return Iconv::iconv(@encoding, "utf-8", content)[0] + def encode_iconv(content) + Iconv.conv(@encoding, UTF_8, content) + end + + register("ICONV") do |obj| + Iconv.conv(UTF_8, obj.encoding, nil) + class << obj + alias decode decode_iconv + alias encode encode_iconv + end end - EOL end end Index: lib/rexml/encodings/ISO-8859-1.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/ISO-8859-1.rb,v retrieving revision 1.3 diff -u -2 -p -d -r1.3 ISO-8859-1.rb --- lib/rexml/encodings/ISO-8859-1.rb 9 Dec 2003 02:41:33 -0000 1.3 +++ lib/rexml/encodings/ISO-8859-1.rb 14 Dec 2003 13:45:40 -0000 @@ -1,25 +1,7 @@ +require 'rexml/encodings/US-ASCII' + module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - # Convert from UTF-8 - def encode content - array_utf8 = content.unpack('U*') - array_enc = [] - array_utf8.each do |num| - if num <= 0xFF - array_enc << num - else - # Numeric entity (&#nnnn;); shard by Stefan Scholl - array_enc.concat "&\##{num};".unpack('C*') - end - end - array_enc.pack('C*') - end - - # Convert to UTF-8 - def decode(str) - str.unpack('C*').pack('U*') - end - EOL + register("ISO-8859-1", &encoding_method("US-ASCII")) end end Index: lib/rexml/encodings/SHIFT-JIS.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/SHIFT-JIS.rb,v retrieving revision 1.2 diff -u -2 -p -d -r1.2 SHIFT-JIS.rb --- lib/rexml/encodings/SHIFT-JIS.rb 12 Dec 2003 21:17:41 -0000 1.2 +++ lib/rexml/encodings/SHIFT-JIS.rb 14 Dec 2003 13:38:00 -0000 @@ -1,37 +1,22 @@ -begin - require 'iconv' - - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def decode(str) - return Iconv::iconv("utf-8", "shift-jis", str)[0] - end +require 'uconv' - def encode content - return Iconv::iconv("shift-jis", "utf-8", content)[0] - end - EOL +module REXML + module Encoding + def decode_sjis content + Uconv::u8tosjis(content) end - end -rescue LoadError - begin - require 'uconv' - module REXML - module Encoding - @@__REXML_encoding_methods =<<-EOL - def to_shift_jis content - Uconv::u8tosjis(content) - end + def encode_sjis(str) + Uconv::sjistou8(str) + end - def from_shift_jis(str) - Uconv::sjistou8(str) - end - EOL + b = proc do |obj| + class << obj + alias decode decode_sjis + alias encode encode_sjis end end - rescue LoadError - raise "uconv or iconv is required for Japanese encoding support." + register("SHIFT-JIS", &b) + register("SHIFT_JIS", &b) end end Index: lib/rexml/encodings/UNILE.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/UNILE.rb,v retrieving revision 1.4 diff -u -2 -p -d -r1.4 UNILE.rb --- lib/rexml/encodings/UNILE.rb 12 Dec 2003 21:17:41 -0000 1.4 +++ lib/rexml/encodings/UNILE.rb 14 Dec 2003 13:32:01 -0000 @@ -1,6 +1,5 @@ module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def encode content + def encode_unile content array_utf8 = content.unpack("U*") array_enc = [] @@ -17,5 +16,5 @@ module REXML end - def decode(str) + def decode_unile(str) array_enc=str.unpack('C*') array_utf8 = [] @@ -25,5 +24,11 @@ module REXML array_utf8.pack('U*') end - EOL + + register(UNILE) do |obj| + class << obj + alias decode decode_unile + alias encode encode_unile + end + end end end Index: lib/rexml/encodings/US-ASCII.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/US-ASCII.rb,v retrieving revision 1.4 diff -u -2 -p -d -r1.4 US-ASCII.rb --- lib/rexml/encodings/US-ASCII.rb 12 Dec 2003 21:17:41 -0000 1.4 +++ lib/rexml/encodings/US-ASCII.rb 14 Dec 2003 13:41:42 -0000 @@ -1,7 +1,6 @@ module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL # Convert from UTF-8 - def encode content + def encode_ascii content array_utf8 = content.unpack('U*') array_enc = [] @@ -18,8 +17,14 @@ module REXML # Convert to UTF-8 - def decode(str) + def decode_ascii(str) str.unpack('C*').pack('U*') end - EOL + + register("US-ASCII") do |obj| + class << obj + alias decode decode_ascii + alias encode encode_ascii + end + end end end Index: lib/rexml/encodings/UTF-16.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/UTF-16.rb,v retrieving revision 1.4 diff -u -2 -p -d -r1.4 UTF-16.rb --- lib/rexml/encodings/UTF-16.rb 12 Dec 2003 21:17:41 -0000 1.4 +++ lib/rexml/encodings/UTF-16.rb 14 Dec 2003 13:30:36 -0000 @@ -1,6 +1,5 @@ module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def encode content + def encode_utf16 content array_utf8 = content.unpack("U*") array_enc = [] @@ -17,5 +16,5 @@ module REXML end - def decode(str) + def decode_utf16(str) array_enc=str.unpack('C*') array_utf8 = [] @@ -25,5 +24,11 @@ module REXML array_utf8.pack('U*') end - EOL + + register(UTF_16) do |obj| + class << obj + alias decode decode_utf16 + alias encode encode_utf16 + end + end end end Index: lib/rexml/encodings/UTF-8.rb =================================================================== RCS file: /cvs/ruby/src/ruby/lib/rexml/encodings/UTF-8.rb,v retrieving revision 1.2 diff -u -2 -p -d -r1.2 UTF-8.rb --- lib/rexml/encodings/UTF-8.rb 9 Dec 2003 02:41:33 -0000 1.2 +++ lib/rexml/encodings/UTF-8.rb 14 Dec 2003 13:28:30 -0000 @@ -1,13 +1,18 @@ module REXML module Encoding - @@__REXML_encoding_methods =<<-EOL - def encode content + def encode_utf8 content content end - def decode(str) + def decode_utf8(str) str end - EOL + + register(UTF_8) do |obj| + class << obj + alias decode decode_utf8 + alias encode encode_utf8 + end + end end end
-- Nobu Nakada