This is a multi-part message in MIME format. --------------050404000305020305030704 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit This is another verison of the patch, with a couple of riskier (because I don't understand what I'm doing as well) changes to binmode handling. They fix the behavior of getc for me. (getc was returning unicode characters even when a file was opened "rb" or binmode() was called on it. I think that was incorrect.) David --------------050404000305020305030704 Content-Type: text/plain; name opatch2" Content-Transfer-Encoding: 7bit Content-Disposition: inline; filename opatch2" Index: include/ruby/io.h --- include/ruby/io.h (revision 14446) +++ include/ruby/io.h (working copy) @@ -47,6 +47,7 @@ int rbuf_capa; VALUE tied_io_for_writing; rb_encoding *enc; + rb_encoding *enc2; } rb_io_t; #define HAVE_RB_IO_T 1 @@ -91,6 +92,7 @@ fp->rbuf_capa ;\ fp->tied_io_for_writing ;\ fp->enc ;\ + fp->enc2 ;\ } while (0) FILE *rb_io_stdio_file(rb_io_t *fptr); Index: io.c --- io.c (revision 14446) +++ io.c (working copy) @@ -124,7 +124,7 @@ static VALUE argf; -static ID id_write, id_read, id_getc, id_flush; +static ID id_write, id_read, id_getc, id_flush, id_encode; extern char *ruby_inplace_mode; @@ -622,6 +622,26 @@ { long len, n, r, l, offset ; + /* + * If an external encoding was specified and it differs from + * the strings encoding then we must transcode before writing. + * We must also transcode if two encodings were specified + */ + if (fptr->enc && (fptr->enc2 || fptr->enc ! b_enc_get(str))) { + /* transcode str before output */ + /* the methods in transcode.c are static, so call indirectly */ + /* Can't use encode! because puts writes a frozen newline */ + if (fptr->enc2) { + str b_funcall(str, id_encode, 2, + rb_enc_from_encoding(fptr->enc), + rb_enc_from_encoding(fptr->enc2)); + } + else { + str b_funcall(str, id_encode, 1, + rb_enc_from_encoding(fptr->enc)); + } + } + len STRING_LEN(str); if ((n en) < ) return n; if (fptr->wbuf NULL && !(fptr->mode & FMODE_SYNC)) { @@ -1279,7 +1299,17 @@ { OBJ_TAINT(str); if (fptr->enc) { - rb_enc_associate(str, fptr->enc); + if (fptr->enc2) { + /* two encodings, so transcode from enc2 to enc */ + /* the methods in transcode.c are static, so call indirectly */ + str b_funcall(str, id_encode, 2, + rb_enc_from_encoding(fptr->enc2), + rb_enc_from_encoding(fptr->enc)); + } + else { + /* just one encoding, so associate it with the string */ + rb_enc_associate(str, fptr->enc); + } } return str; } @@ -2878,11 +2908,11 @@ static VALUE rb_io_binmode_m(VALUE io) { + rb_io_binmode(io); + #if defined(_WIN32) || defined(DJGPP) || defined(__CYGWIN__) || defined(__human68k__) || defined(__EMX__) VALUE write_io; - rb_io_binmode(io); - write_io etWriteIO(io); if (write_io ! o) rb_io_binmode(write_io); @@ -3063,7 +3093,8 @@ rb_io_mode_enc(rb_io_t *fptr, const char *mode) { const char *p0, *p1; - int idx; + char *enc2name; + int idx, idx2; p0 trrchr(mode, ':'); if (p0) { @@ -3071,13 +3102,31 @@ if (idx > ) { fptr->enc b_enc_from_index(idx); } -#if 0 + else { + rb_warn("Unsupported encoding %s ignored", p0+1); + } p1 trchr(mode, ':'); if (p1 < p0) { + enc2name LLOCA_N(char, p0-p1); + strncpy(enc2name, p1+1, p0-p1-1); + enc2name[p0-p1-1] \0'; + idx2_enc_find_index(enc2name); + if (idx2 idx) { + rb_warn("Ignoring internal encoding %s: it is identical to external encoding %s", + enc2name, p0+1); + } + else if (idx2 > ) { + fptr->enc2 b_enc_from_index(idx2); + } + else { + rb_warn("Unsupported encoding %s ignored", enc2name); + } } -#endif } - else if (!(fptr->mode & FMODE_BINMODE)) { + else if (fptr->mode & FMODE_BINMODE) { + fptr->enc b_ascii_encoding(); + } + else { fptr->enc b_default_external_encoding(); } } @@ -5703,6 +5752,11 @@ return rb_enc_from_encoding(fptr->enc); } +static VALUE +argf_external_encoding(void) +{ + return rb_enc_default_external(); +} static VALUE argf_tell(void) @@ -6155,6 +6209,7 @@ id_read b_intern("read"); id_getc b_intern("getc"); id_flush b_intern("flush"); + id_encode b_intern("encode"); rb_define_global_function("syscall", rb_f_syscall, -1); @@ -6339,6 +6394,8 @@ rb_define_singleton_method(argf, "lineno", argf_lineno, 0); rb_define_singleton_method(argf, "lineno argf_set_lineno, 1); + rb_define_singleton_method(argf, "external_encoding", argf_external_encoding, 0); + rb_global_variable(¤t_file); rb_define_readonly_variable("$FILENAME", &filename); filename b_str_new2("-"); --------------050404000305020305030704--