Issue #4044 has been updated by Heesob Park.


I confirmed this on ruby 1.9.3dev (2010-11-10) [i386-mswin32_90]
irb(main):001:0> /[^\W]/iu =~ 'k'
=> nil
irb(main):002:0> /[^\W]/iu =~ 's'
=> nil

This bug is due to mutiple Case Unfold definitions in unicode.c

static const CaseUnfold_11_Type CaseUnfold_11[] = {
 { 0x0061, {1, {0x0041 }}},
 { 0x0062, {1, {0x0042 }}},
 { 0x0063, {1, {0x0043 }}},
 { 0x0064, {1, {0x0044 }}},
 { 0x0065, {1, {0x0045 }}},
 { 0x0066, {1, {0x0046 }}},
 { 0x0067, {1, {0x0047 }}},
 { 0x0068, {1, {0x0048 }}},
 { 0x006a, {1, {0x004a }}},
 { 0x006b, {2, {0x212a, 0x004b }}},   //----- 'k'
 { 0x006c, {1, {0x004c }}},
 { 0x006d, {1, {0x004d }}},
 { 0x006e, {1, {0x004e }}},
 { 0x006f, {1, {0x004f }}},
 { 0x0070, {1, {0x0050 }}},
 { 0x0071, {1, {0x0051 }}},
 { 0x0072, {1, {0x0052 }}},
 { 0x0073, {2, {0x0053, 0x017f }}},   //---- 's'

And a possible patch is

--- regparse.c  2010-11-12 15:10:07.000000000 +0900
+++ regparse.c.new      2010-11-12 15:29:34.000000000 +0900
@@ -5075,7 +5075,7 @@
     int is_in = onig_is_code_in_cc(env->enc, from, cc);
 #ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
     if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
-       (is_in == 0 &&  IS_NCCLASS_NOT(cc))) {
+       (is_in == 0 &&  IS_NCCLASS_NOT(cc) && from < SINGLE_BYTE_SIZE)) {
       if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
        add_code_range0(&(cc->mbuf), env, *to, *to, 0);
       }



----------------------------------------
http://redmine.ruby-lang.org/issues/show/4044

----------------------------------------
http://redmine.ruby-lang.org