Hi, In message "[ruby-talk:16164] Re: (?i:) in regexp" on 01/06/02, ts <decoux / moulon.inra.fr> writes: | Well actually ruby do this | |pigeon% ruby -rjj -e '/(?im:a|b)?x/.dump' |Regexp `(?im:a|b)?x' | 0 casefold_on | 1 option_set `ims' | 2 on_failure_jump ==> 12 | 3 start_paren | 4 on_failure_jump ==> 7 | 5 exactn "a" (1) | 6 jump ==> 11 | 7 exactn "b" (1) | 8 casefold_off | 9 option_set `' | 10 push_dummy_failure | 11 stop_paren | 12 exactn "x" (1) | 13 end |subexpressions : 0 |pigeon% Thank you. casefold_on etc. were flushed too early. I think I've fixed. --- regex.c 2001/05/30 09:12:34 1.41 +++ regex.c 2001/06/02 15:19:45 @@ -545,3 +545,3 @@ results in it being larger than 1 << 16, then flag memory exhausted. */ -#define EXTEND_BUFFER \ +#define EXTEND_BUFFER \ do { char *old_buffer = bufp->buffer; \ @@ -1262,3 +1262,2 @@ int options = bufp->options; - int old_options = 0; @@ -1685,85 +1684,135 @@ case '(': - old_options = options; - PATFETCH(c); - if (c == '?') { - int negative = 0; + { + int old_options = options; int push_option = 0; - PATFETCH_RAW(c); - switch (c) { - case 'x': case 'p': case 'm': case 'i': case '-': - for (;;) { - switch (c) { - case '-': - negative = 1; - break; - - case ':': - case ')': - break; + int casefold = 0; - case 'x': - if (negative) - options &= ~RE_OPTION_EXTENDED; - else - options |= RE_OPTION_EXTENDED; - break; + PATFETCH(c); + if (c == '?') { + int negative = 0; - case 'p': - if (negative) { - if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) { - options &= ~RE_OPTION_POSIXLINE; + PATFETCH_RAW(c); + switch (c) { + case 'x': case 'p': case 'm': case 'i': case '-': + for (;;) { + switch (c) { + case '-': + negative = 1; + break; + + case ':': + case ')': + break; + + case 'x': + if (negative) + options &= ~RE_OPTION_EXTENDED; + else + options |= RE_OPTION_EXTENDED; + break; + + case 'p': + if (negative) { + if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) { + options &= ~RE_OPTION_POSIXLINE; + } } - } - else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) { - options |= RE_OPTION_POSIXLINE; - } - push_option = 1; - break; + else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) { + options |= RE_OPTION_POSIXLINE; + } + push_option = 1; + break; - case 'm': - if (negative) { - if (options&RE_OPTION_MULTILINE) { - options &= ~RE_OPTION_MULTILINE; + case 'm': + if (negative) { + if (options&RE_OPTION_MULTILINE) { + options &= ~RE_OPTION_MULTILINE; + } } - } - else if (!(options&RE_OPTION_MULTILINE)) { - options |= RE_OPTION_MULTILINE; - } - push_option = 1; - break; + else if (!(options&RE_OPTION_MULTILINE)) { + options |= RE_OPTION_MULTILINE; + } + push_option = 1; + break; - case 'i': - if (negative) { - if (options&RE_OPTION_IGNORECASE) { - options &= ~RE_OPTION_IGNORECASE; - BUFPUSH(casefold_off); + case 'i': + if (negative) { + if (options&RE_OPTION_IGNORECASE) { + options &= ~RE_OPTION_IGNORECASE; + } + } + else if (!(options&RE_OPTION_IGNORECASE)) { + options |= RE_OPTION_IGNORECASE; } + casefold = 1; + break; + + default: + FREE_AND_RETURN(stackb, "undefined (?...) inline option"); } - else if (!(options&RE_OPTION_IGNORECASE)) { - options |= RE_OPTION_IGNORECASE; - BUFPUSH(casefold_on); + if (c == ')') { + c = '#'; /* read whole in-line options */ + break; } - break; - - default: - FREE_AND_RETURN(stackb, "undefined (?...) inline option"); + if (c == ':') break; + PATFETCH_RAW(c); } - if (c == ')') { - c = '#'; /* read whole in-line options */ - break; + break; + + case '#': + for (;;) { + PATFETCH(c); + if (c == ')') break; } - if (c == ':') break; - PATFETCH_RAW(c); + c = '#'; + break; + + case ':': + case '=': + case '!': + case '>': + break; + + default: + FREE_AND_RETURN(stackb, "undefined (?...) sequence"); } + } + else { + PATUNFETCH; + c = '('; + } + if (c == '#') { + if (push_option) { + BUFPUSH(option_set); + BUFPUSH(options); + } + if (casefold) { + if (options & RE_OPTION_IGNORECASE) + BUFPUSH(casefold_on); + else + BUFPUSH(casefold_off); + } break; + } + if (stackp+8 >= stacke) { + DOUBLE_STACK(int); + } - case '#': - for (;;) { - PATFETCH(c); - if (c == ')') break; - } - c = '#'; + /* Laststart should point to the start_memory that we are about + to push (unless the pattern has RE_NREGS or more ('s). */ + /* obsolete: now RE_NREGS is just a default register size. */ + *stackp++ = b - bufp->buffer; + *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; + *stackp++ = begalt - bufp->buffer; + switch (c) { + case '(': + BUFPUSH(start_memory); + BUFPUSH(regnum); + *stackp++ = regnum++; + *stackp++ = b - bufp->buffer; + BUFPUSH(0); + /* too many ()'s to fit in a byte. (max 254) */ + if (regnum >= RE_REG_MAX) goto too_big; break; - case ':': case '=': @@ -1771,6 +1820,19 @@ case '>': + BUFPUSH(start_nowidth); + *stackp++ = b - bufp->buffer; + BUFPUSH(0); /* temporary value */ + BUFPUSH(0); + if (c != '!') break; + + BUFPUSH(on_failure_jump); + *stackp++ = b - bufp->buffer; + BUFPUSH(0); /* temporary value */ + BUFPUSH(0); break; + case ':': + BUFPUSH(start_paren); + pending_exact = 0; default: - FREE_AND_RETURN(stackb, "undefined (?...) sequence"); + break; } @@ -1780,55 +1842,14 @@ } + if (casefold) { + if (options & RE_OPTION_IGNORECASE) + BUFPUSH(casefold_on); + else + BUFPUSH(casefold_off); + } + *stackp++ = c; + *stackp++ = old_options; + fixup_alt_jump = 0; + laststart = 0; + begalt = b; } - else { - PATUNFETCH; - c = '('; - } - if (c == '#') break; - if (stackp+8 >= stacke) { - DOUBLE_STACK(int); - } - - /* Laststart should point to the start_memory that we are about - to push (unless the pattern has RE_NREGS or more ('s). */ - /* obsolete: now RE_NREGS is just a default register size. */ - *stackp++ = b - bufp->buffer; - *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; - *stackp++ = begalt - bufp->buffer; - switch (c) { - case '(': - BUFPUSH(start_memory); - BUFPUSH(regnum); - *stackp++ = regnum++; - *stackp++ = b - bufp->buffer; - BUFPUSH(0); - /* too many ()'s to fit in a byte. (max 254) */ - if (regnum >= RE_REG_MAX) goto too_big; - break; - - case '=': - case '!': - case '>': - BUFPUSH(start_nowidth); - *stackp++ = b - bufp->buffer; - BUFPUSH(0); /* temporary value */ - BUFPUSH(0); - if (c != '!') break; - - BUFPUSH(on_failure_jump); - *stackp++ = b - bufp->buffer; - BUFPUSH(0); /* temporary value */ - BUFPUSH(0); - break; - - case ':': - BUFPUSH(start_paren); - pending_exact = 0; - default: - break; - } - *stackp++ = c; - *stackp++ = old_options; - fixup_alt_jump = 0; - laststart = 0; - begalt = b; break; @@ -1839,9 +1860,2 @@ - if (options != stackp[-1]) { - if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) { - BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on); - } - BUFPUSH(option_set); - BUFPUSH(stackp[-1]); - } pending_exact = 0; @@ -1858,2 +1872,9 @@ } + if (options != stackp[-1]) { + if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) { + BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on); + } + BUFPUSH(option_set); + BUFPUSH(stackp[-1]); + } p0 = b; @@ -3357,3 +3378,3 @@ /* Individual items aside from the registers. */ -#define NUM_NONREG_ITEMS 3 +#define NUM_NONREG_ITEMS 4 @@ -3407,2 +3428,3 @@ *stackp++ = string_place; \ + *stackp++ = (unsigned char*)options; /* current option status */ \ *stackp++ = (unsigned char*)0; /* non-greedy flag */ \ @@ -3786,3 +3808,3 @@ stackp = stackb + mcnt; - d = stackp[-2]; + d = stackp[-3]; POP_FAILURE_POINT(); @@ -4010,4 +4032,4 @@ case finalize_jump: - if (stackp > stackb && stackp[-2] == d) { - p = stackp[-3]; + if (stackp > stackb && stackp[-3] == d) { + p = stackp[-4]; POP_FAILURE_POINT(); @@ -4027,3 +4049,3 @@ EXTRACT_NUMBER_AND_INCR(mcnt, p); - if (mcnt < 0 && stackp > stackb && stackp[-2] == d) /* avoid infinite loop */ + if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */ goto fail; @@ -4118,3 +4140,3 @@ EXTRACT_NUMBER_AND_INCR(mcnt, p); - if (mcnt < 0 && stackp > stackb && stackp[-2] == d) /* avoid infinite loop */ + if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */ goto fail; @@ -4283,3 +4305,3 @@ skip it. */ - if (stackp[-3] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) { + if (stackp[-4] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) { POP_FAILURE_POINT(); @@ -4287,3 +4309,4 @@ } - stackp--; /* discard flag */ + stackp--; /* discard greedy flag */ + options = (int)*--stackp; d = *--stackp;