Hi,
In message "[ruby-talk:16164] Re: (?i:) in regexp"
on 01/06/02, ts <decoux / moulon.inra.fr> writes:
| Well actually ruby do this
|
|pigeon% ruby -rjj -e '/(?im:a|b)?x/.dump'
|Regexp `(?im:a|b)?x'
| 0 casefold_on
| 1 option_set `ims'
| 2 on_failure_jump ==> 12
| 3 start_paren
| 4 on_failure_jump ==> 7
| 5 exactn "a" (1)
| 6 jump ==> 11
| 7 exactn "b" (1)
| 8 casefold_off
| 9 option_set `'
| 10 push_dummy_failure
| 11 stop_paren
| 12 exactn "x" (1)
| 13 end
|subexpressions : 0
|pigeon%
Thank you. casefold_on etc. were flushed too early.
I think I've fixed.
--- regex.c 2001/05/30 09:12:34 1.41
+++ regex.c 2001/06/02 15:19:45
@@ -545,3 +545,3 @@
results in it being larger than 1 << 16, then flag memory exhausted. */
-#define EXTEND_BUFFER \
+#define EXTEND_BUFFER \
do { char *old_buffer = bufp->buffer; \
@@ -1262,3 +1262,2 @@
int options = bufp->options;
- int old_options = 0;
@@ -1685,85 +1684,135 @@
case '(':
- old_options = options;
- PATFETCH(c);
- if (c == '?') {
- int negative = 0;
+ {
+ int old_options = options;
int push_option = 0;
- PATFETCH_RAW(c);
- switch (c) {
- case 'x': case 'p': case 'm': case 'i': case '-':
- for (;;) {
- switch (c) {
- case '-':
- negative = 1;
- break;
-
- case ':':
- case ')':
- break;
+ int casefold = 0;
- case 'x':
- if (negative)
- options &= ~RE_OPTION_EXTENDED;
- else
- options |= RE_OPTION_EXTENDED;
- break;
+ PATFETCH(c);
+ if (c == '?') {
+ int negative = 0;
- case 'p':
- if (negative) {
- if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
- options &= ~RE_OPTION_POSIXLINE;
+ PATFETCH_RAW(c);
+ switch (c) {
+ case 'x': case 'p': case 'm': case 'i': case '-':
+ for (;;) {
+ switch (c) {
+ case '-':
+ negative = 1;
+ break;
+
+ case ':':
+ case ')':
+ break;
+
+ case 'x':
+ if (negative)
+ options &= ~RE_OPTION_EXTENDED;
+ else
+ options |= RE_OPTION_EXTENDED;
+ break;
+
+ case 'p':
+ if (negative) {
+ if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
+ options &= ~RE_OPTION_POSIXLINE;
+ }
}
- }
- else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) {
- options |= RE_OPTION_POSIXLINE;
- }
- push_option = 1;
- break;
+ else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) {
+ options |= RE_OPTION_POSIXLINE;
+ }
+ push_option = 1;
+ break;
- case 'm':
- if (negative) {
- if (options&RE_OPTION_MULTILINE) {
- options &= ~RE_OPTION_MULTILINE;
+ case 'm':
+ if (negative) {
+ if (options&RE_OPTION_MULTILINE) {
+ options &= ~RE_OPTION_MULTILINE;
+ }
}
- }
- else if (!(options&RE_OPTION_MULTILINE)) {
- options |= RE_OPTION_MULTILINE;
- }
- push_option = 1;
- break;
+ else if (!(options&RE_OPTION_MULTILINE)) {
+ options |= RE_OPTION_MULTILINE;
+ }
+ push_option = 1;
+ break;
- case 'i':
- if (negative) {
- if (options&RE_OPTION_IGNORECASE) {
- options &= ~RE_OPTION_IGNORECASE;
- BUFPUSH(casefold_off);
+ case 'i':
+ if (negative) {
+ if (options&RE_OPTION_IGNORECASE) {
+ options &= ~RE_OPTION_IGNORECASE;
+ }
+ }
+ else if (!(options&RE_OPTION_IGNORECASE)) {
+ options |= RE_OPTION_IGNORECASE;
}
+ casefold = 1;
+ break;
+
+ default:
+ FREE_AND_RETURN(stackb, "undefined (?...) inline option");
}
- else if (!(options&RE_OPTION_IGNORECASE)) {
- options |= RE_OPTION_IGNORECASE;
- BUFPUSH(casefold_on);
+ if (c == ')') {
+ c = '#'; /* read whole in-line options */
+ break;
}
- break;
-
- default:
- FREE_AND_RETURN(stackb, "undefined (?...) inline option");
+ if (c == ':') break;
+ PATFETCH_RAW(c);
}
- if (c == ')') {
- c = '#'; /* read whole in-line options */
- break;
+ break;
+
+ case '#':
+ for (;;) {
+ PATFETCH(c);
+ if (c == ')') break;
}
- if (c == ':') break;
- PATFETCH_RAW(c);
+ c = '#';
+ break;
+
+ case ':':
+ case '=':
+ case '!':
+ case '>':
+ break;
+
+ default:
+ FREE_AND_RETURN(stackb, "undefined (?...) sequence");
}
+ }
+ else {
+ PATUNFETCH;
+ c = '(';
+ }
+ if (c == '#') {
+ if (push_option) {
+ BUFPUSH(option_set);
+ BUFPUSH(options);
+ }
+ if (casefold) {
+ if (options & RE_OPTION_IGNORECASE)
+ BUFPUSH(casefold_on);
+ else
+ BUFPUSH(casefold_off);
+ }
break;
+ }
+ if (stackp+8 >= stacke) {
+ DOUBLE_STACK(int);
+ }
- case '#':
- for (;;) {
- PATFETCH(c);
- if (c == ')') break;
- }
- c = '#';
+ /* Laststart should point to the start_memory that we are about
+ to push (unless the pattern has RE_NREGS or more ('s). */
+ /* obsolete: now RE_NREGS is just a default register size. */
+ *stackp++ = b - bufp->buffer;
+ *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
+ *stackp++ = begalt - bufp->buffer;
+ switch (c) {
+ case '(':
+ BUFPUSH(start_memory);
+ BUFPUSH(regnum);
+ *stackp++ = regnum++;
+ *stackp++ = b - bufp->buffer;
+ BUFPUSH(0);
+ /* too many ()'s to fit in a byte. (max 254) */
+ if (regnum >= RE_REG_MAX) goto too_big;
break;
- case ':':
case '=':
@@ -1771,6 +1820,19 @@
case '>':
+ BUFPUSH(start_nowidth);
+ *stackp++ = b - bufp->buffer;
+ BUFPUSH(0); /* temporary value */
+ BUFPUSH(0);
+ if (c != '!') break;
+
+ BUFPUSH(on_failure_jump);
+ *stackp++ = b - bufp->buffer;
+ BUFPUSH(0); /* temporary value */
+ BUFPUSH(0);
break;
+ case ':':
+ BUFPUSH(start_paren);
+ pending_exact = 0;
default:
- FREE_AND_RETURN(stackb, "undefined (?...) sequence");
+ break;
}
@@ -1780,55 +1842,14 @@
}
+ if (casefold) {
+ if (options & RE_OPTION_IGNORECASE)
+ BUFPUSH(casefold_on);
+ else
+ BUFPUSH(casefold_off);
+ }
+ *stackp++ = c;
+ *stackp++ = old_options;
+ fixup_alt_jump = 0;
+ laststart = 0;
+ begalt = b;
}
- else {
- PATUNFETCH;
- c = '(';
- }
- if (c == '#') break;
- if (stackp+8 >= stacke) {
- DOUBLE_STACK(int);
- }
-
- /* Laststart should point to the start_memory that we are about
- to push (unless the pattern has RE_NREGS or more ('s). */
- /* obsolete: now RE_NREGS is just a default register size. */
- *stackp++ = b - bufp->buffer;
- *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
- *stackp++ = begalt - bufp->buffer;
- switch (c) {
- case '(':
- BUFPUSH(start_memory);
- BUFPUSH(regnum);
- *stackp++ = regnum++;
- *stackp++ = b - bufp->buffer;
- BUFPUSH(0);
- /* too many ()'s to fit in a byte. (max 254) */
- if (regnum >= RE_REG_MAX) goto too_big;
- break;
-
- case '=':
- case '!':
- case '>':
- BUFPUSH(start_nowidth);
- *stackp++ = b - bufp->buffer;
- BUFPUSH(0); /* temporary value */
- BUFPUSH(0);
- if (c != '!') break;
-
- BUFPUSH(on_failure_jump);
- *stackp++ = b - bufp->buffer;
- BUFPUSH(0); /* temporary value */
- BUFPUSH(0);
- break;
-
- case ':':
- BUFPUSH(start_paren);
- pending_exact = 0;
- default:
- break;
- }
- *stackp++ = c;
- *stackp++ = old_options;
- fixup_alt_jump = 0;
- laststart = 0;
- begalt = b;
break;
@@ -1839,9 +1860,2 @@
- if (options != stackp[-1]) {
- if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
- BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
- }
- BUFPUSH(option_set);
- BUFPUSH(stackp[-1]);
- }
pending_exact = 0;
@@ -1858,2 +1872,9 @@
}
+ if (options != stackp[-1]) {
+ if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
+ BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
+ }
+ BUFPUSH(option_set);
+ BUFPUSH(stackp[-1]);
+ }
p0 = b;
@@ -3357,3 +3378,3 @@
/* Individual items aside from the registers. */
-#define NUM_NONREG_ITEMS 3
+#define NUM_NONREG_ITEMS 4
@@ -3407,2 +3428,3 @@
*stackp++ = string_place; \
+ *stackp++ = (unsigned char*)options; /* current option status */ \
*stackp++ = (unsigned char*)0; /* non-greedy flag */ \
@@ -3786,3 +3808,3 @@
stackp = stackb + mcnt;
- d = stackp[-2];
+ d = stackp[-3];
POP_FAILURE_POINT();
@@ -4010,4 +4032,4 @@
case finalize_jump:
- if (stackp > stackb && stackp[-2] == d) {
- p = stackp[-3];
+ if (stackp > stackb && stackp[-3] == d) {
+ p = stackp[-4];
POP_FAILURE_POINT();
@@ -4027,3 +4049,3 @@
EXTRACT_NUMBER_AND_INCR(mcnt, p);
- if (mcnt < 0 && stackp > stackb && stackp[-2] == d) /* avoid infinite loop */
+ if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */
goto fail;
@@ -4118,3 +4140,3 @@
EXTRACT_NUMBER_AND_INCR(mcnt, p);
- if (mcnt < 0 && stackp > stackb && stackp[-2] == d) /* avoid infinite loop */
+ if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */
goto fail;
@@ -4283,3 +4305,3 @@
skip it. */
- if (stackp[-3] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) {
+ if (stackp[-4] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) {
POP_FAILURE_POINT();
@@ -4287,3 +4309,4 @@
}
- stackp--; /* discard flag */
+ stackp--; /* discard greedy flag */
+ options = (int)*--stackp;
d = *--stackp;