まつもと ゆきひろです

In message "[ruby-dev:8637] Re: slow gsub"
    on 99/12/13, Yukihiro Matsumoto <matz / netlab.co.jp> writes:

||mbclen から mbclen2 に変わって gsub がとても遅くなりました.
||単なるマクロから関数になってるわけで, しかたないのかなあ?
|
|mbclenで十分なときにはmbclenを使う方向で考えてみましょう。

真の原因はstartposを文字単位で調整する処理にありました。
これを別関数に分離してみたら、それほど遅くないみたいです。

Index: regex.h
===================================================================
RCS file: /home/cvs/ruby/regex.h,v
retrieving revision 1.4
diff -u -1 -r1.4 regex.h
--- regex.h	1999/12/06 09:03:58	1.4
+++ regex.h	1999/12/13 09:47:22
@@ -30,2 +30,3 @@
 #ifdef RUBY
+# define re_adjust_startpos ruby_re_adjust_startpos
 # define re_compile_fastmap ruby_re_compile_fastmap
@@ -188,2 +189,3 @@
 /* Is this really advertised?  */
+extern int re_adjust_startpos (struct re_pattern_buffer *, const char*, int, int, int);
 extern void re_compile_fastmap (struct re_pattern_buffer *);
@@ -208,2 +210,3 @@
 /* Is this really advertised? */
+extern int re_adjust_startpos ();
 extern void re_compile_fastmap ();
Index: regex.c
===================================================================
RCS file: /home/cvs/ruby/regex.c,v
retrieving revision 1.10
diff -u -1 -r1.10 regex.c
--- regex.c	1999/12/06 09:03:56	1.10
+++ regex.c	1999/12/13 09:47:31
@@ -2924,17 +2924,5 @@
 
- -/* Using the compiled pattern in BUFP->buffer, first tries to match - STRING, starting first at index STARTPOS, then at STARTPOS + 1, and - so on. RANGE is the number of places to try before giving up. If - RANGE is negative, it searches backwards, i.e., the starting - positions tried are STARTPOS, STARTPOS - 1, etc. STRING is of SIZE. - In REGS, return the indices of STRING that matched the entire - BUFP->buffer and its contained subexpressions. - - The value returned is the position in the strings at which the match - was found, or -1 if no match was found, or -2 if error (such as - failure stack overflow). */ - +/* adjust startpos value to the position between characters. */ int -re_search(bufp, string, size, startpos, range, regs) +re_adjust_startpos(bufp, string, size, startpos, range) struct re_pattern_buffer *bufp; @@ -2942,13 +2930,5 @@ int size, startpos, range; - struct re_registers *regs; { - register char *fastmap = bufp->fastmap; - int val, anchor = 0; - - /* Check for out-of-range starting position. */ - if (startpos < 0 || startpos > size) - return -1; - /* Update the fastmap now if not correct already. */ - if (fastmap && !bufp->fastmap_accurate) { + if (!bufp->fastmap_accurate) { re_compile_fastmap(bufp); @@ -2982,2 +2962,37 @@ } + return startpos; +} +
+ +/* Using the compiled pattern in BUFP->buffer, first tries to match + STRING, starting first at index STARTPOS, then at STARTPOS + 1, and + so on. RANGE is the number of places to try before giving up. If + RANGE is negative, it searches backwards, i.e., the starting + positions tried are STARTPOS, STARTPOS - 1, etc. STRING is of SIZE. + In REGS, return the indices of STRING that matched the entire + BUFP->buffer and its contained subexpressions. + + The value returned is the position in the strings at which the match + was found, or -1 if no match was found, or -2 if error (such as + failure stack overflow). */ + +int +re_search(bufp, string, size, startpos, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, startpos, range; + struct re_registers *regs; +{ + register char *fastmap = bufp->fastmap; + int val, anchor = 0; + + /* Check for out-of-range starting position. */ + if (startpos < 0 || startpos > size) + return -1; + + /* Update the fastmap now if not correct already. */ + if (fastmap && !bufp->fastmap_accurate) { + re_compile_fastmap(bufp); + } + Index: re.h =================================================================== RCS file: /home/cvs/ruby/re.h,v retrieving revision 1.3 diff -u -1 -r1.3 re.h --- re.h 1999/11/04 08:39:37 1.3 +++ re.h 1999/12/13 09:47:31 @@ -34,2 +34,3 @@ VALUE rb_reg_regsub _((VALUE, VALUE, struct re_registers *)); +int rb_reg_adjust_startpos _((VALUE, VALUE, int, int)); Index: re.c =================================================================== RCS file: /home/cvs/ruby/re.c,v retrieving revision 1.7 diff -u -1 -r1.7 re.c --- re.c 1999/12/01 09:24:13 1.7 +++ re.c 1999/12/13 09:47:34 @@ -510,2 +510,28 @@ int +rb_reg_adjust_startpos(reg, str, pos, reverse) + VALUE reg, str; + int pos, reverse; +{ + int range; + + if (may_need_recompile) + rb_reg_prepare_re(reg); + + if (FL_TEST(reg, KCODE_FIXED)) + kcode_set_option(reg); + else if (reg_kcode != curr_kcode) + kcode_reset_option(); + + if (reverse) { + range = -pos; + } + else { + range = RSTRING(str)->len - pos; + } + return re_adjust_startpos(RREGEXP(reg)->ptr, + RSTRING(str)->ptr, RSTRING(str)->len, + pos, range); +} + +int rb_reg_search(reg, str, pos, reverse) Index: string.c =================================================================== RCS file: /home/cvs/ruby/string.c,v retrieving revision 1.11 diff -u -1 -r1.11 string.c --- string.c 1999/12/07 09:23:27 1.11 +++ string.c 1999/12/13 09:47:37 @@ -587,2 +590,3 @@ case T_REGEXP: + pos = rb_reg_adjust_startpos(sub, str, pos, 0); pos = rb_reg_search(sub, str, pos, 0); @@ -637,2 +641,3 @@ if (RREGEXP(sub)->len) { + pos = rb_reg_adjust_startpos(sub, str, pos, 1); pos = rb_reg_search(sub, str, pos, 1);