山本です。

これから最適化に入ります。nobuさんに指摘された、*p == '\0'の場合
単に ++p でいいんじゃないかというのは、最適化の最終でやるつもりです。

Next() の呼び出しをできるだけ減らすため、内部使用のstatic関数の
仕様を少し変えるつもりです。たとえば glob_helper での

	m = find_char(p, '/');
	if (has_magic(p, m, flags)) {

は has_magic が '/' で処理を終えるようにすれば、find_char の分の
Next()呼び出しを省略できるはず。

その前に現段階でのパッチを送ります。
Win32用のCompareString()を使った比較関数が消えてますが、

puts File.fnmatch('[~-。]', 'あ')

の実行結果が ruby の正規表現などと異なるものになり、互換性を確保でき
なかったからです。


--- dir.c	Sat Nov 22 12:59:18 2003
+++ dir.c	Thu Dec  4 18:08:58 2003
@@ -78,10 +78,91 @@ char *strchr _((char*,char));
 #define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c))
+#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2)))
 
-#ifndef CharNext		/* defined as CharNext[AW] on Windows. */
-# if defined(DJGPP)
-#   define CharNext(p) ((p) + mblen(p, MB_CUR_MAX))
-# else
-#   define CharNext(p) ((p) + 1)
-# endif
-#endif
+static char *
+greater(p1, p2)
+    const char *p1;
+    const char *p2;
+{
+    return p1 > p2 ? (char *)p1 : (char *)p2;
+}
+
+#if defined(_WIN32)
+# define Next(p) (greater((p) + 1, CharNext(p))) /* faster */
+#elif defined(DJGPP) || defined(__EMX__)
+# define Next(p) (greater((p) + 1, (p) + mblen(p, INT_MAX)))
+# endif
+
+#ifndef Next /* single byte environment */
+# define Next(p) ((p) + 1)
+# define Inc(p) (++(p))
+# define IncLater(p) ((p)++)
+# define CopyAndInc(dst, src) (*(dst)++ = *(src)++)
+# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2))))
+# define find_char(s, c) strchr(s, c)
+#else /* multi byte environment */
+# define Inc(p) ((p) = Next(p))
+# define IncLater(p) (IncLaterImpl(&(p)))
+# define CopyAndInc(dst, src) (CopyAndIncImpl(&(dst), &(src)))
+# define Compare(p1, p2) (CompareImpl(p1, p2, nocase))
+static char *
+IncLaterImpl(pp)
+    char **pp;
+{
+    char *t = *pp;
+    Inc(*pp);
+    return t;
+}
+
+static void
+CopyAndIncImpl(pdst, psrc)
+    char **pdst;
+    const char **psrc;
+{
+    const int len = Next(*psrc) - *psrc;
+    memcpy(*pdst, *psrc, len);
+    (*pdst) += len;
+    (*psrc) += len;
+}
+
+static int
+CompareImpl(p1, p2, nocase)
+    const char *p1;
+    const char *p2;
+    int nocase;
+{
+    const int len1 = Next(p1) - p1;
+    const int len2 = Next(p2) - p2;
+
+    if (len1 == 1)
+	if (len2 == 1) {
+	    return compare(downcase(*p1), downcase(*p2));
+	}
+	else {
+	    const int ret = compare(downcase(*p1), *p2);
+	    return ret ? ret : -1;
+	}
+    else
+	if (len2 == 1) {
+	    const int ret = compare(*p1, downcase(*p2));
+	    return ret ? ret : 1;
+	}
+	else {
+	    const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2);
+	    return ret ? ret : len1 - len2;
+	}
+}
+
+static char *
+find_char(s, c)
+    const char *s;
+    char c;
+{
+    while (*s) {
+	if (*s == c)
+	    return (char *)s;
+	Inc(s);
+    }
+    return 0;
+}
+#endif /* environment */
 
@@ -89,3 +170,3 @@ char *strchr _((char*,char));
 #define isdirsep(c) ((c) == '/' || (c) == '\\')
-static const char *
+static char *
 find_dirsep(s)
@@ -95,4 +176,4 @@ find_dirsep(s)
 	if (isdirsep(*s))
-	    return s;
-	s = CharNext(s);
+	    return (char *)s;
+	Inc(s);
     }
@@ -108,3 +189,3 @@ range(pat, test, flags)
     char *pat;
-    char test;
+    char *test;
     int flags;
@@ -117,22 +198,20 @@ range(pat, test, flags)
     if (not)
-	pat++;
-
-    test = downcase(test);
+	Inc(pat);
 
     while (*pat) {
-	int cstart, cend;
-	cstart = cend = *pat++;
-	if (cstart == ']')
+	char *pstart, *pend;
+	pstart = pend = IncLater(pat);
+	if (*pstart == ']')
 	    return ok == not ? 0 : pat;
-        else if (escape && cstart == '\\')
-	    cstart = cend = *pat++;
-	if (*pat == '-' && pat[1] != ']') {
-	    if (escape && pat[1] == '\\')
-		pat++;
-	    cend = pat[1];
-	    if (!cend)
+	else if (escape && *pstart == '\\')
+	    pstart = pend = IncLater(pat);
+	if (*pat == '-' && *Next(pat) != ']') {
+	    if (escape && *Next(pat) == '\\')
+		Inc(pat);
+	    pend = Next(pat);
+	    if (!*pend)
 		return 0;
-	    pat += 2;
+	    Inc(pat); Inc(pat);
 	}
-	if (downcase(cstart) <= test && test <= downcase(cend))
+	if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0)
 	    ok = 1;
@@ -143,4 +222,5 @@ range(pat, test, flags)
 #define ISDIRSEP(c) (pathname && isdirsep(c))
-#define PERIOD(s) (period && *(s) == '.' && \
-		  ((s) == string || ISDIRSEP((s)[-1])))
+#define PERIOD_S() (period && *s == '.' && \
+    (s == string || ISDIRSEP(*s_prev)))
+#define INC_S() (s = Next(s_prev = s))
 static int
@@ -151,5 +231,5 @@ fnmatch(pat, string, flags)
 {
-    int c;
-    int test;
-    const char *s = string;
+    const char *p;
+    const char *test;
+    const char *s = string, *s_prev;
     int escape = !(flags & FNM_NOESCAPE);
@@ -159,17 +239,17 @@ fnmatch(pat, string, flags)
 
-    while (c = *pat++) {
-	switch (c) {
+    while (*(p = IncLater(pat))) {
+	switch (*p) {
 	case '?':
-	    if (!*s || ISDIRSEP(*s) || PERIOD(s))
+	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
 	    break;
 	case '*':
-	    while ((c = *pat++) == '*')
+	    while (*(p = IncLater(pat)) == '*')
 		;
 
-	    if (PERIOD(s))
+	    if (PERIOD_S())
 		return FNM_NOMATCH;
 
-	    if (!c) {
+	    if (!*p) {
 		if (pathname && find_dirsep(s))
@@ -179,6 +259,6 @@ fnmatch(pat, string, flags)
 	    }
-	    else if (ISDIRSEP(c)) {
+	    else if (ISDIRSEP(*p)) {
 		s = find_dirsep(s);
 		if (s) {
-                    s++;
+		    INC_S();
 		    break;
@@ -188,7 +268,6 @@ fnmatch(pat, string, flags)
 
-	    test = escape && c == '\\' ? *pat : c;
-	    test = downcase(test);
-	    pat--;
+	    test = escape && *p == '\\' ? pat : p;
+	    pat = p;
 	    while (*s) {
-		if ((c == '[' || downcase(*s) == test) &&
+		if ((*p == '[' || Compare(s, test) == 0) &&
 		    !fnmatch(pat, s, flags | FNM_DOTMATCH))
@@ -197,3 +276,3 @@ fnmatch(pat, string, flags)
 		    break;
-		s++;
+		INC_S();
 	    }
@@ -202,8 +281,8 @@ fnmatch(pat, string, flags)
 	case '[':
-	    if (!*s || ISDIRSEP(*s) || PERIOD(s))
+	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
 		return FNM_NOMATCH;
-	    pat = range(pat, *s, flags);
+	    pat = range(pat, s, flags);
 	    if (!pat)
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
 	    break;
@@ -216,7 +295,7 @@ fnmatch(pat, string, flags)
 		) {
-		c = *pat;
-		if (!c)
-		    c = '\\';
+		p = pat;
+		if (!*p)
+		    p = "\\"; /* point to embeded string */
 		else
-		    pat++;
+		    Inc(pat);
 	    }
@@ -226,3 +305,3 @@ fnmatch(pat, string, flags)
 #if defined DOSISH
-	    if (ISDIRSEP(c) && isdirsep(*s))
+	    if (ISDIRSEP(*p) && isdirsep(*s))
 		;
@@ -230,5 +309,5 @@ fnmatch(pat, string, flags)
 #endif
-	    if(downcase(c) != downcase(*s))
+	    if(Compare(p, s) != 0)
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
 	    break;
@@ -576,4 +655,3 @@ has_magic(s, send, flags)
 {
-    register char *p = s;
-    register char c;
+    char *p;
     int open = 0;
@@ -581,4 +659,4 @@ has_magic(s, send, flags)
 
-    while ((c = *p++) != '\0') {
-	switch (c) {
+    while (*(p = IncLater(s)) != '\0') {
+	switch (*p) {
 	  case '?':
@@ -596,3 +674,3 @@ has_magic(s, send, flags)
 	  case '\\':
-	    if (escape && *p++ == '\0')
+	    if (escape && *IncLater(s) == '\0')
 		return Qfalse;
@@ -600,3 +678,3 @@ has_magic(s, send, flags)
 
-	if (send && p >= send) break;
+	if (send && s >= send) break;
     }
@@ -612,2 +690,6 @@ extract_path(p, pend)
 
+    char *prev2 = 0;
+    char *prev1 = 0;
+    char *cur;
+
     len = pend - p;
@@ -615,11 +697,17 @@ extract_path(p, pend)
     memcpy(alloc, p, len);
-    if (len > 1 && pend[-1] == '/'
+
+    for (cur = alloc; cur < alloc + len; Inc(cur)) {
+	prev2 = prev1;
+	prev1 = cur;
+    }
+
+    if (prev2 && *prev1 == '/'
 #if defined DOSISH_DRIVE_LETTER
-    && pend[-2] != ':'
+	&& *prev2 != ':'
 #endif
     ) {
-	alloc[len-1] = 0;
+	*prev1 = 0;
     }
     else {
-	alloc[len] = 0;
+	*cur = 0;
     }
@@ -635,3 +723,3 @@ extract_elem(path)
 
-    pend = strchr(path, '/');
+    pend = find_char(path, '/');
     if (!pend) pend = path + strlen(path);
@@ -650,5 +738,5 @@ remove_backslashes(p)
 	if (*p == '\\') {
-	    if (++p == pend) break;
+	    if (Inc(p) == pend) break;
 	}
-	*t++ = *p++;
+	CopyAndInc(t, p);
     }
@@ -657,2 +745,3 @@ remove_backslashes(p)
 
+
 #ifndef S_ISDIR
@@ -727,4 +816,4 @@ glob_helper(path, sub, flags, func, arg)
     while (p && !status) {
-	if (*p == '/') p++;
-	m = strchr(p, '/');
+	if (*p == '/') Inc(p);
+	m = find_char(p, '/');
 	if (has_magic(p, m, flags)) {
@@ -776,3 +865,3 @@ glob_helper(path, sub, flags, func, arg)
 #if defined DOSISH_DRIVE_LETTER
-#define BASE (*base && !((isdirsep(*base) && !base[1]) || (base[1] == ':' && isdirsep(base[2]) && !base[3])))
+#define BASE (*base && !((isdirsep(*base) && !base[1]) || (ISALPHA(*base) && base[1] == ':' && isdirsep(base[2]) && !base[3])))
 #else
@@ -926,3 +1015,3 @@ push_braces(ary, s, flags)
 	}
-	p++;
+	Inc(p);
     }
@@ -934,3 +1023,3 @@ push_braces(ary, s, flags)
 	}
-	p++;
+	Inc(p);
     }
@@ -944,9 +1033,9 @@ push_braces(ary, s, flags)
 	while (*p != '}') {
-	    t = p + 1;
-	    for (p = t; *p!='}' && *p!=','; p++) {
+	    t = Next(p);
+	    for (p = t; *p!='}' && *p!=','; Inc(p)) {
 		/* skip inner braces */
-		if (*p == '{') while (*p!='}') p++;
+		if (*p == '{') while (*p!='}') Inc(p);
 	    }
 	    memcpy(b, t, p-t);
-	    strcpy(b+(p-t), rbrace+1);
+	    strcpy(b+(p-t), Next(rbrace));
 	    push_braces(ary, buf, flags);
@@ -988,3 +1077,3 @@ rb_push_glob(str, flags)
 	nest = maxnest = 0;
-	while (p < pend && isdelim(*p)) p++;
+	while (p < pend && isdelim(*p)) Inc(p);
 	while (p < pend && !isdelim(*p)) {
@@ -993,6 +1082,6 @@ rb_push_glob(str, flags)
 	    if (!noescape && *p == '\\') {
-		*t++ = *p++;
+		CopyAndInc(t, p);
 		if (p == pend) break;
 	    }
-	    *t++ = *p++;
+	    CopyAndInc(t, p);
 	}