山本です。

[ruby-dev:22147]で

>これだと、readdir(a)を?magicの時と(child)の列挙のとき2度
>行っているので、これを1度にできないか考えています。

と書きましたが、recursiveなほうはlstat()で、そうでないほうはstat()なので、
単純にまとめるわけにはいきませんでした。

また、C++で

struct element {
    std::string name; /* 要素の中身。magicでなければ、remove_backslashes()をかけておく */
    bool magical;     /* magicを含むかどうか */
    bool separator;   /* この要素が '/' で終わる。ただしルートは例外 */
};

として e:/abc/*/\?/b なら

e:/	false	false
abc	false	true
*	true	true
?	false	true
b	false	false

みたいに分解して std::list<element> に格納して処理するようなコードを書いてみたのですが、
やはり速度はあまり変わりませんでした。もう十分速い気もしてきました。
glob()は遅いことがあると聞きますが、**/の後にヘビーなパターンがこない限り、十分なのかもしれません。

ただ、a.cpp がファイルのときに
Dir.glob("g:/a.cpp/") にマッチすべきでないと思ったので、それだけ修正しました。
(コメント中の "removal '/'" というのは、ルートの '/' 以外の、stat()に渡すとき削除すべき '/' のことです)

file.c、dir.c を最新のものに置き換えただけではやはり無理のようなので、
dir.c (in stable-snapshot.tar.gz) に対してパッチしています。
[ruby-dev:22284]の時点のものがベースです。

本当は CSV をインストールして開発すべきなんですが、
再起動するとき CPU を入れ替えないといけないマシンなので、WinCSVのインストールにも二の足を踏んでいます。
次にWin2000のセキュリティパッチが出たときに再起動するので、そのとき試してみたいと思います。


--- dir.c	Sat Nov 22 12:59:18 2003
+++ dir.c	Fri Dec 19 16:19:54 2003
@@ -78,14 +78,62 @@ char *strchr _((char*,char));
 #define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c))
+#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2)))
 
-#ifndef CharNext		/* defined as CharNext[AW] on Windows. */
-# if defined(DJGPP)
-#   define CharNext(p) ((p) + mblen(p, MB_CUR_MAX))
-# else
-#   define CharNext(p) ((p) + 1)
-# endif
+/* CAUTION: in case *p == '\0'
+   Next(p) == p + 1 in single byte environment
+   Next(p) == p     in multi byte environment
+*/
+#if defined(CharNext)
+# define Next(p) CharNext(p)
+#elif defined(DJGPP)
+# define Next(p) ((p) + mblen(p, MB_CUR_MAX))
+#elif defined(__EMX__)
+# define Next(p) ((p) + emx_mblen(p))
+static inline int
+emx_mblen(p)
+    const char *p;
+{
+    int n = mblen(p, INT_MAX);
+    return (n < 0) ? 1 : n;
+}
 #endif
 
+#ifndef Next /* single byte environment */
+# define Next(p) ((p) + 1)
+# define Inc(p) (++(p))
+# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2))))
+#else /* multi byte environment */
+# define Inc(p) ((p) = Next(p))
+# define Compare(p1, p2) (CompareImpl(p1, p2, nocase))
+static int
+CompareImpl(p1, p2, nocase)
+    const char *p1;
+    const char *p2;
+    int nocase;
+{
+    const int len1 = Next(p1) - p1;
+    const int len2 = Next(p2) - p2;
+
+    if (len1 <= 1)
+	if (len2 <= 1) {
+	    return compare(downcase(*p1), downcase(*p2));
+	}
+	else {
+	    const int ret = compare(downcase(*p1), *p2);
+	    return ret ? ret : -1;
+	}
+    else
+	if (len2 <= 1) {
+	    const int ret = compare(*p1, downcase(*p2));
+	    return ret ? ret : 1;
+	}
+	else {
+	    const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2);
+	    return ret ? ret : len1 - len2;
+	}
+}
+#endif /* environment */
+
 #if defined DOSISH
 #define isdirsep(c) ((c) == '/' || (c) == '\\')
-static const char *
+static char *
 find_dirsep(s)
@@ -95,4 +143,4 @@ find_dirsep(s)
 	if (isdirsep(*s))
-	    return s;
-	s = CharNext(s);
+	    return (char *)s;
+	Inc(s);
     }
@@ -108,3 +156,3 @@ range(pat, test, flags)
     char *pat;
-    char test;
+    char *test;
     int flags;
@@ -119,11 +167,10 @@ range(pat, test, flags)
 
-    test = downcase(test);
-
     while (*pat) {
-	int cstart, cend;
-	cstart = cend = *pat++;
-	if (cstart == ']')
-	    return ok == not ? 0 : pat;
-        else if (escape && cstart == '\\')
-	    cstart = cend = *pat++;
+	char *pstart, *pend;
+	pstart = pend = pat;
+	if (*pstart == ']')
+	    return ok == not ? 0 : ++pat;
+	else if (escape && *pstart == '\\')
+	    pstart = pend = ++pat;
+		Inc(pat);
 	if (*pat == '-' && pat[1] != ']') {
@@ -131,8 +178,8 @@ range(pat, test, flags)
 		pat++;
-	    cend = pat[1];
-	    if (!cend)
+	    pend = pat+1;
+	    if (!*pend)
 		return 0;
-	    pat += 2;
+	    pat = Next(pend);
 	}
-	if (downcase(cstart) <= test && test <= downcase(cend))
+	if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0)
 	    ok = 1;
@@ -143,4 +190,5 @@ range(pat, test, flags)
 #define ISDIRSEP(c) (pathname && isdirsep(c))
-#define PERIOD(s) (period && *(s) == '.' && \
-		  ((s) == string || ISDIRSEP((s)[-1])))
+#define PERIOD_S() (period && *s == '.' && \
+    (s == string || ISDIRSEP(*s_prev)))
+#define INC_S() (s = Next(s_prev = s))
 static int
@@ -152,4 +200,4 @@ fnmatch(pat, string, flags)
     int c;
-    int test;
-    const char *s = string;
+    const char *test;
+    const char *s = string, *s_prev;
     int escape = !(flags & FNM_NOESCAPE);
@@ -159,14 +207,15 @@ fnmatch(pat, string, flags)
 
-    while (c = *pat++) {
+    while (c = *pat) {
 	switch (c) {
 	case '?':
-	    if (!*s || ISDIRSEP(*s) || PERIOD(s))
+	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
+	    ++pat;
 	    break;
 	case '*':
-	    while ((c = *pat++) == '*')
+	    while ((c = *++pat) == '*')
 		;
 
-	    if (PERIOD(s))
+	    if (PERIOD_S())
 		return FNM_NOMATCH;
@@ -182,3 +231,3 @@ fnmatch(pat, string, flags)
 		if (s) {
-                    s++;
+		    INC_S();
 		    break;
@@ -188,7 +237,5 @@ fnmatch(pat, string, flags)
 
-	    test = escape && c == '\\' ? *pat : c;
-	    test = downcase(test);
-	    pat--;
+	    test = escape && c == '\\' ? pat+1 : pat;
 	    while (*s) {
-		if ((c == '[' || downcase(*s) == test) &&
+		if ((c == '[' || Compare(s, test) == 0) &&
 		    !fnmatch(pat, s, flags | FNM_DOTMATCH))
@@ -197,3 +244,3 @@ fnmatch(pat, string, flags)
 		    break;
-		s++;
+		INC_S();
 	    }
@@ -202,8 +249,8 @@ fnmatch(pat, string, flags)
 	case '[':
-	    if (!*s || ISDIRSEP(*s) || PERIOD(s))
+	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
 		return FNM_NOMATCH;
-	    pat = range(pat, *s, flags);
+	    pat = range(pat+1, s, flags);
 	    if (!pat)
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
 	    break;
@@ -211,12 +258,8 @@ fnmatch(pat, string, flags)
 	case '\\':
-	    if (escape
+	    if (escape && pat[1]
 #if defined DOSISH
-		&& *pat && strchr("*?[\\", *pat)
+		&& strchr("*?[\\", pat[1])
 #endif
 		) {
-		c = *pat;
-		if (!c)
-		    c = '\\';
-		else
-		    pat++;
+		c = *++pat;
 	    }
@@ -230,5 +273,6 @@ fnmatch(pat, string, flags)
 #endif
-	    if(downcase(c) != downcase(*s))
+	    if(Compare(pat, s) != 0)
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
+	    Inc(pat);
 	    break;
@@ -570,9 +614,8 @@ dir_s_rmdir(obj, dir)
 
-/* Return nonzero if S has any special globbing chars in it.  */
 static int
-has_magic(s, send, flags)
-     char *s, *send;
+has_magic(p, m, flags)
+     register char *p;
+     char **m;
      int flags;
 {
-    register char *p = s;
     register char c;
@@ -581,3 +624,3 @@ has_magic(s, send, flags)
 
-    while ((c = *p++) != '\0') {
+    while (c = *p++, (c && c != '/')) {
 	switch (c) {
@@ -585,3 +628,3 @@ has_magic(s, send, flags)
 	  case '*':
-	    return Qtrue;
+	    goto found;
 
@@ -592,3 +635,3 @@ has_magic(s, send, flags)
 	    if (open)
-		return Qtrue;
+		goto found;
 	    continue;
@@ -596,61 +639,68 @@ has_magic(s, send, flags)
 	  case '\\':
-	    if (escape && *p++ == '\0')
-		return Qfalse;
+	    if (escape && (c = *p++, !(c && c != '/')))
+		goto miss;
+	    continue;
 	}
 
-	if (send && p >= send) break;
-    }
-    return Qfalse;
+	p = Next(p - 1);
 }
 
-static char*
-extract_path(p, pend)
-    char *p, *pend;
-{
-    char *alloc;
-    int len;
-
-    len = pend - p;
-    alloc = ALLOC_N(char, len+1);
-    memcpy(alloc, p, len);
-    if (len > 1 && pend[-1] == '/'
-#if defined DOSISH_DRIVE_LETTER
-    && pend[-2] != ':'
-#endif
-    ) {
-	alloc[len-1] = 0;
-    }
-    else {
-	alloc[len] = 0;
-    }
+  miss:
+    *m = p-1;
+    return 0;
 
-    return alloc;
+  found:
+    while (*p && *p != '/')
+	Inc(p);
+    *m = p;
+    return 1;
 }
 
-static char*
-extract_elem(path)
-    char *path;
-{
+static int
+do_fnmatch(p, pend, string, flags)
+    char *p;
     char *pend;
+    const char *string;
+    int flags;
+{
+    int ret;
+    char c;
 
-    pend = strchr(path, '/');
-    if (!pend) pend = path + strlen(path);
-
-    return extract_path(path, pend);
+    c = *pend;
+    *pend = '\0'; /* should I allocate new string? */
+    ret = fnmatch(p, string, flags);
+    *pend = c;
+    return ret;
 }
 
-static void
-remove_backslashes(p)
+static int
+remove_backslashes(p, pend)
     char *p;
+    char *pend;
 {
-    char *pend = p + strlen(p);
     char *t = p;
+    char *s = p;
+    int n = 0;
 
-    while (p < pend) {
+    while (*p && p < pend) {
 	if (*p == '\\') {
-	    if (++p == pend) break;
+	    if (t != s) {
+		memmove(t, s, p - s);
+		n++;
+	    }
+	    t += (p - s);
+	    s = ++p;
+	    if (!(*p && p < pend)) break;
+	}
+	Inc(p);
 	}
-	*t++ = *p++;
+
+    while (*p++);
+
+    if (t != s) {
+	memmove(t, s, p - s); /* move '\0' too */
+	n++;
     }
-    *t = '\0';
+
+    return n;
 }
@@ -696,5 +746,6 @@ glob_call_func(func, path, arg)
 static int
-glob_helper(path, sub, flags, func, arg)
+glob_helper(path, sub, separator, flags, func, arg) /* if separator p[-1] is removable '/' */
     char *path;
     char *sub;
+    int separator;
     int flags;
@@ -704,29 +755,4 @@ glob_helper(path, sub, flags, func, arg)
     struct stat st;
-    char *p, *m;
     int status = 0;
-
-    p = sub ? sub : path;
-    if (!has_magic(p, 0, flags)) {
-#if defined DOSISH
-	remove_backslashes(path);
-#else
-	if (!(flags & FNM_NOESCAPE)) remove_backslashes(p);
-#endif
-	if (lstat(path, &st) == 0) {
-	    status = glob_call_func(func, path, arg);
-	    if (status) return status;
-	}
-	else if (errno != ENOENT) {
-	    /* In case stat error is other than ENOENT and
-	       we may want to know what is wrong. */
-	    rb_sys_warning(path);
-	}
-	return 0;
-    }
-
-    while (p && !status) {
-	if (*p == '/') p++;
-	m = strchr(p, '/');
-	if (has_magic(p, m, flags)) {
-	    char *dir, *base, *magic, *buf;
+    char *p = sub, *m, *buf;
 	    DIR *dirp;
@@ -740,22 +766,80 @@ glob_helper(path, sub, flags, func, arg)
 
-	    base = extract_path(path, p);
-	    if (path == p) dir = ".";
-	    else dir = base;
+    while (*p && !has_magic(p, &m, flags)) {
+	if (*m == '/') {
+	    separator = 1;
+	    p = m + 1;
+	}
+	else {
+	    separator = 0;
+	    p = m;
+	}
+    }
+
+    if (!(flags & FNM_NOESCAPE)) {
+	int n = remove_backslashes(sub, p);
+	p -= n;
+	m -= n;
+    }
+
+    /* In case stat error is other than ENOENT and
+       we may want to know what is wrong. */
+
+    if (*p == '\0') { /* magic not found */
+        if (!separator) {
+	    if (lstat(path, &st) < 0) {
+		if (errno != ENOENT) rb_sys_warning(path);
+	    }
+	    else {
+		status = glob_call_func(func, path, arg);
+	    }
+	}
+	else {
+	    const char c = p[-1];
+	    p[-1] = '\0';
+	    if (lstat(path, &st) < 0) {
+		if (errno != ENOENT) rb_sys_warning(path);
+		p[-1] = c;
+	    }
+	    else if (S_ISDIR(st.st_mode)) {
+		p[-1] = c;
+		status = glob_call_func(func, path, arg);
+	    }
+	}
+	return status;
+    }
 
-	    magic = extract_elem(p);
+    {
+#if defined DOSISH_DRIVE_LETTER
+#define NEED_DOT ((p-path==0) || (p-path==2 && ISALPHA(*path) && path[1] == ':'))
+#else
+#define NEED_DOT ((p-path==0))
+#endif
+	const int n = separator ? (p - path) - 1 : (p - path);
+	char *dir = ALLOC_N(char, n+1+1);
+	memcpy(dir, path, n);
+	if (NEED_DOT) {
+	    dir[n] = '.';
+	    dir[n+1] = '\0';
+	}
+	else {
+	    dir[n] = '\0';
+	}
 	    if (stat(dir, &st) < 0) {
 	        if (errno != ENOENT) rb_sys_warning(dir);
-	        free(base);
-	        free(magic);
-	        break;
+	    free(dir);
+	    return 0;
 	    }
 	    if (S_ISDIR(st.st_mode)) {
-		if (m && strcmp(magic, "**") == 0) {
-		    int n = strlen(base);
+	    if (p[0] == '*' && p[1] == '*' && p[2] == '/') {
+		const int n = p - path;
 		    recursive = 1;
-		    buf = ALLOC_N(char, n+strlen(m)+3);
-		    sprintf(buf, "%s%s", base, *base ? m : m+1);
-		    status = glob_helper(buf, buf+n, flags, func, arg);
+		buf = ALLOC_N(char, n+strlen(p+3)+1);
+		memcpy(buf, path, n);
+		strcpy(buf+n, p+3);
+		status = glob_helper(buf, buf+n, separator, flags, func, arg);
 		    free(buf);
-		    if (status) goto finalize;
+		if (status) {
+		    free(dir);
+		    return status;
+		}
 		}
@@ -764,5 +848,4 @@ glob_helper(path, sub, flags, func, arg)
 		    rb_sys_warning(dir);
-		    free(base);
-		    free(magic);
-		    break;
+		free(dir);
+		return 0;
 		}
@@ -770,14 +853,10 @@ glob_helper(path, sub, flags, func, arg)
 	    else {
-		free(base);
-		free(magic);
-		break;
+	    free(dir);
+	    return 0;
+	}
+	free(dir);
 	    }
-
-#if defined DOSISH_DRIVE_LETTER
-#define BASE (*base && !((isdirsep(*base) && !base[1]) || (base[1] == ':' && isdirsep(base[2]) && !base[3])))
-#else
-#define BASE (*base && !(isdirsep(*base) && !base[1]))
-#endif
 
 	    for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {
+	const int n = p - path;
 		if (recursive) {
@@ -785,4 +864,5 @@ glob_helper(path, sub, flags, func, arg)
 			continue;
-		    buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+strlen(m)+6);
-		    sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name);
+	    buf = ALLOC_N(char, n+NAMLEN(dp)+strlen(m)+3+1);
+	    memcpy(buf, path, n);
+	    strcpy(buf+n, dp->d_name);
 		    if (lstat(buf, &st) < 0) {
@@ -794,5 +874,5 @@ glob_helper(path, sub, flags, func, arg)
 			char *t = buf+strlen(buf);
-		        strcpy(t, "/**");
+		memcpy(t, "/**", 3);
 			strcpy(t+3, m);
-			status = glob_helper(buf, t, flags, func, arg);
+		status = glob_helper(buf, t+1, 1, flags, func, arg);
 			free(buf);
@@ -804,6 +884,7 @@ glob_helper(path, sub, flags, func, arg)
 		}
-		if (fnmatch(magic, dp->d_name, flags) == 0) {
-		    buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+2);
-		    sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name);
-		    if (!m) {
+	if (do_fnmatch(p, m, dp->d_name, flags) == 0) {
+	    buf = ALLOC_N(char, n+NAMLEN(dp)+1);
+	    memcpy(buf, path, n);
+	    strcpy(buf+n, dp->d_name);
+	    if (*m == '\0') {
 			status = glob_call_func(func, buf, arg);
@@ -820,7 +901,3 @@ glob_helper(path, sub, flags, func, arg)
 	    closedir(dirp);
-	  finalize:
 	    *tail = 0;
-	    free(base);
-	    free(magic);
-	    if (link) {
 		while (link) {
@@ -829,9 +906,8 @@ glob_helper(path, sub, flags, func, arg)
 			    if (S_ISDIR(st.st_mode)) {
-				int len = strlen(link->path);
-				int mlen = strlen(m);
-				char *t = ALLOC_N(char, len+mlen+1);
-
-				sprintf(t, "%s%s", link->path, m);
-				status = glob_helper(t, t+len, flags, func, arg);
-				free(t);
+		    const int len = strlen(link->path);
+		    buf = ALLOC_N(char, len+strlen(m)+1);
+		    memcpy(buf, link->path, len);
+		    strcpy(buf+len, m);
+		    status = glob_helper(buf, buf+len+1, 1, flags, func, arg);
+		    free(buf);
 			    }
@@ -847,7 +923,2 @@ glob_helper(path, sub, flags, func, arg)
 		}
-		break;
-	    }
-	}
-	p = m;
-    }
     return status;
@@ -862,3 +933,17 @@ rb_glob2(path, flags, func, arg)
 {
-    int status = glob_helper(path, 0, flags, func, arg);
+    int status;
+    char *root = path;
+
+#if defined DOSISH
+    flags |= FNM_CASEFOLD;
+
+    if (ISALPHA(*root) && root[1] == ':') {
+	root += 2;
+    }
+#endif
+    if (*root == '/') {
+	root += 1;
+    }
+
+    status = glob_helper(path, root, 0, flags, func, arg);
     if (status) rb_jump_tag(status);
@@ -926,3 +1011,3 @@ push_braces(ary, s, flags)
 	}
-	p++;
+	Inc(p);
     }
@@ -934,3 +1019,3 @@ push_braces(ary, s, flags)
 	}
-	p++;
+	Inc(p);
     }
@@ -944,9 +1029,9 @@ push_braces(ary, s, flags)
 	while (*p != '}') {
-	    t = p + 1;
-	    for (p = t; *p!='}' && *p!=','; p++) {
+	    t = Next(p);
+	    for (p = t; *p!='}' && *p!=','; Inc(p)) {
 		/* skip inner braces */
-		if (*p == '{') while (*p!='}') p++;
+		if (*p == '{') while (*p!='}') Inc(p);
 	    }
 	    memcpy(b, t, p-t);
-	    strcpy(b+(p-t), rbrace+1);
+	    strcpy(b+(p-t), Next(rbrace));
 	    push_braces(ary, buf, flags);
@@ -986,5 +1071,5 @@ rb_push_glob(str, flags)
     while (p < pend) {
-	t = buf;
 	nest = maxnest = 0;
 	while (p < pend && isdelim(*p)) p++;
+	t = p;
 	while (p < pend && !isdelim(*p)) {
@@ -993,8 +1078,9 @@ rb_push_glob(str, flags)
 	    if (!noescape && *p == '\\') {
-		*t++ = *p++;
-		if (p == pend) break;
+		p++;
+		if (p == pend || isdelim(*p)) break;
 	    }
-	    *t++ = *p++;
+	    p = Next(p);
 	}
-	*t = '\0';
+	memcpy(buf, t, p - t);
+	buf[p - t] = '\0';
 	if (maxnest == 0) {