こんにちは、山本です。

ruby1.8.1のリリースおめでとうございます。
最新のdir.cに合わせて、パッチを作ってみました。

ruby1.8.1との差異は、

1. マルチバイト対応

2. puts Dir.glob('a.cpp/')
   1.8.1 #=> 'a.cpp/' (ファイルなのに、/で終わるパターンにマッチする)
   patch #=> ''

3. glob_helper()でstat()に渡すまえに末尾の'/'(ルート以外)を必ずしも除去していなかった点を改善した。

また、glob_helper()を単純にするためいくつかの仮定をしました。問題があったら戻します。

1. stat("e:")が成功すると仮定。マクロNEED_DOT()を除去した。
      bcb付属stat()ではstat("e:.")とする必要がある。

2. opendir()をディレクトリ以外のエントリに対して呼ぶとENOTDIRを返すと仮定。stat + opendir の部分を opendir 単独に変更した。(やりすぎ?)
      bcb付属opendir()はEINVALを返す。

bcbの場合はruby組み込みのstat() & opendir()を使うので、結局問題ないのですが・・・


--- dir.c-1.8.1	Sat Dec 20 02:20:58 2003
+++ dir.c	Fri Dec 26 17:53:32 2003
@@ -78,11 +78,61 @@ char *strchr _((char*,char));
 #define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c))
+#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2)))
 
-#ifndef CharNext		/* defined as CharNext[AW] on Windows. */
-# if defined(DJGPP)
-#   define CharNext(p) ((p) + mblen(p, MB_CUR_MAX))
-# else
-#   define CharNext(p) ((p) + 1)
-# endif
+/* caution: in case *p == '\0'
+   Next(p) == p + 1 in single byte environment
+   Next(p) == p     in multi byte environment
+*/
+#if defined(CharNext)
+# define Next(p) CharNext(p)
+#elif defined(DJGPP)
+# define Next(p) ((p) + mblen(p, MB_CUR_MAX))
+#elif defined(__EMX__)
+# define Next(p) ((p) + emx_mblen(p))
+static inline int
+emx_mblen(p)
+    const char *p;
+{
+    int n = mblen(p, INT_MAX);
+    return (n < 0) ? 1 : n;
+}
 #endif
 
+#ifndef Next /* single byte environment */
+# define Next(p) ((p) + 1)
+# define Inc(p) (++(p))
+# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2))))
+#else /* multi byte environment */
+# define Inc(p) ((p) = Next(p))
+# define Compare(p1, p2) (CompareImpl(p1, p2, nocase))
+static int
+CompareImpl(p1, p2, nocase)
+    const char *p1;
+    const char *p2;
+    int nocase;
+{
+    const int len1 = Next(p1) - p1;
+    const int len2 = Next(p2) - p2;
+
+    if (len1 == 0) return  len2;
+    if (len2 == 0) return -len1;
+
+    if (len1 == 1)
+	if (len2 == 1)
+	    return compare(downcase(*p1), downcase(*p2));
+	else {
+	    const int ret = compare(downcase(*p1), *p2);
+	    return ret ? ret : -1;
+	}
+    else
+	if (len2 == 1) {
+	    const int ret = compare(*p1, downcase(*p2));
+	    return ret ? ret : 1;
+	}
+	else {
+	    const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2);
+	    return ret ? ret : len1 - len2;
+	}
+}
+#endif /* environment */
+
 #if defined DOSISH
@@ -96,3 +146,3 @@ range(pat, test, flags)
     char *pat;
-    char test;
+    char *test;
     int flags;
@@ -107,11 +157,10 @@ range(pat, test, flags)
 
-    test = downcase(test);
-
     while (*pat) {
-	int cstart, cend;
-	cstart = cend = *pat++;
-	if (cstart == ']')
-	    return ok == not ? 0 : pat;
-        else if (escape && cstart == '\\')
-	    cstart = cend = *pat++;
+	char *pstart, *pend;
+	pstart = pend = pat;
+	if (*pstart == ']')
+	    return ok == not ? 0 : ++pat;
+	else if (escape && *pstart == '\\')
+	    pstart = pend = ++pat;
+		Inc(pat);
 	if (*pat == '-' && pat[1] != ']') {
@@ -119,8 +168,8 @@ range(pat, test, flags)
 		pat++;
-	    cend = pat[1];
-	    if (!cend)
+	    pend = pat+1;
+	    if (!*pend)
 		return 0;
-	    pat += 2;
+	    pat = Next(pend);
 	}
-	if (downcase(cstart) <= test && test <= downcase(cend))
+	if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0)
 	    ok = 1;
@@ -131,4 +180,5 @@ range(pat, test, flags)
 #define ISDIRSEP(c) (pathname && isdirsep(c))
-#define PERIOD(s) (period && *(s) == '.' && \
-		  ((s) == string || ISDIRSEP((s)[-1])))
+#define PERIOD_S() (period && *s == '.' && \
+    (!s_prev || ISDIRSEP(*s_prev)))
+#define INC_S() (s = Next(s_prev = s))
 static int
@@ -140,4 +190,4 @@ fnmatch(pat, string, flags)
     int c;
-    int test;
-    const char *s = string;
+    const char *test;
+    const char *s = string, *s_prev = 0;
     int escape = !(flags & FNM_NOESCAPE);
@@ -147,14 +197,15 @@ fnmatch(pat, string, flags)
 
-    while (c = *pat++) {
+    while (c = *pat) {
 	switch (c) {
 	case '?':
-	    if (!*s || ISDIRSEP(*s) || PERIOD(s))
+	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
+	    ++pat;
 	    break;
 	case '*':
-	    while ((c = *pat++) == '*')
+	    while ((c = *++pat) == '*')
 		;
 
-	    if (PERIOD(s))
+	    if (PERIOD_S())
 		return FNM_NOMATCH;
@@ -170,3 +221,3 @@ fnmatch(pat, string, flags)
 		if (*s) {
-                    s++;
+		    INC_S();
 		    break;
@@ -176,7 +227,5 @@ fnmatch(pat, string, flags)
 
-	    test = escape && c == '\\' ? *pat : c;
-	    test = downcase(test);
-	    pat--;
+	    test = escape && c == '\\' ? pat+1 : pat;
 	    while (*s) {
-		if ((c == '[' || downcase(*s) == test) &&
+		if ((c == '[' || Compare(s, test) == 0) &&
 		    !fnmatch(pat, s, flags | FNM_DOTMATCH))
@@ -185,3 +234,3 @@ fnmatch(pat, string, flags)
 		    break;
-		s++;
+		INC_S();
 	    }
@@ -190,8 +239,8 @@ fnmatch(pat, string, flags)
 	case '[':
-	    if (!*s || ISDIRSEP(*s) || PERIOD(s))
+	    if (!*s || ISDIRSEP(*s) || PERIOD_S())
 		return FNM_NOMATCH;
-	    pat = range(pat, *s, flags);
+	    pat = range(pat+1, s, flags);
 	    if (!pat)
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
 	    break;
@@ -199,12 +248,8 @@ fnmatch(pat, string, flags)
 	case '\\':
-	    if (escape
+	    if (escape && pat[1]
 #if defined DOSISH
-		&& *pat && strchr("*?[\\", *pat)
+		&& strchr("*?[\\", pat[1])
 #endif
 		) {
-		c = *pat;
-		if (!c)
-		    c = '\\';
-		else
-		    pat++;
+		c = *++pat;
 	    }
@@ -218,5 +263,6 @@ fnmatch(pat, string, flags)
 #endif
-	    if(downcase(c) != downcase(*s))
+	    if(Compare(pat, s) != 0)
 		return FNM_NOMATCH;
-	    s++;
+	    INC_S();
+	    Inc(pat);
 	    break;
@@ -770,7 +816,7 @@ dir_s_rmdir(obj, dir)
 static int
-has_magic(s, send, flags)
-     char *s, *send;
+has_magic(p, m, flags)
+     register char *p;
+     char **m;
      int flags;
 {
-    register char *p = s;
     register char c;
@@ -779,3 +825,3 @@ has_magic(s, send, flags)
 
-    while ((c = *p++) != '\0') {
+    while (c = *p++, c != '\0' && c != '/') {
 	switch (c) {
@@ -783,3 +829,3 @@ has_magic(s, send, flags)
 	  case '*':
-	    return Qtrue;
+	    goto found;
 
@@ -790,3 +836,3 @@ has_magic(s, send, flags)
 	    if (open)
-		return Qtrue;
+		goto found;
 	    continue;
@@ -794,61 +840,68 @@ has_magic(s, send, flags)
 	  case '\\':
-	    if (escape && *p++ == '\0')
-		return Qfalse;
+	    if (escape && (c = *p++, c == '\0' || c == '/'))
+		goto miss;
+	    continue;
 	}
 
-	if (send && p >= send) break;
+	p = Next(p-1);
     }
-    return Qfalse;
+
+  miss:
+    *m = p-1;
+    return 0;
+
+  found:
+    while (*p != '\0' && *p != '/')
+	Inc(p);
+    *m = p;
+    return 1;
 }
 
-static char*
-extract_path(p, pend)
-    char *p, *pend;
+static int
+remove_backslashes(p, pend)
+    char *p;
+    char *pend;
 {
-    char *alloc;
-    int len;
+    char *t = p;
+    char *s = p;
+    int n = 0;
 
-    len = pend - p;
-    alloc = ALLOC_N(char, len+1);
-    memcpy(alloc, p, len);
-    if (len > 1 && pend[-1] == '/'
-#if defined DOSISH_DRIVE_LETTER
-    && pend[-2] != ':'
-#endif
-    ) {
-	alloc[len-1] = 0;
+    while (*p && p < pend) {
+	if (*p == '\\') {
+	    if (t != s) {
+		memmove(t, s, p - s);
+		n++;
     }
-    else {
-	alloc[len] = 0;
+	    t += p - s;
+	    s = ++p;
+	    if (!(*p && p < pend)) break;
     }
-
-    return alloc;
+	Inc(p);
 }
 
-static char*
-extract_elem(path)
-    char *path;
-{
-    char *pend;
+    while (*p++);
 
-    pend = strchr(path, '/');
-    if (!pend) pend = path + strlen(path);
+    if (t != s) {
+	memmove(t, s, p - s); /* move '\0' too */
+	n++;
+    }
 
-    return extract_path(path, pend);
+    return n;
 }
 
-static void
-remove_backslashes(p)
+static int
+fnmatch_for_substr(p, pend, string, flags)
     char *p;
+    char *pend;
+    const char *string;
+    int flags;
 {
-    char *pend = p + strlen(p);
-    char *t = p;
+    int ret;
+    char c;
 
-    while (p < pend) {
-	if (*p == '\\') {
-	    if (++p == pend) break;
-	}
-	*t++ = *p++;
-    }
-    *t = '\0';
+    c = *pend;
+    *pend = '\0'; /* should I allocate new string? */
+    ret = fnmatch(p, string, flags);
+    *pend = c;
+    return ret;
 }
@@ -894,5 +947,6 @@ glob_call_func(func, path, arg)
 static int
-glob_helper(path, sub, flags, func, arg)
+glob_helper(path, sub, separator, flags, func, arg) /* if separator p[-1] is removable '/' */
     char *path;
     char *sub;
+    int separator;
     int flags;
@@ -902,29 +956,4 @@ glob_helper(path, sub, flags, func, arg)
     struct stat st;
-    char *p, *m;
     int status = 0;
-
-    p = sub ? sub : path;
-    if (!has_magic(p, 0, flags)) {
-#if defined DOSISH
-	remove_backslashes(path);
-#else
-	if (!(flags & FNM_NOESCAPE)) remove_backslashes(p);
-#endif
-	if (lstat(path, &st) == 0) {
-	    status = glob_call_func(func, path, arg);
-	    if (status) return status;
-	}
-	else if (errno != ENOENT) {
-	    /* In case stat error is other than ENOENT and
-	       we may want to know what is wrong. */
-	    rb_sys_warning(path);
-	}
-	return 0;
-    }
-
-    while (p && !status) {
-	if (*p == '/') p++;
-	m = strchr(p, '/');
-	if (has_magic(p, m, flags)) {
-	    char *dir, *base, *magic, *buf;
+    char *p = sub, *m, *buf;
 	    DIR *dirp;
@@ -938,22 +967,68 @@ glob_helper(path, sub, flags, func, arg)
 
-	    base = extract_path(path, p);
-	    if (path == p) dir = ".";
-	    else dir = base;
-
-	    magic = extract_elem(p);
-	    if (stat(dir, &st) < 0) {
-	        if (errno != ENOENT) rb_sys_warning(dir);
-	        free(base);
-	        free(magic);
-	        break;
+    while (*p && !has_magic(p, &m, flags)) {
+	if (*m == '/') {
+	    separator = 1;
+	    p = m + 1;
 	    }
-	    if (S_ISDIR(st.st_mode)) {
-		if (m && strcmp(magic, "**") == 0) {
-		    int n = strlen(base);
+	else {
+	    separator = 0;
+	    p = m;
+	}
+    }
+
+    if (!(flags & FNM_NOESCAPE)) {
+	int n = remove_backslashes(sub, p);
+	p -= n;
+	m -= n;
+    }
+
+    if (*p == '\0') { /* magic not found */
+        if (!separator) {
+	    if (lstat(path, &st) < 0) {
+		/* In case stat error is other than ENOENT and
+		   we may want to know what is wrong. */
+		if (errno != ENOENT) rb_sys_warning(path);
+	    }
+	    else {
+		status = glob_call_func(func, path, arg);
+	    }
+	}
+	else {
+	    const char c = p[-1];
+	    p[-1] = '\0';
+	    if (lstat(path, &st) < 0) {
+		if (errno != ENOENT) rb_sys_warning(path);
+		p[-1] = c;
+	    }
+	    else if (S_ISDIR(st.st_mode)) {
+		p[-1] = c;
+		status = glob_call_func(func, path, arg);
+	    }
+	}
+	return status;
+    }
+
+    if (p[0] == '*' && p[1] == '*' && p[2] == '/') {
+	const int n = p - path;
 		    recursive = 1;
-		    buf = ALLOC_N(char, n+strlen(m)+3);
-		    sprintf(buf, "%s%s", base, *base ? m : m+1);
-		    status = glob_helper(buf, buf+n, flags, func, arg);
+	buf = ALLOC_N(char, n+strlen(p+3)+1);
+	memcpy(buf, path, n);
+	strcpy(buf+n, p+3);
+	status = glob_helper(buf, buf+n, separator, flags, func, arg);
 		    free(buf);
-		    if (status) goto finalize;
+	if (status) return status;
+    }
+
+    {
+	char *dir;
+	if (path == p) {
+	    dir = ALLOC_N(char, 2);
+	    dir[0] = '.';
+	    dir[1] = '\0';
+	}
+	else {
+	    const int n = separator ? (p - path) - 1 : (p - path);
+	    dir = ALLOC_N(char, n+1);
+	    memcpy(dir, path, n);
+	    dir[n] = '\0';
 		}
@@ -961,21 +1036,11 @@ glob_helper(path, sub, flags, func, arg)
 		if (dirp == NULL) {
-		    rb_sys_warning(dir);
-		    free(base);
-		    free(magic);
-		    break;
-		}
+	    if (errno != ENOENT && errno != ENOTDIR) rb_sys_warning(dir);
+	    free(dir);
+	    return 0;
 	    }
-	    else {
-		free(base);
-		free(magic);
-		break;
+	free(dir);
 	    }
 
-#if defined DOSISH_DRIVE_LETTER
-#define BASE (*base && !((isdirsep(*base) && !base[1]) || (base[1] == ':' && isdirsep(base[2]) && !base[3])))
-#else
-#define BASE (*base && !(isdirsep(*base) && !base[1]))
-#endif
-
 	    for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) {
+	const int n = p - path;
 		if (recursive) {
@@ -983,4 +1048,5 @@ glob_helper(path, sub, flags, func, arg)
 			continue;
-		    buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+strlen(m)+6);
-		    sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name);
+	    buf = ALLOC_N(char, n+NAMLEN(dp)+strlen(m)+3+1);
+	    memcpy(buf, path, n);
+	    strcpy(buf+n, dp->d_name);
 		    if (lstat(buf, &st) < 0) {
@@ -992,5 +1058,5 @@ glob_helper(path, sub, flags, func, arg)
 			char *t = buf+strlen(buf);
-		        strcpy(t, "/**");
+		memcpy(t, "/**", 3);
 			strcpy(t+3, m);
-			status = glob_helper(buf, t, flags, func, arg);
+		status = glob_helper(buf, t+1, 1, flags, func, arg);
 			free(buf);
@@ -1002,6 +1068,7 @@ glob_helper(path, sub, flags, func, arg)
 		}
-		if (fnmatch(magic, dp->d_name, flags) == 0) {
-		    buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+2);
-		    sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name);
-		    if (!m) {
+	if (fnmatch_for_substr(p, m, dp->d_name, flags) == 0) {
+	    buf = ALLOC_N(char, n+NAMLEN(dp)+1);
+	    memcpy(buf, path, n);
+	    strcpy(buf+n, dp->d_name);
+	    if (*m == '\0') {
 			status = glob_call_func(func, buf, arg);
@@ -1018,7 +1085,3 @@ glob_helper(path, sub, flags, func, arg)
 	    closedir(dirp);
-	  finalize:
 	    *tail = 0;
-	    free(base);
-	    free(magic);
-	    if (link) {
 		while (link) {
@@ -1027,9 +1090,8 @@ glob_helper(path, sub, flags, func, arg)
 			    if (S_ISDIR(st.st_mode)) {
-				int len = strlen(link->path);
-				int mlen = strlen(m);
-				char *t = ALLOC_N(char, len+mlen+1);
-
-				sprintf(t, "%s%s", link->path, m);
-				status = glob_helper(t, t+len, flags, func, arg);
-				free(t);
+		    const int len = strlen(link->path);
+		    buf = ALLOC_N(char, len+strlen(m)+1);
+		    memcpy(buf, link->path, len);
+		    strcpy(buf+len, m);
+		    status = glob_helper(buf, buf+len+1, 1, flags, func, arg);
+		    free(buf);
 			    }
@@ -1045,7 +1107,2 @@ glob_helper(path, sub, flags, func, arg)
 		}
-		break;
-	    }
-	}
-	p = m;
-    }
     return status;
@@ -1060,3 +1117,13 @@ rb_glob2(path, flags, func, arg)
 {
-    int status = glob_helper(path, 0, flags, func, arg);
+    char *root = path;
+    int status;
+
+#if defined DOSISH
+    flags |= FNM_CASEFOLD;
+    root = rb_path_skip_prefix(root);
+#endif
+
+    if (*root == '/') root++;
+
+    status = glob_helper(path, root, 0, flags, func, arg);
     if (status) rb_jump_tag(status);
@@ -1124,3 +1191,3 @@ push_braces(ary, s, flags)
 	}
-	p++;
+	Inc(p);
     }
@@ -1132,3 +1199,3 @@ push_braces(ary, s, flags)
 	}
-	p++;
+	Inc(p);
     }
@@ -1142,9 +1209,9 @@ push_braces(ary, s, flags)
 	while (*p != '}') {
-	    t = p + 1;
-	    for (p = t; *p!='}' && *p!=','; p++) {
+	    t = Next(p);
+	    for (p = t; *p!='}' && *p!=','; Inc(p)) {
 		/* skip inner braces */
-		if (*p == '{') while (*p!='}') p++;
+		if (*p == '{') while (*p!='}') Inc(p);
 	    }
 	    memcpy(b, t, p-t);
-	    strcpy(b+(p-t), rbrace+1);
+	    strcpy(b+(p-t), Next(rbrace));
 	    push_braces(ary, buf, flags);
@@ -1159,3 +1226,2 @@ push_braces(ary, s, flags)
 #define isdelim(c) ((c)=='\0')
-
 static VALUE
@@ -1184,5 +1250,5 @@ rb_push_glob(str, flags)
     while (p < pend) {
-	t = buf;
 	nest = maxnest = 0;
 	while (p < pend && isdelim(*p)) p++;
+	t = p;
 	while (p < pend && !isdelim(*p)) {
@@ -1191,8 +1257,9 @@ rb_push_glob(str, flags)
 	    if (!noescape && *p == '\\') {
-		*t++ = *p++;
-		if (p == pend) break;
+		p++;
+		if (p == pend || isdelim(*p)) break;
 	    }
-	    *t++ = *p++;
+	    p = Next(p);
 	}
-	*t = '\0';
+	memcpy(buf, t, p - t);
+	buf[p - t] = '\0';
 	if (maxnest == 0) {