山本です。パッチを送ります。 #追記 WindowsのFindFirstFileだと、エントリの名前と一緒にディレクトリかどうかの情報も渡ってきますが、 readdir()だと名前だけですよね。他のOSにもそんなAPIってありますか? いっそのこと、opendir_ex, readdir_ex を定義して、 そんなAPIがある環境ではそれを使い、 ない環境ではopendir, readdirを使うような実装が可能なら 爆速になるような気がします。 --- dir.c-1.8.1 Sat Dec 20 02:20:58 2003 +++ dir.c Sun Dec 28 11:28:56 2003 @@ -78,11 +78,61 @@ char *strchr _((char*,char)); #define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c)) +#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2))) -#ifndef CharNext /* defined as CharNext[AW] on Windows. */ -# if defined(DJGPP) -# define CharNext(p) ((p) + mblen(p, MB_CUR_MAX)) -# else -# define CharNext(p) ((p) + 1) -# endif +/* caution: in case *p == '\0' + Next(p) == p + 1 in single byte environment + Next(p) == p in multi byte environment +*/ +#if defined(CharNext) +# define Next(p) CharNext(p) +#elif defined(DJGPP) +# define Next(p) ((p) + mblen(p, MB_CUR_MAX)) +#elif defined(__EMX__) +# define Next(p) ((p) + emx_mblen(p)) +static inline int +emx_mblen(p) + const char *p; +{ + int n = mblen(p, INT_MAX); + return (n < 0) ? 1 : n; +} #endif +#ifndef Next /* single byte environment */ +# define Next(p) ((p) + 1) +# define Inc(p) (++(p)) +# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2)))) +#else /* multi byte environment */ +# define Inc(p) ((p) = Next(p)) +# define Compare(p1, p2) (CompareImpl(p1, p2, nocase)) +static int +CompareImpl(p1, p2, nocase) + const char *p1; + const char *p2; + int nocase; +{ + const int len1 = Next(p1) - p1; + const int len2 = Next(p2) - p2; + + if (len1 == 0) return len2; + if (len2 == 0) return -len1; + + if (len1 == 1) + if (len2 == 1) + return compare(downcase(*p1), downcase(*p2)); + else { + const int ret = compare(downcase(*p1), *p2); + return ret ? ret : -1; + } + else + if (len2 == 1) { + const int ret = compare(*p1, downcase(*p2)); + return ret ? ret : 1; + } + else { + const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2); + return ret ? ret : len1 - len2; + } +} +#endif /* environment */ + #if defined DOSISH @@ -96,3 +146,3 @@ range(pat, test, flags) char *pat; - char test; + char *test; int flags; @@ -107,11 +157,10 @@ range(pat, test, flags) - test = downcase(test); - while (*pat) { - int cstart, cend; - cstart = cend = *pat++; - if (cstart == ']') - return ok == not ? 0 : pat; - else if (escape && cstart == '\\') - cstart = cend = *pat++; + char *pstart, *pend; + pstart = pend = pat; + if (*pstart == ']') + return ok == not ? 0 : ++pat; + else if (escape && *pstart == '\\') + pstart = pend = ++pat; + Inc(pat); if (*pat == '-' && pat[1] != ']') { @@ -119,8 +168,8 @@ range(pat, test, flags) pat++; - cend = pat[1]; - if (!cend) + pend = pat+1; + if (!*pend) return 0; - pat += 2; + pat = Next(pend); } - if (downcase(cstart) <= test && test <= downcase(cend)) + if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0) ok = 1; @@ -131,4 +180,5 @@ range(pat, test, flags) #define ISDIRSEP(c) (pathname && isdirsep(c)) -#define PERIOD(s) (period && *(s) == '.' && \ - ((s) == string || ISDIRSEP((s)[-1]))) +#define PERIOD_S() (period && *s == '.' && \ + (!s_prev || ISDIRSEP(*s_prev))) +#define INC_S() (s = Next(s_prev = s)) static int @@ -140,4 +190,4 @@ fnmatch(pat, string, flags) int c; - int test; - const char *s = string; + const char *test; + const char *s = string, *s_prev = 0; int escape = !(flags & FNM_NOESCAPE); @@ -147,14 +197,15 @@ fnmatch(pat, string, flags) - while (c = *pat++) { + while (c = *pat) { switch (c) { case '?': - if (!*s || ISDIRSEP(*s) || PERIOD(s)) + if (!*s || ISDIRSEP(*s) || PERIOD_S()) return FNM_NOMATCH; - s++; + INC_S(); + ++pat; break; case '*': - while ((c = *pat++) == '*') + while ((c = *++pat) == '*') ; - if (PERIOD(s)) + if (PERIOD_S()) return FNM_NOMATCH; @@ -170,3 +221,3 @@ fnmatch(pat, string, flags) if (*s) { - s++; + INC_S(); break; @@ -176,7 +227,5 @@ fnmatch(pat, string, flags) - test = escape && c == '\\' ? *pat : c; - test = downcase(test); - pat--; + test = escape && c == '\\' ? pat+1 : pat; while (*s) { - if ((c == '[' || downcase(*s) == test) && + if ((c == '[' || Compare(s, test) == 0) && !fnmatch(pat, s, flags | FNM_DOTMATCH)) @@ -185,3 +234,3 @@ fnmatch(pat, string, flags) break; - s++; + INC_S(); } @@ -190,8 +239,8 @@ fnmatch(pat, string, flags) case '[': - if (!*s || ISDIRSEP(*s) || PERIOD(s)) + if (!*s || ISDIRSEP(*s) || PERIOD_S()) return FNM_NOMATCH; - pat = range(pat, *s, flags); + pat = range(pat+1, s, flags); if (!pat) return FNM_NOMATCH; - s++; + INC_S(); break; @@ -199,12 +248,8 @@ fnmatch(pat, string, flags) case '\\': - if (escape + if (escape && pat[1] #if defined DOSISH - && *pat && strchr("*?[\\", *pat) + && strchr("*?[\\", pat[1]) #endif ) { - c = *pat; - if (!c) - c = '\\'; - else - pat++; + c = *++pat; } @@ -218,5 +263,6 @@ fnmatch(pat, string, flags) #endif - if(downcase(c) != downcase(*s)) + if(Compare(pat, s) != 0) return FNM_NOMATCH; - s++; + INC_S(); + Inc(pat); break; @@ -770,7 +816,7 @@ dir_s_rmdir(obj, dir) static int -has_magic(s, send, flags) - char *s, *send; +has_magic(p, m, flags) + register char *p; + char **m; int flags; { - register char *p = s; register char c; @@ -779,3 +825,3 @@ has_magic(s, send, flags) - while ((c = *p++) != '\0') { + while (c = *p++, c != '\0' && c != '/') { switch (c) { @@ -783,3 +829,3 @@ has_magic(s, send, flags) case '*': - return Qtrue; + goto found; @@ -790,3 +836,3 @@ has_magic(s, send, flags) if (open) - return Qtrue; + goto found; continue; @@ -794,61 +840,100 @@ has_magic(s, send, flags) case '\\': - if (escape && *p++ == '\0') - return Qfalse; + if (escape && (c = *p++, c == '\0' || c == '/')) + goto miss; + continue; } - if (send && p >= send) break; + p = Next(p-1); } - return Qfalse; + + miss: + *m = p-1; + return 0; + + found: + while (*p != '\0' && *p != '/') + Inc(p); + *m = p; + return 1; } -static char* -extract_path(p, pend) - char *p, *pend; +static int +remove_backslashes(p, pend) + char *p; + char *pend; { - char *alloc; - int len; + char *t = p; + char *s = p; + int n = 0; - len = pend - p; - alloc = ALLOC_N(char, len+1); - memcpy(alloc, p, len); - if (len > 1 && pend[-1] == '/' -#if defined DOSISH_DRIVE_LETTER - && pend[-2] != ':' -#endif - ) { - alloc[len-1] = 0; + while (*p && p < pend) { + if (*p == '\\') { + if (t != s) { + memmove(t, s, p - s); + n++; } - else { - alloc[len] = 0; + t += p - s; + s = ++p; + if (!(*p && p < pend)) break; } - - return alloc; + Inc(p); } -static char* -extract_elem(path) - char *path; -{ - char *pend; + while (*p++); - pend = strchr(path, '/'); - if (!pend) pend = path + strlen(path); + if (t != s) { + memmove(t, s, p - s); /* move '\0' too */ + n++; + } - return extract_path(path, pend); + return n; } -static void -remove_backslashes(p) +static int +do_fnmatch(p, pend, string, flags) char *p; + char *pend; + const char *string; + int flags; { - char *pend = p + strlen(p); - char *t = p; + int ret; + char c; - while (p < pend) { - if (*p == '\\') { - if (++p == pend) break; + c = *pend; + *pend = '\0'; /* should I allocate new string? */ + ret = fnmatch(p, string, flags); + *pend = c; + return ret; } - *t++ = *p++; + +static int +do_stat(path, pst) + const char *path; + struct stat *pst; +{ + int ret = stat(path, pst); + if (ret < 0 && errno != ENOENT) + rb_sys_warning(path); + return ret; } - *t = '\0'; + +static int +do_lstat(path, pst) + const char *path; + struct stat *pst; +{ + int ret = lstat(path, pst); + if (ret < 0 && errno != ENOENT) + rb_sys_warning(path); + return ret; +} + +static DIR * +do_opendir(path) + const char *path; +{ + DIR *dirp = opendir(path); + if (dirp == NULL && errno != ENOENT && errno != ENOTDIR) + rb_sys_warning(path); + return dirp; } @@ -859,2 +944,10 @@ remove_backslashes(p) +#ifndef S_ISLNK +# ifndef S_IFLNK +# define S_ISLNK(m) (0) +# else +# define S_ISLNK(m) ((m & S_IFMT) == S_IFLNK) +# endif +#endif + struct glob_args { @@ -894,5 +987,6 @@ glob_call_func(func, path, arg) static int -glob_helper(path, sub, flags, func, arg) +glob_helper(path, sub, separator, flags, func, arg) /* if separator p[-1] is removable '/' */ char *path; char *sub; + int separator; int flags; @@ -902,29 +996,4 @@ glob_helper(path, sub, flags, func, arg) struct stat st; - char *p, *m; int status = 0; - - p = sub ? sub : path; - if (!has_magic(p, 0, flags)) { -#if defined DOSISH - remove_backslashes(path); -#else - if (!(flags & FNM_NOESCAPE)) remove_backslashes(p); -#endif - if (lstat(path, &st) == 0) { - status = glob_call_func(func, path, arg); - if (status) return status; - } - else if (errno != ENOENT) { - /* In case stat error is other than ENOENT and - we may want to know what is wrong. */ - rb_sys_warning(path); - } - return 0; - } - - while (p && !status) { - if (*p == '/') p++; - m = strchr(p, '/'); - if (has_magic(p, m, flags)) { - char *dir, *base, *magic, *buf; + char *p = sub, *m, *buf; DIR *dirp; @@ -932,5 +1001,6 @@ glob_helper(path, sub, flags, func, arg) int recursive = 0; + int magical = 1; struct d_link { - char *path; + char *name; struct d_link *next; @@ -938,79 +1008,105 @@ glob_helper(path, sub, flags, func, arg) - base = extract_path(path, p); - if (path == p) dir = "."; - else dir = base; - - magic = extract_elem(p); - if (stat(dir, &st) < 0) { - if (errno != ENOENT) rb_sys_warning(dir); - free(base); - free(magic); - break; + while (*p && !has_magic(p, &m, flags)) { + if (*m == '/') { + separator = 1; + p = m + 1; } - if (S_ISDIR(st.st_mode)) { - if (m && strcmp(magic, "**") == 0) { - int n = strlen(base); - recursive = 1; - buf = ALLOC_N(char, n+strlen(m)+3); - sprintf(buf, "%s%s", base, *base ? m : m+1); - status = glob_helper(buf, buf+n, flags, func, arg); - free(buf); - if (status) goto finalize; + else { + separator = 0; + p = m; } - dirp = opendir(dir); - if (dirp == NULL) { - rb_sys_warning(dir); - free(base); - free(magic); - break; } + + if (!(flags & FNM_NOESCAPE)) { + int n = remove_backslashes(sub, p); + p -= n; + m -= n; + } + + if (*p == '\0') { /* magic not found */ + if (separator) { + char c = p[-1]; p[-1] = '\0'; + if (do_lstat(path, &st) == 0 && S_ISDIR(st.st_mode)) { + p[-1] = c; + return glob_call_func(func, path, arg); } else { - free(base); - free(magic); - break; + p[-1] = c; + } + } + else { + if (do_lstat(path, &st) == 0) + return glob_call_func(func, path, arg); + } + return 0; } -#if defined DOSISH_DRIVE_LETTER -#define BASE (*base && !((isdirsep(*base) && !base[1]) || (base[1] == ':' && isdirsep(base[2]) && !base[3]))) -#else -#define BASE (*base && !(isdirsep(*base) && !base[1])) -#endif + if (p[0] == '*' && p[1] == '*' && p[2] == '/') { + recursive = 1; + memmove(p, p+3, strlen(p+3)+1); /* move '\0' too */ + magical = has_magic(p, &m, flags); /* go to next element */ + if (!magical) { + status = glob_helper(path, p, separator, flags, func, arg); + if (status) return status; + } + } - for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) { - if (recursive) { - if (strcmp(".", dp->d_name) == 0 || strcmp("..", dp->d_name) == 0) - continue; - buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+strlen(m)+6); - sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name); - if (lstat(buf, &st) < 0) { - if (errno != ENOENT) rb_sys_warning(buf); - free(buf); - continue; + if (path == p) { + dirp = do_opendir("."); + if (dirp == NULL) return 0; } + else { + int n = separator ? -1 : 0; + char c = p[n]; p[n] = '\0'; + dirp = do_opendir(path); + p[n] = c; + if (dirp == NULL) return 0; + } + + for (dp = readdir(dirp); dp != NULL && status == 0; dp = readdir(dirp)) { + const int uncheck = 0; + const int dir = 1; + const int notdir = 2; + int type = uncheck; + if (recursive && strcmp(".", dp->d_name) != 0 && strcmp("..", dp->d_name) != 0) { + const int n = p - path; + buf = ALLOC_N(char, n+NAMLEN(dp)+1); + memcpy(buf, path, n); + strcpy(buf+n, dp->d_name); + type = notdir; + if (do_lstat(buf, &st) == 0) { if (S_ISDIR(st.st_mode)) { - char *t = buf+strlen(buf); - strcpy(t, "/**"); - strcpy(t+3, m); - status = glob_helper(buf, t, flags, func, arg); - free(buf); - if (status) break; - continue; + tmp = ALLOC(struct d_link); + tmp->name = ALLOC_N(char, NAMLEN(dp)+1); + strcpy(tmp->name, dp->d_name); + *tail = tmp; + tail = &tmp->next; + type = dir; + } + else if (S_ISLNK(st.st_mode) && do_stat(buf, &st) == 0 && S_ISDIR(st.st_mode)) { + type = dir; + } } free(buf); + } + if (type == notdir && *m == '/') continue; + if (magical && do_fnmatch(p, m, dp->d_name, flags) == 0) { + const int n1 = p - path; + const int n2 = NAMLEN(dp); + buf = ALLOC_N(char, n1+n2+strlen(m)+1); + memcpy(buf, path, n1); + strcpy(buf+n1, dp->d_name); + if (*m == '\0') { + status = glob_call_func(func, buf, arg); } - if (fnmatch(magic, dp->d_name, flags) == 0) { - buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+2); - sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name); - if (!m) { + else if (m[1] == '\0' && type == dir) { + strcpy(buf+n1+n2, m); status = glob_call_func(func, buf, arg); - free(buf); - if (status) break; - continue; } - tmp = ALLOC(struct d_link); - tmp->path = buf; - *tail = tmp; - tail = &tmp->next; + else { + strcpy(buf+n1+n2, m); + status = glob_helper(buf, buf+n1+n2+1, 1, flags, func, arg); + } + free(buf); } @@ -1018,23 +1114,14 @@ glob_helper(path, sub, flags, func, arg) closedir(dirp); - finalize: *tail = 0; - free(base); - free(magic); - if (link) { while (link) { if (status == 0) { - if (stat(link->path, &st) == 0) { - if (S_ISDIR(st.st_mode)) { - int len = strlen(link->path); - int mlen = strlen(m); - char *t = ALLOC_N(char, len+mlen+1); - - sprintf(t, "%s%s", link->path, m); - status = glob_helper(t, t+len, flags, func, arg); - free(t); - } - } - else { - rb_sys_warning(link->path); - } + const int n1 = p - path; + const int n2 = strlen(link->name); + buf = ALLOC_N(char, n1+n2+4+strlen(p)+1); + memcpy(buf, path, n1); + strcpy(buf+n1, link->name); + strcpy(buf+n1+n2, "/**/"); + strcpy(buf+n1+n2+4, p); + status = glob_helper(buf, buf+n1+n2+1, 1, flags, func, arg); + free(buf); } @@ -1042,10 +1129,5 @@ glob_helper(path, sub, flags, func, arg) link = link->next; - free(tmp->path); + free(tmp->name); free(tmp); } - break; - } - } - p = m; - } return status; @@ -1060,3 +1142,13 @@ rb_glob2(path, flags, func, arg) { - int status = glob_helper(path, 0, flags, func, arg); + char *root = path; + int status; + +#if defined DOSISH + flags |= FNM_CASEFOLD; + root = rb_path_skip_prefix(root); +#endif + + if (*root == '/') root++; + + status = glob_helper(path, root, 0, flags, func, arg); if (status) rb_jump_tag(status); @@ -1124,3 +1216,3 @@ push_braces(ary, s, flags) } - p++; + Inc(p); } @@ -1132,3 +1224,3 @@ push_braces(ary, s, flags) } - p++; + Inc(p); } @@ -1142,9 +1234,9 @@ push_braces(ary, s, flags) while (*p != '}') { - t = p + 1; - for (p = t; *p!='}' && *p!=','; p++) { + t = Next(p); + for (p = t; *p!='}' && *p!=','; Inc(p)) { /* skip inner braces */ - if (*p == '{') while (*p!='}') p++; + if (*p == '{') while (*p!='}') Inc(p); } memcpy(b, t, p-t); - strcpy(b+(p-t), rbrace+1); + strcpy(b+(p-t), Next(rbrace)); push_braces(ary, buf, flags); @@ -1159,3 +1251,2 @@ push_braces(ary, s, flags) #define isdelim(c) ((c)=='\0') - static VALUE @@ -1184,5 +1275,5 @@ rb_push_glob(str, flags) while (p < pend) { - t = buf; nest = maxnest = 0; while (p < pend && isdelim(*p)) p++; + t = p; while (p < pend && !isdelim(*p)) { @@ -1191,8 +1282,9 @@ rb_push_glob(str, flags) if (!noescape && *p == '\\') { - *t++ = *p++; - if (p == pend) break; + p++; + if (p == pend || isdelim(*p)) break; } - *t++ = *p++; + p = Next(p); } - *t = '\0'; + memcpy(buf, t, p - t); + buf[p - t] = '\0'; if (maxnest == 0) {