山本です。glob_helperをリファクタリングしてみました。 次の点以外は、仕様は変化していないはずです。 # DOSISHな環境で、Dir.glob("e:*") が動作するようになった。(eドライブのカレントを列挙) # 以前は、Dir.glob("e:./*") とする必要があった。 依存個所は、glob_helperの中の char *dir の初期化部分と、 rb_glob2の中のmagic探索開始ポインタ設定部分です。 今までと違って大きくいじったので、ちょっと怖いです。 いちおう自分のスクリプトは動作していますが・・・ --- dir.c Sat Nov 22 12:59:18 2003 +++ dir.c Sat Dec 13 12:02:10 2003 @@ -78,10 +78,66 @@ char *strchr _((char*,char)); #define downcase(c) (nocase && ISUPPER(c) ? tolower(c) : (c)) +#define compare(c1, c2) (((unsigned char)(c1)) - ((unsigned char)(c2))) -#ifndef CharNext /* defined as CharNext[AW] on Windows. */ -# if defined(DJGPP) -# define CharNext(p) ((p) + mblen(p, MB_CUR_MAX)) -# else -# define CharNext(p) ((p) + 1) -# endif -#endif +static char * +greater(p1, p2) + const char *p1; + const char *p2; +{ + return p1 > p2 ? (char *)p1 : (char *)p2; +} + +#if defined(_WIN32) +# define Next(p) (greater((p) + 1, CharNext(p))) /* faster */ +#elif defined(DJGPP) || defined(__EMX__) +# define Next(p) (greater((p) + 1, (p) + mblen(p, INT_MAX))) +# endif + +#ifndef Next /* single byte environment */ +# define Next(p) ((p) + 1) +# define Inc(p) (++(p)) +# define CopyAndInc(dst, src) (*(dst)++ = *(src)++) +# define Compare(p1, p2) (compare(downcase(*(p1)), downcase(*(p2)))) +#else /* multi byte environment */ +# define Inc(p) ((p) = Next(p)) +# define CopyAndInc(dst, src) (CopyAndIncImpl(&(dst), &(src))) +# define Compare(p1, p2) (CompareImpl(p1, p2, nocase)) +static void +CopyAndIncImpl(pdst, psrc) + char **pdst; + const char **psrc; +{ + const int len = Next(*psrc) - *psrc; + memcpy(*pdst, *psrc, len); + (*pdst) += len; + (*psrc) += len; +} + +static int +CompareImpl(p1, p2, nocase) + const char *p1; + const char *p2; + int nocase; +{ + const int len1 = Next(p1) - p1; + const int len2 = Next(p2) - p2; + + if (len1 == 1) + if (len2 == 1) { + return compare(downcase(*p1), downcase(*p2)); + } + else { + const int ret = compare(downcase(*p1), *p2); + return ret ? ret : -1; + } + else + if (len2 == 1) { + const int ret = compare(*p1, downcase(*p2)); + return ret ? ret : 1; + } + else { + const int ret = memcmp(p1, p2, len1 < len2 ? len1 : len2); + return ret ? ret : len1 - len2; + } +} +#endif /* environment */ @@ -89,3 +145,3 @@ char *strchr _((char*,char)); #define isdirsep(c) ((c) == '/' || (c) == '\\') -static const char * +static char * find_dirsep(s) @@ -95,4 +151,4 @@ find_dirsep(s) if (isdirsep(*s)) - return s; - s = CharNext(s); + return (char *)s; + Inc(s); } @@ -108,3 +164,3 @@ range(pat, test, flags) char *pat; - char test; + char *test; int flags; @@ -117,22 +173,20 @@ range(pat, test, flags) if (not) - pat++; - - test = downcase(test); + Inc(pat); while (*pat) { - int cstart, cend; - cstart = cend = *pat++; - if (cstart == ']') + char *pstart, *pend; + pstart = pend = pat; Inc(pat); + if (*pstart == ']') return ok == not ? 0 : pat; - else if (escape && cstart == '\\') - cstart = cend = *pat++; - if (*pat == '-' && pat[1] != ']') { - if (escape && pat[1] == '\\') - pat++; - cend = pat[1]; - if (!cend) + else if (escape && *pstart == '\\') + pstart = pend = pat; Inc(pat); + if (*pat == '-' && *Next(pat) != ']') { + if (escape && *Next(pat) == '\\') + Inc(pat); + pend = Next(pat); + if (!*pend) return 0; - pat += 2; + Inc(pat); Inc(pat); } - if (downcase(cstart) <= test && test <= downcase(cend)) + if (Compare(pstart, test) <= 0 && Compare(test, pend) <= 0) ok = 1; @@ -143,4 +197,5 @@ range(pat, test, flags) #define ISDIRSEP(c) (pathname && isdirsep(c)) -#define PERIOD(s) (period && *(s) == '.' && \ - ((s) == string || ISDIRSEP((s)[-1]))) +#define PERIOD_S() (period && *s == '.' && \ + (s == string || ISDIRSEP(*s_prev))) +#define INC_S() (s = Next(s_prev = s)) static int @@ -151,5 +206,5 @@ fnmatch(pat, string, flags) { - int c; - int test; - const char *s = string; + const char *p; + const char *test; + const char *s = string, *s_prev; int escape = !(flags & FNM_NOESCAPE); @@ -159,17 +214,17 @@ fnmatch(pat, string, flags) - while (c = *pat++) { - switch (c) { + while (p = pat, Inc(pat), *p) { + switch (*p) { case '?': - if (!*s || ISDIRSEP(*s) || PERIOD(s)) + if (!*s || ISDIRSEP(*s) || PERIOD_S()) return FNM_NOMATCH; - s++; + INC_S(); break; case '*': - while ((c = *pat++) == '*') + while (p = pat, Inc(pat), *p == '*') ; - if (PERIOD(s)) + if (PERIOD_S()) return FNM_NOMATCH; - if (!c) { + if (!*p) { if (pathname && find_dirsep(s)) @@ -179,6 +234,6 @@ fnmatch(pat, string, flags) } - else if (ISDIRSEP(c)) { + else if (ISDIRSEP(*p)) { s = find_dirsep(s); if (s) { - s++; + INC_S(); break; @@ -188,7 +243,6 @@ fnmatch(pat, string, flags) - test = escape && c == '\\' ? *pat : c; - test = downcase(test); - pat--; + test = escape && *p == '\\' ? pat : p; + pat = p; while (*s) { - if ((c == '[' || downcase(*s) == test) && + if ((*p == '[' || Compare(s, test) == 0) && !fnmatch(pat, s, flags | FNM_DOTMATCH)) @@ -197,3 +251,3 @@ fnmatch(pat, string, flags) break; - s++; + INC_S(); } @@ -202,8 +256,8 @@ fnmatch(pat, string, flags) case '[': - if (!*s || ISDIRSEP(*s) || PERIOD(s)) + if (!*s || ISDIRSEP(*s) || PERIOD_S()) return FNM_NOMATCH; - pat = range(pat, *s, flags); + pat = range(pat, s, flags); if (!pat) return FNM_NOMATCH; - s++; + INC_S(); break; @@ -216,7 +270,7 @@ fnmatch(pat, string, flags) ) { - c = *pat; - if (!c) - c = '\\'; + p = pat; + if (!*p) + p = "\\"; /* point to embeded string */ else - pat++; + Inc(pat); } @@ -226,3 +280,3 @@ fnmatch(pat, string, flags) #if defined DOSISH - if (ISDIRSEP(c) && isdirsep(*s)) + if (ISDIRSEP(*p) && isdirsep(*s)) ; @@ -230,5 +284,5 @@ fnmatch(pat, string, flags) #endif - if(downcase(c) != downcase(*s)) + if(Compare(p, s) != 0) return FNM_NOMATCH; - s++; + INC_S(); break; @@ -570,10 +624,11 @@ dir_s_rmdir(obj, dir) -/* Return nonzero if S has any special globbing chars in it. */ -static int -has_magic(s, send, flags) - char *s, *send; +/* difference from find_dirsep: if not found, return pointer to '\0' */ +static char * +find_magic(s, flags, pbeg, pend) + char *s; int flags; + char **pbeg; + char **pend; { - register char *p = s; - register char c; + char *p; int open = 0; @@ -581,7 +636,13 @@ has_magic(s, send, flags) - while ((c = *p++) != '\0') { - switch (c) { + *pbeg = s; + + while (p = s, Inc(s), *p != '\0') { + switch (*p) { + case '/': + *pbeg = s; + continue; + case '?': case '*': - return Qtrue; + goto found; @@ -592,3 +653,3 @@ has_magic(s, send, flags) if (open) - return Qtrue; + goto found; continue; @@ -596,45 +657,35 @@ has_magic(s, send, flags) case '\\': - if (escape && *p++ == '\0') - return Qfalse; + if (escape && (p = s, Inc(s), *p == '\0')) + goto miss; } - - if (send && p >= send) break; - } - return Qfalse; } -static char* -extract_path(p, pend) - char *p, *pend; -{ - char *alloc; - int len; + miss: + *pbeg = *pend = p; + return; - len = pend - p; - alloc = ALLOC_N(char, len+1); - memcpy(alloc, p, len); - if (len > 1 && pend[-1] == '/' -#if defined DOSISH_DRIVE_LETTER - && pend[-2] != ':' -#endif - ) { - alloc[len-1] = 0; - } - else { - alloc[len] = 0; + found: + while (*s) { + if (*s == '/') + break; + Inc(s); } - - return alloc; + *pend = s; } -static char* -extract_elem(path) - char *path; +static int +do_fnmatch(beg, end, string, flags) + char *beg; + char *end; + const char *string; + int flags; { - char *pend; + int ret; + char c; - pend = strchr(path, '/'); - if (!pend) pend = path + strlen(path); - - return extract_path(path, pend); + c = *end; + *end = '\0'; /* should I allocate new string? */ + ret = fnmatch(beg, string, flags); + *end = c; + return ret; } @@ -652,3 +703,3 @@ remove_backslashes(p) } - *t++ = *p++; + CopyAndInc(t, p); } @@ -704,12 +755,16 @@ glob_helper(path, sub, flags, func, arg) struct stat st; - char *p, *m; int status = 0; + char *beg, *end, *buf; + DIR *dirp; + struct dirent *dp; + int recursive = 0; - p = sub ? sub : path; - if (!has_magic(p, 0, flags)) { -#if defined DOSISH - remove_backslashes(path); -#else - if (!(flags & FNM_NOESCAPE)) remove_backslashes(p); -#endif + struct d_link { + char *path; + struct d_link *next; + } *tmp, *link, **tail = &link; + + find_magic(sub, flags, &beg, &end); + + if (*beg == '\0') { /* magic not found */ if (lstat(path, &st) == 0) { @@ -726,36 +781,38 @@ glob_helper(path, sub, flags, func, arg) - while (p && !status) { - if (*p == '/') p++; - m = strchr(p, '/'); - if (has_magic(p, m, flags)) { - char *dir, *base, *magic, *buf; - DIR *dirp; - struct dirent *dp; - int recursive = 0; - - struct d_link { - char *path; - struct d_link *next; - } *tmp, *link, **tail = &link; - - base = extract_path(path, p); - if (path == p) dir = "."; - else dir = base; - - magic = extract_elem(p); + { +#if defined DOSISH_DRIVE_LETTER +#define NEED_DOT ((beg-path==0) || (beg-path==2 && ISALPHA(*path) && path[1] == ':')) +#define NEED_LAST ((NEED_DOT) || (beg-path==1 && *path=='/') || (beg-path==3 && ISALPHA(*path) && path[1]==':' && path[2]=='/')) +#else +#define NEED_DOT ((beg-path==0)) +#define NEED_LAST ((beg-path==0) || (beg-path==1 && *path=='/')) +#endif + int n = (NEED_LAST) ? (beg - path) : (beg - path) - 1; + char *dir = ALLOC_N(char, n+1+1); + memcpy(dir, path, n); + if (NEED_DOT) { + dir[n] = '.'; + dir[n+1] = '\0'; + } + else { + dir[n] = '\0'; + } if (stat(dir, &st) < 0) { if (errno != ENOENT) rb_sys_warning(dir); - free(base); - free(magic); - break; + free(dir); + return 0; } if (S_ISDIR(st.st_mode)) { - if (m && strcmp(magic, "**") == 0) { - int n = strlen(base); + if (beg[0] == '*' && beg[1] == '*' && beg[2] == '/') { + int n = beg - path; recursive = 1; - buf = ALLOC_N(char, n+strlen(m)+3); - sprintf(buf, "%s%s", base, *base ? m : m+1); + buf = ALLOC_N(char, n+strlen(beg+3)+1); + memcpy(buf, path, n); + strcpy(buf+n, beg+3); status = glob_helper(buf, buf+n, flags, func, arg); free(buf); - if (status) goto finalize; + if (status) { + free(dir); + return status; + } } @@ -764,5 +821,4 @@ glob_helper(path, sub, flags, func, arg) rb_sys_warning(dir); - free(base); - free(magic); - break; + free(dir); + return 0; } @@ -770,14 +826,10 @@ glob_helper(path, sub, flags, func, arg) else { - free(base); - free(magic); - break; + free(dir); + return 0; + } + free(dir); } - -#if defined DOSISH_DRIVE_LETTER -#define BASE (*base && !((isdirsep(*base) && !base[1]) || (base[1] == ':' && isdirsep(base[2]) && !base[3]))) -#else -#define BASE (*base && !(isdirsep(*base) && !base[1])) -#endif for (dp = readdir(dirp); dp != NULL; dp = readdir(dirp)) { + int n = beg - path; if (recursive) { @@ -785,4 +837,5 @@ glob_helper(path, sub, flags, func, arg) continue; - buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+strlen(m)+6); - sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name); + buf = ALLOC_N(char, n+NAMLEN(dp)+strlen(end)+3+1); + memcpy(buf, path, n); + strcpy(buf+n, dp->d_name); if (lstat(buf, &st) < 0) { @@ -794,4 +847,4 @@ glob_helper(path, sub, flags, func, arg) char *t = buf+strlen(buf); - strcpy(t, "/**"); - strcpy(t+3, m); + memcpy(t, "/**", 3); + strcpy(t+3, end); status = glob_helper(buf, t, flags, func, arg); @@ -804,6 +857,7 @@ glob_helper(path, sub, flags, func, arg) } - if (fnmatch(magic, dp->d_name, flags) == 0) { - buf = ALLOC_N(char, strlen(base)+NAMLEN(dp)+2); - sprintf(buf, "%s%s%s", base, (BASE) ? "/" : "", dp->d_name); - if (!m) { + if (do_fnmatch(beg, end, dp->d_name, flags) == 0) { + buf = ALLOC_N(char, n+NAMLEN(dp)+1); + memcpy(buf, path, n); + strcpy(buf+n, dp->d_name); + if (*end == '\0') { status = glob_call_func(func, buf, arg); @@ -822,4 +876,2 @@ glob_helper(path, sub, flags, func, arg) *tail = 0; - free(base); - free(magic); if (link) { @@ -829,9 +881,8 @@ glob_helper(path, sub, flags, func, arg) if (S_ISDIR(st.st_mode)) { - int len = strlen(link->path); - int mlen = strlen(m); - char *t = ALLOC_N(char, len+mlen+1); - - sprintf(t, "%s%s", link->path, m); - status = glob_helper(t, t+len, flags, func, arg); - free(t); + int n = strlen(link->path); + buf = ALLOC_N(char, n+strlen(end)+1); + memcpy(buf, link->path, n); + strcpy(buf+n, end); + status = glob_helper(buf, buf+n, flags, func, arg); + free(buf); } @@ -847,6 +898,2 @@ glob_helper(path, sub, flags, func, arg) } - break; - } - } - p = m; } @@ -862,3 +909,10 @@ rb_glob2(path, flags, func, arg) { - int status = glob_helper(path, 0, flags, func, arg); + int status; +#if defined DOSISH + remove_backslashes(path); + status = glob_helper(path, (ISALPHA(path[0]) && path[1] == ':' ? path + 2 : path), flags, func, arg); +#else + if (!(flags & FNM_NOESCAPE)) remove_backslashes(path); + status = glob_helper(path, path, flags, func, arg); +#endif if (status) rb_jump_tag(status); @@ -926,3 +980,3 @@ push_braces(ary, s, flags) } - p++; + Inc(p); } @@ -934,3 +988,3 @@ push_braces(ary, s, flags) } - p++; + Inc(p); } @@ -944,9 +998,9 @@ push_braces(ary, s, flags) while (*p != '}') { - t = p + 1; - for (p = t; *p!='}' && *p!=','; p++) { + t = Next(p); + for (p = t; *p!='}' && *p!=','; Inc(p)) { /* skip inner braces */ - if (*p == '{') while (*p!='}') p++; + if (*p == '{') while (*p!='}') Inc(p); } memcpy(b, t, p-t); - strcpy(b+(p-t), rbrace+1); + strcpy(b+(p-t), Next(rbrace)); push_braces(ary, buf, flags); @@ -988,3 +1042,3 @@ rb_push_glob(str, flags) nest = maxnest = 0; - while (p < pend && isdelim(*p)) p++; + while (p < pend && isdelim(*p)) Inc(p); while (p < pend && !isdelim(*p)) { @@ -993,6 +1047,6 @@ rb_push_glob(str, flags) if (!noescape && *p == '\\') { - *t++ = *p++; + CopyAndInc(t, p); if (p == pend) break; } - *t++ = *p++; + CopyAndInc(t, p); }