/* fnmatch.c -- ksh-like extended pattern matching for the shell and filename globbing. */ /* Copyright (C) 1991, 1997 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. Bash is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. Bash is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Bash; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ #include #include /* for debugging */ #include "fnmatch.h" #include "collsyms.h" #include #if defined (HAVE_STRING_H) # include #else # include #endif /* HAVE_STRING_H */ static int gmatch (); static char *brackmatch (); #ifdef EXTENDED_GLOB static int extmatch (); static char *patscan (); #endif #if !defined (isascii) # define isascii(c) ((unsigned int)(c) <= 0177) #endif /* Note that these evaluate C many times. */ #ifndef isblank # define isblank(c) ((c) == ' ' || (c) == '\t') #endif #ifndef isgraph # define isgraph(c) ((c) != ' ' && isprint((c))) #endif #ifndef isxdigit # define isxdigit(c) (((c) >= '0' && (c) <= '9') || ((c) >= 'a' && (c) <= 'f') || ((c) >= 'A' && (c) <= 'F')) #endif /* The result of FOLD is an `unsigned char' */ # define FOLD(c) ((flags & FNM_CASEFOLD) && isupper ((unsigned char)c) \ ? tolower ((unsigned char)c) \ : ((unsigned char)c)) #ifndef STREQ #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) #endif /* We don't use strcoll(3) for range comparisons in bracket expressions, even if we have it, since it can have unwanted side effects in locales other than POSIX or US. For instance, in the de locale, [A-Z] matches all characters. So, for ranges we use ASCII collation, and for collating symbol equivalence we use strcoll(). The casts to int are to handle tests that use unsigned chars. */ #define rangecmp(c1, c2) ((int)(c1) - (int)(c2)) #if defined (HAVE_STRCOLL) /* Helper function for collating symbol equivalence. */ static int rangecmp2 (c1, c2) int c1, c2; { static char s1[2] = { ' ', '\0' }; static char s2[2] = { ' ', '\0' }; int ret; /* Eight bits only. Period. */ c1 &= 0xFF; c2 &= 0xFF; if (c1 == c2) return (0); s1[0] = c1; s2[0] = c2; if ((ret = strcoll (s1, s2)) != 0) return ret; return (c1 - c2); } #else /* !HAVE_STRCOLL */ # define rangecmp2(c1, c2) ((int)(c1) - (int)(c2)) #endif /* !HAVE_STRCOLL */ #if defined (HAVE_STRCOLL) static int collequiv (c1, c2) int c1, c2; { return (rangecmp2 (c1, c2) == 0); } #else # define collequiv(c1, c2) ((c1) == (c2)) #endif static int collsym (s, len) char *s; int len; { register struct _collsym *csp; for (csp = posix_collsyms; csp->name; csp++) { if (STREQN(csp->name, s, len) && csp->name[len] == '\0') return (csp->code); } if (len == 1) return s[0]; return -1; } int fnmatch (pattern, string, flags) char *pattern; char *string; int flags; { char *se, *pe; if (string == 0 || pattern == 0) return FNM_NOMATCH; se = string + strlen (string); pe = pattern + strlen (pattern); return (gmatch (string, se, pattern, pe, flags)); } /* Match STRING against the filename pattern PATTERN, returning zero if it matches, FNM_NOMATCH if not. */ static int gmatch (string, se, pattern, pe, flags) char *string, *se; char *pattern, *pe; int flags; { register char *p, *n; /* pattern, string */ register char c; /* current pattern character */ register char sc; /* current string character */ p = pattern; n = string; if (string == 0 || pattern == 0) return FNM_NOMATCH; while (p < pe) { c = *p++; c = FOLD (c); sc = n < se ? *n : '\0'; #ifdef EXTENDED_GLOB /* extmatch () will handle recursively calling gmatch, so we can just return what extmatch() returns. */ if ((flags & FNM_EXTMATCH) && *p == '(' && (c == '+' || c == '*' || c == '?' || c == '@' || c == '!')) /* ) */ { int lflags; /* If we're not matching the start of the string, we're not concerned about the special cases for matching `.' */ lflags = (n == string) ? flags : (flags & ~FNM_PERIOD); return (extmatch (c, n, se, p, pe, lflags)); } #endif switch (c) { case '?': /* Match single character */ if (sc == '\0') return FNM_NOMATCH; else if ((flags & FNM_PATHNAME) && sc == '/') /* If we are matching a pathname, `?' can never match a `/'. */ return FNM_NOMATCH; else if ((flags & FNM_PERIOD) && sc == '.' && (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) /* `?' cannot match a `.' if it is the first character of the string or if it is the first character following a slash and we are matching a pathname. */ return FNM_NOMATCH; break; case '\\': /* backslash escape removes special meaning */ if (p == pe) return FNM_NOMATCH; if ((flags & FNM_NOESCAPE) == 0) { c = *p++; /* A trailing `\' cannot match. */ if (p > pe) return FNM_NOMATCH; c = FOLD (c); } if (FOLD (sc) != (unsigned char)c) return FNM_NOMATCH; break; case '*': /* Match zero or more characters */ if (p == pe) return 0; if ((flags & FNM_PERIOD) && sc == '.' && (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) /* `*' cannot match a `.' if it is the first character of the string or if it is the first character following a slash and we are matching a pathname. */ return FNM_NOMATCH; /* Collapse multiple consecutive, `*' and `?', but make sure that one character of the string is consumed for each `?'. */ for (c = *p++; (c == '?' || c == '*'); c = *p++) { if ((flags & FNM_PATHNAME) && sc == '/') /* A slash does not match a wildcard under FNM_PATHNAME. */ return FNM_NOMATCH; else if (c == '?') { if (sc == '\0') return FNM_NOMATCH; /* One character of the string is consumed in matching this ? wildcard, so *??? won't match if there are fewer than three characters. */ n++; sc = n < se ? *n : '\0'; } #ifdef EXTENDED_GLOB /* Handle ******(patlist) */ if ((flags & FNM_EXTMATCH) && c == '*' && *p == '(') /*)*/ { char *newn; /* We need to check whether or not the extended glob pattern matches the remainder of the string. If it does, we match the entire pattern. */ for (newn = n; newn < se; ++newn) { if (extmatch (c, newn, se, p, pe, flags) == 0) return (0); } /* We didn't match the extended glob pattern, but that's OK, since we can match 0 or more occurrences. We need to skip the glob pattern and see if we match the rest of the string. */ newn = patscan (p + 1, pe, 0); p = newn; } #endif if (p == pe) break; } /* If we've hit the end of the pattern and the last character of the pattern was handled by the loop above, we've succeeded. Otherwise, we need to match that last character. */ if (p == pe && (c == '?' || c == '*')) return (0); /* General case, use recursion. */ { unsigned char c1; c1 = (unsigned char)((flags & FNM_NOESCAPE) == 0 && c == '\\') ? *p : c; c1 = FOLD (c1); for (--p; n < se; ++n) { /* Only call fnmatch if the first character indicates a possible match. We can check the first character if we're not doing an extended glob match. */ if ((flags & FNM_EXTMATCH) == 0 && c != '[' && FOLD (*n) != c1) continue; /* If we're doing an extended glob match and the pattern is not one of the extended glob patterns, we can check the first character. */ if ((flags & FNM_EXTMATCH) && p[1] != '(' && /*)*/ strchr ("?*+@!", *p) == 0 && c != '[' && FOLD (*n) != c1) continue; /* Otherwise, we just recurse. */ if (gmatch (n, se, p, pe, flags & ~FNM_PERIOD) == 0) return (0); } return FNM_NOMATCH; } case '[': { if (sc == '\0' || n == se) return FNM_NOMATCH; /* A character class cannot match a `.' if it is the first character of the string or if it is the first character following a slash and we are matching a pathname. */ if ((flags & FNM_PERIOD) && sc == '.' && (n == string || ((flags & FNM_PATHNAME) && n[-1] == '/'))) return (FNM_NOMATCH); p = brackmatch (p, sc, flags); if (p == 0) return FNM_NOMATCH; } break; default: if ((unsigned char)c != FOLD (sc)) return (FNM_NOMATCH); } ++n; } if (n == se) return (0); if ((flags & FNM_LEADING_DIR) && *n == '/') /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ return 0; return (FNM_NOMATCH); } /* Parse a bracket expression collating symbol ([.sym.]) starting at P, find the value of the symbol, and move P past the collating symbol expression. The value is returned in *VP, if VP is not null. */ static char * parse_collsym (p, vp) char *p; int *vp; { register int pc; int val; p++; /* move past the `.' */ for (pc = 0; p[pc]; pc++) if (p[pc] == '.' && p[pc+1] == ']') break; val = collsym (p, pc); if (vp) *vp = val; return (p + pc + 2); } static char * brackmatch (p, test, flags) char *p; unsigned char test; int flags; { register char cstart, cend, c; register int not; /* Nonzero if the sense of the character class is inverted. */ int pc, brcnt; char *savep; test = FOLD (test); savep = p; /* POSIX.2 3.13.1 says that an exclamation mark (`!') shall replace the circumflex (`^') in its role in a `nonmatching list'. A bracket expression starging with an unquoted circumflex character produces unspecified results. This implementation treats the two identically. */ if (not = (*p == '!' || *p == '^')) ++p; c = *p++; for (;;) { /* Initialize cstart and cend in case `-' is the last character of the pattern. */ cstart = cend = c; /* POSIX.2 equivalence class: [=c=]. See POSIX.2 2.8.3.2. Find the end of the equivalence class, move the pattern pointer past it, and check for equivalence. XXX - this handles only single-character equivalence classes, which is wrong, or at least incomplete. */ if (c == '[' && *p == '=' && p[2] == '=' && p[3] == ']') { pc = FOLD (p[1]); p += 4; if (collequiv (test, pc)) goto matched; else { c = *p++; if (c == '\0') return ((test == '[') ? savep : (char *)0); c = FOLD (c); continue; } } /* POSIX.2 character class expression. See POSIX.2 2.8.3.2. */ if (c == '[' && *p == ':') { pc = 0; /* make sure invalid char classes don't match. */ if (STREQN (p+1, "alnum:]", 7)) { pc = isalnum (test); p += 8; } else if (STREQN (p+1, "alpha:]", 7)) { pc = isalpha (test); p += 8; } else if (STREQN (p+1, "blank:]", 7)) { pc = isblank (test); p += 8; } else if (STREQN (p+1, "cntrl:]", 7)) { pc = iscntrl (test); p += 8; } else if (STREQN (p+1, "digit:]", 7)) { pc = isdigit (test); p += 8; } else if (STREQN (p+1, "graph:]", 7)) { pc = isgraph (test); p += 8; } else if (STREQN (p+1, "lower:]", 7)) { pc = islower (test); p += 8; } else if (STREQN (p+1, "print:]", 7)) { pc = isprint (test); p += 8; } else if (STREQN (p+1, "punct:]", 7)) { pc = ispunct (test); p += 8; } else if (STREQN (p+1, "space:]", 7)) { pc = isspace (test); p += 8; } else if (STREQN (p+1, "upper:]", 7)) { pc = isupper (test); p += 8; } else if (STREQN (p+1, "xdigit:]", 8)) { pc = isxdigit (test); p += 9; } else if (STREQN (p+1, "ascii:]", 7)) { pc = isascii (test); p += 8; } if (pc) goto matched; else { /* continue the loop here, since this expression can't be the first part of a range expression. */ c = *p++; if (c == '\0') return ((test == '[') ? savep : (char *)0); else if (c == ']') break; c = FOLD (c); continue; } } /* POSIX.2 collating symbols. See POSIX.2 2.8.3.2. Find the end of the symbol name, make sure it is terminated by `.]', translate the name to a character using the external table, and do the comparison. */ if (c == '[' && *p == '.') { p = parse_collsym (p, &pc); /* An invalid collating symbol cannot be the first point of a range. If it is, we set cstart to one greater than `test', so any comparisons later will fail. */ cstart = (pc == -1) ? test + 1 : pc; } if (!(flags & FNM_NOESCAPE) && c == '\\') { if (*p == '\0') return (char *)0; cstart = cend = *p++; } cstart = cend = FOLD (cstart); /* POSIX.2 2.8.3.1.2 says: `An expression containing a `[' that is not preceded by a backslash and is not part of a bracket expression produces undefined results.' This implementation treats the `[' as just a character to be matched if there is not a closing `]'. */ if (c == '\0') return ((test == '[') ? savep : (char *)0); c = *p++; c = FOLD (c); if ((flags & FNM_PATHNAME) && c == '/') /* [/] can never match when matching a pathname. */ return (char *)0; /* This introduces a range, unless the `-' is the last character of the class. Find the end of the range and move past it. */ if (c == '-' && *p != ']') { cend = *p++; if (!(flags & FNM_NOESCAPE) && cend == '\\') cend = *p++; if (cend == '\0') return (char *)0; if (cend == '[' && *p == '.') { p = parse_collsym (p, &pc); /* An invalid collating symbol cannot be the second part of a range expression. If we get one, we set cend to one fewer than the test character to make sure the range test fails. */ cend = (pc == -1) ? test - 1 : pc; } cend = FOLD (cend); c = *p++; /* POSIX.2 2.8.3.2: ``The ending range point shall collate equal to or higher than the starting range point; otherwise the expression shall be treated as invalid.'' Note that this applies to only the range expression; the rest of the bracket expression is still checked for matches. */ if (rangecmp (cstart, cend) > 0) { if (c == ']') break; c = FOLD (c); continue; } } if (rangecmp (test, cstart) >= 0 && rangecmp (test, cend) <= 0) goto matched; if (c == ']') break; } /* No match. */ return (!not ? (char *)0 : p); matched: /* Skip the rest of the [...] that already matched. */ brcnt = (c != ']') + (c == '[' && (*p == '=' || *p == ':' || *p == '.')); while (brcnt > 0) { /* A `[' without a matching `]' is just another character to match. */ if (c == '\0') return ((test == '[') ? savep : (char *)0); c = *p++; if (c == '[' && (*p == '=' || *p == ':' || *p == '.')) brcnt++; else if (c == ']') brcnt--; else if (!(flags & FNM_NOESCAPE) && c == '\\') { if (*p == '\0') return (char *)0; /* XXX 1003.2d11 is unclear if this is right. */ ++p; } } return (not ? (char *)0 : p); } #if defined (EXTENDED_GLOB) /* ksh-like extended pattern matching: [?*+@!](pat-list) where pat-list is a list of one or patterns separated by `|'. Operation is as follows: ?(patlist) match zero or one of the given patterns *(patlist) match zero or more of the given patterns +(patlist) match one or more of the given patterns @(patlist) match exactly one of the given patterns !(patlist) match anything except one of the given patterns */ /* Scan a pattern starting at STRING and ending at END, keeping track of embedded () and []. If DELIM is 0, we scan until a matching `)' because we're scanning a `patlist'. Otherwise, we scan until we see DELIM. In all cases, we never scan past END. The return value is the first character after the matching DELIM. */ static char * patscan (string, end, delim) char *string, *end; int delim; { int pnest, bnest; char *s, c; pnest = bnest = 0; for (s = string; c = *s; s++) { if (s >= end) return (s); switch (c) { case '\0': return ((char *)0); case '[': bnest++; break; case ']': if (bnest) bnest--; break; case '(': if (bnest == 0) pnest++; break; case ')': #if 0 if (bnest == 0) pnest--; if (pnest <= 0) return ++s; #else if (bnest == 0 && pnest-- <= 0) return ++s; #endif break; case '|': if (bnest == 0 && pnest == 0 && delim == '|') return ++s; break; } } return (char *)0; } /* Return 0 if dequoted pattern matches S in the current locale. */ static int strcompare (p, pe, s, se) char *p, *pe, *s, *se; { int ret; char c1, c2; c1 = *pe; c2 = *se; *pe = *se = '\0'; #if defined (HAVE_STRCOLL) ret = strcoll (p, s); #else ret = strcmp (p, s); #endif *pe = c1; *se = c2; return (ret == 0 ? ret : FNM_NOMATCH); } /* Match a ksh extended pattern specifier. Return FNM_NOMATCH on failure or 0 on success. This is handed the entire rest of the pattern and string the first time an extended pattern specifier is encountered, so it calls gmatch recursively. */ static int extmatch (xc, s, se, p, pe, flags) int xc; /* select which operation */ char *s, *se; char *p, *pe; int flags; { char *prest; /* pointer to rest of pattern */ char *psub; /* pointer to sub-pattern */ char *pnext; /* pointer to next sub-pattern */ char *srest; /* pointer to rest of string */ int m1, m2; #if 0 fprintf(stderr, "extmatch: xc = %c\n", xc); fprintf(stderr, "extmatch: s = %s; se = %s\n", s, se); fprintf(stderr, "extmatch: p = %s; pe = %s\n", p, pe); #endif prest = patscan (p + (*p == '('), pe, 0); /* ) */ if (prest == 0) /* If PREST is 0, we failed to scan a valid pattern. In this case, we just want to compare the two as strings. */ return (strcompare (p - 1, pe, s, se)); switch (xc) { case '+': /* match one or more occurrences */ case '*': /* match zero or more occurrences */ /* If we can get away with no matches, don't even bother. Just call gmatch on the rest of the pattern and return success if it succeeds. */ if (xc == '*' && (gmatch (s, se, prest, pe, flags) == 0)) return 0; /* OK, we have to do this the hard way. First, we make sure one of the subpatterns matches, then we try to match the rest of the string. */ for (psub = p + 1; ; psub = pnext) { pnext = patscan (psub, pe, '|'); for (srest = s; srest <= se; srest++) { /* Match this substring (S -> SREST) against this subpattern (psub -> pnext - 1) */ m1 = gmatch (s, srest, psub, pnext - 1, flags) == 0; /* OK, we matched a subpattern, so make sure the rest of the string matches the rest of the pattern. Also handle multiple matches of the pattern. */ if (m1) m2 = (gmatch (srest, se, prest, pe, flags) == 0) || (s != srest && gmatch (srest, se, p - 1, pe, flags) == 0); if (m1 && m2) return (0); } if (pnext == prest) break; } return (FNM_NOMATCH); case '?': /* match zero or one of the patterns */ case '@': /* match exactly one of the patterns */ /* If we can get away with no matches, don't even bother. Just call gmatch on the rest of the pattern and return success if it succeeds. */ if (xc == '?' && (gmatch (s, se, prest, pe, flags) == 0)) return 0; /* OK, we have to do this the hard way. First, we see if one of the subpatterns matches, then, if it does, we try to match the rest of the string. */ for (psub = p + 1; ; psub = pnext) { pnext = patscan (psub, pe, '|'); srest = (prest == pe) ? se : s; for ( ; srest <= se; srest++) { if (gmatch (s, srest, psub, pnext - 1, flags) == 0 && gmatch (srest, se, prest, pe, flags) == 0) return (0); } if (pnext == prest) break; } return (FNM_NOMATCH); case '!': /* match anything *except* one of the patterns */ for (srest = s; srest <= se; srest++) { m1 = 0; for (psub = p + 1; ; psub = pnext) { pnext = patscan (psub, pe, '|'); /* If one of the patterns matches, just bail immediately. */ if (m1 = (gmatch (s, srest, psub, pnext - 1, flags) == 0)) break; if (pnext == prest) break; } if (m1 == 0 && gmatch (srest, se, prest, pe, flags) == 0) return (0); } return (FNM_NOMATCH); } return (FNM_NOMATCH); } #endif /* EXTENDED_GLOB */ #ifdef TEST main (c, v) int c; char **v; { char *string, *pat; string = v[1]; pat = v[2]; if (fnmatch (pat, string, 0) == 0) { printf ("%s matches %s\n", string, pat); exit (0); } else { printf ("%s does not match %s\n", string, pat); exit (1); } } #endif