/* gmisc.c -- miscellaneous pattern matching utility functions for Bash. Copyright (C) 2010 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne-Again SHell. Bash is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. Bash is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Bash. If not, see . */ #include #include "bashtypes.h" #if defined (HAVE_UNISTD_H) # include #endif #include "bashansi.h" #include "shmbutil.h" #include "stdc.h" #ifndef LPAREN # define LPAREN '(' #endif #ifndef RPAREN # define RPAREN ')' #endif #if defined (HANDLE_MULTIBYTE) #define WLPAREN L'(' #define WRPAREN L')' extern char *glob_patscan __P((char *, char *, int)); /* Return 1 of the first character of WSTRING could match the first character of pattern WPAT. Wide character version. */ int match_pattern_wchar (wpat, wstring) wchar_t *wpat, *wstring; { wchar_t wc; if (*wstring == 0) return (0); switch (wc = *wpat++) { default: return (*wstring == wc); case L'\\': return (*wstring == *wpat); case L'?': return (*wpat == WLPAREN ? 1 : (*wstring != L'\0')); case L'*': return (1); case L'+': case L'!': case L'@': return (*wpat == WLPAREN ? 1 : (*wstring == wc)); case L'[': return (*wstring != L'\0'); } } int wmatchlen (wpat, wmax) wchar_t *wpat; size_t wmax; { wchar_t wc; int matlen, bracklen, t, in_cclass, in_collsym, in_equiv; if (*wpat == 0) return (0); matlen = in_cclass = in_collsym = in_equiv = 0; while (wc = *wpat++) { switch (wc) { default: matlen++; break; case L'\\': if (*wpat == 0) return ++matlen; else { matlen++; wpat++; } break; case L'?': if (*wpat == WLPAREN) return (matlen = -1); /* XXX for now */ else matlen++; break; case L'*': return (matlen = -1); case L'+': case L'!': case L'@': if (*wpat == WLPAREN) return (matlen = -1); /* XXX for now */ else matlen++; break; case L'[': /* scan for ending `]', skipping over embedded [:...:] */ bracklen = 1; wc = *wpat++; do { if (wc == 0) { wpat--; /* back up to NUL */ matlen += bracklen; goto bad_bracket; } else if (wc == L'\\') { /* *wpat == backslash-escaped character */ bracklen++; /* If the backslash or backslash-escape ends the string, bail. The ++wpat skips over the backslash escape */ if (*wpat == 0 || *++wpat == 0) { matlen += bracklen; goto bad_bracket; } } else if (wc == L'[' && *wpat == L':') /* character class */ { wpat++; bracklen++; in_cclass = 1; } else if (in_cclass && wc == L':' && *wpat == L']') { wpat++; bracklen++; in_cclass = 0; } else if (wc == L'[' && *wpat == L'.') /* collating symbol */ { wpat++; bracklen++; if (*wpat == L']') /* right bracket can appear as collating symbol */ { wpat++; bracklen++; } in_collsym = 1; } else if (in_collsym && wc == L'.' && *wpat == L']') { wpat++; bracklen++; in_collsym = 0; } else if (wc == L'[' && *wpat == L'=') /* equivalence class */ { wpat++; bracklen++; if (*wpat == L']') /* right bracket can appear as equivalence class */ { wpat++; bracklen++; } in_equiv = 1; } else if (in_equiv && wc == L'=' && *wpat == L']') { wpat++; bracklen++; in_equiv = 0; } else bracklen++; } while ((wc = *wpat++) != L']'); matlen++; /* bracket expression can only match one char */ bad_bracket: break; } } return matlen; } #endif int extglob_pattern_p (pat) char *pat; { switch (pat[0]) { case '*': case '+': case '!': case '@': case '?': return (pat[1] == LPAREN); default: return 0; } return 0; } /* Return 1 of the first character of STRING could match the first character of pattern PAT. Used to avoid n2 calls to strmatch(). */ int match_pattern_char (pat, string) char *pat, *string; { char c; if (*string == 0) return (0); switch (c = *pat++) { default: return (*string == c); case '\\': return (*string == *pat); case '?': return (*pat == LPAREN ? 1 : (*string != '\0')); case '*': return (1); case '+': case '!': case '@': return (*pat == LPAREN ? 1 : (*string == c)); case '[': return (*string != '\0'); } } int umatchlen (pat, max) char *pat; size_t max; { char c; int matlen, bracklen, t, in_cclass, in_collsym, in_equiv; if (*pat == 0) return (0); matlen = in_cclass = in_collsym = in_equiv = 0; while (c = *pat++) { switch (c) { default: matlen++; break; case '\\': if (*pat == 0) return ++matlen; else { matlen++; pat++; } break; case '?': if (*pat == LPAREN) return (matlen = -1); /* XXX for now */ else matlen++; break; case '*': return (matlen = -1); case '+': case '!': case '@': if (*pat == LPAREN) return (matlen = -1); /* XXX for now */ else matlen++; break; case '[': /* scan for ending `]', skipping over embedded [:...:] */ bracklen = 1; c = *pat++; do { if (c == 0) { pat--; /* back up to NUL */ matlen += bracklen; goto bad_bracket; } else if (c == '\\') { /* *pat == backslash-escaped character */ bracklen++; /* If the backslash or backslash-escape ends the string, bail. The ++pat skips over the backslash escape */ if (*pat == 0 || *++pat == 0) { matlen += bracklen; goto bad_bracket; } } else if (c == '[' && *pat == ':') /* character class */ { pat++; bracklen++; in_cclass = 1; } else if (in_cclass && c == ':' && *pat == ']') { pat++; bracklen++; in_cclass = 0; } else if (c == '[' && *pat == '.') /* collating symbol */ { pat++; bracklen++; if (*pat == ']') /* right bracket can appear as collating symbol */ { pat++; bracklen++; } in_collsym = 1; } else if (in_collsym && c == '.' && *pat == ']') { pat++; bracklen++; in_collsym = 0; } else if (c == '[' && *pat == '=') /* equivalence class */ { pat++; bracklen++; if (*pat == ']') /* right bracket can appear as equivalence class */ { pat++; bracklen++; } in_equiv = 1; } else if (in_equiv && c == '=' && *pat == ']') { pat++; bracklen++; in_equiv = 0; } else bracklen++; } while ((c = *pat++) != ']'); matlen++; /* bracket expression can only match one char */ bad_bracket: break; } } return matlen; } /* Skip characters in PAT and return the final occurrence of DIRSEP. This is only called when extended_glob is set, so we have to skip over extglob patterns x(...) */ char * glob_dirscan (pat, dirsep) char *pat; int dirsep; { char *p, *d, *pe, *se; d = pe = se = 0; for (p = pat; p && *p; p++) { if (extglob_pattern_p (p)) { if (se == 0) se = p + strlen (p) - 1; pe = glob_patscan (p + 2, se, 0); if (pe == 0) continue; else if (*pe == 0) break; p = pe - 1; /* will do increment above */ continue; } if (*p == dirsep) d = p; } return d; }