410 lines
7.9 KiB
C
410 lines
7.9 KiB
C
/* gmisc.c -- miscellaneous pattern matching utility functions for Bash.
|
|
|
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
|
|
|
This file is part of GNU Bash, the Bourne-Again SHell.
|
|
|
|
Bash is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
Bash is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with Bash. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include "bashtypes.h"
|
|
|
|
#if defined (HAVE_UNISTD_H)
|
|
# include <unistd.h>
|
|
#endif
|
|
|
|
#include "bashansi.h"
|
|
#include "shmbutil.h"
|
|
|
|
#include "stdc.h"
|
|
|
|
#ifndef LPAREN
|
|
# define LPAREN '('
|
|
#endif
|
|
#ifndef RPAREN
|
|
# define RPAREN ')'
|
|
#endif
|
|
|
|
#if defined (HANDLE_MULTIBYTE)
|
|
#define WLPAREN L'('
|
|
#define WRPAREN L')'
|
|
|
|
extern char *glob_patscan __P((char *, char *, int));
|
|
|
|
/* Return 1 of the first character of WSTRING could match the first
|
|
character of pattern WPAT. Wide character version. */
|
|
int
|
|
match_pattern_wchar (wpat, wstring)
|
|
wchar_t *wpat, *wstring;
|
|
{
|
|
wchar_t wc;
|
|
|
|
if (*wstring == 0)
|
|
return (0);
|
|
|
|
switch (wc = *wpat++)
|
|
{
|
|
default:
|
|
return (*wstring == wc);
|
|
case L'\\':
|
|
return (*wstring == *wpat);
|
|
case L'?':
|
|
return (*wpat == WLPAREN ? 1 : (*wstring != L'\0'));
|
|
case L'*':
|
|
return (1);
|
|
case L'+':
|
|
case L'!':
|
|
case L'@':
|
|
return (*wpat == WLPAREN ? 1 : (*wstring == wc));
|
|
case L'[':
|
|
return (*wstring != L'\0');
|
|
}
|
|
}
|
|
|
|
int
|
|
wmatchlen (wpat, wmax)
|
|
wchar_t *wpat;
|
|
size_t wmax;
|
|
{
|
|
wchar_t wc;
|
|
int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
|
|
|
|
if (*wpat == 0)
|
|
return (0);
|
|
|
|
matlen = in_cclass = in_collsym = in_equiv = 0;
|
|
while (wc = *wpat++)
|
|
{
|
|
switch (wc)
|
|
{
|
|
default:
|
|
matlen++;
|
|
break;
|
|
case L'\\':
|
|
if (*wpat == 0)
|
|
return ++matlen;
|
|
else
|
|
{
|
|
matlen++;
|
|
wpat++;
|
|
}
|
|
break;
|
|
case L'?':
|
|
if (*wpat == WLPAREN)
|
|
return (matlen = -1); /* XXX for now */
|
|
else
|
|
matlen++;
|
|
break;
|
|
case L'*':
|
|
return (matlen = -1);
|
|
case L'+':
|
|
case L'!':
|
|
case L'@':
|
|
if (*wpat == WLPAREN)
|
|
return (matlen = -1); /* XXX for now */
|
|
else
|
|
matlen++;
|
|
break;
|
|
case L'[':
|
|
/* scan for ending `]', skipping over embedded [:...:] */
|
|
bracklen = 1;
|
|
wc = *wpat++;
|
|
do
|
|
{
|
|
if (wc == 0)
|
|
{
|
|
wpat--; /* back up to NUL */
|
|
matlen += bracklen;
|
|
goto bad_bracket;
|
|
}
|
|
else if (wc == L'\\')
|
|
{
|
|
/* *wpat == backslash-escaped character */
|
|
bracklen++;
|
|
/* If the backslash or backslash-escape ends the string,
|
|
bail. The ++wpat skips over the backslash escape */
|
|
if (*wpat == 0 || *++wpat == 0)
|
|
{
|
|
matlen += bracklen;
|
|
goto bad_bracket;
|
|
}
|
|
}
|
|
else if (wc == L'[' && *wpat == L':') /* character class */
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
in_cclass = 1;
|
|
}
|
|
else if (in_cclass && wc == L':' && *wpat == L']')
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
in_cclass = 0;
|
|
}
|
|
else if (wc == L'[' && *wpat == L'.') /* collating symbol */
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
if (*wpat == L']') /* right bracket can appear as collating symbol */
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
}
|
|
in_collsym = 1;
|
|
}
|
|
else if (in_collsym && wc == L'.' && *wpat == L']')
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
in_collsym = 0;
|
|
}
|
|
else if (wc == L'[' && *wpat == L'=') /* equivalence class */
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
if (*wpat == L']') /* right bracket can appear as equivalence class */
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
}
|
|
in_equiv = 1;
|
|
}
|
|
else if (in_equiv && wc == L'=' && *wpat == L']')
|
|
{
|
|
wpat++;
|
|
bracklen++;
|
|
in_equiv = 0;
|
|
}
|
|
else
|
|
bracklen++;
|
|
}
|
|
while ((wc = *wpat++) != L']');
|
|
matlen++; /* bracket expression can only match one char */
|
|
bad_bracket:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return matlen;
|
|
}
|
|
#endif
|
|
|
|
int
|
|
extglob_pattern_p (pat)
|
|
char *pat;
|
|
{
|
|
switch (pat[0])
|
|
{
|
|
case '*':
|
|
case '+':
|
|
case '!':
|
|
case '@':
|
|
case '?':
|
|
return (pat[1] == LPAREN);
|
|
default:
|
|
return 0;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Return 1 of the first character of STRING could match the first
|
|
character of pattern PAT. Used to avoid n2 calls to strmatch(). */
|
|
int
|
|
match_pattern_char (pat, string)
|
|
char *pat, *string;
|
|
{
|
|
char c;
|
|
|
|
if (*string == 0)
|
|
return (0);
|
|
|
|
switch (c = *pat++)
|
|
{
|
|
default:
|
|
return (*string == c);
|
|
case '\\':
|
|
return (*string == *pat);
|
|
case '?':
|
|
return (*pat == LPAREN ? 1 : (*string != '\0'));
|
|
case '*':
|
|
return (1);
|
|
case '+':
|
|
case '!':
|
|
case '@':
|
|
return (*pat == LPAREN ? 1 : (*string == c));
|
|
case '[':
|
|
return (*string != '\0');
|
|
}
|
|
}
|
|
|
|
int
|
|
umatchlen (pat, max)
|
|
char *pat;
|
|
size_t max;
|
|
{
|
|
char c;
|
|
int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
|
|
|
|
if (*pat == 0)
|
|
return (0);
|
|
|
|
matlen = in_cclass = in_collsym = in_equiv = 0;
|
|
while (c = *pat++)
|
|
{
|
|
switch (c)
|
|
{
|
|
default:
|
|
matlen++;
|
|
break;
|
|
case '\\':
|
|
if (*pat == 0)
|
|
return ++matlen;
|
|
else
|
|
{
|
|
matlen++;
|
|
pat++;
|
|
}
|
|
break;
|
|
case '?':
|
|
if (*pat == LPAREN)
|
|
return (matlen = -1); /* XXX for now */
|
|
else
|
|
matlen++;
|
|
break;
|
|
case '*':
|
|
return (matlen = -1);
|
|
case '+':
|
|
case '!':
|
|
case '@':
|
|
if (*pat == LPAREN)
|
|
return (matlen = -1); /* XXX for now */
|
|
else
|
|
matlen++;
|
|
break;
|
|
case '[':
|
|
/* scan for ending `]', skipping over embedded [:...:] */
|
|
bracklen = 1;
|
|
c = *pat++;
|
|
do
|
|
{
|
|
if (c == 0)
|
|
{
|
|
pat--; /* back up to NUL */
|
|
matlen += bracklen;
|
|
goto bad_bracket;
|
|
}
|
|
else if (c == '\\')
|
|
{
|
|
/* *pat == backslash-escaped character */
|
|
bracklen++;
|
|
/* If the backslash or backslash-escape ends the string,
|
|
bail. The ++pat skips over the backslash escape */
|
|
if (*pat == 0 || *++pat == 0)
|
|
{
|
|
matlen += bracklen;
|
|
goto bad_bracket;
|
|
}
|
|
}
|
|
else if (c == '[' && *pat == ':') /* character class */
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
in_cclass = 1;
|
|
}
|
|
else if (in_cclass && c == ':' && *pat == ']')
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
in_cclass = 0;
|
|
}
|
|
else if (c == '[' && *pat == '.') /* collating symbol */
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
if (*pat == ']') /* right bracket can appear as collating symbol */
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
}
|
|
in_collsym = 1;
|
|
}
|
|
else if (in_collsym && c == '.' && *pat == ']')
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
in_collsym = 0;
|
|
}
|
|
else if (c == '[' && *pat == '=') /* equivalence class */
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
if (*pat == ']') /* right bracket can appear as equivalence class */
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
}
|
|
in_equiv = 1;
|
|
}
|
|
else if (in_equiv && c == '=' && *pat == ']')
|
|
{
|
|
pat++;
|
|
bracklen++;
|
|
in_equiv = 0;
|
|
}
|
|
else
|
|
bracklen++;
|
|
}
|
|
while ((c = *pat++) != ']');
|
|
matlen++; /* bracket expression can only match one char */
|
|
bad_bracket:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return matlen;
|
|
}
|
|
|
|
/* Skip characters in PAT and return the final occurrence of DIRSEP. This
|
|
is only called when extended_glob is set, so we have to skip over extglob
|
|
patterns x(...) */
|
|
char *
|
|
glob_dirscan (pat, dirsep)
|
|
char *pat;
|
|
int dirsep;
|
|
{
|
|
char *p, *d, *pe, *se;
|
|
|
|
d = pe = se = 0;
|
|
for (p = pat; p && *p; p++)
|
|
{
|
|
if (extglob_pattern_p (p))
|
|
{
|
|
if (se == 0)
|
|
se = p + strlen (p) - 1;
|
|
pe = glob_patscan (p + 2, se, 0);
|
|
if (pe == 0)
|
|
continue;
|
|
else if (*pe == 0)
|
|
break;
|
|
p = pe - 1; /* will do increment above */
|
|
continue;
|
|
}
|
|
if (*p == dirsep)
|
|
d = p;
|
|
}
|
|
return d;
|
|
}
|