i-bash/lib/glob/smatch.c
2014-02-26 09:36:43 -05:00

415 lines
9.5 KiB
C

/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
globbing. */
/* Copyright (C) 1991-2011 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
Bash is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Bash is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Bash. If not, see <http://www.gnu.org/licenses/>.
*/
#include <config.h>
#include <stdio.h> /* for debugging */
#include "strmatch.h"
#include <chartypes.h>
#include "bashansi.h"
#include "shmbutil.h"
#include "xmalloc.h"
/* First, compile `sm_loop.c' for single-byte characters. */
#define CHAR unsigned char
#define U_CHAR unsigned char
#define XCHAR char
#define INT int
#define L(CS) CS
#define INVALID -1
#undef STREQ
#undef STREQN
#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
#ifndef GLOBASCII_DEFAULT
# define GLOBASCII_DEFAULT 0
#endif
int glob_asciirange = GLOBASCII_DEFAULT;
/* We use strcoll(3) for range comparisons in bracket expressions,
even though it can have unwanted side effects in locales
other than POSIX or US. For instance, in the de locale, [A-Z] matches
all characters. If GLOB_ASCIIRANGE is non-zero, and we're not forcing
the use of strcoll (e.g., for explicit collating symbols), we use
straight ordering as if in the C locale. */
#if defined (HAVE_STRCOLL)
/* Helper function for collating symbol equivalence. */
static int
rangecmp (c1, c2, forcecoll)
int c1, c2;
int forcecoll;
{
static char s1[2] = { ' ', '\0' };
static char s2[2] = { ' ', '\0' };
int ret;
/* Eight bits only. Period. */
c1 &= 0xFF;
c2 &= 0xFF;
if (c1 == c2)
return (0);
if (forcecoll == 0 && glob_asciirange)
return (c1 - c2);
s1[0] = c1;
s2[0] = c2;
if ((ret = strcoll (s1, s2)) != 0)
return ret;
return (c1 - c2);
}
#else /* !HAVE_STRCOLL */
# define rangecmp(c1, c2, f) ((int)(c1) - (int)(c2))
#endif /* !HAVE_STRCOLL */
#if defined (HAVE_STRCOLL)
static int
collequiv (c1, c2)
int c1, c2;
{
return (rangecmp (c1, c2, 1) == 0);
}
#else
# define collequiv(c1, c2) ((c1) == (c2))
#endif
#define _COLLSYM _collsym
#define __COLLSYM __collsym
#define POSIXCOLL posix_collsyms
#include "collsyms.h"
static int
collsym (s, len)
CHAR *s;
int len;
{
register struct _collsym *csp;
char *x;
x = (char *)s;
for (csp = posix_collsyms; csp->name; csp++)
{
if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
return (csp->code);
}
if (len == 1)
return s[0];
return INVALID;
}
/* unibyte character classification */
#if !defined (isascii) && !defined (HAVE_ISASCII)
# define isascii(c) ((unsigned int)(c) <= 0177)
#endif
enum char_class
{
CC_NO_CLASS = 0,
CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
};
static char const *const cclass_name[] =
{
"",
"ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
"lower", "print", "punct", "space", "upper", "word", "xdigit"
};
#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
static int
is_cclass (c, name)
int c;
const char *name;
{
enum char_class char_class = CC_NO_CLASS;
int i, result;
for (i = 1; i < N_CHAR_CLASS; i++)
{
if (STREQ (name, cclass_name[i]))
{
char_class = (enum char_class)i;
break;
}
}
if (char_class == 0)
return -1;
switch (char_class)
{
case CC_ASCII:
result = isascii (c);
break;
case CC_ALNUM:
result = ISALNUM (c);
break;
case CC_ALPHA:
result = ISALPHA (c);
break;
case CC_BLANK:
result = ISBLANK (c);
break;
case CC_CNTRL:
result = ISCNTRL (c);
break;
case CC_DIGIT:
result = ISDIGIT (c);
break;
case CC_GRAPH:
result = ISGRAPH (c);
break;
case CC_LOWER:
result = ISLOWER (c);
break;
case CC_PRINT:
result = ISPRINT (c);
break;
case CC_PUNCT:
result = ISPUNCT (c);
break;
case CC_SPACE:
result = ISSPACE (c);
break;
case CC_UPPER:
result = ISUPPER (c);
break;
case CC_WORD:
result = (ISALNUM (c) || c == '_');
break;
case CC_XDIGIT:
result = ISXDIGIT (c);
break;
default:
result = -1;
break;
}
return result;
}
/* Now include `sm_loop.c' for single-byte characters. */
/* The result of FOLD is an `unsigned char' */
# define FOLD(c) ((flags & FNM_CASEFOLD) \
? TOLOWER ((unsigned char)c) \
: ((unsigned char)c))
#define FCT internal_strmatch
#define GMATCH gmatch
#define COLLSYM collsym
#define PARSE_COLLSYM parse_collsym
#define BRACKMATCH brackmatch
#define PATSCAN glob_patscan
#define STRCOMPARE strcompare
#define EXTMATCH extmatch
#define STRCHR(S, C) strchr((S), (C))
#define STRCOLL(S1, S2) strcoll((S1), (S2))
#define STRLEN(S) strlen(S)
#define STRCMP(S1, S2) strcmp((S1), (S2))
#define RANGECMP(C1, C2, F) rangecmp((C1), (C2), (F))
#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
#define CTYPE_T enum char_class
#define IS_CCLASS(C, S) is_cclass((C), (S))
#include "sm_loop.c"
#if HANDLE_MULTIBYTE
# define CHAR wchar_t
# define U_CHAR wint_t
# define XCHAR wchar_t
# define INT wint_t
# define L(CS) L##CS
# define INVALID WEOF
# undef STREQ
# undef STREQN
# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
extern char *mbsmbchar __P((const char *));
static int
rangecmp_wc (c1, c2, forcecoll)
wint_t c1, c2;
int forcecoll;
{
static wchar_t s1[2] = { L' ', L'\0' };
static wchar_t s2[2] = { L' ', L'\0' };
if (c1 == c2)
return 0;
if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX)
return ((int)(c1 - c2));
s1[0] = c1;
s2[0] = c2;
return (wcscoll (s1, s2));
}
static int
collequiv_wc (c, equiv)
wint_t c, equiv;
{
return (c == equiv);
}
/* Helper function for collating symbol. */
# define _COLLSYM _collwcsym
# define __COLLSYM __collwcsym
# define POSIXCOLL posix_collwcsyms
# include "collsyms.h"
static wint_t
collwcsym (s, len)
wchar_t *s;
int len;
{
register struct _collwcsym *csp;
for (csp = posix_collwcsyms; csp->name; csp++)
{
if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
return (csp->code);
}
if (len == 1)
return s[0];
return INVALID;
}
static int
is_wcclass (wc, name)
wint_t wc;
wchar_t *name;
{
char *mbs;
mbstate_t state;
size_t mbslength;
wctype_t desc;
int want_word;
if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
{
int c;
if ((c = wctob (wc)) == EOF)
return 0;
else
return (c <= 0x7F);
}
want_word = (wcscmp (name, L"word") == 0);
if (want_word)
name = L"alnum";
memset (&state, '\0', sizeof (mbstate_t));
mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
{
free (mbs);
return -1;
}
desc = wctype (mbs);
free (mbs);
if (desc == (wctype_t)0)
return -1;
if (want_word)
return (iswctype (wc, desc) || wc == L'_');
else
return (iswctype (wc, desc));
}
/* Now include `sm_loop.c' for multibyte characters. */
#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
#define FCT internal_wstrmatch
#define GMATCH gmatch_wc
#define COLLSYM collwcsym
#define PARSE_COLLSYM parse_collwcsym
#define BRACKMATCH brackmatch_wc
#define PATSCAN glob_patscan_wc
#define STRCOMPARE wscompare
#define EXTMATCH extmatch_wc
#define STRCHR(S, C) wcschr((S), (C))
#define STRCOLL(S1, S2) wcscoll((S1), (S2))
#define STRLEN(S) wcslen(S)
#define STRCMP(S1, S2) wcscmp((S1), (S2))
#define RANGECMP(C1, C2, F) rangecmp_wc((C1), (C2), (F))
#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
#define CTYPE_T enum char_class
#define IS_CCLASS(C, S) is_wcclass((C), (S))
#include "sm_loop.c"
#endif /* HAVE_MULTIBYTE */
int
xstrmatch (pattern, string, flags)
char *pattern;
char *string;
int flags;
{
#if HANDLE_MULTIBYTE
int ret;
size_t n;
wchar_t *wpattern, *wstring;
size_t plen, slen, mplen, mslen;
if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
if (MB_CUR_MAX == 1)
return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
n = xdupmbstowcs (&wpattern, NULL, pattern);
if (n == (size_t)-1 || n == (size_t)-2)
return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
n = xdupmbstowcs (&wstring, NULL, string);
if (n == (size_t)-1 || n == (size_t)-2)
{
free (wpattern);
return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
}
ret = internal_wstrmatch (wpattern, wstring, flags);
free (wpattern);
free (wstring);
return ret;
#else
return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
#endif /* !HANDLE_MULTIBYTE */
}