415 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			415 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /* strmatch.c -- ksh-like extended pattern matching for the shell and filename
 | |
| 		globbing. */
 | |
| 
 | |
| /* Copyright (C) 1991-2011 Free Software Foundation, Inc.
 | |
| 
 | |
|    This file is part of GNU Bash, the Bourne Again SHell.
 | |
|    
 | |
|    Bash is free software: you can redistribute it and/or modify
 | |
|    it under the terms of the GNU General Public License as published by
 | |
|    the Free Software Foundation, either version 3 of the License, or
 | |
|    (at your option) any later version.
 | |
| 
 | |
|    Bash is distributed in the hope that it will be useful,
 | |
|    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|    GNU General Public License for more details.
 | |
| 
 | |
|    You should have received a copy of the GNU General Public License
 | |
|    along with Bash.  If not, see <http://www.gnu.org/licenses/>.
 | |
| */
 | |
| 
 | |
| #include <config.h>
 | |
| 
 | |
| #include <stdio.h>	/* for debugging */
 | |
| 				
 | |
| #include "strmatch.h"
 | |
| #include <chartypes.h>
 | |
| 
 | |
| #include "bashansi.h"
 | |
| #include "shmbutil.h"
 | |
| #include "xmalloc.h"
 | |
| 
 | |
| /* First, compile `sm_loop.c' for single-byte characters. */
 | |
| #define CHAR	unsigned char
 | |
| #define U_CHAR	unsigned char
 | |
| #define XCHAR	char
 | |
| #define INT	int
 | |
| #define L(CS)	CS
 | |
| #define INVALID	-1
 | |
| 
 | |
| #undef STREQ
 | |
| #undef STREQN
 | |
| #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
 | |
| #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
 | |
| 
 | |
| #ifndef GLOBASCII_DEFAULT
 | |
| #  define GLOBASCII_DEFAULT 0
 | |
| #endif
 | |
| 
 | |
| int glob_asciirange = GLOBASCII_DEFAULT;
 | |
| 
 | |
| /* We use strcoll(3) for range comparisons in bracket expressions,
 | |
|    even though it can have unwanted side effects in locales
 | |
|    other than POSIX or US.  For instance, in the de locale, [A-Z] matches
 | |
|    all characters.  If GLOB_ASCIIRANGE is non-zero, and we're not forcing
 | |
|    the use of strcoll (e.g., for explicit collating symbols), we use
 | |
|    straight ordering as if in the C locale. */
 | |
| 
 | |
| #if defined (HAVE_STRCOLL)
 | |
| /* Helper function for collating symbol equivalence. */
 | |
| static int
 | |
| rangecmp (c1, c2, forcecoll)
 | |
|      int c1, c2;
 | |
|      int forcecoll;
 | |
| {
 | |
|   static char s1[2] = { ' ', '\0' };
 | |
|   static char s2[2] = { ' ', '\0' };
 | |
|   int ret;
 | |
| 
 | |
|   /* Eight bits only.  Period. */
 | |
|   c1 &= 0xFF;
 | |
|   c2 &= 0xFF;
 | |
| 
 | |
|   if (c1 == c2)
 | |
|     return (0);
 | |
| 
 | |
|   if (forcecoll == 0 && glob_asciirange)
 | |
|     return (c1 - c2);
 | |
| 
 | |
|   s1[0] = c1;
 | |
|   s2[0] = c2;
 | |
| 
 | |
|   if ((ret = strcoll (s1, s2)) != 0)
 | |
|     return ret;
 | |
|   return (c1 - c2);
 | |
| }
 | |
| #else /* !HAVE_STRCOLL */
 | |
| #  define rangecmp(c1, c2, f)	((int)(c1) - (int)(c2))
 | |
| #endif /* !HAVE_STRCOLL */
 | |
| 
 | |
| #if defined (HAVE_STRCOLL)
 | |
| static int
 | |
| collequiv (c1, c2)
 | |
|      int c1, c2;
 | |
| {
 | |
|   return (rangecmp (c1, c2, 1) == 0);
 | |
| }
 | |
| #else
 | |
| #  define collequiv(c1, c2)	((c1) == (c2))
 | |
| #endif
 | |
| 
 | |
| #define _COLLSYM	_collsym
 | |
| #define __COLLSYM	__collsym
 | |
| #define POSIXCOLL	posix_collsyms
 | |
| #include "collsyms.h"
 | |
| 
 | |
| static int
 | |
| collsym (s, len)
 | |
|      CHAR *s;
 | |
|      int len;
 | |
| {
 | |
|   register struct _collsym *csp;
 | |
|   char *x;
 | |
| 
 | |
|   x = (char *)s;
 | |
|   for (csp = posix_collsyms; csp->name; csp++)
 | |
|     {
 | |
|       if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
 | |
| 	return (csp->code);
 | |
|     }
 | |
|   if (len == 1)
 | |
|     return s[0];
 | |
|   return INVALID;
 | |
| }
 | |
| 
 | |
| /* unibyte character classification */
 | |
| #if !defined (isascii) && !defined (HAVE_ISASCII)
 | |
| #  define isascii(c)	((unsigned int)(c) <= 0177)
 | |
| #endif
 | |
| 
 | |
| enum char_class
 | |
|   {
 | |
|     CC_NO_CLASS = 0,
 | |
|     CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
 | |
|     CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
 | |
|   };
 | |
| 
 | |
| static char const *const cclass_name[] =
 | |
|   {
 | |
|     "",
 | |
|     "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
 | |
|     "lower", "print", "punct", "space", "upper", "word", "xdigit"
 | |
|   };
 | |
| 
 | |
| #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
 | |
| 
 | |
| static int
 | |
| is_cclass (c, name)
 | |
|      int c;
 | |
|      const char *name;
 | |
| {
 | |
|   enum char_class char_class = CC_NO_CLASS;
 | |
|   int i, result;
 | |
| 
 | |
|   for (i = 1; i < N_CHAR_CLASS; i++)
 | |
|     {
 | |
|       if (STREQ (name, cclass_name[i]))
 | |
| 	{
 | |
| 	  char_class = (enum char_class)i;
 | |
| 	  break;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|   if (char_class == 0)
 | |
|     return -1;
 | |
| 
 | |
|   switch (char_class)
 | |
|     {
 | |
|       case CC_ASCII:
 | |
| 	result = isascii (c);
 | |
| 	break;
 | |
|       case CC_ALNUM:
 | |
| 	result = ISALNUM (c);
 | |
| 	break;
 | |
|       case CC_ALPHA:
 | |
| 	result = ISALPHA (c);
 | |
| 	break;
 | |
|       case CC_BLANK:  
 | |
| 	result = ISBLANK (c);
 | |
| 	break;
 | |
|       case CC_CNTRL:
 | |
| 	result = ISCNTRL (c);
 | |
| 	break;
 | |
|       case CC_DIGIT:
 | |
| 	result = ISDIGIT (c);
 | |
| 	break;
 | |
|       case CC_GRAPH:
 | |
| 	result = ISGRAPH (c);
 | |
| 	break;
 | |
|       case CC_LOWER:
 | |
| 	result = ISLOWER (c);
 | |
| 	break;
 | |
|       case CC_PRINT: 
 | |
| 	result = ISPRINT (c);
 | |
| 	break;
 | |
|       case CC_PUNCT:
 | |
| 	result = ISPUNCT (c);
 | |
| 	break;
 | |
|       case CC_SPACE:
 | |
| 	result = ISSPACE (c);
 | |
| 	break;
 | |
|       case CC_UPPER:
 | |
| 	result = ISUPPER (c);
 | |
| 	break;
 | |
|       case CC_WORD:
 | |
|         result = (ISALNUM (c) || c == '_');
 | |
| 	break;
 | |
|       case CC_XDIGIT:
 | |
| 	result = ISXDIGIT (c);
 | |
| 	break;
 | |
|       default:
 | |
| 	result = -1;
 | |
| 	break;
 | |
|     }
 | |
| 
 | |
|   return result;  
 | |
| }
 | |
| 
 | |
| /* Now include `sm_loop.c' for single-byte characters. */
 | |
| /* The result of FOLD is an `unsigned char' */
 | |
| # define FOLD(c) ((flags & FNM_CASEFOLD) \
 | |
| 	? TOLOWER ((unsigned char)c) \
 | |
| 	: ((unsigned char)c))
 | |
| 
 | |
| #define FCT			internal_strmatch
 | |
| #define GMATCH			gmatch
 | |
| #define COLLSYM			collsym
 | |
| #define PARSE_COLLSYM		parse_collsym
 | |
| #define BRACKMATCH		brackmatch
 | |
| #define PATSCAN			glob_patscan
 | |
| #define STRCOMPARE		strcompare
 | |
| #define EXTMATCH		extmatch
 | |
| #define STRCHR(S, C)		strchr((S), (C))
 | |
| #define STRCOLL(S1, S2)		strcoll((S1), (S2))
 | |
| #define STRLEN(S)		strlen(S)
 | |
| #define STRCMP(S1, S2)		strcmp((S1), (S2))
 | |
| #define RANGECMP(C1, C2, F)	rangecmp((C1), (C2), (F))
 | |
| #define COLLEQUIV(C1, C2)	collequiv((C1), (C2))
 | |
| #define CTYPE_T			enum char_class
 | |
| #define IS_CCLASS(C, S)		is_cclass((C), (S))
 | |
| #include "sm_loop.c"
 | |
| 
 | |
| #if HANDLE_MULTIBYTE
 | |
| 
 | |
| #  define CHAR		wchar_t
 | |
| #  define U_CHAR	wint_t
 | |
| #  define XCHAR		wchar_t
 | |
| #  define INT		wint_t
 | |
| #  define L(CS)		L##CS
 | |
| #  define INVALID	WEOF
 | |
| 
 | |
| #  undef STREQ
 | |
| #  undef STREQN
 | |
| #  define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
 | |
| #  define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
 | |
| 
 | |
| extern char *mbsmbchar __P((const char *));
 | |
| 
 | |
| static int
 | |
| rangecmp_wc (c1, c2, forcecoll)
 | |
|      wint_t c1, c2;
 | |
|      int forcecoll;
 | |
| {
 | |
|   static wchar_t s1[2] = { L' ', L'\0' };
 | |
|   static wchar_t s2[2] = { L' ', L'\0' };
 | |
| 
 | |
|   if (c1 == c2)
 | |
|     return 0;
 | |
| 
 | |
|   if (forcecoll == 0 && glob_asciirange && c1 <= UCHAR_MAX && c2 <= UCHAR_MAX)
 | |
|     return ((int)(c1 - c2));
 | |
| 
 | |
|   s1[0] = c1;
 | |
|   s2[0] = c2;
 | |
| 
 | |
|   return (wcscoll (s1, s2));
 | |
| }
 | |
| 
 | |
| static int
 | |
| collequiv_wc (c, equiv)
 | |
|      wint_t c, equiv;
 | |
| {
 | |
|   return (c == equiv);
 | |
| }
 | |
| 
 | |
| /* Helper function for collating symbol. */
 | |
| #  define _COLLSYM	_collwcsym
 | |
| #  define __COLLSYM	__collwcsym
 | |
| #  define POSIXCOLL	posix_collwcsyms
 | |
| #  include "collsyms.h"
 | |
| 
 | |
| static wint_t
 | |
| collwcsym (s, len)
 | |
|      wchar_t *s;
 | |
|      int len;
 | |
| {
 | |
|   register struct _collwcsym *csp;
 | |
| 
 | |
|   for (csp = posix_collwcsyms; csp->name; csp++)
 | |
|     {
 | |
|       if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
 | |
| 	return (csp->code);
 | |
|     }
 | |
|   if (len == 1)
 | |
|     return s[0];
 | |
|   return INVALID;
 | |
| }
 | |
| 
 | |
| static int
 | |
| is_wcclass (wc, name)
 | |
|      wint_t wc;
 | |
|      wchar_t *name;
 | |
| {
 | |
|   char *mbs;
 | |
|   mbstate_t state;
 | |
|   size_t mbslength;
 | |
|   wctype_t desc;
 | |
|   int want_word;
 | |
| 
 | |
|   if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
 | |
|     {
 | |
|       int c;
 | |
| 
 | |
|       if ((c = wctob (wc)) == EOF)
 | |
| 	return 0;
 | |
|       else
 | |
|         return (c <= 0x7F);
 | |
|     }
 | |
| 
 | |
|   want_word = (wcscmp (name, L"word") == 0);
 | |
|   if (want_word)
 | |
|     name = L"alnum";
 | |
| 
 | |
|   memset (&state, '\0', sizeof (mbstate_t));
 | |
|   mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
 | |
|   mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
 | |
| 
 | |
|   if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
 | |
|     {
 | |
|       free (mbs);
 | |
|       return -1;
 | |
|     }
 | |
|   desc = wctype (mbs);
 | |
|   free (mbs);
 | |
| 
 | |
|   if (desc == (wctype_t)0)
 | |
|     return -1;
 | |
| 
 | |
|   if (want_word)
 | |
|     return (iswctype (wc, desc) || wc == L'_');
 | |
|   else
 | |
|     return (iswctype (wc, desc));
 | |
| }
 | |
| 
 | |
| /* Now include `sm_loop.c' for multibyte characters. */
 | |
| #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
 | |
| #define FCT			internal_wstrmatch
 | |
| #define GMATCH			gmatch_wc
 | |
| #define COLLSYM			collwcsym
 | |
| #define PARSE_COLLSYM		parse_collwcsym
 | |
| #define BRACKMATCH		brackmatch_wc
 | |
| #define PATSCAN			glob_patscan_wc
 | |
| #define STRCOMPARE		wscompare
 | |
| #define EXTMATCH		extmatch_wc
 | |
| #define STRCHR(S, C)		wcschr((S), (C))
 | |
| #define STRCOLL(S1, S2)		wcscoll((S1), (S2))
 | |
| #define STRLEN(S)		wcslen(S)
 | |
| #define STRCMP(S1, S2)		wcscmp((S1), (S2))
 | |
| #define RANGECMP(C1, C2, F)	rangecmp_wc((C1), (C2), (F))
 | |
| #define COLLEQUIV(C1, C2)	collequiv_wc((C1), (C2))
 | |
| #define CTYPE_T			enum char_class
 | |
| #define IS_CCLASS(C, S)		is_wcclass((C), (S))
 | |
| #include "sm_loop.c"
 | |
| 
 | |
| #endif /* HAVE_MULTIBYTE */
 | |
| 
 | |
| int
 | |
| xstrmatch (pattern, string, flags)
 | |
|      char *pattern;
 | |
|      char *string;
 | |
|      int flags;
 | |
| {
 | |
| #if HANDLE_MULTIBYTE
 | |
|   int ret;
 | |
|   size_t n;
 | |
|   wchar_t *wpattern, *wstring;
 | |
|   size_t plen, slen, mplen, mslen;
 | |
| 
 | |
|   if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
 | |
|     return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | |
| 
 | |
|   if (MB_CUR_MAX == 1)
 | |
|     return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | |
| 
 | |
|   n = xdupmbstowcs (&wpattern, NULL, pattern);
 | |
|   if (n == (size_t)-1 || n == (size_t)-2)
 | |
|     return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | |
| 
 | |
|   n = xdupmbstowcs (&wstring, NULL, string);
 | |
|   if (n == (size_t)-1 || n == (size_t)-2)
 | |
|     {
 | |
|       free (wpattern);
 | |
|       return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | |
|     }
 | |
| 
 | |
|   ret = internal_wstrmatch (wpattern, wstring, flags);
 | |
| 
 | |
|   free (wpattern);
 | |
|   free (wstring);
 | |
| 
 | |
|   return ret;
 | |
| #else
 | |
|   return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | |
| #endif /* !HANDLE_MULTIBYTE */
 | |
| }
 | 
