404 lines
		
	
	
	
		
			9.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			404 lines
		
	
	
	
		
			9.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
 | 
						|
		globbing. */
 | 
						|
 | 
						|
/* Copyright (C) 1991-2011 Free Software Foundation, Inc.
 | 
						|
 | 
						|
   This file is part of GNU Bash, the Bourne Again SHell.
 | 
						|
   
 | 
						|
   Bash is free software: you can redistribute it and/or modify
 | 
						|
   it under the terms of the GNU General Public License as published by
 | 
						|
   the Free Software Foundation, either version 3 of the License, or
 | 
						|
   (at your option) any later version.
 | 
						|
 | 
						|
   Bash is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
						|
   GNU General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU General Public License
 | 
						|
   along with Bash.  If not, see <http://www.gnu.org/licenses/>.
 | 
						|
*/
 | 
						|
 | 
						|
#include <config.h>
 | 
						|
 | 
						|
#include <stdio.h>	/* for debugging */
 | 
						|
				
 | 
						|
#include "strmatch.h"
 | 
						|
#include <chartypes.h>
 | 
						|
 | 
						|
#include "bashansi.h"
 | 
						|
#include "shmbutil.h"
 | 
						|
#include "xmalloc.h"
 | 
						|
 | 
						|
/* First, compile `sm_loop.c' for single-byte characters. */
 | 
						|
#define CHAR	unsigned char
 | 
						|
#define U_CHAR	unsigned char
 | 
						|
#define XCHAR	char
 | 
						|
#define INT	int
 | 
						|
#define L(CS)	CS
 | 
						|
#define INVALID	-1
 | 
						|
 | 
						|
#undef STREQ
 | 
						|
#undef STREQN
 | 
						|
#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
 | 
						|
#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
 | 
						|
 | 
						|
/* We use strcoll(3) for range comparisons in bracket expressions,
 | 
						|
   even though it can have unwanted side effects in locales
 | 
						|
   other than POSIX or US.  For instance, in the de locale, [A-Z] matches
 | 
						|
   all characters. */
 | 
						|
 | 
						|
#if defined (HAVE_STRCOLL)
 | 
						|
/* Helper function for collating symbol equivalence. */
 | 
						|
static int rangecmp (c1, c2)
 | 
						|
     int c1, c2;
 | 
						|
{
 | 
						|
  static char s1[2] = { ' ', '\0' };
 | 
						|
  static char s2[2] = { ' ', '\0' };
 | 
						|
  int ret;
 | 
						|
 | 
						|
  /* Eight bits only.  Period. */
 | 
						|
  c1 &= 0xFF;
 | 
						|
  c2 &= 0xFF;
 | 
						|
 | 
						|
  if (c1 == c2)
 | 
						|
    return (0);
 | 
						|
 | 
						|
  s1[0] = c1;
 | 
						|
  s2[0] = c2;
 | 
						|
 | 
						|
  if ((ret = strcoll (s1, s2)) != 0)
 | 
						|
    return ret;
 | 
						|
  return (c1 - c2);
 | 
						|
}
 | 
						|
#else /* !HAVE_STRCOLL */
 | 
						|
#  define rangecmp(c1, c2)	((int)(c1) - (int)(c2))
 | 
						|
#endif /* !HAVE_STRCOLL */
 | 
						|
 | 
						|
#if defined (HAVE_STRCOLL)
 | 
						|
static int
 | 
						|
collequiv (c1, c2)
 | 
						|
     int c1, c2;
 | 
						|
{
 | 
						|
  return (rangecmp (c1, c2) == 0);
 | 
						|
}
 | 
						|
#else
 | 
						|
#  define collequiv(c1, c2)	((c1) == (c2))
 | 
						|
#endif
 | 
						|
 | 
						|
#define _COLLSYM	_collsym
 | 
						|
#define __COLLSYM	__collsym
 | 
						|
#define POSIXCOLL	posix_collsyms
 | 
						|
#include "collsyms.h"
 | 
						|
 | 
						|
static int
 | 
						|
collsym (s, len)
 | 
						|
     CHAR *s;
 | 
						|
     int len;
 | 
						|
{
 | 
						|
  register struct _collsym *csp;
 | 
						|
  char *x;
 | 
						|
 | 
						|
  x = (char *)s;
 | 
						|
  for (csp = posix_collsyms; csp->name; csp++)
 | 
						|
    {
 | 
						|
      if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
 | 
						|
	return (csp->code);
 | 
						|
    }
 | 
						|
  if (len == 1)
 | 
						|
    return s[0];
 | 
						|
  return INVALID;
 | 
						|
}
 | 
						|
 | 
						|
/* unibyte character classification */
 | 
						|
#if !defined (isascii) && !defined (HAVE_ISASCII)
 | 
						|
#  define isascii(c)	((unsigned int)(c) <= 0177)
 | 
						|
#endif
 | 
						|
 | 
						|
enum char_class
 | 
						|
  {
 | 
						|
    CC_NO_CLASS = 0,
 | 
						|
    CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
 | 
						|
    CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
 | 
						|
  };
 | 
						|
 | 
						|
static char const *const cclass_name[] =
 | 
						|
  {
 | 
						|
    "",
 | 
						|
    "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
 | 
						|
    "lower", "print", "punct", "space", "upper", "word", "xdigit"
 | 
						|
  };
 | 
						|
 | 
						|
#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
 | 
						|
 | 
						|
static int
 | 
						|
is_cclass (c, name)
 | 
						|
     int c;
 | 
						|
     const char *name;
 | 
						|
{
 | 
						|
  enum char_class char_class = CC_NO_CLASS;
 | 
						|
  int i, result;
 | 
						|
 | 
						|
  for (i = 1; i < N_CHAR_CLASS; i++)
 | 
						|
    {
 | 
						|
      if (STREQ (name, cclass_name[i]))
 | 
						|
	{
 | 
						|
	  char_class = (enum char_class)i;
 | 
						|
	  break;
 | 
						|
	}
 | 
						|
    }
 | 
						|
 | 
						|
  if (char_class == 0)
 | 
						|
    return -1;
 | 
						|
 | 
						|
  switch (char_class)
 | 
						|
    {
 | 
						|
      case CC_ASCII:
 | 
						|
	result = isascii (c);
 | 
						|
	break;
 | 
						|
      case CC_ALNUM:
 | 
						|
	result = ISALNUM (c);
 | 
						|
	break;
 | 
						|
      case CC_ALPHA:
 | 
						|
	result = ISALPHA (c);
 | 
						|
	break;
 | 
						|
      case CC_BLANK:  
 | 
						|
	result = ISBLANK (c);
 | 
						|
	break;
 | 
						|
      case CC_CNTRL:
 | 
						|
	result = ISCNTRL (c);
 | 
						|
	break;
 | 
						|
      case CC_DIGIT:
 | 
						|
	result = ISDIGIT (c);
 | 
						|
	break;
 | 
						|
      case CC_GRAPH:
 | 
						|
	result = ISGRAPH (c);
 | 
						|
	break;
 | 
						|
      case CC_LOWER:
 | 
						|
	result = ISLOWER (c);
 | 
						|
	break;
 | 
						|
      case CC_PRINT: 
 | 
						|
	result = ISPRINT (c);
 | 
						|
	break;
 | 
						|
      case CC_PUNCT:
 | 
						|
	result = ISPUNCT (c);
 | 
						|
	break;
 | 
						|
      case CC_SPACE:
 | 
						|
	result = ISSPACE (c);
 | 
						|
	break;
 | 
						|
      case CC_UPPER:
 | 
						|
	result = ISUPPER (c);
 | 
						|
	break;
 | 
						|
      case CC_WORD:
 | 
						|
        result = (ISALNUM (c) || c == '_');
 | 
						|
	break;
 | 
						|
      case CC_XDIGIT:
 | 
						|
	result = ISXDIGIT (c);
 | 
						|
	break;
 | 
						|
      default:
 | 
						|
	result = -1;
 | 
						|
	break;
 | 
						|
    }
 | 
						|
 | 
						|
  return result;  
 | 
						|
}
 | 
						|
 | 
						|
/* Now include `sm_loop.c' for single-byte characters. */
 | 
						|
/* The result of FOLD is an `unsigned char' */
 | 
						|
# define FOLD(c) ((flags & FNM_CASEFOLD) \
 | 
						|
	? TOLOWER ((unsigned char)c) \
 | 
						|
	: ((unsigned char)c))
 | 
						|
 | 
						|
#define FCT			internal_strmatch
 | 
						|
#define GMATCH			gmatch
 | 
						|
#define COLLSYM			collsym
 | 
						|
#define PARSE_COLLSYM		parse_collsym
 | 
						|
#define BRACKMATCH		brackmatch
 | 
						|
#define PATSCAN			patscan
 | 
						|
#define STRCOMPARE		strcompare
 | 
						|
#define EXTMATCH		extmatch
 | 
						|
#define STRCHR(S, C)		strchr((S), (C))
 | 
						|
#define STRCOLL(S1, S2)		strcoll((S1), (S2))
 | 
						|
#define STRLEN(S)		strlen(S)
 | 
						|
#define STRCMP(S1, S2)		strcmp((S1), (S2))
 | 
						|
#define RANGECMP(C1, C2)	rangecmp((C1), (C2))
 | 
						|
#define COLLEQUIV(C1, C2)	collequiv((C1), (C2))
 | 
						|
#define CTYPE_T			enum char_class
 | 
						|
#define IS_CCLASS(C, S)		is_cclass((C), (S))
 | 
						|
#include "sm_loop.c"
 | 
						|
 | 
						|
#if HANDLE_MULTIBYTE
 | 
						|
 | 
						|
#  define CHAR		wchar_t
 | 
						|
#  define U_CHAR	wint_t
 | 
						|
#  define XCHAR		wchar_t
 | 
						|
#  define INT		wint_t
 | 
						|
#  define L(CS)		L##CS
 | 
						|
#  define INVALID	WEOF
 | 
						|
 | 
						|
#  undef STREQ
 | 
						|
#  undef STREQN
 | 
						|
#  define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
 | 
						|
#  define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
 | 
						|
 | 
						|
extern char *mbsmbchar __P((const char *));
 | 
						|
 | 
						|
static int
 | 
						|
rangecmp_wc (c1, c2)
 | 
						|
     wint_t c1, c2;
 | 
						|
{
 | 
						|
  static wchar_t s1[2] = { L' ', L'\0' };
 | 
						|
  static wchar_t s2[2] = { L' ', L'\0' };
 | 
						|
 | 
						|
  if (c1 == c2)
 | 
						|
    return 0;
 | 
						|
 | 
						|
  s1[0] = c1;
 | 
						|
  s2[0] = c2;
 | 
						|
 | 
						|
  return (wcscoll (s1, s2));
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
collequiv_wc (c, equiv)
 | 
						|
     wint_t c, equiv;
 | 
						|
{
 | 
						|
  return (!(c - equiv));
 | 
						|
}
 | 
						|
 | 
						|
/* Helper function for collating symbol. */
 | 
						|
#  define _COLLSYM	_collwcsym
 | 
						|
#  define __COLLSYM	__collwcsym
 | 
						|
#  define POSIXCOLL	posix_collwcsyms
 | 
						|
#  include "collsyms.h"
 | 
						|
 | 
						|
static wint_t
 | 
						|
collwcsym (s, len)
 | 
						|
     wchar_t *s;
 | 
						|
     int len;
 | 
						|
{
 | 
						|
  register struct _collwcsym *csp;
 | 
						|
 | 
						|
  for (csp = posix_collwcsyms; csp->name; csp++)
 | 
						|
    {
 | 
						|
      if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
 | 
						|
	return (csp->code);
 | 
						|
    }
 | 
						|
  if (len == 1)
 | 
						|
    return s[0];
 | 
						|
  return INVALID;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
is_wcclass (wc, name)
 | 
						|
     wint_t wc;
 | 
						|
     wchar_t *name;
 | 
						|
{
 | 
						|
  char *mbs;
 | 
						|
  mbstate_t state;
 | 
						|
  size_t mbslength;
 | 
						|
  wctype_t desc;
 | 
						|
  int want_word;
 | 
						|
 | 
						|
  if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
 | 
						|
    {
 | 
						|
      int c;
 | 
						|
 | 
						|
      if ((c = wctob (wc)) == EOF)
 | 
						|
	return 0;
 | 
						|
      else
 | 
						|
        return (c <= 0x7F);
 | 
						|
    }
 | 
						|
 | 
						|
  want_word = (wcscmp (name, L"word") == 0);
 | 
						|
  if (want_word)
 | 
						|
    name = L"alnum";
 | 
						|
 | 
						|
  memset (&state, '\0', sizeof (mbstate_t));
 | 
						|
  mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
 | 
						|
  mbslength = wcsrtombs (mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
 | 
						|
 | 
						|
  if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
 | 
						|
    {
 | 
						|
      free (mbs);
 | 
						|
      return -1;
 | 
						|
    }
 | 
						|
  desc = wctype (mbs);
 | 
						|
  free (mbs);
 | 
						|
 | 
						|
  if (desc == (wctype_t)0)
 | 
						|
    return -1;
 | 
						|
 | 
						|
  if (want_word)
 | 
						|
    return (iswctype (wc, desc) || wc == L'_');
 | 
						|
  else
 | 
						|
    return (iswctype (wc, desc));
 | 
						|
}
 | 
						|
 | 
						|
/* Now include `sm_loop.c' for multibyte characters. */
 | 
						|
#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
 | 
						|
#define FCT			internal_wstrmatch
 | 
						|
#define GMATCH			gmatch_wc
 | 
						|
#define COLLSYM			collwcsym
 | 
						|
#define PARSE_COLLSYM		parse_collwcsym
 | 
						|
#define BRACKMATCH		brackmatch_wc
 | 
						|
#define PATSCAN			patscan_wc
 | 
						|
#define STRCOMPARE		wscompare
 | 
						|
#define EXTMATCH		extmatch_wc
 | 
						|
#define STRCHR(S, C)		wcschr((S), (C))
 | 
						|
#define STRCOLL(S1, S2)		wcscoll((S1), (S2))
 | 
						|
#define STRLEN(S)		wcslen(S)
 | 
						|
#define STRCMP(S1, S2)		wcscmp((S1), (S2))
 | 
						|
#define RANGECMP(C1, C2)	rangecmp_wc((C1), (C2))
 | 
						|
#define COLLEQUIV(C1, C2)	collequiv_wc((C1), (C2))
 | 
						|
#define CTYPE_T			enum char_class
 | 
						|
#define IS_CCLASS(C, S)		is_wcclass((C), (S))
 | 
						|
#include "sm_loop.c"
 | 
						|
 | 
						|
#endif /* HAVE_MULTIBYTE */
 | 
						|
 | 
						|
int
 | 
						|
xstrmatch (pattern, string, flags)
 | 
						|
     char *pattern;
 | 
						|
     char *string;
 | 
						|
     int flags;
 | 
						|
{
 | 
						|
#if HANDLE_MULTIBYTE
 | 
						|
  int ret;
 | 
						|
  size_t n;
 | 
						|
  wchar_t *wpattern, *wstring;
 | 
						|
  size_t plen, slen, mplen, mslen;
 | 
						|
 | 
						|
#if 0
 | 
						|
  plen = strlen (pattern);
 | 
						|
  mplen = mbstrlen (pattern);
 | 
						|
  if (plen == mplen && strlen (string) == mbstrlen (string))
 | 
						|
#else
 | 
						|
  if (mbsmbchar (string) == 0 && mbsmbchar (pattern) == 0)
 | 
						|
#endif
 | 
						|
    return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | 
						|
 | 
						|
  if (MB_CUR_MAX == 1)
 | 
						|
    return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | 
						|
 | 
						|
  n = xdupmbstowcs (&wpattern, NULL, pattern);
 | 
						|
  if (n == (size_t)-1 || n == (size_t)-2)
 | 
						|
    return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | 
						|
 | 
						|
  n = xdupmbstowcs (&wstring, NULL, string);
 | 
						|
  if (n == (size_t)-1 || n == (size_t)-2)
 | 
						|
    {
 | 
						|
      free (wpattern);
 | 
						|
      return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | 
						|
    }
 | 
						|
 | 
						|
  ret = internal_wstrmatch (wpattern, wstring, flags);
 | 
						|
 | 
						|
  free (wpattern);
 | 
						|
  free (wstring);
 | 
						|
 | 
						|
  return ret;
 | 
						|
#else
 | 
						|
  return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
 | 
						|
#endif /* !HANDLE_MULTIBYTE */
 | 
						|
}
 |