/* gmisc.c -- miscellaneous pattern matching utility functions for Bash.
   Copyright (C) 2010 Free Software Foundation, Inc.
   This file is part of GNU Bash, the Bourne-Again SHell.
   
   Bash is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.
   Bash is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.
   You should have received a copy of the GNU General Public License
   along with Bash.  If not, see .
*/
#include 
#include "bashtypes.h"
#if defined (HAVE_UNISTD_H)
#  include 
#endif
#include "bashansi.h"
#include "shmbutil.h"
#include "stdc.h"
#ifndef LPAREN
#  define LPAREN '('
#endif
#ifndef RPAREN
#  define RPAREN ')'
#endif
#if defined (HANDLE_MULTIBYTE)
#define WLPAREN         L'('
#define WRPAREN         L')'
/* Return 1 of the first character of WSTRING could match the first
   character of pattern WPAT.  Wide character version. */
int
match_pattern_wchar (wpat, wstring)
     wchar_t *wpat, *wstring;
{
  wchar_t wc;
  if (*wstring == 0)
    return (0);
  switch (wc = *wpat++)
    {
    default:
      return (*wstring == wc);
    case L'\\':
      return (*wstring == *wpat);
    case L'?':
      return (*wpat == WLPAREN ? 1 : (*wstring != L'\0'));
    case L'*':
      return (1);
    case L'+':
    case L'!':
    case L'@':
      return (*wpat == WLPAREN ? 1 : (*wstring == wc));
    case L'[':
      return (*wstring != L'\0');
    }
}
int
wmatchlen (wpat, wmax)
     wchar_t *wpat;
     size_t wmax;
{
  wchar_t wc;
  int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
  if (*wpat == 0)
    return (0);
  matlen = in_cclass = in_collsym = in_equiv = 0;
  while (wc = *wpat++)
    {
      switch (wc)
	{
	default:
	  matlen++;
	  break;
	case L'\\':
	  if (*wpat == 0)
	    return ++matlen;
	  else
	    {
	      matlen++;
	      wpat++;
	    }
	  break;
	case L'?':
	  if (*wpat == WLPAREN)
	    return (matlen = -1);		/* XXX for now */
	  else
	    matlen++;
	  break;
	case L'*':
	  return (matlen = -1);
	case L'+':
	case L'!':
	case L'@':
	  if (*wpat == WLPAREN)
	    return (matlen = -1);		/* XXX for now */
	  else
	    matlen++;
	  break;
	case L'[':
	  /* scan for ending `]', skipping over embedded [:...:] */
	  bracklen = 1;
	  wc = *wpat++;
	  do
	    {
	      if (wc == 0)
		{
		  wpat--;			/* back up to NUL */
	          matlen += bracklen;
	          goto bad_bracket;
	        }
	      else if (wc == L'\\')
		{
		  /* *wpat == backslash-escaped character */
		  bracklen++;
		  /* If the backslash or backslash-escape ends the string,
		     bail.  The ++wpat skips over the backslash escape */
		  if (*wpat == 0 || *++wpat == 0)
		    {
		      matlen += bracklen;
		      goto bad_bracket;
		    }
		}
	      else if (wc == L'[' && *wpat == L':')	/* character class */
		{
		  wpat++;
		  bracklen++;
		  in_cclass = 1;
		}
	      else if (in_cclass && wc == L':' && *wpat == L']')
		{
		  wpat++;
		  bracklen++;
		  in_cclass = 0;
		}
	      else if (wc == L'[' && *wpat == L'.')	/* collating symbol */
		{
		  wpat++;
		  bracklen++;
		  if (*wpat == L']')	/* right bracket can appear as collating symbol */
		    {
		      wpat++;
		      bracklen++;
		    }
		  in_collsym = 1;
		}
	      else if (in_collsym && wc == L'.' && *wpat == L']')
		{
		  wpat++;
		  bracklen++;
		  in_collsym = 0;
		}
	      else if (wc == L'[' && *wpat == L'=')	/* equivalence class */
		{
		  wpat++;
		  bracklen++;
		  if (*wpat == L']')	/* right bracket can appear as equivalence class */
		    {
		      wpat++;
		      bracklen++;
		    }
		  in_equiv = 1;
		}
	      else if (in_equiv && wc == L'=' && *wpat == L']')
		{
		  wpat++;
		  bracklen++;
		  in_equiv = 0;
		}
	      else
		bracklen++;
	    }
	  while ((wc = *wpat++) != L']');
	  matlen++;		/* bracket expression can only match one char */
bad_bracket:
	  break;
	}
    }
  return matlen;
}
#endif
/* Return 1 of the first character of STRING could match the first
   character of pattern PAT.  Used to avoid n2 calls to strmatch(). */
int
match_pattern_char (pat, string)
     char *pat, *string;
{
  char c;
  if (*string == 0)
    return (0);
  switch (c = *pat++)
    {
    default:
      return (*string == c);
    case '\\':
      return (*string == *pat);
    case '?':
      return (*pat == LPAREN ? 1 : (*string != '\0'));
    case '*':
      return (1);
    case '+':
    case '!':
    case '@':
      return (*pat == LPAREN ? 1 : (*string == c));
    case '[':
      return (*string != '\0');
    }
}
int
umatchlen (pat, max)
     char *pat;
     size_t max;
{
  char c;
  int matlen, bracklen, t, in_cclass, in_collsym, in_equiv;
  if (*pat == 0)
    return (0);
  matlen = in_cclass = in_collsym = in_equiv = 0;
  while (c = *pat++)
    {
      switch (c)
	{
	default:
	  matlen++;
	  break;
	case '\\':
	  if (*pat == 0)
	    return ++matlen;
	  else
	    {
	      matlen++;
	      pat++;
	    }
	  break;
	case '?':
	  if (*pat == LPAREN)
	    return (matlen = -1);		/* XXX for now */
	  else
	    matlen++;
	  break;
	case '*':
	  return (matlen = -1);
	case '+':
	case '!':
	case '@':
	  if (*pat == LPAREN)
	    return (matlen = -1);		/* XXX for now */
	  else
	    matlen++;
	  break;
	case '[':
	  /* scan for ending `]', skipping over embedded [:...:] */
	  bracklen = 1;
	  c = *pat++;
	  do
	    {
	      if (c == 0)
		{
		  pat--;			/* back up to NUL */
		  matlen += bracklen;
		  goto bad_bracket;
	        }
	      else if (c == '\\')
		{
		  /* *pat == backslash-escaped character */
		  bracklen++;
		  /* If the backslash or backslash-escape ends the string,
		     bail.  The ++pat skips over the backslash escape */
		  if (*pat == 0 || *++pat == 0)
		    {
		      matlen += bracklen;
		      goto bad_bracket;
		    }
		}
	      else if (c == '[' && *pat == ':')	/* character class */
		{
		  pat++;
		  bracklen++;
		  in_cclass = 1;
		}
	      else if (in_cclass && c == ':' && *pat == ']')
		{
		  pat++;
		  bracklen++;
		  in_cclass = 0;
		}
	      else if (c == '[' && *pat == '.')	/* collating symbol */
		{
		  pat++;
		  bracklen++;
		  if (*pat == ']')	/* right bracket can appear as collating symbol */
		    {
		      pat++;
		      bracklen++;
		    }
		  in_collsym = 1;
		}
	      else if (in_collsym && c == '.' && *pat == ']')
		{
		  pat++;
		  bracklen++;
		  in_collsym = 0;
		}
	      else if (c == '[' && *pat == '=')	/* equivalence class */
		{
		  pat++;
		  bracklen++;
		  if (*pat == ']')	/* right bracket can appear as equivalence class */
		    {
		      pat++;
		      bracklen++;
		    }
		  in_equiv = 1;
		}
	      else if (in_equiv && c == '=' && *pat == ']')
		{
		  pat++;
		  bracklen++;
		  in_equiv = 0;
		}
	      else
		bracklen++;
	    }
	  while ((c = *pat++) != ']');
	  matlen++;		/* bracket expression can only match one char */
bad_bracket:
	  break;
	}
    }
  return matlen;
}