Use GNU libunistring and Gnulib modules needed by R6RS bytevectors and ports.

* m4/gnulib-cache.m4 (gl_MODULES): Add `byteswap', `iconv_open-utf',
  `libunistring', `striconveh', and `string'.
This commit is contained in:
Ludovic Courtès 2009-05-27 16:50:40 +02:00
commit 24d56127bb
42 changed files with 7947 additions and 12 deletions

View file

@ -9,9 +9,9 @@
# the same distribution terms as the rest of that program.
#
# Generated by gnulib-tool.
# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild count-one-bits environ extensions flock fpieee full-read full-write lib-symbol-visibility putenv stdlib strcase strftime
# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild byteswap count-one-bits environ extensions flock fpieee full-read full-write iconv_open-utf lib-symbol-visibility libunistring putenv stdlib strcase strftime striconveh string
AUTOMAKE_OPTIONS = 1.5 gnits
AUTOMAKE_OPTIONS = 1.5 gnits subdir-objects
SUBDIRS =
noinst_HEADERS =
@ -54,6 +54,42 @@ EXTRA_DIST += alloca.in.h
## end gnulib module alloca-opt
## begin gnulib module byteswap
BUILT_SOURCES += $(BYTESWAP_H)
# We need the following in order to create <byteswap.h> when the system
# doesn't have one.
byteswap.h: byteswap.in.h
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \
cat $(srcdir)/byteswap.in.h; \
} > $@-t
mv -f $@-t $@
MOSTLYCLEANFILES += byteswap.h byteswap.h-t
EXTRA_DIST += byteswap.in.h
## end gnulib module byteswap
## begin gnulib module c-ctype
libgnu_la_SOURCES += c-ctype.h c-ctype.c
## end gnulib module c-ctype
## begin gnulib module c-strcase
libgnu_la_SOURCES += c-strcase.h c-strcasecmp.c c-strncasecmp.c
## end gnulib module c-strcase
## begin gnulib module c-strcaseeq
EXTRA_DIST += c-strcaseeq.h
## end gnulib module c-strcaseeq
## begin gnulib module configmake
# Retrieve values of the variables through 'configure' followed by
@ -143,6 +179,72 @@ libgnu_la_SOURCES += full-write.h full-write.c
## end gnulib module full-write
## begin gnulib module gperf
GPERF = gperf
## end gnulib module gperf
## begin gnulib module havelib
EXTRA_DIST += $(top_srcdir)/build-aux/config.rpath
## end gnulib module havelib
## begin gnulib module iconv_open
BUILT_SOURCES += $(ICONV_H)
# We need the following in order to create <iconv.h> when the system
# doesn't have one that works with the given compiler.
iconv.h: iconv.in.h
rm -f $@-t $@
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \
sed -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
-e 's|@''NEXT_ICONV_H''@|$(NEXT_ICONV_H)|g' \
-e 's|@''ICONV_CONST''@|$(ICONV_CONST)|g' \
-e 's|@''REPLACE_ICONV''@|$(REPLACE_ICONV)|g' \
-e 's|@''REPLACE_ICONV_OPEN''@|$(REPLACE_ICONV_OPEN)|g' \
-e 's|@''REPLACE_ICONV_UTF''@|$(REPLACE_ICONV_UTF)|g' \
< $(srcdir)/iconv.in.h; \
} > $@-t
mv $@-t $@
MOSTLYCLEANFILES += iconv.h iconv.h-t
iconv_open-aix.h: iconv_open-aix.gperf
$(GPERF) -m 10 $(srcdir)/iconv_open-aix.gperf > $(srcdir)/iconv_open-aix.h-t
mv $(srcdir)/iconv_open-aix.h-t $(srcdir)/iconv_open-aix.h
iconv_open-hpux.h: iconv_open-hpux.gperf
$(GPERF) -m 10 $(srcdir)/iconv_open-hpux.gperf > $(srcdir)/iconv_open-hpux.h-t
mv $(srcdir)/iconv_open-hpux.h-t $(srcdir)/iconv_open-hpux.h
iconv_open-irix.h: iconv_open-irix.gperf
$(GPERF) -m 10 $(srcdir)/iconv_open-irix.gperf > $(srcdir)/iconv_open-irix.h-t
mv $(srcdir)/iconv_open-irix.h-t $(srcdir)/iconv_open-irix.h
iconv_open-osf.h: iconv_open-osf.gperf
$(GPERF) -m 10 $(srcdir)/iconv_open-osf.gperf > $(srcdir)/iconv_open-osf.h-t
mv $(srcdir)/iconv_open-osf.h-t $(srcdir)/iconv_open-osf.h
BUILT_SOURCES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h
MOSTLYCLEANFILES += iconv_open-aix.h-t iconv_open-hpux.h-t iconv_open-irix.h-t iconv_open-osf.h-t
MAINTAINERCLEANFILES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h
EXTRA_DIST += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h
EXTRA_DIST += iconv.in.h iconv_open-aix.gperf iconv_open-hpux.gperf iconv_open-irix.gperf iconv_open-osf.gperf iconv_open.c
EXTRA_libgnu_la_SOURCES += iconv_open.c
## end gnulib module iconv_open
## begin gnulib module iconv_open-utf
EXTRA_DIST += iconv.c iconv_close.c
EXTRA_libgnu_la_SOURCES += iconv.c iconv_close.c
## end gnulib module iconv_open-utf
## begin gnulib module lib-symbol-visibility
# The value of $(CFLAG_VISIBILITY) needs to be added to the CFLAGS for the
@ -442,6 +544,95 @@ EXTRA_libgnu_la_SOURCES += strftime.c
## end gnulib module strftime
## begin gnulib module striconveh
libgnu_la_SOURCES += striconveh.h striconveh.c
if GL_COND_LIBTOOL
libgnu_la_LDFLAGS += $(LTLIBICONV)
endif
EXTRA_DIST += iconveh.h
## end gnulib module striconveh
## begin gnulib module string
BUILT_SOURCES += string.h
# We need the following in order to create <string.h> when the system
# doesn't have one that works with the given compiler.
string.h: string.in.h
rm -f $@-t $@
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \
sed -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
-e 's|@''NEXT_STRING_H''@|$(NEXT_STRING_H)|g' \
-e 's|@''GNULIB_MBSLEN''@|$(GNULIB_MBSLEN)|g' \
-e 's|@''GNULIB_MBSNLEN''@|$(GNULIB_MBSNLEN)|g' \
-e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \
-e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
-e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
-e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
-e 's|@''GNULIB_MBSNCASECMP''@|$(GNULIB_MBSNCASECMP)|g' \
-e 's|@''GNULIB_MBSPCASECMP''@|$(GNULIB_MBSPCASECMP)|g' \
-e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \
-e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \
-e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \
-e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \
-e 's|@''GNULIB_MBSSEP''@|$(GNULIB_MBSSEP)|g' \
-e 's|@''GNULIB_MBSTOK_R''@|$(GNULIB_MBSTOK_R)|g' \
-e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
-e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
-e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \
-e 's|@''GNULIB_RAWMEMCHR''@|$(GNULIB_RAWMEMCHR)|g' \
-e 's|@''GNULIB_STPCPY''@|$(GNULIB_STPCPY)|g' \
-e 's|@''GNULIB_STPNCPY''@|$(GNULIB_STPNCPY)|g' \
-e 's|@''GNULIB_STRCHRNUL''@|$(GNULIB_STRCHRNUL)|g' \
-e 's|@''GNULIB_STRDUP''@|$(GNULIB_STRDUP)|g' \
-e 's|@''GNULIB_STRNDUP''@|$(GNULIB_STRNDUP)|g' \
-e 's|@''GNULIB_STRNLEN''@|$(GNULIB_STRNLEN)|g' \
-e 's|@''GNULIB_STRPBRK''@|$(GNULIB_STRPBRK)|g' \
-e 's|@''GNULIB_STRSEP''@|$(GNULIB_STRSEP)|g' \
-e 's|@''GNULIB_STRSTR''@|$(GNULIB_STRSTR)|g' \
-e 's|@''GNULIB_STRCASESTR''@|$(GNULIB_STRCASESTR)|g' \
-e 's|@''GNULIB_STRTOK_R''@|$(GNULIB_STRTOK_R)|g' \
-e 's|@''GNULIB_STRERROR''@|$(GNULIB_STRERROR)|g' \
-e 's|@''GNULIB_STRSIGNAL''@|$(GNULIB_STRSIGNAL)|g' \
-e 's|@''GNULIB_STRVERSCMP''@|$(GNULIB_STRVERSCMP)|g' \
-e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \
-e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \
-e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \
-e 's|@''HAVE_RAWMEMCHR''@|$(HAVE_RAWMEMCHR)|g' \
-e 's|@''HAVE_STPCPY''@|$(HAVE_STPCPY)|g' \
-e 's|@''HAVE_STPNCPY''@|$(HAVE_STPNCPY)|g' \
-e 's|@''HAVE_STRCHRNUL''@|$(HAVE_STRCHRNUL)|g' \
-e 's|@''HAVE_DECL_STRDUP''@|$(HAVE_DECL_STRDUP)|g' \
-e 's|@''HAVE_STRNDUP''@|$(HAVE_STRNDUP)|g' \
-e 's|@''HAVE_DECL_STRNDUP''@|$(HAVE_DECL_STRNDUP)|g' \
-e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \
-e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \
-e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \
-e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \
-e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \
-e 's|@''HAVE_DECL_STRERROR''@|$(HAVE_DECL_STRERROR)|g' \
-e 's|@''HAVE_DECL_STRSIGNAL''@|$(HAVE_DECL_STRSIGNAL)|g' \
-e 's|@''HAVE_STRVERSCMP''@|$(HAVE_STRVERSCMP)|g' \
-e 's|@''REPLACE_MEMMEM''@|$(REPLACE_MEMMEM)|g' \
-e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \
-e 's|@''REPLACE_STRDUP''@|$(REPLACE_STRDUP)|g' \
-e 's|@''REPLACE_STRSTR''@|$(REPLACE_STRSTR)|g' \
-e 's|@''REPLACE_STRERROR''@|$(REPLACE_STRERROR)|g' \
-e 's|@''REPLACE_STRSIGNAL''@|$(REPLACE_STRSIGNAL)|g' \
-e '/definition of GL_LINK_WARNING/r $(LINK_WARNING_H)' \
< $(srcdir)/string.in.h; \
} > $@-t
mv $@-t $@
MOSTLYCLEANFILES += string.h string.h-t
EXTRA_DIST += string.in.h
## end gnulib module string
## begin gnulib module strings
BUILT_SOURCES += strings.h
@ -598,6 +789,50 @@ EXTRA_DIST += unistd.in.h
## end gnulib module unistd
## begin gnulib module unistr/base
EXTRA_DIST += unistr.h
## end gnulib module unistr/base
## begin gnulib module unistr/u8-mbtouc
libgnu_la_SOURCES += unistr/u8-mbtouc.c unistr/u8-mbtouc-aux.c
## end gnulib module unistr/u8-mbtouc
## begin gnulib module unistr/u8-mbtouc-unsafe
libgnu_la_SOURCES += unistr/u8-mbtouc-unsafe.c unistr/u8-mbtouc-unsafe-aux.c
## end gnulib module unistr/u8-mbtouc-unsafe
## begin gnulib module unistr/u8-mbtoucr
libgnu_la_SOURCES += unistr/u8-mbtoucr.c
## end gnulib module unistr/u8-mbtoucr
## begin gnulib module unistr/u8-prev
libgnu_la_SOURCES += unistr/u8-prev.c
## end gnulib module unistr/u8-prev
## begin gnulib module unistr/u8-uctomb
libgnu_la_SOURCES += unistr/u8-uctomb.c unistr/u8-uctomb-aux.c
## end gnulib module unistr/u8-uctomb
## begin gnulib module unitypes
EXTRA_DIST += unitypes.h
## end gnulib module unitypes
## begin gnulib module verify
libgnu_la_SOURCES += verify.h

44
lib/byteswap.in.h Normal file
View file

@ -0,0 +1,44 @@
/* byteswap.h - Byte swapping
Copyright (C) 2005, 2007 Free Software Foundation, Inc.
Written by Oskar Liljeblad <oskar@osk.mine.nu>, 2005.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef _GL_BYTESWAP_H
#define _GL_BYTESWAP_H
/* Given an unsigned 16-bit argument X, return the value corresponding to
X with reversed byte order. */
#define bswap_16(x) ((((x) & 0x00FF) << 8) | \
(((x) & 0xFF00) >> 8))
/* Given an unsigned 32-bit argument X, return the value corresponding to
X with reversed byte order. */
#define bswap_32(x) ((((x) & 0x000000FF) << 24) | \
(((x) & 0x0000FF00) << 8) | \
(((x) & 0x00FF0000) >> 8) | \
(((x) & 0xFF000000) >> 24))
/* Given an unsigned 64-bit argument X, return the value corresponding to
X with reversed byte order. */
#define bswap_64(x) ((((x) & 0x00000000000000FFULL) << 56) | \
(((x) & 0x000000000000FF00ULL) << 40) | \
(((x) & 0x0000000000FF0000ULL) << 24) | \
(((x) & 0x00000000FF000000ULL) << 8) | \
(((x) & 0x000000FF00000000ULL) >> 8) | \
(((x) & 0x0000FF0000000000ULL) >> 24) | \
(((x) & 0x00FF000000000000ULL) >> 40) | \
(((x) & 0xFF00000000000000ULL) >> 56))
#endif /* _GL_BYTESWAP_H */

396
lib/c-ctype.c Normal file
View file

@ -0,0 +1,396 @@
/* Character handling in C locale.
Copyright 2000-2003, 2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#define NO_C_CTYPE_MACROS
#include "c-ctype.h"
/* The function isascii is not locale dependent. Its use in EBCDIC is
questionable. */
bool
c_isascii (int c)
{
return (c >= 0x00 && c <= 0x7f);
}
bool
c_isalnum (int c)
{
#if C_CTYPE_CONSECUTIVE_DIGITS \
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
#if C_CTYPE_ASCII
return ((c >= '0' && c <= '9')
|| ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'));
#else
return ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'Z')
|| (c >= 'a' && c <= 'z'));
#endif
#else
switch (c)
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
return 1;
default:
return 0;
}
#endif
}
bool
c_isalpha (int c)
{
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
#if C_CTYPE_ASCII
return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z');
#else
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
#endif
#else
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
return 1;
default:
return 0;
}
#endif
}
bool
c_isblank (int c)
{
return (c == ' ' || c == '\t');
}
bool
c_iscntrl (int c)
{
#if C_CTYPE_ASCII
return ((c & ~0x1f) == 0 || c == 0x7f);
#else
switch (c)
{
case ' ': case '!': case '"': case '#': case '$': case '%':
case '&': case '\'': case '(': case ')': case '*': case '+':
case ',': case '-': case '.': case '/':
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case ':': case ';': case '<': case '=': case '>': case '?':
case '@':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '[': case '\\': case ']': case '^': case '_': case '`':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '{': case '|': case '}': case '~':
return 0;
default:
return 1;
}
#endif
}
bool
c_isdigit (int c)
{
#if C_CTYPE_CONSECUTIVE_DIGITS
return (c >= '0' && c <= '9');
#else
switch (c)
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
return 1;
default:
return 0;
}
#endif
}
bool
c_islower (int c)
{
#if C_CTYPE_CONSECUTIVE_LOWERCASE
return (c >= 'a' && c <= 'z');
#else
switch (c)
{
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
return 1;
default:
return 0;
}
#endif
}
bool
c_isgraph (int c)
{
#if C_CTYPE_ASCII
return (c >= '!' && c <= '~');
#else
switch (c)
{
case '!': case '"': case '#': case '$': case '%': case '&':
case '\'': case '(': case ')': case '*': case '+': case ',':
case '-': case '.': case '/':
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case ':': case ';': case '<': case '=': case '>': case '?':
case '@':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '[': case '\\': case ']': case '^': case '_': case '`':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '{': case '|': case '}': case '~':
return 1;
default:
return 0;
}
#endif
}
bool
c_isprint (int c)
{
#if C_CTYPE_ASCII
return (c >= ' ' && c <= '~');
#else
switch (c)
{
case ' ': case '!': case '"': case '#': case '$': case '%':
case '&': case '\'': case '(': case ')': case '*': case '+':
case ',': case '-': case '.': case '/':
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case ':': case ';': case '<': case '=': case '>': case '?':
case '@':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
case '[': case '\\': case ']': case '^': case '_': case '`':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
case 'y': case 'z':
case '{': case '|': case '}': case '~':
return 1;
default:
return 0;
}
#endif
}
bool
c_ispunct (int c)
{
#if C_CTYPE_ASCII
return ((c >= '!' && c <= '~')
&& !((c >= '0' && c <= '9')
|| ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')));
#else
switch (c)
{
case '!': case '"': case '#': case '$': case '%': case '&':
case '\'': case '(': case ')': case '*': case '+': case ',':
case '-': case '.': case '/':
case ':': case ';': case '<': case '=': case '>': case '?':
case '@':
case '[': case '\\': case ']': case '^': case '_': case '`':
case '{': case '|': case '}': case '~':
return 1;
default:
return 0;
}
#endif
}
bool
c_isspace (int c)
{
return (c == ' ' || c == '\t'
|| c == '\n' || c == '\v' || c == '\f' || c == '\r');
}
bool
c_isupper (int c)
{
#if C_CTYPE_CONSECUTIVE_UPPERCASE
return (c >= 'A' && c <= 'Z');
#else
switch (c)
{
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
case 'Y': case 'Z':
return 1;
default:
return 0;
}
#endif
}
bool
c_isxdigit (int c)
{
#if C_CTYPE_CONSECUTIVE_DIGITS \
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
#if C_CTYPE_ASCII
return ((c >= '0' && c <= '9')
|| ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F'));
#else
return ((c >= '0' && c <= '9')
|| (c >= 'A' && c <= 'F')
|| (c >= 'a' && c <= 'f'));
#endif
#else
switch (c)
{
case '0': case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
return 1;
default:
return 0;
}
#endif
}
int
c_tolower (int c)
{
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c);
#else
switch (c)
{
case 'A': return 'a';
case 'B': return 'b';
case 'C': return 'c';
case 'D': return 'd';
case 'E': return 'e';
case 'F': return 'f';
case 'G': return 'g';
case 'H': return 'h';
case 'I': return 'i';
case 'J': return 'j';
case 'K': return 'k';
case 'L': return 'l';
case 'M': return 'm';
case 'N': return 'n';
case 'O': return 'o';
case 'P': return 'p';
case 'Q': return 'q';
case 'R': return 'r';
case 'S': return 's';
case 'T': return 't';
case 'U': return 'u';
case 'V': return 'v';
case 'W': return 'w';
case 'X': return 'x';
case 'Y': return 'y';
case 'Z': return 'z';
default: return c;
}
#endif
}
int
c_toupper (int c)
{
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
#else
switch (c)
{
case 'a': return 'A';
case 'b': return 'B';
case 'c': return 'C';
case 'd': return 'D';
case 'e': return 'E';
case 'f': return 'F';
case 'g': return 'G';
case 'h': return 'H';
case 'i': return 'I';
case 'j': return 'J';
case 'k': return 'K';
case 'l': return 'L';
case 'm': return 'M';
case 'n': return 'N';
case 'o': return 'O';
case 'p': return 'P';
case 'q': return 'Q';
case 'r': return 'R';
case 's': return 'S';
case 't': return 'T';
case 'u': return 'U';
case 'v': return 'V';
case 'w': return 'W';
case 'x': return 'X';
case 'y': return 'Y';
case 'z': return 'Z';
default: return c;
}
#endif
}

295
lib/c-ctype.h Normal file
View file

@ -0,0 +1,295 @@
/* Character handling in C locale.
These functions work like the corresponding functions in <ctype.h>,
except that they have the C (POSIX) locale hardwired, whereas the
<ctype.h> functions' behaviour depends on the current locale set via
setlocale.
Copyright (C) 2000-2003, 2006, 2008 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef C_CTYPE_H
#define C_CTYPE_H
#include <stdbool.h>
#ifdef __cplusplus
extern "C" {
#endif
/* The functions defined in this file assume the "C" locale and a character
set without diacritics (ASCII-US or EBCDIC-US or something like that).
Even if the "C" locale on a particular system is an extension of the ASCII
character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it
is ISO-8859-1), the functions in this file recognize only the ASCII
characters. */
/* Check whether the ASCII optimizations apply. */
/* ANSI C89 (and ISO C99 5.2.1.3 too) already guarantees that
'0', '1', ..., '9' have consecutive integer values. */
#define C_CTYPE_CONSECUTIVE_DIGITS 1
#if ('A' <= 'Z') \
&& ('A' + 1 == 'B') && ('B' + 1 == 'C') && ('C' + 1 == 'D') \
&& ('D' + 1 == 'E') && ('E' + 1 == 'F') && ('F' + 1 == 'G') \
&& ('G' + 1 == 'H') && ('H' + 1 == 'I') && ('I' + 1 == 'J') \
&& ('J' + 1 == 'K') && ('K' + 1 == 'L') && ('L' + 1 == 'M') \
&& ('M' + 1 == 'N') && ('N' + 1 == 'O') && ('O' + 1 == 'P') \
&& ('P' + 1 == 'Q') && ('Q' + 1 == 'R') && ('R' + 1 == 'S') \
&& ('S' + 1 == 'T') && ('T' + 1 == 'U') && ('U' + 1 == 'V') \
&& ('V' + 1 == 'W') && ('W' + 1 == 'X') && ('X' + 1 == 'Y') \
&& ('Y' + 1 == 'Z')
#define C_CTYPE_CONSECUTIVE_UPPERCASE 1
#endif
#if ('a' <= 'z') \
&& ('a' + 1 == 'b') && ('b' + 1 == 'c') && ('c' + 1 == 'd') \
&& ('d' + 1 == 'e') && ('e' + 1 == 'f') && ('f' + 1 == 'g') \
&& ('g' + 1 == 'h') && ('h' + 1 == 'i') && ('i' + 1 == 'j') \
&& ('j' + 1 == 'k') && ('k' + 1 == 'l') && ('l' + 1 == 'm') \
&& ('m' + 1 == 'n') && ('n' + 1 == 'o') && ('o' + 1 == 'p') \
&& ('p' + 1 == 'q') && ('q' + 1 == 'r') && ('r' + 1 == 's') \
&& ('s' + 1 == 't') && ('t' + 1 == 'u') && ('u' + 1 == 'v') \
&& ('v' + 1 == 'w') && ('w' + 1 == 'x') && ('x' + 1 == 'y') \
&& ('y' + 1 == 'z')
#define C_CTYPE_CONSECUTIVE_LOWERCASE 1
#endif
#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)
/* The character set is ASCII or one of its variants or extensions, not EBCDIC.
Testing the value of '\n' and '\r' is not relevant. */
#define C_CTYPE_ASCII 1
#endif
/* Function declarations. */
/* Unlike the functions in <ctype.h>, which require an argument in the range
of the 'unsigned char' type, the functions here operate on values that are
in the 'unsigned char' range or in the 'char' range. In other words,
when you have a 'char' value, you need to cast it before using it as
argument to a <ctype.h> function:
const char *s = ...;
if (isalpha ((unsigned char) *s)) ...
but you don't need to cast it for the functions defined in this file:
const char *s = ...;
if (c_isalpha (*s)) ...
*/
extern bool c_isascii (int c); /* not locale dependent */
extern bool c_isalnum (int c);
extern bool c_isalpha (int c);
extern bool c_isblank (int c);
extern bool c_iscntrl (int c);
extern bool c_isdigit (int c);
extern bool c_islower (int c);
extern bool c_isgraph (int c);
extern bool c_isprint (int c);
extern bool c_ispunct (int c);
extern bool c_isspace (int c);
extern bool c_isupper (int c);
extern bool c_isxdigit (int c);
extern int c_tolower (int c);
extern int c_toupper (int c);
#if defined __GNUC__ && defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS
/* ASCII optimizations. */
#undef c_isascii
#define c_isascii(c) \
({ int __c = (c); \
(__c >= 0x00 && __c <= 0x7f); \
})
#if C_CTYPE_CONSECUTIVE_DIGITS \
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
#if C_CTYPE_ASCII
#undef c_isalnum
#define c_isalnum(c) \
({ int __c = (c); \
((__c >= '0' && __c <= '9') \
|| ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z')); \
})
#else
#undef c_isalnum
#define c_isalnum(c) \
({ int __c = (c); \
((__c >= '0' && __c <= '9') \
|| (__c >= 'A' && __c <= 'Z') \
|| (__c >= 'a' && __c <= 'z')); \
})
#endif
#endif
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
#if C_CTYPE_ASCII
#undef c_isalpha
#define c_isalpha(c) \
({ int __c = (c); \
((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z'); \
})
#else
#undef c_isalpha
#define c_isalpha(c) \
({ int __c = (c); \
((__c >= 'A' && __c <= 'Z') || (__c >= 'a' && __c <= 'z')); \
})
#endif
#endif
#undef c_isblank
#define c_isblank(c) \
({ int __c = (c); \
(__c == ' ' || __c == '\t'); \
})
#if C_CTYPE_ASCII
#undef c_iscntrl
#define c_iscntrl(c) \
({ int __c = (c); \
((__c & ~0x1f) == 0 || __c == 0x7f); \
})
#endif
#if C_CTYPE_CONSECUTIVE_DIGITS
#undef c_isdigit
#define c_isdigit(c) \
({ int __c = (c); \
(__c >= '0' && __c <= '9'); \
})
#endif
#if C_CTYPE_CONSECUTIVE_LOWERCASE
#undef c_islower
#define c_islower(c) \
({ int __c = (c); \
(__c >= 'a' && __c <= 'z'); \
})
#endif
#if C_CTYPE_ASCII
#undef c_isgraph
#define c_isgraph(c) \
({ int __c = (c); \
(__c >= '!' && __c <= '~'); \
})
#endif
#if C_CTYPE_ASCII
#undef c_isprint
#define c_isprint(c) \
({ int __c = (c); \
(__c >= ' ' && __c <= '~'); \
})
#endif
#if C_CTYPE_ASCII
#undef c_ispunct
#define c_ispunct(c) \
({ int _c = (c); \
(c_isgraph (_c) && ! c_isalnum (_c)); \
})
#endif
#undef c_isspace
#define c_isspace(c) \
({ int __c = (c); \
(__c == ' ' || __c == '\t' \
|| __c == '\n' || __c == '\v' || __c == '\f' || __c == '\r'); \
})
#if C_CTYPE_CONSECUTIVE_UPPERCASE
#undef c_isupper
#define c_isupper(c) \
({ int __c = (c); \
(__c >= 'A' && __c <= 'Z'); \
})
#endif
#if C_CTYPE_CONSECUTIVE_DIGITS \
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
#if C_CTYPE_ASCII
#undef c_isxdigit
#define c_isxdigit(c) \
({ int __c = (c); \
((__c >= '0' && __c <= '9') \
|| ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'F')); \
})
#else
#undef c_isxdigit
#define c_isxdigit(c) \
({ int __c = (c); \
((__c >= '0' && __c <= '9') \
|| (__c >= 'A' && __c <= 'F') \
|| (__c >= 'a' && __c <= 'f')); \
})
#endif
#endif
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
#undef c_tolower
#define c_tolower(c) \
({ int __c = (c); \
(__c >= 'A' && __c <= 'Z' ? __c - 'A' + 'a' : __c); \
})
#undef c_toupper
#define c_toupper(c) \
({ int __c = (c); \
(__c >= 'a' && __c <= 'z' ? __c - 'a' + 'A' : __c); \
})
#endif
#endif /* optimizing for speed */
#ifdef __cplusplus
}
#endif
#endif /* C_CTYPE_H */

55
lib/c-strcase.h Normal file
View file

@ -0,0 +1,55 @@
/* Case-insensitive string comparison functions in C locale.
Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef C_STRCASE_H
#define C_STRCASE_H
#include <stddef.h>
/* The functions defined in this file assume the "C" locale and a character
set without diacritics (ASCII-US or EBCDIC-US or something like that).
Even if the "C" locale on a particular system is an extension of the ASCII
character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it
is ISO-8859-1), the functions in this file recognize only the ASCII
characters. More precisely, one of the string arguments must be an ASCII
string; the other one can also contain non-ASCII characters (but then
the comparison result will be nonzero). */
#ifdef __cplusplus
extern "C" {
#endif
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
greater than zero if S1 is lexicographically less than, equal to or greater
than S2. */
extern int c_strcasecmp (const char *s1, const char *s2);
/* Compare no more than N characters of strings S1 and S2, ignoring case,
returning less than, equal to or greater than zero if S1 is
lexicographically less than, equal to or greater than S2. */
extern int c_strncasecmp (const char *s1, const char *s2, size_t n);
#ifdef __cplusplus
}
#endif
#endif /* C_STRCASE_H */

57
lib/c-strcasecmp.c Normal file
View file

@ -0,0 +1,57 @@
/* c-strcasecmp.c -- case insensitive string comparator in C locale
Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#include "c-strcase.h"
#include <limits.h>
#include "c-ctype.h"
int
c_strcasecmp (const char *s1, const char *s2)
{
register const unsigned char *p1 = (const unsigned char *) s1;
register const unsigned char *p2 = (const unsigned char *) s2;
unsigned char c1, c2;
if (p1 == p2)
return 0;
do
{
c1 = c_tolower (*p1);
c2 = c_tolower (*p2);
if (c1 == '\0')
break;
++p1;
++p2;
}
while (c1 == c2);
if (UCHAR_MAX <= INT_MAX)
return c1 - c2;
else
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
}

184
lib/c-strcaseeq.h Normal file
View file

@ -0,0 +1,184 @@
/* Optimized case-insensitive string comparison in C locale.
Copyright (C) 2001-2002, 2007 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
/* Written by Bruno Haible <bruno@clisp.org>. */
#include "c-strcase.h"
#include "c-ctype.h"
/* STRCASEEQ allows to optimize string comparison with a small literal string.
STRCASEEQ (s, "UTF-8", 'U','T','F','-','8',0,0,0,0)
is semantically equivalent to
c_strcasecmp (s, "UTF-8") == 0
just faster. */
/* Help GCC to generate good code for string comparisons with
immediate strings. */
#if defined (__GNUC__) && defined (__OPTIMIZE__)
/* Case insensitive comparison of ASCII characters. */
# if C_CTYPE_ASCII
# define CASEEQ(other,upper) \
(c_isupper (upper) ? ((other) & ~0x20) == (upper) : (other) == (upper))
# elif C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
# define CASEEQ(other,upper) \
(c_isupper (upper) ? (other) == (upper) || (other) == (upper) - 'A' + 'a' : (other) == (upper))
# else
# define CASEEQ(other,upper) \
(c_toupper (other) == (upper))
# endif
static inline int
strcaseeq9 (const char *s1, const char *s2)
{
return c_strcasecmp (s1 + 9, s2 + 9) == 0;
}
static inline int
strcaseeq8 (const char *s1, const char *s2, char s28)
{
if (CASEEQ (s1[8], s28))
{
if (s28 == 0)
return 1;
else
return strcaseeq9 (s1, s2);
}
else
return 0;
}
static inline int
strcaseeq7 (const char *s1, const char *s2, char s27, char s28)
{
if (CASEEQ (s1[7], s27))
{
if (s27 == 0)
return 1;
else
return strcaseeq8 (s1, s2, s28);
}
else
return 0;
}
static inline int
strcaseeq6 (const char *s1, const char *s2, char s26, char s27, char s28)
{
if (CASEEQ (s1[6], s26))
{
if (s26 == 0)
return 1;
else
return strcaseeq7 (s1, s2, s27, s28);
}
else
return 0;
}
static inline int
strcaseeq5 (const char *s1, const char *s2, char s25, char s26, char s27, char s28)
{
if (CASEEQ (s1[5], s25))
{
if (s25 == 0)
return 1;
else
return strcaseeq6 (s1, s2, s26, s27, s28);
}
else
return 0;
}
static inline int
strcaseeq4 (const char *s1, const char *s2, char s24, char s25, char s26, char s27, char s28)
{
if (CASEEQ (s1[4], s24))
{
if (s24 == 0)
return 1;
else
return strcaseeq5 (s1, s2, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
strcaseeq3 (const char *s1, const char *s2, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (CASEEQ (s1[3], s23))
{
if (s23 == 0)
return 1;
else
return strcaseeq4 (s1, s2, s24, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
strcaseeq2 (const char *s1, const char *s2, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (CASEEQ (s1[2], s22))
{
if (s22 == 0)
return 1;
else
return strcaseeq3 (s1, s2, s23, s24, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
strcaseeq1 (const char *s1, const char *s2, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (CASEEQ (s1[1], s21))
{
if (s21 == 0)
return 1;
else
return strcaseeq2 (s1, s2, s22, s23, s24, s25, s26, s27, s28);
}
else
return 0;
}
static inline int
strcaseeq0 (const char *s1, const char *s2, char s20, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
{
if (CASEEQ (s1[0], s20))
{
if (s20 == 0)
return 1;
else
return strcaseeq1 (s1, s2, s21, s22, s23, s24, s25, s26, s27, s28);
}
else
return 0;
}
#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
strcaseeq0 (s1, s2, s20, s21, s22, s23, s24, s25, s26, s27, s28)
#else
#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
(c_strcasecmp (s1, s2) == 0)
#endif

57
lib/c-strncasecmp.c Normal file
View file

@ -0,0 +1,57 @@
/* c-strncasecmp.c -- case insensitive string comparator in C locale
Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#include "c-strcase.h"
#include <limits.h>
#include "c-ctype.h"
int
c_strncasecmp (const char *s1, const char *s2, size_t n)
{
register const unsigned char *p1 = (const unsigned char *) s1;
register const unsigned char *p2 = (const unsigned char *) s2;
unsigned char c1, c2;
if (p1 == p2 || n == 0)
return 0;
do
{
c1 = c_tolower (*p1);
c2 = c_tolower (*p2);
if (--n == 0 || c1 == '\0')
break;
++p1;
++p2;
}
while (c1 == c2);
if (UCHAR_MAX <= INT_MAX)
return c1 - c2;
else
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
}

450
lib/iconv.c Normal file
View file

@ -0,0 +1,450 @@
/* Character set conversion.
Copyright (C) 1999-2001, 2007 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#include <iconv.h>
#include <stddef.h>
#if REPLACE_ICONV_UTF
# include <errno.h>
# include <stdint.h>
# include <stdlib.h>
# include "unistr.h"
# ifndef uintptr_t
# define uintptr_t unsigned long
# endif
#endif
#if REPLACE_ICONV_UTF
/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11. */
/* Return code if invalid. (xxx_mbtowc) */
# define RET_ILSEQ -1
/* Return code if no bytes were read. (xxx_mbtowc) */
# define RET_TOOFEW -2
/* Return code if invalid. (xxx_wctomb) */
# define RET_ILUNI -1
/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
# define RET_TOOSMALL -2
/*
* UTF-16BE
*/
/* Specification: RFC 2781 */
static int
utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
{
if (n >= 2)
{
ucs4_t wc = (s[0] << 8) + s[1];
if (wc >= 0xd800 && wc < 0xdc00)
{
if (n >= 4)
{
ucs4_t wc2 = (s[2] << 8) + s[3];
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
return RET_ILSEQ;
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
return 4;
}
}
else if (wc >= 0xdc00 && wc < 0xe000)
{
return RET_ILSEQ;
}
else
{
*pwc = wc;
return 2;
}
}
return RET_TOOFEW;
}
static int
utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
{
if (!(wc >= 0xd800 && wc < 0xe000))
{
if (wc < 0x10000)
{
if (n >= 2)
{
r[0] = (unsigned char) (wc >> 8);
r[1] = (unsigned char) wc;
return 2;
}
else
return RET_TOOSMALL;
}
else if (wc < 0x110000)
{
if (n >= 4)
{
ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
r[0] = (unsigned char) (wc1 >> 8);
r[1] = (unsigned char) wc1;
r[2] = (unsigned char) (wc2 >> 8);
r[3] = (unsigned char) wc2;
return 4;
}
else
return RET_TOOSMALL;
}
}
return RET_ILUNI;
}
/*
* UTF-16LE
*/
/* Specification: RFC 2781 */
static int
utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
{
if (n >= 2)
{
ucs4_t wc = s[0] + (s[1] << 8);
if (wc >= 0xd800 && wc < 0xdc00)
{
if (n >= 4)
{
ucs4_t wc2 = s[2] + (s[3] << 8);
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
return RET_ILSEQ;
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
return 4;
}
}
else if (wc >= 0xdc00 && wc < 0xe000)
{
return RET_ILSEQ;
}
else
{
*pwc = wc;
return 2;
}
}
return RET_TOOFEW;
}
static int
utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
{
if (!(wc >= 0xd800 && wc < 0xe000))
{
if (wc < 0x10000)
{
if (n >= 2)
{
r[0] = (unsigned char) wc;
r[1] = (unsigned char) (wc >> 8);
return 2;
}
else
return RET_TOOSMALL;
}
else if (wc < 0x110000)
{
if (n >= 4)
{
ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
r[0] = (unsigned char) wc1;
r[1] = (unsigned char) (wc1 >> 8);
r[2] = (unsigned char) wc2;
r[3] = (unsigned char) (wc2 >> 8);
return 4;
}
else
return RET_TOOSMALL;
}
}
return RET_ILUNI;
}
/*
* UTF-32BE
*/
/* Specification: Unicode 3.1 Standard Annex #19 */
static int
utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
{
if (n >= 4)
{
ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
{
*pwc = wc;
return 4;
}
else
return RET_ILSEQ;
}
return RET_TOOFEW;
}
static int
utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
{
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
{
if (n >= 4)
{
r[0] = 0;
r[1] = (unsigned char) (wc >> 16);
r[2] = (unsigned char) (wc >> 8);
r[3] = (unsigned char) wc;
return 4;
}
else
return RET_TOOSMALL;
}
return RET_ILUNI;
}
/*
* UTF-32LE
*/
/* Specification: Unicode 3.1 Standard Annex #19 */
static int
utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
{
if (n >= 4)
{
ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
{
*pwc = wc;
return 4;
}
else
return RET_ILSEQ;
}
return RET_TOOFEW;
}
static int
utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
{
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
{
if (n >= 4)
{
r[0] = (unsigned char) wc;
r[1] = (unsigned char) (wc >> 8);
r[2] = (unsigned char) (wc >> 16);
r[3] = 0;
return 4;
}
else
return RET_TOOSMALL;
}
return RET_ILUNI;
}
#endif
size_t
rpl_iconv (iconv_t cd,
ICONV_CONST char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft)
#undef iconv
{
#if REPLACE_ICONV_UTF
switch ((uintptr_t) cd)
{
{
int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
case (uintptr_t) _ICONV_UTF8_UTF16BE:
xxx_wctomb = utf16be_wctomb;
goto loop_from_utf8;
case (uintptr_t) _ICONV_UTF8_UTF16LE:
xxx_wctomb = utf16le_wctomb;
goto loop_from_utf8;
case (uintptr_t) _ICONV_UTF8_UTF32BE:
xxx_wctomb = utf32be_wctomb;
goto loop_from_utf8;
case (uintptr_t) _ICONV_UTF8_UTF32LE:
xxx_wctomb = utf32le_wctomb;
goto loop_from_utf8;
loop_from_utf8:
if (inbuf == NULL || *inbuf == NULL)
return 0;
{
ICONV_CONST char *inptr = *inbuf;
size_t inleft = *inbytesleft;
char *outptr = *outbuf;
size_t outleft = *outbytesleft;
size_t res = 0;
while (inleft > 0)
{
ucs4_t uc;
int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
if (m <= 0)
{
if (m == -1)
{
errno = EILSEQ;
res = (size_t)(-1);
break;
}
if (m == -2)
{
errno = EINVAL;
res = (size_t)(-1);
break;
}
abort ();
}
else
{
int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
if (n < 0)
{
if (n == RET_ILUNI)
{
errno = EILSEQ;
res = (size_t)(-1);
break;
}
if (n == RET_TOOSMALL)
{
errno = E2BIG;
res = (size_t)(-1);
break;
}
abort ();
}
else
{
inptr += m;
inleft -= m;
outptr += n;
outleft -= n;
}
}
}
*inbuf = inptr;
*inbytesleft = inleft;
*outbuf = outptr;
*outbytesleft = outleft;
return res;
}
}
{
int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
case (uintptr_t) _ICONV_UTF16BE_UTF8:
xxx_mbtowc = utf16be_mbtowc;
goto loop_to_utf8;
case (uintptr_t) _ICONV_UTF16LE_UTF8:
xxx_mbtowc = utf16le_mbtowc;
goto loop_to_utf8;
case (uintptr_t) _ICONV_UTF32BE_UTF8:
xxx_mbtowc = utf32be_mbtowc;
goto loop_to_utf8;
case (uintptr_t) _ICONV_UTF32LE_UTF8:
xxx_mbtowc = utf32le_mbtowc;
goto loop_to_utf8;
loop_to_utf8:
if (inbuf == NULL || *inbuf == NULL)
return 0;
{
ICONV_CONST char *inptr = *inbuf;
size_t inleft = *inbytesleft;
char *outptr = *outbuf;
size_t outleft = *outbytesleft;
size_t res = 0;
while (inleft > 0)
{
ucs4_t uc;
int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
if (m <= 0)
{
if (m == RET_ILSEQ)
{
errno = EILSEQ;
res = (size_t)(-1);
break;
}
if (m == RET_TOOFEW)
{
errno = EINVAL;
res = (size_t)(-1);
break;
}
abort ();
}
else
{
int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
if (n < 0)
{
if (n == -1)
{
errno = EILSEQ;
res = (size_t)(-1);
break;
}
if (n == -2)
{
errno = E2BIG;
res = (size_t)(-1);
break;
}
abort ();
}
else
{
inptr += m;
inleft -= m;
outptr += n;
outleft -= n;
}
}
}
*inbuf = inptr;
*inbytesleft = inleft;
*outbuf = outptr;
*outbytesleft = outleft;
return res;
}
}
}
#endif
return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
}

71
lib/iconv.in.h Normal file
View file

@ -0,0 +1,71 @@
/* A GNU-like <iconv.h>.
Copyright (C) 2007-2008 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _GL_ICONV_H
#if __GNUC__ >= 3
@PRAGMA_SYSTEM_HEADER@
#endif
/* The include_next requires a split double-inclusion guard. */
#@INCLUDE_NEXT@ @NEXT_ICONV_H@
#ifndef _GL_ICONV_H
#define _GL_ICONV_H
#ifdef __cplusplus
extern "C" {
#endif
#if @REPLACE_ICONV_OPEN@
/* An iconv_open wrapper that supports the IANA standardized encoding names
("ISO-8859-1" etc.) as far as possible. */
# define iconv_open rpl_iconv_open
extern iconv_t iconv_open (const char *tocode, const char *fromcode);
#endif
#if @REPLACE_ICONV_UTF@
/* Special constants for supporting UTF-{16,32}{BE,LE} encodings.
Not public. */
# define _ICONV_UTF8_UTF16BE (iconv_t)(-161)
# define _ICONV_UTF8_UTF16LE (iconv_t)(-162)
# define _ICONV_UTF8_UTF32BE (iconv_t)(-163)
# define _ICONV_UTF8_UTF32LE (iconv_t)(-164)
# define _ICONV_UTF16BE_UTF8 (iconv_t)(-165)
# define _ICONV_UTF16LE_UTF8 (iconv_t)(-166)
# define _ICONV_UTF32BE_UTF8 (iconv_t)(-167)
# define _ICONV_UTF32LE_UTF8 (iconv_t)(-168)
#endif
#if @REPLACE_ICONV@
# define iconv rpl_iconv
extern size_t iconv (iconv_t cd,
@ICONV_CONST@ char **inbuf, size_t *inbytesleft,
char **outbuf, size_t *outbytesleft);
# define iconv_close rpl_iconv_close
extern int iconv_close (iconv_t cd);
#endif
#ifdef __cplusplus
}
#endif
#endif /* _GL_ICONV_H */
#endif /* _GL_ICONV_H */

47
lib/iconv_close.c Normal file
View file

@ -0,0 +1,47 @@
/* Character set conversion.
Copyright (C) 2007 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#include <iconv.h>
#include <stdint.h>
#ifndef uintptr_t
# define uintptr_t unsigned long
#endif
int
rpl_iconv_close (iconv_t cd)
#undef iconv_close
{
#if REPLACE_ICONV_UTF
switch ((uintptr_t) cd)
{
case (uintptr_t) _ICONV_UTF8_UTF16BE:
case (uintptr_t) _ICONV_UTF8_UTF16LE:
case (uintptr_t) _ICONV_UTF8_UTF32BE:
case (uintptr_t) _ICONV_UTF8_UTF32LE:
case (uintptr_t) _ICONV_UTF16BE_UTF8:
case (uintptr_t) _ICONV_UTF16LE_UTF8:
case (uintptr_t) _ICONV_UTF32BE_UTF8:
case (uintptr_t) _ICONV_UTF32LE_UTF8:
return 0;
}
#endif
return iconv_close (cd);
}

44
lib/iconv_open-aix.gperf Normal file
View file

@ -0,0 +1,44 @@
struct mapping { int standard_name; const char vendor_name[10 + 1]; };
%struct-type
%language=ANSI-C
%define slot-name standard_name
%define hash-function-name mapping_hash
%define lookup-function-name mapping_lookup
%readonly-tables
%global-table
%define word-array-name mappings
%pic
%%
# On AIX 5.1, look in /usr/lib/nls/loc/uconvTable.
ISO-8859-1, "ISO8859-1"
ISO-8859-2, "ISO8859-2"
ISO-8859-3, "ISO8859-3"
ISO-8859-4, "ISO8859-4"
ISO-8859-5, "ISO8859-5"
ISO-8859-6, "ISO8859-6"
ISO-8859-7, "ISO8859-7"
ISO-8859-8, "ISO8859-8"
ISO-8859-9, "ISO8859-9"
ISO-8859-15, "ISO8859-15"
CP437, "IBM-437"
CP850, "IBM-850"
CP852, "IBM-852"
CP856, "IBM-856"
CP857, "IBM-857"
CP861, "IBM-861"
CP865, "IBM-865"
CP869, "IBM-869"
ISO-8859-13, "IBM-921"
CP922, "IBM-922"
CP932, "IBM-932"
CP943, "IBM-943"
CP1046, "IBM-1046"
CP1124, "IBM-1124"
CP1125, "IBM-1125"
CP1129, "IBM-1129"
CP1252, "IBM-1252"
GB2312, "IBM-eucCN"
EUC-JP, "IBM-eucJP"
EUC-KR, "IBM-eucKR"
EUC-TW, "IBM-eucTW"
BIG5, "big5"

56
lib/iconv_open-hpux.gperf Normal file
View file

@ -0,0 +1,56 @@
struct mapping { int standard_name; const char vendor_name[9 + 1]; };
%struct-type
%language=ANSI-C
%define slot-name standard_name
%define hash-function-name mapping_hash
%define lookup-function-name mapping_lookup
%readonly-tables
%global-table
%define word-array-name mappings
%pic
%%
# On HP-UX 11.11, look in /usr/lib/nls/iconv.
ISO-8859-1, "iso88591"
ISO-8859-2, "iso88592"
ISO-8859-5, "iso88595"
ISO-8859-6, "iso88596"
ISO-8859-7, "iso88597"
ISO-8859-8, "iso88598"
ISO-8859-9, "iso88599"
ISO-8859-15, "iso885915"
CP437, "cp437"
CP775, "cp775"
CP850, "cp850"
CP852, "cp852"
CP855, "cp855"
CP857, "cp857"
CP861, "cp861"
CP862, "cp862"
CP864, "cp864"
CP865, "cp865"
CP866, "cp866"
CP869, "cp869"
CP874, "cp874"
CP1250, "cp1250"
CP1251, "cp1251"
CP1252, "cp1252"
CP1253, "cp1253"
CP1254, "cp1254"
CP1255, "cp1255"
CP1256, "cp1256"
CP1257, "cp1257"
CP1258, "cp1258"
HP-ROMAN8, "roman8"
HP-ARABIC8, "arabic8"
HP-GREEK8, "greek8"
HP-HEBREW8, "hebrew8"
HP-TURKISH8, "turkish8"
HP-KANA8, "kana8"
TIS-620, "tis620"
GB2312, "hp15CN"
EUC-JP, "eucJP"
EUC-KR, "eucKR"
EUC-TW, "eucTW"
BIG5, "big5"
SHIFT_JIS, "sjis"
UTF-8, "utf8"

31
lib/iconv_open-irix.gperf Normal file
View file

@ -0,0 +1,31 @@
struct mapping { int standard_name; const char vendor_name[10 + 1]; };
%struct-type
%language=ANSI-C
%define slot-name standard_name
%define hash-function-name mapping_hash
%define lookup-function-name mapping_lookup
%readonly-tables
%global-table
%define word-array-name mappings
%pic
%%
# On IRIX 6.5, look in /usr/lib/iconv and /usr/lib/international/encodings.
ISO-8859-1, "ISO8859-1"
ISO-8859-2, "ISO8859-2"
ISO-8859-3, "ISO8859-3"
ISO-8859-4, "ISO8859-4"
ISO-8859-5, "ISO8859-5"
ISO-8859-6, "ISO8859-6"
ISO-8859-7, "ISO8859-7"
ISO-8859-8, "ISO8859-8"
ISO-8859-9, "ISO8859-9"
ISO-8859-15, "ISO8859-15"
KOI8-R, "KOI8"
CP855, "DOS855"
CP1251, "WIN1251"
GB2312, "eucCN"
EUC-JP, "eucJP"
EUC-KR, "eucKR"
EUC-TW, "eucTW"
SHIFT_JIS, "sjis"
TIS-620, "TIS620"

50
lib/iconv_open-osf.gperf Normal file
View file

@ -0,0 +1,50 @@
struct mapping { int standard_name; const char vendor_name[10 + 1]; };
%struct-type
%language=ANSI-C
%define slot-name standard_name
%define hash-function-name mapping_hash
%define lookup-function-name mapping_lookup
%readonly-tables
%global-table
%define word-array-name mappings
%pic
%%
# On OSF/1 5.1, look in /usr/lib/nls/loc/iconv.
ISO-8859-1, "ISO8859-1"
ISO-8859-2, "ISO8859-2"
ISO-8859-3, "ISO8859-3"
ISO-8859-4, "ISO8859-4"
ISO-8859-5, "ISO8859-5"
ISO-8859-6, "ISO8859-6"
ISO-8859-7, "ISO8859-7"
ISO-8859-8, "ISO8859-8"
ISO-8859-9, "ISO8859-9"
ISO-8859-15, "ISO8859-15"
CP437, "cp437"
CP775, "cp775"
CP850, "cp850"
CP852, "cp852"
CP855, "cp855"
CP857, "cp857"
CP861, "cp861"
CP862, "cp862"
CP865, "cp865"
CP866, "cp866"
CP869, "cp869"
CP874, "cp874"
CP949, "KSC5601"
CP1250, "cp1250"
CP1251, "cp1251"
CP1252, "cp1252"
CP1253, "cp1253"
CP1254, "cp1254"
CP1255, "cp1255"
CP1256, "cp1256"
CP1257, "cp1257"
CP1258, "cp1258"
EUC-JP, "eucJP"
EUC-KR, "eucKR"
EUC-TW, "eucTW"
BIG5, "big5"
SHIFT_JIS, "SJIS"
TIS-620, "TACTIS"

172
lib/iconv_open.c Normal file
View file

@ -0,0 +1,172 @@
/* Character set conversion.
Copyright (C) 2007 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along
with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#include <config.h>
/* Specification. */
#include <iconv.h>
#include <errno.h>
#include <string.h>
#include "c-ctype.h"
#include "c-strcase.h"
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
/* Namespace cleanliness. */
#define mapping_lookup rpl_iconv_open_mapping_lookup
/* The macro ICONV_FLAVOR is defined to one of these or undefined. */
#define ICONV_FLAVOR_AIX "iconv_open-aix.h"
#define ICONV_FLAVOR_HPUX "iconv_open-hpux.h"
#define ICONV_FLAVOR_IRIX "iconv_open-irix.h"
#define ICONV_FLAVOR_OSF "iconv_open-osf.h"
#ifdef ICONV_FLAVOR
# include ICONV_FLAVOR
#endif
iconv_t
rpl_iconv_open (const char *tocode, const char *fromcode)
#undef iconv_open
{
char fromcode_upper[32];
char tocode_upper[32];
char *fromcode_upper_end;
char *tocode_upper_end;
#if REPLACE_ICONV_UTF
/* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}.
Do this here, before calling the real iconv_open(), because OSF/1 5.1
iconv() to these encoding inserts a BOM, which is wrong.
We do not need to handle conversion between arbitrary encodings and
UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step
conversion throough UTF-8.
The _ICONV_* constants are chosen to be disjoint from any iconv_t
returned by the system's iconv_open() functions. Recall that iconv_t
is a scalar type. */
if (c_toupper (fromcode[0]) == 'U'
&& c_toupper (fromcode[1]) == 'T'
&& c_toupper (fromcode[2]) == 'F'
&& fromcode[3] == '-')
{
if (c_toupper (tocode[0]) == 'U'
&& c_toupper (tocode[1]) == 'T'
&& c_toupper (tocode[2]) == 'F'
&& tocode[3] == '-')
{
if (strcmp (fromcode + 4, "8") == 0)
{
if (c_strcasecmp (tocode + 4, "16BE") == 0)
return _ICONV_UTF8_UTF16BE;
if (c_strcasecmp (tocode + 4, "16LE") == 0)
return _ICONV_UTF8_UTF16LE;
if (c_strcasecmp (tocode + 4, "32BE") == 0)
return _ICONV_UTF8_UTF32BE;
if (c_strcasecmp (tocode + 4, "32LE") == 0)
return _ICONV_UTF8_UTF32LE;
}
else if (strcmp (tocode + 4, "8") == 0)
{
if (c_strcasecmp (fromcode + 4, "16BE") == 0)
return _ICONV_UTF16BE_UTF8;
if (c_strcasecmp (fromcode + 4, "16LE") == 0)
return _ICONV_UTF16LE_UTF8;
if (c_strcasecmp (fromcode + 4, "32BE") == 0)
return _ICONV_UTF32BE_UTF8;
if (c_strcasecmp (fromcode + 4, "32LE") == 0)
return _ICONV_UTF32LE_UTF8;
}
}
}
#endif
/* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1
here. This would lead to programs that work in some locales (such as the
"C" or "en_US" locales) but do not work in East Asian locales. It is
better if programmers make their programs depend on GNU libiconv (except
on glibc systems), e.g. by using the AM_ICONV macro and documenting the
dependency in an INSTALL or DEPENDENCIES file. */
/* Try with the original names first.
This covers the case when fromcode or tocode is a lowercase encoding name
that is understood by the system's iconv_open but not listed in our
mappings table. */
{
iconv_t cd = iconv_open (tocode, fromcode);
if (cd != (iconv_t)(-1))
return cd;
}
/* Convert the encodings to upper case, because
1. in the arguments of iconv_open() on AIX, HP-UX, and OSF/1 the case
matters,
2. it makes searching in the table faster. */
{
const char *p = fromcode;
char *q = fromcode_upper;
while ((*q = c_toupper (*p)) != '\0')
{
p++;
q++;
if (q == &fromcode_upper[SIZEOF (fromcode_upper)])
{
errno = EINVAL;
return (iconv_t)(-1);
}
}
fromcode_upper_end = q;
}
{
const char *p = tocode;
char *q = tocode_upper;
while ((*q = c_toupper (*p)) != '\0')
{
p++;
q++;
if (q == &tocode_upper[SIZEOF (tocode_upper)])
{
errno = EINVAL;
return (iconv_t)(-1);
}
}
tocode_upper_end = q;
}
#ifdef ICONV_FLAVOR
/* Apply the mappings. */
{
const struct mapping *m =
mapping_lookup (fromcode_upper, fromcode_upper_end - fromcode_upper);
fromcode = (m != NULL ? m->vendor_name : fromcode_upper);
}
{
const struct mapping *m =
mapping_lookup (tocode_upper, tocode_upper_end - tocode_upper);
tocode = (m != NULL ? m->vendor_name : tocode_upper);
}
#else
fromcode = fromcode_upper;
tocode = tocode_upper;
#endif
return iconv_open (tocode, fromcode);
}

41
lib/iconveh.h Normal file
View file

@ -0,0 +1,41 @@
/* Character set conversion handler type.
Copyright (C) 2001-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef _ICONVEH_H
#define _ICONVEH_H
#ifdef __cplusplus
extern "C" {
#endif
/* Handling of unconvertible characters. */
enum iconv_ilseq_handler
{
iconveh_error, /* return and set errno = EILSEQ */
iconveh_question_mark, /* use one '?' per unconvertible character */
iconveh_escape_sequence /* use escape sequence \uxxxx or \Uxxxxxxxx */
};
#ifdef __cplusplus
}
#endif
#endif /* _ICONVEH_H */

1251
lib/striconveh.c Normal file

File diff suppressed because it is too large Load diff

120
lib/striconveh.h Normal file
View file

@ -0,0 +1,120 @@
/* Character set conversion with error handling.
Copyright (C) 2001-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible and Simon Josefsson.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef _STRICONVEH_H
#define _STRICONVEH_H
#include <stddef.h>
#if HAVE_ICONV
#include <iconv.h>
#endif
#include "iconveh.h"
#ifdef __cplusplus
extern "C" {
#endif
#if HAVE_ICONV
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
the system does not support a direct conversion from FROMCODE to TOCODE.
CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
(iconv_t)(-1) if FROM_CODESET is UTF-8).
CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
if TO_CODESET is UTF-8).
If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
array is filled with offsets into the result, i.e. the character starting
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
and other offsets are set to (size_t)(-1).
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
or *RESULTP can initially be NULL.
May erase the contents of the memory at *RESULTP.
Return value: 0 if successful, otherwise -1 and errno set.
If successful: The resulting string is stored in *RESULTP and its length
in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is
unchanged if no dynamic memory allocation was necessary. */
extern int
mem_cd_iconveh (const char *src, size_t srclen,
iconv_t cd, iconv_t cd1, iconv_t cd2,
enum iconv_ilseq_handler handler,
size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
The original string is the NUL-terminated string starting at SRC.
CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
the system does not support a direct conversion from FROMCODE to TOCODE.
Both the "from" and the "to" encoding must use a single NUL byte at the end
of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
(iconv_t)(-1) if FROM_CODESET is UTF-8).
CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
if TO_CODESET is UTF-8).
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char *
str_cd_iconveh (const char *src,
iconv_t cd, iconv_t cd1, iconv_t cd2,
enum iconv_ilseq_handler handler);
#endif
/* Convert an entire string from one encoding to another, using iconv.
The original string is at [SRC,...,SRC+SRCLEN-1].
If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
array is filled with offsets into the result, i.e. the character starting
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
and other offsets are set to (size_t)(-1).
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
or *RESULTP can initially be NULL.
May erase the contents of the memory at *RESULTP.
Return value: 0 if successful, otherwise -1 and errno set.
If successful: The resulting string is stored in *RESULTP and its length
in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is
unchanged if no dynamic memory allocation was necessary. */
extern int
mem_iconveh (const char *src, size_t srclen,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler,
size_t *offsets,
char **resultp, size_t *lengthp);
/* Convert an entire string from one encoding to another, using iconv.
The original string is the NUL-terminated string starting at SRC.
Both the "from" and the "to" encoding must use a single NUL byte at the
end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
Allocate a malloced memory block for the result.
Return value: the freshly allocated resulting NUL-terminated string if
successful, otherwise NULL and errno set. */
extern char *
str_iconveh (const char *src,
const char *from_codeset, const char *to_codeset,
enum iconv_ilseq_handler handler);
#ifdef __cplusplus
}
#endif
#endif /* _STRICONVEH_H */

605
lib/string.in.h Normal file
View file

@ -0,0 +1,605 @@
/* A GNU-like <string.h>.
Copyright (C) 1995-1996, 2001-2008 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
#ifndef _GL_STRING_H
#if __GNUC__ >= 3
@PRAGMA_SYSTEM_HEADER@
#endif
/* The include_next requires a split double-inclusion guard. */
#@INCLUDE_NEXT@ @NEXT_STRING_H@
#ifndef _GL_STRING_H
#define _GL_STRING_H
#ifndef __attribute__
/* This feature is available in gcc versions 2.5 and later. */
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
# define __attribute__(Spec) /* empty */
# endif
/* The attribute __pure__ was added in gcc 2.96. */
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
# define __pure__ /* empty */
# endif
#endif
/* The definition of GL_LINK_WARNING is copied here. */
#ifdef __cplusplus
extern "C" {
#endif
/* Return the first occurrence of NEEDLE in HAYSTACK. */
#if @GNULIB_MEMMEM@
# if @REPLACE_MEMMEM@
# define memmem rpl_memmem
# endif
# if ! @HAVE_DECL_MEMMEM@ || @REPLACE_MEMMEM@
extern void *memmem (void const *__haystack, size_t __haystack_len,
void const *__needle, size_t __needle_len)
__attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef memmem
# define memmem(a,al,b,bl) \
(GL_LINK_WARNING ("memmem is unportable and often quadratic - " \
"use gnulib module memmem-simple for portability, " \
"and module memmem for speed" ), \
memmem (a, al, b, bl))
#endif
/* Copy N bytes of SRC to DEST, return pointer to bytes after the
last written byte. */
#if @GNULIB_MEMPCPY@
# if ! @HAVE_MEMPCPY@
extern void *mempcpy (void *restrict __dest, void const *restrict __src,
size_t __n);
# endif
#elif defined GNULIB_POSIXCHECK
# undef mempcpy
# define mempcpy(a,b,n) \
(GL_LINK_WARNING ("mempcpy is unportable - " \
"use gnulib module mempcpy for portability"), \
mempcpy (a, b, n))
#endif
/* Search backwards through a block for a byte (specified as an int). */
#if @GNULIB_MEMRCHR@
# if ! @HAVE_DECL_MEMRCHR@
extern void *memrchr (void const *, int, size_t)
__attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef memrchr
# define memrchr(a,b,c) \
(GL_LINK_WARNING ("memrchr is unportable - " \
"use gnulib module memrchr for portability"), \
memrchr (a, b, c))
#endif
/* Find the first occurrence of C in S. More efficient than
memchr(S,C,N), at the expense of undefined behavior if C does not
occur within N bytes. */
#if @GNULIB_RAWMEMCHR@
# if ! @HAVE_RAWMEMCHR@
extern void *rawmemchr (void const *__s, int __c_in)
__attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef rawmemchr
# define rawmemchr(a,b) \
(GL_LINK_WARNING ("rawmemchr is unportable - " \
"use gnulib module rawmemchr for portability"), \
rawmemchr (a, b))
#endif
/* Copy SRC to DST, returning the address of the terminating '\0' in DST. */
#if @GNULIB_STPCPY@
# if ! @HAVE_STPCPY@
extern char *stpcpy (char *restrict __dst, char const *restrict __src);
# endif
#elif defined GNULIB_POSIXCHECK
# undef stpcpy
# define stpcpy(a,b) \
(GL_LINK_WARNING ("stpcpy is unportable - " \
"use gnulib module stpcpy for portability"), \
stpcpy (a, b))
#endif
/* Copy no more than N bytes of SRC to DST, returning a pointer past the
last non-NUL byte written into DST. */
#if @GNULIB_STPNCPY@
# if ! @HAVE_STPNCPY@
# define stpncpy gnu_stpncpy
extern char *stpncpy (char *restrict __dst, char const *restrict __src,
size_t __n);
# endif
#elif defined GNULIB_POSIXCHECK
# undef stpncpy
# define stpncpy(a,b,n) \
(GL_LINK_WARNING ("stpncpy is unportable - " \
"use gnulib module stpncpy for portability"), \
stpncpy (a, b, n))
#endif
#if defined GNULIB_POSIXCHECK
/* strchr() does not work with multibyte strings if the locale encoding is
GB18030 and the character to be searched is a digit. */
# undef strchr
# define strchr(s,c) \
(GL_LINK_WARNING ("strchr cannot work correctly on character strings " \
"in some multibyte locales - " \
"use mbschr if you care about internationalization"), \
strchr (s, c))
#endif
/* Find the first occurrence of C in S or the final NUL byte. */
#if @GNULIB_STRCHRNUL@
# if ! @HAVE_STRCHRNUL@
extern char *strchrnul (char const *__s, int __c_in)
__attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef strchrnul
# define strchrnul(a,b) \
(GL_LINK_WARNING ("strchrnul is unportable - " \
"use gnulib module strchrnul for portability"), \
strchrnul (a, b))
#endif
/* Duplicate S, returning an identical malloc'd string. */
#if @GNULIB_STRDUP@
# if @REPLACE_STRDUP@
# undef strdup
# define strdup rpl_strdup
# endif
# if !(@HAVE_DECL_STRDUP@ || defined strdup) || @REPLACE_STRDUP@
extern char *strdup (char const *__s);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strdup
# define strdup(a) \
(GL_LINK_WARNING ("strdup is unportable - " \
"use gnulib module strdup for portability"), \
strdup (a))
#endif
/* Return a newly allocated copy of at most N bytes of STRING. */
#if @GNULIB_STRNDUP@
# if ! @HAVE_STRNDUP@
# undef strndup
# define strndup rpl_strndup
# endif
# if ! @HAVE_STRNDUP@ || ! @HAVE_DECL_STRNDUP@
extern char *strndup (char const *__string, size_t __n);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strndup
# define strndup(a,n) \
(GL_LINK_WARNING ("strndup is unportable - " \
"use gnulib module strndup for portability"), \
strndup (a, n))
#endif
/* Find the length (number of bytes) of STRING, but scan at most
MAXLEN bytes. If no '\0' terminator is found in that many bytes,
return MAXLEN. */
#if @GNULIB_STRNLEN@
# if ! @HAVE_DECL_STRNLEN@
extern size_t strnlen (char const *__string, size_t __maxlen)
__attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
# undef strnlen
# define strnlen(a,n) \
(GL_LINK_WARNING ("strnlen is unportable - " \
"use gnulib module strnlen for portability"), \
strnlen (a, n))
#endif
#if defined GNULIB_POSIXCHECK
/* strcspn() assumes the second argument is a list of single-byte characters.
Even in this simple case, it does not work with multibyte strings if the
locale encoding is GB18030 and one of the characters to be searched is a
digit. */
# undef strcspn
# define strcspn(s,a) \
(GL_LINK_WARNING ("strcspn cannot work correctly on character strings " \
"in multibyte locales - " \
"use mbscspn if you care about internationalization"), \
strcspn (s, a))
#endif
/* Find the first occurrence in S of any character in ACCEPT. */
#if @GNULIB_STRPBRK@
# if ! @HAVE_STRPBRK@
extern char *strpbrk (char const *__s, char const *__accept)
__attribute__ ((__pure__));
# endif
# if defined GNULIB_POSIXCHECK
/* strpbrk() assumes the second argument is a list of single-byte characters.
Even in this simple case, it does not work with multibyte strings if the
locale encoding is GB18030 and one of the characters to be searched is a
digit. */
# undef strpbrk
# define strpbrk(s,a) \
(GL_LINK_WARNING ("strpbrk cannot work correctly on character strings " \
"in multibyte locales - " \
"use mbspbrk if you care about internationalization"), \
strpbrk (s, a))
# endif
#elif defined GNULIB_POSIXCHECK
# undef strpbrk
# define strpbrk(s,a) \
(GL_LINK_WARNING ("strpbrk is unportable - " \
"use gnulib module strpbrk for portability"), \
strpbrk (s, a))
#endif
#if defined GNULIB_POSIXCHECK
/* strspn() assumes the second argument is a list of single-byte characters.
Even in this simple case, it cannot work with multibyte strings. */
# undef strspn
# define strspn(s,a) \
(GL_LINK_WARNING ("strspn cannot work correctly on character strings " \
"in multibyte locales - " \
"use mbsspn if you care about internationalization"), \
strspn (s, a))
#endif
#if defined GNULIB_POSIXCHECK
/* strrchr() does not work with multibyte strings if the locale encoding is
GB18030 and the character to be searched is a digit. */
# undef strrchr
# define strrchr(s,c) \
(GL_LINK_WARNING ("strrchr cannot work correctly on character strings " \
"in some multibyte locales - " \
"use mbsrchr if you care about internationalization"), \
strrchr (s, c))
#endif
/* Search the next delimiter (char listed in DELIM) starting at *STRINGP.
If one is found, overwrite it with a NUL, and advance *STRINGP
to point to the next char after it. Otherwise, set *STRINGP to NULL.
If *STRINGP was already NULL, nothing happens.
Return the old value of *STRINGP.
This is a variant of strtok() that is multithread-safe and supports
empty fields.
Caveat: It modifies the original string.
Caveat: These functions cannot be used on constant strings.
Caveat: The identity of the delimiting character is lost.
Caveat: It doesn't work with multibyte strings unless all of the delimiter
characters are ASCII characters < 0x30.
See also strtok_r(). */
#if @GNULIB_STRSEP@
# if ! @HAVE_STRSEP@
extern char *strsep (char **restrict __stringp, char const *restrict __delim);
# endif
# if defined GNULIB_POSIXCHECK
# undef strsep
# define strsep(s,d) \
(GL_LINK_WARNING ("strsep cannot work correctly on character strings " \
"in multibyte locales - " \
"use mbssep if you care about internationalization"), \
strsep (s, d))
# endif
#elif defined GNULIB_POSIXCHECK
# undef strsep
# define strsep(s,d) \
(GL_LINK_WARNING ("strsep is unportable - " \
"use gnulib module strsep for portability"), \
strsep (s, d))
#endif
#if @GNULIB_STRSTR@
# if @REPLACE_STRSTR@
# define strstr rpl_strstr
char *strstr (const char *haystack, const char *needle)
__attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
/* strstr() does not work with multibyte strings if the locale encoding is
different from UTF-8:
POSIX says that it operates on "strings", and "string" in POSIX is defined
as a sequence of bytes, not of characters. */
# undef strstr
# define strstr(a,b) \
(GL_LINK_WARNING ("strstr is quadratic on many systems, and cannot " \
"work correctly on character strings in most " \
"multibyte locales - " \
"use mbsstr if you care about internationalization, " \
"or use strstr if you care about speed"), \
strstr (a, b))
#endif
/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
comparison. */
#if @GNULIB_STRCASESTR@
# if @REPLACE_STRCASESTR@
# define strcasestr rpl_strcasestr
# endif
# if ! @HAVE_STRCASESTR@ || @REPLACE_STRCASESTR@
extern char *strcasestr (const char *haystack, const char *needle)
__attribute__ ((__pure__));
# endif
#elif defined GNULIB_POSIXCHECK
/* strcasestr() does not work with multibyte strings:
It is a glibc extension, and glibc implements it only for unibyte
locales. */
# undef strcasestr
# define strcasestr(a,b) \
(GL_LINK_WARNING ("strcasestr does work correctly on character strings " \
"in multibyte locales - " \
"use mbscasestr if you care about " \
"internationalization, or use c-strcasestr if you want " \
"a locale independent function"), \
strcasestr (a, b))
#endif
/* Parse S into tokens separated by characters in DELIM.
If S is NULL, the saved pointer in SAVE_PTR is used as
the next starting point. For example:
char s[] = "-abc-=-def";
char *sp;
x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def"
x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL
x = strtok_r(NULL, "=", &sp); // x = NULL
// s = "abc\0-def\0"
This is a variant of strtok() that is multithread-safe.
For the POSIX documentation for this function, see:
http://www.opengroup.org/susv3xsh/strtok.html
Caveat: It modifies the original string.
Caveat: These functions cannot be used on constant strings.
Caveat: The identity of the delimiting character is lost.
Caveat: It doesn't work with multibyte strings unless all of the delimiter
characters are ASCII characters < 0x30.
See also strsep(). */
#if @GNULIB_STRTOK_R@
# if ! @HAVE_DECL_STRTOK_R@
extern char *strtok_r (char *restrict s, char const *restrict delim,
char **restrict save_ptr);
# endif
# if defined GNULIB_POSIXCHECK
# undef strtok_r
# define strtok_r(s,d,p) \
(GL_LINK_WARNING ("strtok_r cannot work correctly on character strings " \
"in multibyte locales - " \
"use mbstok_r if you care about internationalization"), \
strtok_r (s, d, p))
# endif
#elif defined GNULIB_POSIXCHECK
# undef strtok_r
# define strtok_r(s,d,p) \
(GL_LINK_WARNING ("strtok_r is unportable - " \
"use gnulib module strtok_r for portability"), \
strtok_r (s, d, p))
#endif
/* The following functions are not specified by POSIX. They are gnulib
extensions. */
#if @GNULIB_MBSLEN@
/* Return the number of multibyte characters in the character string STRING.
This considers multibyte characters, unlike strlen, which counts bytes. */
extern size_t mbslen (const char *string);
#endif
#if @GNULIB_MBSNLEN@
/* Return the number of multibyte characters in the character string starting
at STRING and ending at STRING + LEN. */
extern size_t mbsnlen (const char *string, size_t len);
#endif
#if @GNULIB_MBSCHR@
/* Locate the first single-byte character C in the character string STRING,
and return a pointer to it. Return NULL if C is not found in STRING.
Unlike strchr(), this function works correctly in multibyte locales with
encodings such as GB18030. */
# define mbschr rpl_mbschr /* avoid collision with HP-UX function */
extern char * mbschr (const char *string, int c);
#endif
#if @GNULIB_MBSRCHR@
/* Locate the last single-byte character C in the character string STRING,
and return a pointer to it. Return NULL if C is not found in STRING.
Unlike strrchr(), this function works correctly in multibyte locales with
encodings such as GB18030. */
# define mbsrchr rpl_mbsrchr /* avoid collision with HP-UX function */
extern char * mbsrchr (const char *string, int c);
#endif
#if @GNULIB_MBSSTR@
/* Find the first occurrence of the character string NEEDLE in the character
string HAYSTACK. Return NULL if NEEDLE is not found in HAYSTACK.
Unlike strstr(), this function works correctly in multibyte locales with
encodings different from UTF-8. */
extern char * mbsstr (const char *haystack, const char *needle);
#endif
#if @GNULIB_MBSCASECMP@
/* Compare the character strings S1 and S2, ignoring case, returning less than,
equal to or greater than zero if S1 is lexicographically less than, equal to
or greater than S2.
Note: This function may, in multibyte locales, return 0 for strings of
different lengths!
Unlike strcasecmp(), this function works correctly in multibyte locales. */
extern int mbscasecmp (const char *s1, const char *s2);
#endif
#if @GNULIB_MBSNCASECMP@
/* Compare the initial segment of the character string S1 consisting of at most
N characters with the initial segment of the character string S2 consisting
of at most N characters, ignoring case, returning less than, equal to or
greater than zero if the initial segment of S1 is lexicographically less
than, equal to or greater than the initial segment of S2.
Note: This function may, in multibyte locales, return 0 for initial segments
of different lengths!
Unlike strncasecmp(), this function works correctly in multibyte locales.
But beware that N is not a byte count but a character count! */
extern int mbsncasecmp (const char *s1, const char *s2, size_t n);
#endif
#if @GNULIB_MBSPCASECMP@
/* Compare the initial segment of the character string STRING consisting of
at most mbslen (PREFIX) characters with the character string PREFIX,
ignoring case, returning less than, equal to or greater than zero if this
initial segment is lexicographically less than, equal to or greater than
PREFIX.
Note: This function may, in multibyte locales, return 0 if STRING is of
smaller length than PREFIX!
Unlike strncasecmp(), this function works correctly in multibyte
locales. */
extern char * mbspcasecmp (const char *string, const char *prefix);
#endif
#if @GNULIB_MBSCASESTR@
/* Find the first occurrence of the character string NEEDLE in the character
string HAYSTACK, using case-insensitive comparison.
Note: This function may, in multibyte locales, return success even if
strlen (haystack) < strlen (needle) !
Unlike strcasestr(), this function works correctly in multibyte locales. */
extern char * mbscasestr (const char *haystack, const char *needle);
#endif
#if @GNULIB_MBSCSPN@
/* Find the first occurrence in the character string STRING of any character
in the character string ACCEPT. Return the number of bytes from the
beginning of the string to this occurrence, or to the end of the string
if none exists.
Unlike strcspn(), this function works correctly in multibyte locales. */
extern size_t mbscspn (const char *string, const char *accept);
#endif
#if @GNULIB_MBSPBRK@
/* Find the first occurrence in the character string STRING of any character
in the character string ACCEPT. Return the pointer to it, or NULL if none
exists.
Unlike strpbrk(), this function works correctly in multibyte locales. */
# define mbspbrk rpl_mbspbrk /* avoid collision with HP-UX function */
extern char * mbspbrk (const char *string, const char *accept);
#endif
#if @GNULIB_MBSSPN@
/* Find the first occurrence in the character string STRING of any character
not in the character string REJECT. Return the number of bytes from the
beginning of the string to this occurrence, or to the end of the string
if none exists.
Unlike strspn(), this function works correctly in multibyte locales. */
extern size_t mbsspn (const char *string, const char *reject);
#endif
#if @GNULIB_MBSSEP@
/* Search the next delimiter (multibyte character listed in the character
string DELIM) starting at the character string *STRINGP.
If one is found, overwrite it with a NUL, and advance *STRINGP to point
to the next multibyte character after it. Otherwise, set *STRINGP to NULL.
If *STRINGP was already NULL, nothing happens.
Return the old value of *STRINGP.
This is a variant of mbstok_r() that supports empty fields.
Caveat: It modifies the original string.
Caveat: These functions cannot be used on constant strings.
Caveat: The identity of the delimiting character is lost.
See also mbstok_r(). */
extern char * mbssep (char **stringp, const char *delim);
#endif
#if @GNULIB_MBSTOK_R@
/* Parse the character string STRING into tokens separated by characters in
the character string DELIM.
If STRING is NULL, the saved pointer in SAVE_PTR is used as
the next starting point. For example:
char s[] = "-abc-=-def";
char *sp;
x = mbstok_r(s, "-", &sp); // x = "abc", sp = "=-def"
x = mbstok_r(NULL, "-=", &sp); // x = "def", sp = NULL
x = mbstok_r(NULL, "=", &sp); // x = NULL
// s = "abc\0-def\0"
Caveat: It modifies the original string.
Caveat: These functions cannot be used on constant strings.
Caveat: The identity of the delimiting character is lost.
See also mbssep(). */
extern char * mbstok_r (char *string, const char *delim, char **save_ptr);
#endif
/* Map any int, typically from errno, into an error message. */
#if @GNULIB_STRERROR@
# if @REPLACE_STRERROR@
# undef strerror
# define strerror rpl_strerror
extern char *strerror (int);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strerror
# define strerror(e) \
(GL_LINK_WARNING ("strerror is unportable - " \
"use gnulib module strerror to guarantee non-NULL result"), \
strerror (e))
#endif
#if @GNULIB_STRSIGNAL@
# if @REPLACE_STRSIGNAL@
# define strsignal rpl_strsignal
# endif
# if ! @HAVE_DECL_STRSIGNAL@ || @REPLACE_STRSIGNAL@
extern char *strsignal (int __sig);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strsignal
# define strsignal(a) \
(GL_LINK_WARNING ("strsignal is unportable - " \
"use gnulib module strsignal for portability"), \
strsignal (a))
#endif
#if @GNULIB_STRVERSCMP@
# if !@HAVE_STRVERSCMP@
extern int strverscmp (const char *, const char *);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strverscmp
# define strverscmp(a, b) \
(GL_LINK_WARNING ("strverscmp is unportable - " \
"use gnulib module strverscmp for portability"), \
strverscmp (a, b))
#endif
#ifdef __cplusplus
}
#endif
#endif /* _GL_STRING_H */
#endif /* _GL_STRING_H */

681
lib/unistr.h Normal file
View file

@ -0,0 +1,681 @@
/* Elementary Unicode string functions.
Copyright (C) 2001-2002, 2005-2009 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef _UNISTR_H
#define _UNISTR_H
#include "unitypes.h"
/* Get bool. */
#include <stdbool.h>
/* Get size_t. */
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
/* Conventions:
All functions prefixed with u8_ operate on UTF-8 encoded strings.
Their unit is an uint8_t (1 byte).
All functions prefixed with u16_ operate on UTF-16 encoded strings.
Their unit is an uint16_t (a 2-byte word).
All functions prefixed with u32_ operate on UCS-4 encoded strings.
Their unit is an uint32_t (a 4-byte word).
All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
n units.
All arguments starting with "str" and the arguments of functions starting
with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
which terminates at the first NUL unit. This termination unit is
considered part of the string for all memory allocation purposes, but
is not considered part of the string for all other logical purposes.
Functions returning a string result take a (resultbuf, lengthp) argument
pair. If resultbuf is not NULL and the result fits into *lengthp units,
it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
allocated string is returned. In both cases, *lengthp is set to the
length (number of units) of the returned string. In case of error,
NULL is returned and errno is set. */
/* Elementary string checks. */
/* Check whether an UTF-8 string is well-formed.
Return NULL if valid, or a pointer to the first invalid unit otherwise. */
extern const uint8_t *
u8_check (const uint8_t *s, size_t n);
/* Check whether an UTF-16 string is well-formed.
Return NULL if valid, or a pointer to the first invalid unit otherwise. */
extern const uint16_t *
u16_check (const uint16_t *s, size_t n);
/* Check whether an UCS-4 string is well-formed.
Return NULL if valid, or a pointer to the first invalid unit otherwise. */
extern const uint32_t *
u32_check (const uint32_t *s, size_t n);
/* Elementary string conversions. */
/* Convert an UTF-8 string to an UTF-16 string. */
extern uint16_t *
u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
size_t *lengthp);
/* Convert an UTF-8 string to an UCS-4 string. */
extern uint32_t *
u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
size_t *lengthp);
/* Convert an UTF-16 string to an UTF-8 string. */
extern uint8_t *
u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
size_t *lengthp);
/* Convert an UTF-16 string to an UCS-4 string. */
extern uint32_t *
u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
size_t *lengthp);
/* Convert an UCS-4 string to an UTF-8 string. */
extern uint8_t *
u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
size_t *lengthp);
/* Convert an UCS-4 string to an UTF-16 string. */
extern uint16_t *
u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
size_t *lengthp);
/* Elementary string functions. */
/* Return the length (number of units) of the first character in S, which is
no longer than N. Return 0 if it is the NUL character. Return -1 upon
failure. */
/* Similar to mblen(), except that s must not be NULL. */
extern int
u8_mblen (const uint8_t *s, size_t n);
extern int
u16_mblen (const uint16_t *s, size_t n);
extern int
u32_mblen (const uint32_t *s, size_t n);
/* Return the length (number of units) of the first character in S, putting
its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
and an appropriate number of units is returned.
The number of available units, N, must be > 0. */
/* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
and the NUL character is not treated specially. */
/* The variants with _safe suffix are safe, even if the library is compiled
without --enable-safety. */
#ifdef GNULIB_UNISTR_U8_MBTOUC_UNSAFE
# if !HAVE_INLINE
extern int
u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
# else
extern int
u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
static inline int
u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
{
uint8_t c = *s;
if (c < 0x80)
{
*puc = c;
return 1;
}
else
return u8_mbtouc_unsafe_aux (puc, s, n);
}
# endif
#endif
#ifdef GNULIB_UNISTR_U16_MBTOUC_UNSAFE
# if !HAVE_INLINE
extern int
u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
# else
extern int
u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
static inline int
u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
{
uint16_t c = *s;
if (c < 0xd800 || c >= 0xe000)
{
*puc = c;
return 1;
}
else
return u16_mbtouc_unsafe_aux (puc, s, n);
}
# endif
#endif
#ifdef GNULIB_UNISTR_U32_MBTOUC_UNSAFE
# if !HAVE_INLINE
extern int
u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
# else
static inline int
u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_)
{
uint32_t c = *s;
# if CONFIG_UNICODE_SAFETY
if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
# endif
*puc = c;
# if CONFIG_UNICODE_SAFETY
else
/* invalid multibyte character */
*puc = 0xfffd;
# endif
return 1;
}
# endif
#endif
#ifdef GNULIB_UNISTR_U8_MBTOUC
# if !HAVE_INLINE
extern int
u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
# else
extern int
u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
static inline int
u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
uint8_t c = *s;
if (c < 0x80)
{
*puc = c;
return 1;
}
else
return u8_mbtouc_aux (puc, s, n);
}
# endif
#endif
#ifdef GNULIB_UNISTR_U16_MBTOUC
# if !HAVE_INLINE
extern int
u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
# else
extern int
u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
static inline int
u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
{
uint16_t c = *s;
if (c < 0xd800 || c >= 0xe000)
{
*puc = c;
return 1;
}
else
return u16_mbtouc_aux (puc, s, n);
}
# endif
#endif
#ifdef GNULIB_UNISTR_U32_MBTOUC
# if !HAVE_INLINE
extern int
u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
# else
static inline int
u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_)
{
uint32_t c = *s;
if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
*puc = c;
else
/* invalid multibyte character */
*puc = 0xfffd;
return 1;
}
# endif
#endif
/* Return the length (number of units) of the first character in S, putting
its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
and -1 is returned for an invalid sequence of units, -2 is returned for an
incomplete sequence of units.
The number of available units, N, must be > 0. */
/* Similar to u*_mbtouc(), except that the return value gives more details
about the failure, similar to mbrtowc(). */
#ifdef GNULIB_UNISTR_U8_MBTOUCR
extern int
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
#endif
#ifdef GNULIB_UNISTR_U16_MBTOUCR
extern int
u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
#endif
#ifdef GNULIB_UNISTR_U32_MBTOUCR
extern int
u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
#endif
/* Put the multibyte character represented by UC in S, returning its
length. Return -1 upon failure, -2 if the number of available units, N,
is too small. The latter case cannot occur if N >= 6/2/1, respectively. */
/* Similar to wctomb(), except that s must not be NULL, and the argument n
must be specified. */
#ifdef GNULIB_UNISTR_U8_UCTOMB
/* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */
extern int
u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
# if !HAVE_INLINE
extern int
u8_uctomb (uint8_t *s, ucs4_t uc, int n);
# else
static inline int
u8_uctomb (uint8_t *s, ucs4_t uc, int n)
{
if (uc < 0x80 && n > 0)
{
s[0] = uc;
return 1;
}
else
return u8_uctomb_aux (s, uc, n);
}
# endif
#endif
#ifdef GNULIB_UNISTR_U16_UCTOMB
/* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */
extern int
u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
# if !HAVE_INLINE
extern int
u16_uctomb (uint16_t *s, ucs4_t uc, int n);
# else
static inline int
u16_uctomb (uint16_t *s, ucs4_t uc, int n)
{
if (uc < 0xd800 && n > 0)
{
s[0] = uc;
return 1;
}
else
return u16_uctomb_aux (s, uc, n);
}
# endif
#endif
#ifdef GNULIB_UNISTR_U32_UCTOMB
# if !HAVE_INLINE
extern int
u32_uctomb (uint32_t *s, ucs4_t uc, int n);
# else
static inline int
u32_uctomb (uint32_t *s, ucs4_t uc, int n)
{
if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
{
if (n > 0)
{
*s = uc;
return 1;
}
else
return -2;
}
else
return -1;
}
# endif
#endif
/* Copy N units from SRC to DEST. */
/* Similar to memcpy(). */
extern uint8_t *
u8_cpy (uint8_t *dest, const uint8_t *src, size_t n);
extern uint16_t *
u16_cpy (uint16_t *dest, const uint16_t *src, size_t n);
extern uint32_t *
u32_cpy (uint32_t *dest, const uint32_t *src, size_t n);
/* Copy N units from SRC to DEST, guaranteeing correct behavior for
overlapping memory areas. */
/* Similar to memmove(). */
extern uint8_t *
u8_move (uint8_t *dest, const uint8_t *src, size_t n);
extern uint16_t *
u16_move (uint16_t *dest, const uint16_t *src, size_t n);
extern uint32_t *
u32_move (uint32_t *dest, const uint32_t *src, size_t n);
/* Set the first N characters of S to UC. UC should be a character that
occupies only 1 unit. */
/* Similar to memset(). */
extern uint8_t *
u8_set (uint8_t *s, ucs4_t uc, size_t n);
extern uint16_t *
u16_set (uint16_t *s, ucs4_t uc, size_t n);
extern uint32_t *
u32_set (uint32_t *s, ucs4_t uc, size_t n);
/* Compare S1 and S2, each of length N. */
/* Similar to memcmp(). */
extern int
u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n);
extern int
u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n);
extern int
u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n);
/* Compare S1 and S2. */
/* Similar to the gnulib function memcmp2(). */
extern int
u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2);
extern int
u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2);
extern int
u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2);
/* Search the string at S for UC. */
/* Similar to memchr(). */
extern uint8_t *
u8_chr (const uint8_t *s, size_t n, ucs4_t uc);
extern uint16_t *
u16_chr (const uint16_t *s, size_t n, ucs4_t uc);
extern uint32_t *
u32_chr (const uint32_t *s, size_t n, ucs4_t uc);
/* Count the number of Unicode characters in the N units from S. */
/* Similar to mbsnlen(). */
extern size_t
u8_mbsnlen (const uint8_t *s, size_t n);
extern size_t
u16_mbsnlen (const uint16_t *s, size_t n);
extern size_t
u32_mbsnlen (const uint32_t *s, size_t n);
/* Elementary string functions with memory allocation. */
/* Make a freshly allocated copy of S, of length N. */
extern uint8_t *
u8_cpy_alloc (const uint8_t *s, size_t n);
extern uint16_t *
u16_cpy_alloc (const uint16_t *s, size_t n);
extern uint32_t *
u32_cpy_alloc (const uint32_t *s, size_t n);
/* Elementary string functions on NUL terminated strings. */
/* Return the length (number of units) of the first character in S.
Return 0 if it is the NUL character. Return -1 upon failure. */
extern int
u8_strmblen (const uint8_t *s);
extern int
u16_strmblen (const uint16_t *s);
extern int
u32_strmblen (const uint32_t *s);
/* Return the length (number of units) of the first character in S, putting
its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL
character. Return -1 upon failure. */
extern int
u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
extern int
u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
extern int
u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
/* Forward iteration step. Advances the pointer past the next character,
or returns NULL if the end of the string has been reached. Puts the
character's 'ucs4_t' representation in *PUC. */
extern const uint8_t *
u8_next (ucs4_t *puc, const uint8_t *s);
extern const uint16_t *
u16_next (ucs4_t *puc, const uint16_t *s);
extern const uint32_t *
u32_next (ucs4_t *puc, const uint32_t *s);
/* Backward iteration step. Advances the pointer to point to the previous
character, or returns NULL if the beginning of the string had been reached.
Puts the character's 'ucs4_t' representation in *PUC. */
extern const uint8_t *
u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
extern const uint16_t *
u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
extern const uint32_t *
u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
/* Return the number of units in S. */
/* Similar to strlen(), wcslen(). */
extern size_t
u8_strlen (const uint8_t *s);
extern size_t
u16_strlen (const uint16_t *s);
extern size_t
u32_strlen (const uint32_t *s);
/* Return the number of units in S, but at most MAXLEN. */
/* Similar to strnlen(), wcsnlen(). */
extern size_t
u8_strnlen (const uint8_t *s, size_t maxlen);
extern size_t
u16_strnlen (const uint16_t *s, size_t maxlen);
extern size_t
u32_strnlen (const uint32_t *s, size_t maxlen);
/* Copy SRC to DEST. */
/* Similar to strcpy(), wcscpy(). */
extern uint8_t *
u8_strcpy (uint8_t *dest, const uint8_t *src);
extern uint16_t *
u16_strcpy (uint16_t *dest, const uint16_t *src);
extern uint32_t *
u32_strcpy (uint32_t *dest, const uint32_t *src);
/* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */
/* Similar to stpcpy(). */
extern uint8_t *
u8_stpcpy (uint8_t *dest, const uint8_t *src);
extern uint16_t *
u16_stpcpy (uint16_t *dest, const uint16_t *src);
extern uint32_t *
u32_stpcpy (uint32_t *dest, const uint32_t *src);
/* Copy no more than N units of SRC to DEST. */
/* Similar to strncpy(), wcsncpy(). */
extern uint8_t *
u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n);
extern uint16_t *
u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n);
extern uint32_t *
u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n);
/* Copy no more than N units of SRC to DEST, returning the address of
the last unit written into DEST. */
/* Similar to stpncpy(). */
extern uint8_t *
u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n);
extern uint16_t *
u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n);
extern uint32_t *
u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n);
/* Append SRC onto DEST. */
/* Similar to strcat(), wcscat(). */
extern uint8_t *
u8_strcat (uint8_t *dest, const uint8_t *src);
extern uint16_t *
u16_strcat (uint16_t *dest, const uint16_t *src);
extern uint32_t *
u32_strcat (uint32_t *dest, const uint32_t *src);
/* Append no more than N units of SRC onto DEST. */
/* Similar to strncat(), wcsncat(). */
extern uint8_t *
u8_strncat (uint8_t *dest, const uint8_t *src, size_t n);
extern uint16_t *
u16_strncat (uint16_t *dest, const uint16_t *src, size_t n);
extern uint32_t *
u32_strncat (uint32_t *dest, const uint32_t *src, size_t n);
/* Compare S1 and S2. */
/* Similar to strcmp(), wcscmp(). */
extern int
u8_strcmp (const uint8_t *s1, const uint8_t *s2);
extern int
u16_strcmp (const uint16_t *s1, const uint16_t *s2);
extern int
u32_strcmp (const uint32_t *s1, const uint32_t *s2);
/* Compare S1 and S2 using the collation rules of the current locale.
Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
Upon failure, set errno and return any value. */
/* Similar to strcoll(), wcscoll(). */
extern int
u8_strcoll (const uint8_t *s1, const uint8_t *s2);
extern int
u16_strcoll (const uint16_t *s1, const uint16_t *s2);
extern int
u32_strcoll (const uint32_t *s1, const uint32_t *s2);
/* Compare no more than N units of S1 and S2. */
/* Similar to strncmp(), wcsncmp(). */
extern int
u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n);
extern int
u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n);
extern int
u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n);
/* Duplicate S, returning an identical malloc'd string. */
/* Similar to strdup(), wcsdup(). */
extern uint8_t *
u8_strdup (const uint8_t *s);
extern uint16_t *
u16_strdup (const uint16_t *s);
extern uint32_t *
u32_strdup (const uint32_t *s);
/* Find the first occurrence of UC in STR. */
/* Similar to strchr(), wcschr(). */
extern uint8_t *
u8_strchr (const uint8_t *str, ucs4_t uc);
extern uint16_t *
u16_strchr (const uint16_t *str, ucs4_t uc);
extern uint32_t *
u32_strchr (const uint32_t *str, ucs4_t uc);
/* Find the last occurrence of UC in STR. */
/* Similar to strrchr(), wcsrchr(). */
extern uint8_t *
u8_strrchr (const uint8_t *str, ucs4_t uc);
extern uint16_t *
u16_strrchr (const uint16_t *str, ucs4_t uc);
extern uint32_t *
u32_strrchr (const uint32_t *str, ucs4_t uc);
/* Return the length of the initial segment of STR which consists entirely
of Unicode characters not in REJECT. */
/* Similar to strcspn(), wcscspn(). */
extern size_t
u8_strcspn (const uint8_t *str, const uint8_t *reject);
extern size_t
u16_strcspn (const uint16_t *str, const uint16_t *reject);
extern size_t
u32_strcspn (const uint32_t *str, const uint32_t *reject);
/* Return the length of the initial segment of STR which consists entirely
of Unicode characters in ACCEPT. */
/* Similar to strspn(), wcsspn(). */
extern size_t
u8_strspn (const uint8_t *str, const uint8_t *accept);
extern size_t
u16_strspn (const uint16_t *str, const uint16_t *accept);
extern size_t
u32_strspn (const uint32_t *str, const uint32_t *accept);
/* Find the first occurrence in STR of any character in ACCEPT. */
/* Similar to strpbrk(), wcspbrk(). */
extern uint8_t *
u8_strpbrk (const uint8_t *str, const uint8_t *accept);
extern uint16_t *
u16_strpbrk (const uint16_t *str, const uint16_t *accept);
extern uint32_t *
u32_strpbrk (const uint32_t *str, const uint32_t *accept);
/* Find the first occurrence of NEEDLE in HAYSTACK. */
/* Similar to strstr(), wcsstr(). */
extern uint8_t *
u8_strstr (const uint8_t *haystack, const uint8_t *needle);
extern uint16_t *
u16_strstr (const uint16_t *haystack, const uint16_t *needle);
extern uint32_t *
u32_strstr (const uint32_t *haystack, const uint32_t *needle);
/* Test whether STR starts with PREFIX. */
extern bool
u8_startswith (const uint8_t *str, const uint8_t *prefix);
extern bool
u16_startswith (const uint16_t *str, const uint16_t *prefix);
extern bool
u32_startswith (const uint32_t *str, const uint32_t *prefix);
/* Test whether STR ends with SUFFIX. */
extern bool
u8_endswith (const uint8_t *str, const uint8_t *suffix);
extern bool
u16_endswith (const uint16_t *str, const uint16_t *suffix);
extern bool
u32_endswith (const uint32_t *str, const uint32_t *suffix);
/* Divide STR into tokens separated by characters in DELIM.
This interface is actually more similar to wcstok than to strtok. */
/* Similar to strtok_r(), wcstok(). */
extern uint8_t *
u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr);
extern uint16_t *
u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr);
extern uint32_t *
u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr);
#ifdef __cplusplus
}
#endif
#endif /* _UNISTR_H */

158
lib/unistr/u8-mbtouc-aux.c Normal file
View file

@ -0,0 +1,158 @@
/* Conversion UTF-8 to UCS-4.
Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include "unistr.h"
#if defined IN_LIBUNISTRING || HAVE_INLINE
int
u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
{
uint8_t c = *s;
if (c >= 0xc2)
{
if (c < 0xe0)
{
if (n >= 2)
{
if ((s[1] ^ 0x80) < 0x40)
{
*puc = ((unsigned int) (c & 0x1f) << 6)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf0)
{
if (n >= 3)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (c >= 0xe1 || s[1] >= 0xa0)
&& (c != 0xed || s[1] < 0xa0))
{
*puc = ((unsigned int) (c & 0x0f) << 12)
| ((unsigned int) (s[1] ^ 0x80) << 6)
| (unsigned int) (s[2] ^ 0x80);
return 3;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf8)
{
if (n >= 4)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40
&& (c >= 0xf1 || s[1] >= 0x90)
#if 1
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
)
{
*puc = ((unsigned int) (c & 0x07) << 18)
| ((unsigned int) (s[1] ^ 0x80) << 12)
| ((unsigned int) (s[2] ^ 0x80) << 6)
| (unsigned int) (s[3] ^ 0x80);
return 4;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#if 0
else if (c < 0xfc)
{
if (n >= 5)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (c >= 0xf9 || s[1] >= 0x88))
{
*puc = ((unsigned int) (c & 0x03) << 24)
| ((unsigned int) (s[1] ^ 0x80) << 18)
| ((unsigned int) (s[2] ^ 0x80) << 12)
| ((unsigned int) (s[3] ^ 0x80) << 6)
| (unsigned int) (s[4] ^ 0x80);
return 5;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xfe)
{
if (n >= 6)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (s[5] ^ 0x80) < 0x40
&& (c >= 0xfd || s[1] >= 0x84))
{
*puc = ((unsigned int) (c & 0x01) << 30)
| ((unsigned int) (s[1] ^ 0x80) << 24)
| ((unsigned int) (s[2] ^ 0x80) << 18)
| ((unsigned int) (s[3] ^ 0x80) << 12)
| ((unsigned int) (s[4] ^ 0x80) << 6)
| (unsigned int) (s[5] ^ 0x80);
return 6;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#endif
}
/* invalid multibyte character */
*puc = 0xfffd;
return 1;
}
#endif

View file

@ -0,0 +1,168 @@
/* Conversion UTF-8 to UCS-4.
Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include "unistr.h"
#if defined IN_LIBUNISTRING || HAVE_INLINE
int
u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
{
uint8_t c = *s;
if (c >= 0xc2)
{
if (c < 0xe0)
{
if (n >= 2)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40)
#endif
{
*puc = ((unsigned int) (c & 0x1f) << 6)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf0)
{
if (n >= 3)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (c >= 0xe1 || s[1] >= 0xa0)
&& (c != 0xed || s[1] < 0xa0))
#endif
{
*puc = ((unsigned int) (c & 0x0f) << 12)
| ((unsigned int) (s[1] ^ 0x80) << 6)
| (unsigned int) (s[2] ^ 0x80);
return 3;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf8)
{
if (n >= 4)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40
&& (c >= 0xf1 || s[1] >= 0x90)
#if 1
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
)
#endif
{
*puc = ((unsigned int) (c & 0x07) << 18)
| ((unsigned int) (s[1] ^ 0x80) << 12)
| ((unsigned int) (s[2] ^ 0x80) << 6)
| (unsigned int) (s[3] ^ 0x80);
return 4;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#if 0
else if (c < 0xfc)
{
if (n >= 5)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (c >= 0xf9 || s[1] >= 0x88))
#endif
{
*puc = ((unsigned int) (c & 0x03) << 24)
| ((unsigned int) (s[1] ^ 0x80) << 18)
| ((unsigned int) (s[2] ^ 0x80) << 12)
| ((unsigned int) (s[3] ^ 0x80) << 6)
| (unsigned int) (s[4] ^ 0x80);
return 5;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xfe)
{
if (n >= 6)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (s[5] ^ 0x80) < 0x40
&& (c >= 0xfd || s[1] >= 0x84))
#endif
{
*puc = ((unsigned int) (c & 0x01) << 30)
| ((unsigned int) (s[1] ^ 0x80) << 24)
| ((unsigned int) (s[2] ^ 0x80) << 18)
| ((unsigned int) (s[3] ^ 0x80) << 12)
| ((unsigned int) (s[4] ^ 0x80) << 6)
| (unsigned int) (s[5] ^ 0x80);
return 6;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#endif
}
/* invalid multibyte character */
*puc = 0xfffd;
return 1;
}
#endif

View file

@ -0,0 +1,179 @@
/* Look at first character in UTF-8 string.
Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#if defined IN_LIBUNISTRING
/* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
'static inline'. */
# include "unistring-notinline.h"
#endif
/* Specification. */
#include "unistr.h"
#if !HAVE_INLINE
int
u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
{
uint8_t c = *s;
if (c < 0x80)
{
*puc = c;
return 1;
}
else if (c >= 0xc2)
{
if (c < 0xe0)
{
if (n >= 2)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40)
#endif
{
*puc = ((unsigned int) (c & 0x1f) << 6)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf0)
{
if (n >= 3)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (c >= 0xe1 || s[1] >= 0xa0)
&& (c != 0xed || s[1] < 0xa0))
#endif
{
*puc = ((unsigned int) (c & 0x0f) << 12)
| ((unsigned int) (s[1] ^ 0x80) << 6)
| (unsigned int) (s[2] ^ 0x80);
return 3;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf8)
{
if (n >= 4)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40
&& (c >= 0xf1 || s[1] >= 0x90)
#if 1
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
)
#endif
{
*puc = ((unsigned int) (c & 0x07) << 18)
| ((unsigned int) (s[1] ^ 0x80) << 12)
| ((unsigned int) (s[2] ^ 0x80) << 6)
| (unsigned int) (s[3] ^ 0x80);
return 4;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#if 0
else if (c < 0xfc)
{
if (n >= 5)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (c >= 0xf9 || s[1] >= 0x88))
#endif
{
*puc = ((unsigned int) (c & 0x03) << 24)
| ((unsigned int) (s[1] ^ 0x80) << 18)
| ((unsigned int) (s[2] ^ 0x80) << 12)
| ((unsigned int) (s[3] ^ 0x80) << 6)
| (unsigned int) (s[4] ^ 0x80);
return 5;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xfe)
{
if (n >= 6)
{
#if CONFIG_UNICODE_SAFETY
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (s[5] ^ 0x80) < 0x40
&& (c >= 0xfd || s[1] >= 0x84))
#endif
{
*puc = ((unsigned int) (c & 0x01) << 30)
| ((unsigned int) (s[1] ^ 0x80) << 24)
| ((unsigned int) (s[2] ^ 0x80) << 18)
| ((unsigned int) (s[3] ^ 0x80) << 12)
| ((unsigned int) (s[4] ^ 0x80) << 6)
| (unsigned int) (s[5] ^ 0x80);
return 6;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#endif
}
/* invalid multibyte character */
*puc = 0xfffd;
return 1;
}
#endif

168
lib/unistr/u8-mbtouc.c Normal file
View file

@ -0,0 +1,168 @@
/* Look at first character in UTF-8 string.
Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#if defined IN_LIBUNISTRING
/* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'. */
# include "unistring-notinline.h"
#endif
/* Specification. */
#include "unistr.h"
#if !HAVE_INLINE
int
u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
{
uint8_t c = *s;
if (c < 0x80)
{
*puc = c;
return 1;
}
else if (c >= 0xc2)
{
if (c < 0xe0)
{
if (n >= 2)
{
if ((s[1] ^ 0x80) < 0x40)
{
*puc = ((unsigned int) (c & 0x1f) << 6)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf0)
{
if (n >= 3)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (c >= 0xe1 || s[1] >= 0xa0)
&& (c != 0xed || s[1] < 0xa0))
{
*puc = ((unsigned int) (c & 0x0f) << 12)
| ((unsigned int) (s[1] ^ 0x80) << 6)
| (unsigned int) (s[2] ^ 0x80);
return 3;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xf8)
{
if (n >= 4)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40
&& (c >= 0xf1 || s[1] >= 0x90)
#if 1
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
)
{
*puc = ((unsigned int) (c & 0x07) << 18)
| ((unsigned int) (s[1] ^ 0x80) << 12)
| ((unsigned int) (s[2] ^ 0x80) << 6)
| (unsigned int) (s[3] ^ 0x80);
return 4;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#if 0
else if (c < 0xfc)
{
if (n >= 5)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (c >= 0xf9 || s[1] >= 0x88))
{
*puc = ((unsigned int) (c & 0x03) << 24)
| ((unsigned int) (s[1] ^ 0x80) << 18)
| ((unsigned int) (s[2] ^ 0x80) << 12)
| ((unsigned int) (s[3] ^ 0x80) << 6)
| (unsigned int) (s[4] ^ 0x80);
return 5;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
else if (c < 0xfe)
{
if (n >= 6)
{
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
&& (s[5] ^ 0x80) < 0x40
&& (c >= 0xfd || s[1] >= 0x84))
{
*puc = ((unsigned int) (c & 0x01) << 30)
| ((unsigned int) (s[1] ^ 0x80) << 24)
| ((unsigned int) (s[2] ^ 0x80) << 18)
| ((unsigned int) (s[3] ^ 0x80) << 12)
| ((unsigned int) (s[4] ^ 0x80) << 6)
| (unsigned int) (s[5] ^ 0x80);
return 6;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return n;
}
}
#endif
}
/* invalid multibyte character */
*puc = 0xfffd;
return 1;
}
#endif

285
lib/unistr/u8-mbtoucr.c Normal file
View file

@ -0,0 +1,285 @@
/* Look at first character in UTF-8 string, returning an error code.
Copyright (C) 1999-2002, 2006-2007 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2001.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include "unistr.h"
int
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
{
uint8_t c = *s;
if (c < 0x80)
{
*puc = c;
return 1;
}
else if (c >= 0xc2)
{
if (c < 0xe0)
{
if (n >= 2)
{
if ((s[1] ^ 0x80) < 0x40)
{
*puc = ((unsigned int) (c & 0x1f) << 6)
| (unsigned int) (s[1] ^ 0x80);
return 2;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
else if (c < 0xf0)
{
if (n >= 2)
{
if ((s[1] ^ 0x80) < 0x40
&& (c >= 0xe1 || s[1] >= 0xa0)
&& (c != 0xed || s[1] < 0xa0))
{
if (n >= 3)
{
if ((s[2] ^ 0x80) < 0x40)
{
*puc = ((unsigned int) (c & 0x0f) << 12)
| ((unsigned int) (s[1] ^ 0x80) << 6)
| (unsigned int) (s[2] ^ 0x80);
return 3;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
else if (c < 0xf8)
{
if (n >= 2)
{
if ((s[1] ^ 0x80) < 0x40
&& (c >= 0xf1 || s[1] >= 0x90)
#if 1
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
#endif
)
{
if (n >= 3)
{
if ((s[2] ^ 0x80) < 0x40)
{
if (n >= 4)
{
if ((s[3] ^ 0x80) < 0x40)
{
*puc = ((unsigned int) (c & 0x07) << 18)
| ((unsigned int) (s[1] ^ 0x80) << 12)
| ((unsigned int) (s[2] ^ 0x80) << 6)
| (unsigned int) (s[3] ^ 0x80);
return 4;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
#if 0
else if (c < 0xfc)
{
if (n >= 2)
{
if ((s[1] ^ 0x80) < 0x40
&& (c >= 0xf9 || s[1] >= 0x88))
{
if (n >= 3)
{
if ((s[2] ^ 0x80) < 0x40)
{
if (n >= 4)
{
if ((s[3] ^ 0x80) < 0x40)
{
if (n >= 5)
{
if ((s[4] ^ 0x80) < 0x40)
{
*puc = ((unsigned int) (c & 0x03) << 24)
| ((unsigned int) (s[1] ^ 0x80) << 18)
| ((unsigned int) (s[2] ^ 0x80) << 12)
| ((unsigned int) (s[3] ^ 0x80) << 6)
| (unsigned int) (s[4] ^ 0x80);
return 5;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
else if (c < 0xfe)
{
if (n >= 2)
{
if ((s[1] ^ 0x80) < 0x40
&& (c >= 0xfd || s[1] >= 0x84))
{
if (n >= 3)
{
if ((s[2] ^ 0x80) < 0x40)
{
if (n >= 4)
{
if ((s[3] ^ 0x80) < 0x40)
{
if (n >= 5)
{
if ((s[4] ^ 0x80) < 0x40)
{
if (n >= 6)
{
if ((s[5] ^ 0x80) < 0x40)
{
*puc = ((unsigned int) (c & 0x01) << 30)
| ((unsigned int) (s[1] ^ 0x80) << 24)
| ((unsigned int) (s[2] ^ 0x80) << 18)
| ((unsigned int) (s[3] ^ 0x80) << 12)
| ((unsigned int) (s[4] ^ 0x80) << 6)
| (unsigned int) (s[5] ^ 0x80);
return 6;
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
/* invalid multibyte character */
}
else
{
/* incomplete multibyte character */
*puc = 0xfffd;
return -2;
}
}
#endif
}
/* invalid multibyte character */
*puc = 0xfffd;
return -1;
}

93
lib/unistr/u8-prev.c Normal file
View file

@ -0,0 +1,93 @@
/* Iterate over previous character in UTF-8 string.
Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include "unistr.h"
const uint8_t *
u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start)
{
/* Keep in sync with unistr.h and utf8-ucs4.c. */
if (s != start)
{
uint8_t c_1 = s[-1];
if (c_1 < 0x80)
{
*puc = c_1;
return s - 1;
}
#if CONFIG_UNICODE_SAFETY
if ((c_1 ^ 0x80) < 0x40)
#endif
if (s - 1 != start)
{
uint8_t c_2 = s[-2];
if (c_2 >= 0xc2 && c_2 < 0xe0)
{
*puc = ((unsigned int) (c_2 & 0x1f) << 6)
| (unsigned int) (c_1 ^ 0x80);
return s - 2;
}
#if CONFIG_UNICODE_SAFETY
if ((c_2 ^ 0x80) < 0x40)
#endif
if (s - 2 != start)
{
uint8_t c_3 = s[-3];
if (c_3 >= 0xe0 && c_3 < 0xf0
#if CONFIG_UNICODE_SAFETY
&& (c_3 >= 0xe1 || c_2 >= 0xa0)
&& (c_3 != 0xed || c_2 < 0xa0)
#endif
)
{
*puc = ((unsigned int) (c_3 & 0x0f) << 12)
| ((unsigned int) (c_2 ^ 0x80) << 6)
| (unsigned int) (c_1 ^ 0x80);
return s - 3;
}
#if CONFIG_UNICODE_SAFETY
if ((c_3 ^ 0x80) < 0x40)
#endif
if (s - 3 != start)
{
uint8_t c_4 = s[-4];
if (c_4 >= 0xf0 && c_4 < 0xf8
#if CONFIG_UNICODE_SAFETY
&& (c_4 >= 0xf1 || c_3 >= 0x90)
&& (c_4 < 0xf4 || (c_4 == 0xf4 && c_3 < 0x90))
#endif
)
{
*puc = ((unsigned int) (c_4 & 0x07) << 18)
| ((unsigned int) (c_3 ^ 0x80) << 12)
| ((unsigned int) (c_2 ^ 0x80) << 6)
| (unsigned int) (c_1 ^ 0x80);
return s - 4;
}
}
}
}
}
return NULL;
}

View file

@ -0,0 +1,69 @@
/* Conversion UCS-4 to UTF-8.
Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
/* Specification. */
#include "unistr.h"
int
u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n)
{
int count;
if (uc < 0x80)
/* The case n >= 1 is already handled by the caller. */
return -2;
else if (uc < 0x800)
count = 2;
else if (uc < 0x10000)
{
if (uc < 0xd800 || uc >= 0xe000)
count = 3;
else
return -1;
}
#if 0
else if (uc < 0x200000)
count = 4;
else if (uc < 0x4000000)
count = 5;
else if (uc <= 0x7fffffff)
count = 6;
#else
else if (uc < 0x110000)
count = 4;
#endif
else
return -1;
if (n < count)
return -2;
switch (count) /* note: code falls through cases! */
{
#if 0
case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
#endif
case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
/*case 1:*/ s[0] = uc;
}
return count;
}

88
lib/unistr/u8-uctomb.c Normal file
View file

@ -0,0 +1,88 @@
/* Store a character in UTF-8 string.
Copyright (C) 2002, 2005-2006, 2009 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2002.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
#if defined IN_LIBUNISTRING
/* Tell unistr.h to declare u8_uctomb as 'extern', not 'static inline'. */
# include "unistring-notinline.h"
#endif
/* Specification. */
#include "unistr.h"
#if !HAVE_INLINE
int
u8_uctomb (uint8_t *s, ucs4_t uc, int n)
{
if (uc < 0x80)
{
if (n > 0)
{
s[0] = uc;
return 1;
}
/* else return -2, below. */
}
else
{
int count;
if (uc < 0x800)
count = 2;
else if (uc < 0x10000)
{
if (uc < 0xd800 || uc >= 0xe000)
count = 3;
else
return -1;
}
#if 0
else if (uc < 0x200000)
count = 4;
else if (uc < 0x4000000)
count = 5;
else if (uc <= 0x7fffffff)
count = 6;
#else
else if (uc < 0x110000)
count = 4;
#endif
else
return -1;
if (n >= count)
{
switch (count) /* note: code falls through cases! */
{
#if 0
case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
#endif
case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
/*case 1:*/ s[0] = uc;
}
return count;
}
}
return -2;
}
#endif

26
lib/unitypes.h Normal file
View file

@ -0,0 +1,26 @@
/* Elementary types for the GNU UniString library.
Copyright (C) 2002, 2005-2006 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>. */
#ifndef _UNITYPES_H
#define _UNITYPES_H
/* Get uint8_t, uint16_t, uint32_t. */
#include <stdint.h>
/* Type representing a Unicode character. */
typedef uint32_t ucs4_t;
#endif /* _UNITYPES_H */