Use GNU libunistring and Gnulib modules needed by R6RS bytevectors and ports.
* m4/gnulib-cache.m4 (gl_MODULES): Add `byteswap', `iconv_open-utf', `libunistring', `striconveh', and `string'.
This commit is contained in:
parent
21346c4f5e
commit
24d56127bb
42 changed files with 7947 additions and 12 deletions
239
lib/Makefile.am
239
lib/Makefile.am
|
|
@ -9,9 +9,9 @@
|
|||
# the same distribution terms as the rest of that program.
|
||||
#
|
||||
# Generated by gnulib-tool.
|
||||
# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild count-one-bits environ extensions flock fpieee full-read full-write lib-symbol-visibility putenv stdlib strcase strftime
|
||||
# Reproduce by: gnulib-tool --import --dir=. --lib=libgnu --source-base=lib --m4-base=m4 --doc-base=doc --tests-base=tests --aux-dir=build-aux --lgpl --libtool --macro-prefix=gl --no-vc-files alloca-opt autobuild byteswap count-one-bits environ extensions flock fpieee full-read full-write iconv_open-utf lib-symbol-visibility libunistring putenv stdlib strcase strftime striconveh string
|
||||
|
||||
AUTOMAKE_OPTIONS = 1.5 gnits
|
||||
AUTOMAKE_OPTIONS = 1.5 gnits subdir-objects
|
||||
|
||||
SUBDIRS =
|
||||
noinst_HEADERS =
|
||||
|
|
@ -54,6 +54,42 @@ EXTRA_DIST += alloca.in.h
|
|||
|
||||
## end gnulib module alloca-opt
|
||||
|
||||
## begin gnulib module byteswap
|
||||
|
||||
BUILT_SOURCES += $(BYTESWAP_H)
|
||||
|
||||
# We need the following in order to create <byteswap.h> when the system
|
||||
# doesn't have one.
|
||||
byteswap.h: byteswap.in.h
|
||||
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */'; \
|
||||
cat $(srcdir)/byteswap.in.h; \
|
||||
} > $@-t
|
||||
mv -f $@-t $@
|
||||
MOSTLYCLEANFILES += byteswap.h byteswap.h-t
|
||||
|
||||
EXTRA_DIST += byteswap.in.h
|
||||
|
||||
## end gnulib module byteswap
|
||||
|
||||
## begin gnulib module c-ctype
|
||||
|
||||
libgnu_la_SOURCES += c-ctype.h c-ctype.c
|
||||
|
||||
## end gnulib module c-ctype
|
||||
|
||||
## begin gnulib module c-strcase
|
||||
|
||||
libgnu_la_SOURCES += c-strcase.h c-strcasecmp.c c-strncasecmp.c
|
||||
|
||||
## end gnulib module c-strcase
|
||||
|
||||
## begin gnulib module c-strcaseeq
|
||||
|
||||
|
||||
EXTRA_DIST += c-strcaseeq.h
|
||||
|
||||
## end gnulib module c-strcaseeq
|
||||
|
||||
## begin gnulib module configmake
|
||||
|
||||
# Retrieve values of the variables through 'configure' followed by
|
||||
|
|
@ -143,6 +179,72 @@ libgnu_la_SOURCES += full-write.h full-write.c
|
|||
|
||||
## end gnulib module full-write
|
||||
|
||||
## begin gnulib module gperf
|
||||
|
||||
GPERF = gperf
|
||||
|
||||
## end gnulib module gperf
|
||||
|
||||
## begin gnulib module havelib
|
||||
|
||||
|
||||
EXTRA_DIST += $(top_srcdir)/build-aux/config.rpath
|
||||
|
||||
## end gnulib module havelib
|
||||
|
||||
## begin gnulib module iconv_open
|
||||
|
||||
BUILT_SOURCES += $(ICONV_H)
|
||||
|
||||
# We need the following in order to create <iconv.h> when the system
|
||||
# doesn't have one that works with the given compiler.
|
||||
iconv.h: iconv.in.h
|
||||
rm -f $@-t $@
|
||||
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \
|
||||
sed -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
|
||||
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
|
||||
-e 's|@''NEXT_ICONV_H''@|$(NEXT_ICONV_H)|g' \
|
||||
-e 's|@''ICONV_CONST''@|$(ICONV_CONST)|g' \
|
||||
-e 's|@''REPLACE_ICONV''@|$(REPLACE_ICONV)|g' \
|
||||
-e 's|@''REPLACE_ICONV_OPEN''@|$(REPLACE_ICONV_OPEN)|g' \
|
||||
-e 's|@''REPLACE_ICONV_UTF''@|$(REPLACE_ICONV_UTF)|g' \
|
||||
< $(srcdir)/iconv.in.h; \
|
||||
} > $@-t
|
||||
mv $@-t $@
|
||||
MOSTLYCLEANFILES += iconv.h iconv.h-t
|
||||
|
||||
iconv_open-aix.h: iconv_open-aix.gperf
|
||||
$(GPERF) -m 10 $(srcdir)/iconv_open-aix.gperf > $(srcdir)/iconv_open-aix.h-t
|
||||
mv $(srcdir)/iconv_open-aix.h-t $(srcdir)/iconv_open-aix.h
|
||||
iconv_open-hpux.h: iconv_open-hpux.gperf
|
||||
$(GPERF) -m 10 $(srcdir)/iconv_open-hpux.gperf > $(srcdir)/iconv_open-hpux.h-t
|
||||
mv $(srcdir)/iconv_open-hpux.h-t $(srcdir)/iconv_open-hpux.h
|
||||
iconv_open-irix.h: iconv_open-irix.gperf
|
||||
$(GPERF) -m 10 $(srcdir)/iconv_open-irix.gperf > $(srcdir)/iconv_open-irix.h-t
|
||||
mv $(srcdir)/iconv_open-irix.h-t $(srcdir)/iconv_open-irix.h
|
||||
iconv_open-osf.h: iconv_open-osf.gperf
|
||||
$(GPERF) -m 10 $(srcdir)/iconv_open-osf.gperf > $(srcdir)/iconv_open-osf.h-t
|
||||
mv $(srcdir)/iconv_open-osf.h-t $(srcdir)/iconv_open-osf.h
|
||||
BUILT_SOURCES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h
|
||||
MOSTLYCLEANFILES += iconv_open-aix.h-t iconv_open-hpux.h-t iconv_open-irix.h-t iconv_open-osf.h-t
|
||||
MAINTAINERCLEANFILES += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h
|
||||
EXTRA_DIST += iconv_open-aix.h iconv_open-hpux.h iconv_open-irix.h iconv_open-osf.h
|
||||
|
||||
EXTRA_DIST += iconv.in.h iconv_open-aix.gperf iconv_open-hpux.gperf iconv_open-irix.gperf iconv_open-osf.gperf iconv_open.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += iconv_open.c
|
||||
|
||||
## end gnulib module iconv_open
|
||||
|
||||
## begin gnulib module iconv_open-utf
|
||||
|
||||
|
||||
EXTRA_DIST += iconv.c iconv_close.c
|
||||
|
||||
EXTRA_libgnu_la_SOURCES += iconv.c iconv_close.c
|
||||
|
||||
## end gnulib module iconv_open-utf
|
||||
|
||||
## begin gnulib module lib-symbol-visibility
|
||||
|
||||
# The value of $(CFLAG_VISIBILITY) needs to be added to the CFLAGS for the
|
||||
|
|
@ -442,6 +544,95 @@ EXTRA_libgnu_la_SOURCES += strftime.c
|
|||
|
||||
## end gnulib module strftime
|
||||
|
||||
## begin gnulib module striconveh
|
||||
|
||||
libgnu_la_SOURCES += striconveh.h striconveh.c
|
||||
if GL_COND_LIBTOOL
|
||||
libgnu_la_LDFLAGS += $(LTLIBICONV)
|
||||
endif
|
||||
|
||||
EXTRA_DIST += iconveh.h
|
||||
|
||||
## end gnulib module striconveh
|
||||
|
||||
## begin gnulib module string
|
||||
|
||||
BUILT_SOURCES += string.h
|
||||
|
||||
# We need the following in order to create <string.h> when the system
|
||||
# doesn't have one that works with the given compiler.
|
||||
string.h: string.in.h
|
||||
rm -f $@-t $@
|
||||
{ echo '/* DO NOT EDIT! GENERATED AUTOMATICALLY! */' && \
|
||||
sed -e 's|@''INCLUDE_NEXT''@|$(INCLUDE_NEXT)|g' \
|
||||
-e 's|@''PRAGMA_SYSTEM_HEADER''@|@PRAGMA_SYSTEM_HEADER@|g' \
|
||||
-e 's|@''NEXT_STRING_H''@|$(NEXT_STRING_H)|g' \
|
||||
-e 's|@''GNULIB_MBSLEN''@|$(GNULIB_MBSLEN)|g' \
|
||||
-e 's|@''GNULIB_MBSNLEN''@|$(GNULIB_MBSNLEN)|g' \
|
||||
-e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \
|
||||
-e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
|
||||
-e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
|
||||
-e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
|
||||
-e 's|@''GNULIB_MBSNCASECMP''@|$(GNULIB_MBSNCASECMP)|g' \
|
||||
-e 's|@''GNULIB_MBSPCASECMP''@|$(GNULIB_MBSPCASECMP)|g' \
|
||||
-e 's|@''GNULIB_MBSCASESTR''@|$(GNULIB_MBSCASESTR)|g' \
|
||||
-e 's|@''GNULIB_MBSCSPN''@|$(GNULIB_MBSCSPN)|g' \
|
||||
-e 's|@''GNULIB_MBSPBRK''@|$(GNULIB_MBSPBRK)|g' \
|
||||
-e 's|@''GNULIB_MBSSPN''@|$(GNULIB_MBSSPN)|g' \
|
||||
-e 's|@''GNULIB_MBSSEP''@|$(GNULIB_MBSSEP)|g' \
|
||||
-e 's|@''GNULIB_MBSTOK_R''@|$(GNULIB_MBSTOK_R)|g' \
|
||||
-e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
|
||||
-e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
|
||||
-e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \
|
||||
-e 's|@''GNULIB_RAWMEMCHR''@|$(GNULIB_RAWMEMCHR)|g' \
|
||||
-e 's|@''GNULIB_STPCPY''@|$(GNULIB_STPCPY)|g' \
|
||||
-e 's|@''GNULIB_STPNCPY''@|$(GNULIB_STPNCPY)|g' \
|
||||
-e 's|@''GNULIB_STRCHRNUL''@|$(GNULIB_STRCHRNUL)|g' \
|
||||
-e 's|@''GNULIB_STRDUP''@|$(GNULIB_STRDUP)|g' \
|
||||
-e 's|@''GNULIB_STRNDUP''@|$(GNULIB_STRNDUP)|g' \
|
||||
-e 's|@''GNULIB_STRNLEN''@|$(GNULIB_STRNLEN)|g' \
|
||||
-e 's|@''GNULIB_STRPBRK''@|$(GNULIB_STRPBRK)|g' \
|
||||
-e 's|@''GNULIB_STRSEP''@|$(GNULIB_STRSEP)|g' \
|
||||
-e 's|@''GNULIB_STRSTR''@|$(GNULIB_STRSTR)|g' \
|
||||
-e 's|@''GNULIB_STRCASESTR''@|$(GNULIB_STRCASESTR)|g' \
|
||||
-e 's|@''GNULIB_STRTOK_R''@|$(GNULIB_STRTOK_R)|g' \
|
||||
-e 's|@''GNULIB_STRERROR''@|$(GNULIB_STRERROR)|g' \
|
||||
-e 's|@''GNULIB_STRSIGNAL''@|$(GNULIB_STRSIGNAL)|g' \
|
||||
-e 's|@''GNULIB_STRVERSCMP''@|$(GNULIB_STRVERSCMP)|g' \
|
||||
-e 's|@''HAVE_DECL_MEMMEM''@|$(HAVE_DECL_MEMMEM)|g' \
|
||||
-e 's|@''HAVE_MEMPCPY''@|$(HAVE_MEMPCPY)|g' \
|
||||
-e 's|@''HAVE_DECL_MEMRCHR''@|$(HAVE_DECL_MEMRCHR)|g' \
|
||||
-e 's|@''HAVE_RAWMEMCHR''@|$(HAVE_RAWMEMCHR)|g' \
|
||||
-e 's|@''HAVE_STPCPY''@|$(HAVE_STPCPY)|g' \
|
||||
-e 's|@''HAVE_STPNCPY''@|$(HAVE_STPNCPY)|g' \
|
||||
-e 's|@''HAVE_STRCHRNUL''@|$(HAVE_STRCHRNUL)|g' \
|
||||
-e 's|@''HAVE_DECL_STRDUP''@|$(HAVE_DECL_STRDUP)|g' \
|
||||
-e 's|@''HAVE_STRNDUP''@|$(HAVE_STRNDUP)|g' \
|
||||
-e 's|@''HAVE_DECL_STRNDUP''@|$(HAVE_DECL_STRNDUP)|g' \
|
||||
-e 's|@''HAVE_DECL_STRNLEN''@|$(HAVE_DECL_STRNLEN)|g' \
|
||||
-e 's|@''HAVE_STRPBRK''@|$(HAVE_STRPBRK)|g' \
|
||||
-e 's|@''HAVE_STRSEP''@|$(HAVE_STRSEP)|g' \
|
||||
-e 's|@''HAVE_STRCASESTR''@|$(HAVE_STRCASESTR)|g' \
|
||||
-e 's|@''HAVE_DECL_STRTOK_R''@|$(HAVE_DECL_STRTOK_R)|g' \
|
||||
-e 's|@''HAVE_DECL_STRERROR''@|$(HAVE_DECL_STRERROR)|g' \
|
||||
-e 's|@''HAVE_DECL_STRSIGNAL''@|$(HAVE_DECL_STRSIGNAL)|g' \
|
||||
-e 's|@''HAVE_STRVERSCMP''@|$(HAVE_STRVERSCMP)|g' \
|
||||
-e 's|@''REPLACE_MEMMEM''@|$(REPLACE_MEMMEM)|g' \
|
||||
-e 's|@''REPLACE_STRCASESTR''@|$(REPLACE_STRCASESTR)|g' \
|
||||
-e 's|@''REPLACE_STRDUP''@|$(REPLACE_STRDUP)|g' \
|
||||
-e 's|@''REPLACE_STRSTR''@|$(REPLACE_STRSTR)|g' \
|
||||
-e 's|@''REPLACE_STRERROR''@|$(REPLACE_STRERROR)|g' \
|
||||
-e 's|@''REPLACE_STRSIGNAL''@|$(REPLACE_STRSIGNAL)|g' \
|
||||
-e '/definition of GL_LINK_WARNING/r $(LINK_WARNING_H)' \
|
||||
< $(srcdir)/string.in.h; \
|
||||
} > $@-t
|
||||
mv $@-t $@
|
||||
MOSTLYCLEANFILES += string.h string.h-t
|
||||
|
||||
EXTRA_DIST += string.in.h
|
||||
|
||||
## end gnulib module string
|
||||
|
||||
## begin gnulib module strings
|
||||
|
||||
BUILT_SOURCES += strings.h
|
||||
|
|
@ -598,6 +789,50 @@ EXTRA_DIST += unistd.in.h
|
|||
|
||||
## end gnulib module unistd
|
||||
|
||||
## begin gnulib module unistr/base
|
||||
|
||||
|
||||
EXTRA_DIST += unistr.h
|
||||
|
||||
## end gnulib module unistr/base
|
||||
|
||||
## begin gnulib module unistr/u8-mbtouc
|
||||
|
||||
libgnu_la_SOURCES += unistr/u8-mbtouc.c unistr/u8-mbtouc-aux.c
|
||||
|
||||
## end gnulib module unistr/u8-mbtouc
|
||||
|
||||
## begin gnulib module unistr/u8-mbtouc-unsafe
|
||||
|
||||
libgnu_la_SOURCES += unistr/u8-mbtouc-unsafe.c unistr/u8-mbtouc-unsafe-aux.c
|
||||
|
||||
## end gnulib module unistr/u8-mbtouc-unsafe
|
||||
|
||||
## begin gnulib module unistr/u8-mbtoucr
|
||||
|
||||
libgnu_la_SOURCES += unistr/u8-mbtoucr.c
|
||||
|
||||
## end gnulib module unistr/u8-mbtoucr
|
||||
|
||||
## begin gnulib module unistr/u8-prev
|
||||
|
||||
libgnu_la_SOURCES += unistr/u8-prev.c
|
||||
|
||||
## end gnulib module unistr/u8-prev
|
||||
|
||||
## begin gnulib module unistr/u8-uctomb
|
||||
|
||||
libgnu_la_SOURCES += unistr/u8-uctomb.c unistr/u8-uctomb-aux.c
|
||||
|
||||
## end gnulib module unistr/u8-uctomb
|
||||
|
||||
## begin gnulib module unitypes
|
||||
|
||||
|
||||
EXTRA_DIST += unitypes.h
|
||||
|
||||
## end gnulib module unitypes
|
||||
|
||||
## begin gnulib module verify
|
||||
|
||||
libgnu_la_SOURCES += verify.h
|
||||
|
|
|
|||
44
lib/byteswap.in.h
Normal file
44
lib/byteswap.in.h
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
/* byteswap.h - Byte swapping
|
||||
Copyright (C) 2005, 2007 Free Software Foundation, Inc.
|
||||
Written by Oskar Liljeblad <oskar@osk.mine.nu>, 2005.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _GL_BYTESWAP_H
|
||||
#define _GL_BYTESWAP_H
|
||||
|
||||
/* Given an unsigned 16-bit argument X, return the value corresponding to
|
||||
X with reversed byte order. */
|
||||
#define bswap_16(x) ((((x) & 0x00FF) << 8) | \
|
||||
(((x) & 0xFF00) >> 8))
|
||||
|
||||
/* Given an unsigned 32-bit argument X, return the value corresponding to
|
||||
X with reversed byte order. */
|
||||
#define bswap_32(x) ((((x) & 0x000000FF) << 24) | \
|
||||
(((x) & 0x0000FF00) << 8) | \
|
||||
(((x) & 0x00FF0000) >> 8) | \
|
||||
(((x) & 0xFF000000) >> 24))
|
||||
|
||||
/* Given an unsigned 64-bit argument X, return the value corresponding to
|
||||
X with reversed byte order. */
|
||||
#define bswap_64(x) ((((x) & 0x00000000000000FFULL) << 56) | \
|
||||
(((x) & 0x000000000000FF00ULL) << 40) | \
|
||||
(((x) & 0x0000000000FF0000ULL) << 24) | \
|
||||
(((x) & 0x00000000FF000000ULL) << 8) | \
|
||||
(((x) & 0x000000FF00000000ULL) >> 8) | \
|
||||
(((x) & 0x0000FF0000000000ULL) >> 24) | \
|
||||
(((x) & 0x00FF000000000000ULL) >> 40) | \
|
||||
(((x) & 0xFF00000000000000ULL) >> 56))
|
||||
|
||||
#endif /* _GL_BYTESWAP_H */
|
||||
396
lib/c-ctype.c
Normal file
396
lib/c-ctype.c
Normal file
|
|
@ -0,0 +1,396 @@
|
|||
/* Character handling in C locale.
|
||||
|
||||
Copyright 2000-2003, 2006 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#define NO_C_CTYPE_MACROS
|
||||
#include "c-ctype.h"
|
||||
|
||||
/* The function isascii is not locale dependent. Its use in EBCDIC is
|
||||
questionable. */
|
||||
bool
|
||||
c_isascii (int c)
|
||||
{
|
||||
return (c >= 0x00 && c <= 0x7f);
|
||||
}
|
||||
|
||||
bool
|
||||
c_isalnum (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_DIGITS \
|
||||
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#if C_CTYPE_ASCII
|
||||
return ((c >= '0' && c <= '9')
|
||||
|| ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z'));
|
||||
#else
|
||||
return ((c >= '0' && c <= '9')
|
||||
|| (c >= 'A' && c <= 'Z')
|
||||
|| (c >= 'a' && c <= 'z'));
|
||||
#endif
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7': case '8': case '9':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
|
||||
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
||||
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
||||
case 'Y': case 'Z':
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
||||
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
||||
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
||||
case 'y': case 'z':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_isalpha (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#if C_CTYPE_ASCII
|
||||
return ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z');
|
||||
#else
|
||||
return ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'));
|
||||
#endif
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
|
||||
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
||||
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
||||
case 'Y': case 'Z':
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
||||
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
||||
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
||||
case 'y': case 'z':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_isblank (int c)
|
||||
{
|
||||
return (c == ' ' || c == '\t');
|
||||
}
|
||||
|
||||
bool
|
||||
c_iscntrl (int c)
|
||||
{
|
||||
#if C_CTYPE_ASCII
|
||||
return ((c & ~0x1f) == 0 || c == 0x7f);
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case ' ': case '!': case '"': case '#': case '$': case '%':
|
||||
case '&': case '\'': case '(': case ')': case '*': case '+':
|
||||
case ',': case '-': case '.': case '/':
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7': case '8': case '9':
|
||||
case ':': case ';': case '<': case '=': case '>': case '?':
|
||||
case '@':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
|
||||
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
||||
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
||||
case 'Y': case 'Z':
|
||||
case '[': case '\\': case ']': case '^': case '_': case '`':
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
||||
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
||||
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
||||
case 'y': case 'z':
|
||||
case '{': case '|': case '}': case '~':
|
||||
return 0;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_isdigit (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_DIGITS
|
||||
return (c >= '0' && c <= '9');
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7': case '8': case '9':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_islower (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
return (c >= 'a' && c <= 'z');
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
||||
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
||||
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
||||
case 'y': case 'z':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_isgraph (int c)
|
||||
{
|
||||
#if C_CTYPE_ASCII
|
||||
return (c >= '!' && c <= '~');
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case '!': case '"': case '#': case '$': case '%': case '&':
|
||||
case '\'': case '(': case ')': case '*': case '+': case ',':
|
||||
case '-': case '.': case '/':
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7': case '8': case '9':
|
||||
case ':': case ';': case '<': case '=': case '>': case '?':
|
||||
case '@':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
|
||||
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
||||
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
||||
case 'Y': case 'Z':
|
||||
case '[': case '\\': case ']': case '^': case '_': case '`':
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
||||
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
||||
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
||||
case 'y': case 'z':
|
||||
case '{': case '|': case '}': case '~':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_isprint (int c)
|
||||
{
|
||||
#if C_CTYPE_ASCII
|
||||
return (c >= ' ' && c <= '~');
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case ' ': case '!': case '"': case '#': case '$': case '%':
|
||||
case '&': case '\'': case '(': case ')': case '*': case '+':
|
||||
case ',': case '-': case '.': case '/':
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7': case '8': case '9':
|
||||
case ':': case ';': case '<': case '=': case '>': case '?':
|
||||
case '@':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
|
||||
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
||||
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
||||
case 'Y': case 'Z':
|
||||
case '[': case '\\': case ']': case '^': case '_': case '`':
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
|
||||
case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
|
||||
case 's': case 't': case 'u': case 'v': case 'w': case 'x':
|
||||
case 'y': case 'z':
|
||||
case '{': case '|': case '}': case '~':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_ispunct (int c)
|
||||
{
|
||||
#if C_CTYPE_ASCII
|
||||
return ((c >= '!' && c <= '~')
|
||||
&& !((c >= '0' && c <= '9')
|
||||
|| ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'Z')));
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case '!': case '"': case '#': case '$': case '%': case '&':
|
||||
case '\'': case '(': case ')': case '*': case '+': case ',':
|
||||
case '-': case '.': case '/':
|
||||
case ':': case ';': case '<': case '=': case '>': case '?':
|
||||
case '@':
|
||||
case '[': case '\\': case ']': case '^': case '_': case '`':
|
||||
case '{': case '|': case '}': case '~':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_isspace (int c)
|
||||
{
|
||||
return (c == ' ' || c == '\t'
|
||||
|| c == '\n' || c == '\v' || c == '\f' || c == '\r');
|
||||
}
|
||||
|
||||
bool
|
||||
c_isupper (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_UPPERCASE
|
||||
return (c >= 'A' && c <= 'Z');
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
|
||||
case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
|
||||
case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
|
||||
case 'Y': case 'Z':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
bool
|
||||
c_isxdigit (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_DIGITS \
|
||||
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#if C_CTYPE_ASCII
|
||||
return ((c >= '0' && c <= '9')
|
||||
|| ((c & ~0x20) >= 'A' && (c & ~0x20) <= 'F'));
|
||||
#else
|
||||
return ((c >= '0' && c <= '9')
|
||||
|| (c >= 'A' && c <= 'F')
|
||||
|| (c >= 'a' && c <= 'f'));
|
||||
#endif
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case '0': case '1': case '2': case '3': case '4': case '5':
|
||||
case '6': case '7': case '8': case '9':
|
||||
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
||||
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
||||
return 1;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
c_tolower (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
return (c >= 'A' && c <= 'Z' ? c - 'A' + 'a' : c);
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case 'A': return 'a';
|
||||
case 'B': return 'b';
|
||||
case 'C': return 'c';
|
||||
case 'D': return 'd';
|
||||
case 'E': return 'e';
|
||||
case 'F': return 'f';
|
||||
case 'G': return 'g';
|
||||
case 'H': return 'h';
|
||||
case 'I': return 'i';
|
||||
case 'J': return 'j';
|
||||
case 'K': return 'k';
|
||||
case 'L': return 'l';
|
||||
case 'M': return 'm';
|
||||
case 'N': return 'n';
|
||||
case 'O': return 'o';
|
||||
case 'P': return 'p';
|
||||
case 'Q': return 'q';
|
||||
case 'R': return 'r';
|
||||
case 'S': return 's';
|
||||
case 'T': return 't';
|
||||
case 'U': return 'u';
|
||||
case 'V': return 'v';
|
||||
case 'W': return 'w';
|
||||
case 'X': return 'x';
|
||||
case 'Y': return 'y';
|
||||
case 'Z': return 'z';
|
||||
default: return c;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
int
|
||||
c_toupper (int c)
|
||||
{
|
||||
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
return (c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c);
|
||||
#else
|
||||
switch (c)
|
||||
{
|
||||
case 'a': return 'A';
|
||||
case 'b': return 'B';
|
||||
case 'c': return 'C';
|
||||
case 'd': return 'D';
|
||||
case 'e': return 'E';
|
||||
case 'f': return 'F';
|
||||
case 'g': return 'G';
|
||||
case 'h': return 'H';
|
||||
case 'i': return 'I';
|
||||
case 'j': return 'J';
|
||||
case 'k': return 'K';
|
||||
case 'l': return 'L';
|
||||
case 'm': return 'M';
|
||||
case 'n': return 'N';
|
||||
case 'o': return 'O';
|
||||
case 'p': return 'P';
|
||||
case 'q': return 'Q';
|
||||
case 'r': return 'R';
|
||||
case 's': return 'S';
|
||||
case 't': return 'T';
|
||||
case 'u': return 'U';
|
||||
case 'v': return 'V';
|
||||
case 'w': return 'W';
|
||||
case 'x': return 'X';
|
||||
case 'y': return 'Y';
|
||||
case 'z': return 'Z';
|
||||
default: return c;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
295
lib/c-ctype.h
Normal file
295
lib/c-ctype.h
Normal file
|
|
@ -0,0 +1,295 @@
|
|||
/* Character handling in C locale.
|
||||
|
||||
These functions work like the corresponding functions in <ctype.h>,
|
||||
except that they have the C (POSIX) locale hardwired, whereas the
|
||||
<ctype.h> functions' behaviour depends on the current locale set via
|
||||
setlocale.
|
||||
|
||||
Copyright (C) 2000-2003, 2006, 2008 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#ifndef C_CTYPE_H
|
||||
#define C_CTYPE_H
|
||||
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* The functions defined in this file assume the "C" locale and a character
|
||||
set without diacritics (ASCII-US or EBCDIC-US or something like that).
|
||||
Even if the "C" locale on a particular system is an extension of the ASCII
|
||||
character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it
|
||||
is ISO-8859-1), the functions in this file recognize only the ASCII
|
||||
characters. */
|
||||
|
||||
|
||||
/* Check whether the ASCII optimizations apply. */
|
||||
|
||||
/* ANSI C89 (and ISO C99 5.2.1.3 too) already guarantees that
|
||||
'0', '1', ..., '9' have consecutive integer values. */
|
||||
#define C_CTYPE_CONSECUTIVE_DIGITS 1
|
||||
|
||||
#if ('A' <= 'Z') \
|
||||
&& ('A' + 1 == 'B') && ('B' + 1 == 'C') && ('C' + 1 == 'D') \
|
||||
&& ('D' + 1 == 'E') && ('E' + 1 == 'F') && ('F' + 1 == 'G') \
|
||||
&& ('G' + 1 == 'H') && ('H' + 1 == 'I') && ('I' + 1 == 'J') \
|
||||
&& ('J' + 1 == 'K') && ('K' + 1 == 'L') && ('L' + 1 == 'M') \
|
||||
&& ('M' + 1 == 'N') && ('N' + 1 == 'O') && ('O' + 1 == 'P') \
|
||||
&& ('P' + 1 == 'Q') && ('Q' + 1 == 'R') && ('R' + 1 == 'S') \
|
||||
&& ('S' + 1 == 'T') && ('T' + 1 == 'U') && ('U' + 1 == 'V') \
|
||||
&& ('V' + 1 == 'W') && ('W' + 1 == 'X') && ('X' + 1 == 'Y') \
|
||||
&& ('Y' + 1 == 'Z')
|
||||
#define C_CTYPE_CONSECUTIVE_UPPERCASE 1
|
||||
#endif
|
||||
|
||||
#if ('a' <= 'z') \
|
||||
&& ('a' + 1 == 'b') && ('b' + 1 == 'c') && ('c' + 1 == 'd') \
|
||||
&& ('d' + 1 == 'e') && ('e' + 1 == 'f') && ('f' + 1 == 'g') \
|
||||
&& ('g' + 1 == 'h') && ('h' + 1 == 'i') && ('i' + 1 == 'j') \
|
||||
&& ('j' + 1 == 'k') && ('k' + 1 == 'l') && ('l' + 1 == 'm') \
|
||||
&& ('m' + 1 == 'n') && ('n' + 1 == 'o') && ('o' + 1 == 'p') \
|
||||
&& ('p' + 1 == 'q') && ('q' + 1 == 'r') && ('r' + 1 == 's') \
|
||||
&& ('s' + 1 == 't') && ('t' + 1 == 'u') && ('u' + 1 == 'v') \
|
||||
&& ('v' + 1 == 'w') && ('w' + 1 == 'x') && ('x' + 1 == 'y') \
|
||||
&& ('y' + 1 == 'z')
|
||||
#define C_CTYPE_CONSECUTIVE_LOWERCASE 1
|
||||
#endif
|
||||
|
||||
#if (' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
|
||||
&& ('%' == 37) && ('&' == 38) && ('\'' == 39) && ('(' == 40) \
|
||||
&& (')' == 41) && ('*' == 42) && ('+' == 43) && (',' == 44) \
|
||||
&& ('-' == 45) && ('.' == 46) && ('/' == 47) && ('0' == 48) \
|
||||
&& ('1' == 49) && ('2' == 50) && ('3' == 51) && ('4' == 52) \
|
||||
&& ('5' == 53) && ('6' == 54) && ('7' == 55) && ('8' == 56) \
|
||||
&& ('9' == 57) && (':' == 58) && (';' == 59) && ('<' == 60) \
|
||||
&& ('=' == 61) && ('>' == 62) && ('?' == 63) && ('A' == 65) \
|
||||
&& ('B' == 66) && ('C' == 67) && ('D' == 68) && ('E' == 69) \
|
||||
&& ('F' == 70) && ('G' == 71) && ('H' == 72) && ('I' == 73) \
|
||||
&& ('J' == 74) && ('K' == 75) && ('L' == 76) && ('M' == 77) \
|
||||
&& ('N' == 78) && ('O' == 79) && ('P' == 80) && ('Q' == 81) \
|
||||
&& ('R' == 82) && ('S' == 83) && ('T' == 84) && ('U' == 85) \
|
||||
&& ('V' == 86) && ('W' == 87) && ('X' == 88) && ('Y' == 89) \
|
||||
&& ('Z' == 90) && ('[' == 91) && ('\\' == 92) && (']' == 93) \
|
||||
&& ('^' == 94) && ('_' == 95) && ('a' == 97) && ('b' == 98) \
|
||||
&& ('c' == 99) && ('d' == 100) && ('e' == 101) && ('f' == 102) \
|
||||
&& ('g' == 103) && ('h' == 104) && ('i' == 105) && ('j' == 106) \
|
||||
&& ('k' == 107) && ('l' == 108) && ('m' == 109) && ('n' == 110) \
|
||||
&& ('o' == 111) && ('p' == 112) && ('q' == 113) && ('r' == 114) \
|
||||
&& ('s' == 115) && ('t' == 116) && ('u' == 117) && ('v' == 118) \
|
||||
&& ('w' == 119) && ('x' == 120) && ('y' == 121) && ('z' == 122) \
|
||||
&& ('{' == 123) && ('|' == 124) && ('}' == 125) && ('~' == 126)
|
||||
/* The character set is ASCII or one of its variants or extensions, not EBCDIC.
|
||||
Testing the value of '\n' and '\r' is not relevant. */
|
||||
#define C_CTYPE_ASCII 1
|
||||
#endif
|
||||
|
||||
|
||||
/* Function declarations. */
|
||||
|
||||
/* Unlike the functions in <ctype.h>, which require an argument in the range
|
||||
of the 'unsigned char' type, the functions here operate on values that are
|
||||
in the 'unsigned char' range or in the 'char' range. In other words,
|
||||
when you have a 'char' value, you need to cast it before using it as
|
||||
argument to a <ctype.h> function:
|
||||
|
||||
const char *s = ...;
|
||||
if (isalpha ((unsigned char) *s)) ...
|
||||
|
||||
but you don't need to cast it for the functions defined in this file:
|
||||
|
||||
const char *s = ...;
|
||||
if (c_isalpha (*s)) ...
|
||||
*/
|
||||
|
||||
extern bool c_isascii (int c); /* not locale dependent */
|
||||
|
||||
extern bool c_isalnum (int c);
|
||||
extern bool c_isalpha (int c);
|
||||
extern bool c_isblank (int c);
|
||||
extern bool c_iscntrl (int c);
|
||||
extern bool c_isdigit (int c);
|
||||
extern bool c_islower (int c);
|
||||
extern bool c_isgraph (int c);
|
||||
extern bool c_isprint (int c);
|
||||
extern bool c_ispunct (int c);
|
||||
extern bool c_isspace (int c);
|
||||
extern bool c_isupper (int c);
|
||||
extern bool c_isxdigit (int c);
|
||||
|
||||
extern int c_tolower (int c);
|
||||
extern int c_toupper (int c);
|
||||
|
||||
|
||||
#if defined __GNUC__ && defined __OPTIMIZE__ && !defined __OPTIMIZE_SIZE__ && !defined NO_C_CTYPE_MACROS
|
||||
|
||||
/* ASCII optimizations. */
|
||||
|
||||
#undef c_isascii
|
||||
#define c_isascii(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= 0x00 && __c <= 0x7f); \
|
||||
})
|
||||
|
||||
#if C_CTYPE_CONSECUTIVE_DIGITS \
|
||||
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#if C_CTYPE_ASCII
|
||||
#undef c_isalnum
|
||||
#define c_isalnum(c) \
|
||||
({ int __c = (c); \
|
||||
((__c >= '0' && __c <= '9') \
|
||||
|| ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z')); \
|
||||
})
|
||||
#else
|
||||
#undef c_isalnum
|
||||
#define c_isalnum(c) \
|
||||
({ int __c = (c); \
|
||||
((__c >= '0' && __c <= '9') \
|
||||
|| (__c >= 'A' && __c <= 'Z') \
|
||||
|| (__c >= 'a' && __c <= 'z')); \
|
||||
})
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#if C_CTYPE_ASCII
|
||||
#undef c_isalpha
|
||||
#define c_isalpha(c) \
|
||||
({ int __c = (c); \
|
||||
((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'Z'); \
|
||||
})
|
||||
#else
|
||||
#undef c_isalpha
|
||||
#define c_isalpha(c) \
|
||||
({ int __c = (c); \
|
||||
((__c >= 'A' && __c <= 'Z') || (__c >= 'a' && __c <= 'z')); \
|
||||
})
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#undef c_isblank
|
||||
#define c_isblank(c) \
|
||||
({ int __c = (c); \
|
||||
(__c == ' ' || __c == '\t'); \
|
||||
})
|
||||
|
||||
#if C_CTYPE_ASCII
|
||||
#undef c_iscntrl
|
||||
#define c_iscntrl(c) \
|
||||
({ int __c = (c); \
|
||||
((__c & ~0x1f) == 0 || __c == 0x7f); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_CONSECUTIVE_DIGITS
|
||||
#undef c_isdigit
|
||||
#define c_isdigit(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= '0' && __c <= '9'); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#undef c_islower
|
||||
#define c_islower(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= 'a' && __c <= 'z'); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_ASCII
|
||||
#undef c_isgraph
|
||||
#define c_isgraph(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= '!' && __c <= '~'); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_ASCII
|
||||
#undef c_isprint
|
||||
#define c_isprint(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= ' ' && __c <= '~'); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_ASCII
|
||||
#undef c_ispunct
|
||||
#define c_ispunct(c) \
|
||||
({ int _c = (c); \
|
||||
(c_isgraph (_c) && ! c_isalnum (_c)); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#undef c_isspace
|
||||
#define c_isspace(c) \
|
||||
({ int __c = (c); \
|
||||
(__c == ' ' || __c == '\t' \
|
||||
|| __c == '\n' || __c == '\v' || __c == '\f' || __c == '\r'); \
|
||||
})
|
||||
|
||||
#if C_CTYPE_CONSECUTIVE_UPPERCASE
|
||||
#undef c_isupper
|
||||
#define c_isupper(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= 'A' && __c <= 'Z'); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_CONSECUTIVE_DIGITS \
|
||||
&& C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#if C_CTYPE_ASCII
|
||||
#undef c_isxdigit
|
||||
#define c_isxdigit(c) \
|
||||
({ int __c = (c); \
|
||||
((__c >= '0' && __c <= '9') \
|
||||
|| ((__c & ~0x20) >= 'A' && (__c & ~0x20) <= 'F')); \
|
||||
})
|
||||
#else
|
||||
#undef c_isxdigit
|
||||
#define c_isxdigit(c) \
|
||||
({ int __c = (c); \
|
||||
((__c >= '0' && __c <= '9') \
|
||||
|| (__c >= 'A' && __c <= 'F') \
|
||||
|| (__c >= 'a' && __c <= 'f')); \
|
||||
})
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
#undef c_tolower
|
||||
#define c_tolower(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= 'A' && __c <= 'Z' ? __c - 'A' + 'a' : __c); \
|
||||
})
|
||||
#undef c_toupper
|
||||
#define c_toupper(c) \
|
||||
({ int __c = (c); \
|
||||
(__c >= 'a' && __c <= 'z' ? __c - 'a' + 'A' : __c); \
|
||||
})
|
||||
#endif
|
||||
|
||||
#endif /* optimizing for speed */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* C_CTYPE_H */
|
||||
55
lib/c-strcase.h
Normal file
55
lib/c-strcase.h
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
/* Case-insensitive string comparison functions in C locale.
|
||||
Copyright (C) 1995-1996, 2001, 2003, 2005 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#ifndef C_STRCASE_H
|
||||
#define C_STRCASE_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
|
||||
/* The functions defined in this file assume the "C" locale and a character
|
||||
set without diacritics (ASCII-US or EBCDIC-US or something like that).
|
||||
Even if the "C" locale on a particular system is an extension of the ASCII
|
||||
character set (like on BeOS, where it is UTF-8, or on AmigaOS, where it
|
||||
is ISO-8859-1), the functions in this file recognize only the ASCII
|
||||
characters. More precisely, one of the string arguments must be an ASCII
|
||||
string; the other one can also contain non-ASCII characters (but then
|
||||
the comparison result will be nonzero). */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or
|
||||
greater than zero if S1 is lexicographically less than, equal to or greater
|
||||
than S2. */
|
||||
extern int c_strcasecmp (const char *s1, const char *s2);
|
||||
|
||||
/* Compare no more than N characters of strings S1 and S2, ignoring case,
|
||||
returning less than, equal to or greater than zero if S1 is
|
||||
lexicographically less than, equal to or greater than S2. */
|
||||
extern int c_strncasecmp (const char *s1, const char *s2, size_t n);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* C_STRCASE_H */
|
||||
57
lib/c-strcasecmp.c
Normal file
57
lib/c-strcasecmp.c
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
/* c-strcasecmp.c -- case insensitive string comparator in C locale
|
||||
Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include "c-strcase.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "c-ctype.h"
|
||||
|
||||
int
|
||||
c_strcasecmp (const char *s1, const char *s2)
|
||||
{
|
||||
register const unsigned char *p1 = (const unsigned char *) s1;
|
||||
register const unsigned char *p2 = (const unsigned char *) s2;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (p1 == p2)
|
||||
return 0;
|
||||
|
||||
do
|
||||
{
|
||||
c1 = c_tolower (*p1);
|
||||
c2 = c_tolower (*p2);
|
||||
|
||||
if (c1 == '\0')
|
||||
break;
|
||||
|
||||
++p1;
|
||||
++p2;
|
||||
}
|
||||
while (c1 == c2);
|
||||
|
||||
if (UCHAR_MAX <= INT_MAX)
|
||||
return c1 - c2;
|
||||
else
|
||||
/* On machines where 'char' and 'int' are types of the same size, the
|
||||
difference of two 'unsigned char' values - including the sign bit -
|
||||
doesn't fit in an 'int'. */
|
||||
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
|
||||
}
|
||||
184
lib/c-strcaseeq.h
Normal file
184
lib/c-strcaseeq.h
Normal file
|
|
@ -0,0 +1,184 @@
|
|||
/* Optimized case-insensitive string comparison in C locale.
|
||||
Copyright (C) 2001-2002, 2007 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>. */
|
||||
|
||||
#include "c-strcase.h"
|
||||
#include "c-ctype.h"
|
||||
|
||||
/* STRCASEEQ allows to optimize string comparison with a small literal string.
|
||||
STRCASEEQ (s, "UTF-8", 'U','T','F','-','8',0,0,0,0)
|
||||
is semantically equivalent to
|
||||
c_strcasecmp (s, "UTF-8") == 0
|
||||
just faster. */
|
||||
|
||||
/* Help GCC to generate good code for string comparisons with
|
||||
immediate strings. */
|
||||
#if defined (__GNUC__) && defined (__OPTIMIZE__)
|
||||
|
||||
/* Case insensitive comparison of ASCII characters. */
|
||||
# if C_CTYPE_ASCII
|
||||
# define CASEEQ(other,upper) \
|
||||
(c_isupper (upper) ? ((other) & ~0x20) == (upper) : (other) == (upper))
|
||||
# elif C_CTYPE_CONSECUTIVE_UPPERCASE && C_CTYPE_CONSECUTIVE_LOWERCASE
|
||||
# define CASEEQ(other,upper) \
|
||||
(c_isupper (upper) ? (other) == (upper) || (other) == (upper) - 'A' + 'a' : (other) == (upper))
|
||||
# else
|
||||
# define CASEEQ(other,upper) \
|
||||
(c_toupper (other) == (upper))
|
||||
# endif
|
||||
|
||||
static inline int
|
||||
strcaseeq9 (const char *s1, const char *s2)
|
||||
{
|
||||
return c_strcasecmp (s1 + 9, s2 + 9) == 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq8 (const char *s1, const char *s2, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[8], s28))
|
||||
{
|
||||
if (s28 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq9 (s1, s2);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq7 (const char *s1, const char *s2, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[7], s27))
|
||||
{
|
||||
if (s27 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq8 (s1, s2, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq6 (const char *s1, const char *s2, char s26, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[6], s26))
|
||||
{
|
||||
if (s26 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq7 (s1, s2, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq5 (const char *s1, const char *s2, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[5], s25))
|
||||
{
|
||||
if (s25 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq6 (s1, s2, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq4 (const char *s1, const char *s2, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[4], s24))
|
||||
{
|
||||
if (s24 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq5 (s1, s2, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq3 (const char *s1, const char *s2, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[3], s23))
|
||||
{
|
||||
if (s23 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq4 (s1, s2, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq2 (const char *s1, const char *s2, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[2], s22))
|
||||
{
|
||||
if (s22 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq3 (s1, s2, s23, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq1 (const char *s1, const char *s2, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[1], s21))
|
||||
{
|
||||
if (s21 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq2 (s1, s2, s22, s23, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int
|
||||
strcaseeq0 (const char *s1, const char *s2, char s20, char s21, char s22, char s23, char s24, char s25, char s26, char s27, char s28)
|
||||
{
|
||||
if (CASEEQ (s1[0], s20))
|
||||
{
|
||||
if (s20 == 0)
|
||||
return 1;
|
||||
else
|
||||
return strcaseeq1 (s1, s2, s21, s22, s23, s24, s25, s26, s27, s28);
|
||||
}
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
|
||||
strcaseeq0 (s1, s2, s20, s21, s22, s23, s24, s25, s26, s27, s28)
|
||||
|
||||
#else
|
||||
|
||||
#define STRCASEEQ(s1,s2,s20,s21,s22,s23,s24,s25,s26,s27,s28) \
|
||||
(c_strcasecmp (s1, s2) == 0)
|
||||
|
||||
#endif
|
||||
57
lib/c-strncasecmp.c
Normal file
57
lib/c-strncasecmp.c
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
/* c-strncasecmp.c -- case insensitive string comparator in C locale
|
||||
Copyright (C) 1998-1999, 2005-2006 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include "c-strcase.h"
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
#include "c-ctype.h"
|
||||
|
||||
int
|
||||
c_strncasecmp (const char *s1, const char *s2, size_t n)
|
||||
{
|
||||
register const unsigned char *p1 = (const unsigned char *) s1;
|
||||
register const unsigned char *p2 = (const unsigned char *) s2;
|
||||
unsigned char c1, c2;
|
||||
|
||||
if (p1 == p2 || n == 0)
|
||||
return 0;
|
||||
|
||||
do
|
||||
{
|
||||
c1 = c_tolower (*p1);
|
||||
c2 = c_tolower (*p2);
|
||||
|
||||
if (--n == 0 || c1 == '\0')
|
||||
break;
|
||||
|
||||
++p1;
|
||||
++p2;
|
||||
}
|
||||
while (c1 == c2);
|
||||
|
||||
if (UCHAR_MAX <= INT_MAX)
|
||||
return c1 - c2;
|
||||
else
|
||||
/* On machines where 'char' and 'int' are types of the same size, the
|
||||
difference of two 'unsigned char' values - including the sign bit -
|
||||
doesn't fit in an 'int'. */
|
||||
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
|
||||
}
|
||||
450
lib/iconv.c
Normal file
450
lib/iconv.c
Normal file
|
|
@ -0,0 +1,450 @@
|
|||
/* Character set conversion.
|
||||
Copyright (C) 1999-2001, 2007 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <iconv.h>
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#if REPLACE_ICONV_UTF
|
||||
# include <errno.h>
|
||||
# include <stdint.h>
|
||||
# include <stdlib.h>
|
||||
# include "unistr.h"
|
||||
# ifndef uintptr_t
|
||||
# define uintptr_t unsigned long
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if REPLACE_ICONV_UTF
|
||||
|
||||
/* UTF-{16,32}{BE,LE} converters taken from GNU libiconv 1.11. */
|
||||
|
||||
/* Return code if invalid. (xxx_mbtowc) */
|
||||
# define RET_ILSEQ -1
|
||||
/* Return code if no bytes were read. (xxx_mbtowc) */
|
||||
# define RET_TOOFEW -2
|
||||
|
||||
/* Return code if invalid. (xxx_wctomb) */
|
||||
# define RET_ILUNI -1
|
||||
/* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
|
||||
# define RET_TOOSMALL -2
|
||||
|
||||
/*
|
||||
* UTF-16BE
|
||||
*/
|
||||
|
||||
/* Specification: RFC 2781 */
|
||||
|
||||
static int
|
||||
utf16be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
ucs4_t wc = (s[0] << 8) + s[1];
|
||||
if (wc >= 0xd800 && wc < 0xdc00)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
ucs4_t wc2 = (s[2] << 8) + s[3];
|
||||
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
|
||||
return RET_ILSEQ;
|
||||
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else if (wc >= 0xdc00 && wc < 0xe000)
|
||||
{
|
||||
return RET_ILSEQ;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pwc = wc;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
return RET_TOOFEW;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
|
||||
{
|
||||
if (!(wc >= 0xd800 && wc < 0xe000))
|
||||
{
|
||||
if (wc < 0x10000)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
r[0] = (unsigned char) (wc >> 8);
|
||||
r[1] = (unsigned char) wc;
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return RET_TOOSMALL;
|
||||
}
|
||||
else if (wc < 0x110000)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
|
||||
ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
|
||||
r[0] = (unsigned char) (wc1 >> 8);
|
||||
r[1] = (unsigned char) wc1;
|
||||
r[2] = (unsigned char) (wc2 >> 8);
|
||||
r[3] = (unsigned char) wc2;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return RET_TOOSMALL;
|
||||
}
|
||||
}
|
||||
return RET_ILUNI;
|
||||
}
|
||||
|
||||
/*
|
||||
* UTF-16LE
|
||||
*/
|
||||
|
||||
/* Specification: RFC 2781 */
|
||||
|
||||
static int
|
||||
utf16le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
ucs4_t wc = s[0] + (s[1] << 8);
|
||||
if (wc >= 0xd800 && wc < 0xdc00)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
ucs4_t wc2 = s[2] + (s[3] << 8);
|
||||
if (!(wc2 >= 0xdc00 && wc2 < 0xe000))
|
||||
return RET_ILSEQ;
|
||||
*pwc = 0x10000 + ((wc - 0xd800) << 10) + (wc2 - 0xdc00);
|
||||
return 4;
|
||||
}
|
||||
}
|
||||
else if (wc >= 0xdc00 && wc < 0xe000)
|
||||
{
|
||||
return RET_ILSEQ;
|
||||
}
|
||||
else
|
||||
{
|
||||
*pwc = wc;
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
return RET_TOOFEW;
|
||||
}
|
||||
|
||||
static int
|
||||
utf16le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
|
||||
{
|
||||
if (!(wc >= 0xd800 && wc < 0xe000))
|
||||
{
|
||||
if (wc < 0x10000)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
r[0] = (unsigned char) wc;
|
||||
r[1] = (unsigned char) (wc >> 8);
|
||||
return 2;
|
||||
}
|
||||
else
|
||||
return RET_TOOSMALL;
|
||||
}
|
||||
else if (wc < 0x110000)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
ucs4_t wc1 = 0xd800 + ((wc - 0x10000) >> 10);
|
||||
ucs4_t wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff);
|
||||
r[0] = (unsigned char) wc1;
|
||||
r[1] = (unsigned char) (wc1 >> 8);
|
||||
r[2] = (unsigned char) wc2;
|
||||
r[3] = (unsigned char) (wc2 >> 8);
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return RET_TOOSMALL;
|
||||
}
|
||||
}
|
||||
return RET_ILUNI;
|
||||
}
|
||||
|
||||
/*
|
||||
* UTF-32BE
|
||||
*/
|
||||
|
||||
/* Specification: Unicode 3.1 Standard Annex #19 */
|
||||
|
||||
static int
|
||||
utf32be_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
ucs4_t wc = (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3];
|
||||
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
|
||||
{
|
||||
*pwc = wc;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return RET_ILSEQ;
|
||||
}
|
||||
return RET_TOOFEW;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32be_wctomb (unsigned char *r, ucs4_t wc, size_t n)
|
||||
{
|
||||
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
r[0] = 0;
|
||||
r[1] = (unsigned char) (wc >> 16);
|
||||
r[2] = (unsigned char) (wc >> 8);
|
||||
r[3] = (unsigned char) wc;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return RET_TOOSMALL;
|
||||
}
|
||||
return RET_ILUNI;
|
||||
}
|
||||
|
||||
/*
|
||||
* UTF-32LE
|
||||
*/
|
||||
|
||||
/* Specification: Unicode 3.1 Standard Annex #19 */
|
||||
|
||||
static int
|
||||
utf32le_mbtowc (ucs4_t *pwc, const unsigned char *s, size_t n)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
ucs4_t wc = s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24);
|
||||
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
|
||||
{
|
||||
*pwc = wc;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return RET_ILSEQ;
|
||||
}
|
||||
return RET_TOOFEW;
|
||||
}
|
||||
|
||||
static int
|
||||
utf32le_wctomb (unsigned char *r, ucs4_t wc, size_t n)
|
||||
{
|
||||
if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000))
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
r[0] = (unsigned char) wc;
|
||||
r[1] = (unsigned char) (wc >> 8);
|
||||
r[2] = (unsigned char) (wc >> 16);
|
||||
r[3] = 0;
|
||||
return 4;
|
||||
}
|
||||
else
|
||||
return RET_TOOSMALL;
|
||||
}
|
||||
return RET_ILUNI;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
size_t
|
||||
rpl_iconv (iconv_t cd,
|
||||
ICONV_CONST char **inbuf, size_t *inbytesleft,
|
||||
char **outbuf, size_t *outbytesleft)
|
||||
#undef iconv
|
||||
{
|
||||
#if REPLACE_ICONV_UTF
|
||||
switch ((uintptr_t) cd)
|
||||
{
|
||||
{
|
||||
int (*xxx_wctomb) (unsigned char *, ucs4_t, size_t);
|
||||
|
||||
case (uintptr_t) _ICONV_UTF8_UTF16BE:
|
||||
xxx_wctomb = utf16be_wctomb;
|
||||
goto loop_from_utf8;
|
||||
case (uintptr_t) _ICONV_UTF8_UTF16LE:
|
||||
xxx_wctomb = utf16le_wctomb;
|
||||
goto loop_from_utf8;
|
||||
case (uintptr_t) _ICONV_UTF8_UTF32BE:
|
||||
xxx_wctomb = utf32be_wctomb;
|
||||
goto loop_from_utf8;
|
||||
case (uintptr_t) _ICONV_UTF8_UTF32LE:
|
||||
xxx_wctomb = utf32le_wctomb;
|
||||
goto loop_from_utf8;
|
||||
|
||||
loop_from_utf8:
|
||||
if (inbuf == NULL || *inbuf == NULL)
|
||||
return 0;
|
||||
{
|
||||
ICONV_CONST char *inptr = *inbuf;
|
||||
size_t inleft = *inbytesleft;
|
||||
char *outptr = *outbuf;
|
||||
size_t outleft = *outbytesleft;
|
||||
size_t res = 0;
|
||||
while (inleft > 0)
|
||||
{
|
||||
ucs4_t uc;
|
||||
int m = u8_mbtoucr (&uc, (const uint8_t *) inptr, inleft);
|
||||
if (m <= 0)
|
||||
{
|
||||
if (m == -1)
|
||||
{
|
||||
errno = EILSEQ;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
if (m == -2)
|
||||
{
|
||||
errno = EINVAL;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
int n = xxx_wctomb ((uint8_t *) outptr, uc, outleft);
|
||||
if (n < 0)
|
||||
{
|
||||
if (n == RET_ILUNI)
|
||||
{
|
||||
errno = EILSEQ;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
if (n == RET_TOOSMALL)
|
||||
{
|
||||
errno = E2BIG;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
inptr += m;
|
||||
inleft -= m;
|
||||
outptr += n;
|
||||
outleft -= n;
|
||||
}
|
||||
}
|
||||
}
|
||||
*inbuf = inptr;
|
||||
*inbytesleft = inleft;
|
||||
*outbuf = outptr;
|
||||
*outbytesleft = outleft;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int (*xxx_mbtowc) (ucs4_t *, const unsigned char *, size_t);
|
||||
|
||||
case (uintptr_t) _ICONV_UTF16BE_UTF8:
|
||||
xxx_mbtowc = utf16be_mbtowc;
|
||||
goto loop_to_utf8;
|
||||
case (uintptr_t) _ICONV_UTF16LE_UTF8:
|
||||
xxx_mbtowc = utf16le_mbtowc;
|
||||
goto loop_to_utf8;
|
||||
case (uintptr_t) _ICONV_UTF32BE_UTF8:
|
||||
xxx_mbtowc = utf32be_mbtowc;
|
||||
goto loop_to_utf8;
|
||||
case (uintptr_t) _ICONV_UTF32LE_UTF8:
|
||||
xxx_mbtowc = utf32le_mbtowc;
|
||||
goto loop_to_utf8;
|
||||
|
||||
loop_to_utf8:
|
||||
if (inbuf == NULL || *inbuf == NULL)
|
||||
return 0;
|
||||
{
|
||||
ICONV_CONST char *inptr = *inbuf;
|
||||
size_t inleft = *inbytesleft;
|
||||
char *outptr = *outbuf;
|
||||
size_t outleft = *outbytesleft;
|
||||
size_t res = 0;
|
||||
while (inleft > 0)
|
||||
{
|
||||
ucs4_t uc;
|
||||
int m = xxx_mbtowc (&uc, (const uint8_t *) inptr, inleft);
|
||||
if (m <= 0)
|
||||
{
|
||||
if (m == RET_ILSEQ)
|
||||
{
|
||||
errno = EILSEQ;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
if (m == RET_TOOFEW)
|
||||
{
|
||||
errno = EINVAL;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
int n = u8_uctomb ((uint8_t *) outptr, uc, outleft);
|
||||
if (n < 0)
|
||||
{
|
||||
if (n == -1)
|
||||
{
|
||||
errno = EILSEQ;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
if (n == -2)
|
||||
{
|
||||
errno = E2BIG;
|
||||
res = (size_t)(-1);
|
||||
break;
|
||||
}
|
||||
abort ();
|
||||
}
|
||||
else
|
||||
{
|
||||
inptr += m;
|
||||
inleft -= m;
|
||||
outptr += n;
|
||||
outleft -= n;
|
||||
}
|
||||
}
|
||||
}
|
||||
*inbuf = inptr;
|
||||
*inbytesleft = inleft;
|
||||
*outbuf = outptr;
|
||||
*outbytesleft = outleft;
|
||||
return res;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return iconv (cd, inbuf, inbytesleft, outbuf, outbytesleft);
|
||||
}
|
||||
71
lib/iconv.in.h
Normal file
71
lib/iconv.in.h
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
/* A GNU-like <iconv.h>.
|
||||
|
||||
Copyright (C) 2007-2008 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#ifndef _GL_ICONV_H
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
@PRAGMA_SYSTEM_HEADER@
|
||||
#endif
|
||||
|
||||
/* The include_next requires a split double-inclusion guard. */
|
||||
#@INCLUDE_NEXT@ @NEXT_ICONV_H@
|
||||
|
||||
#ifndef _GL_ICONV_H
|
||||
#define _GL_ICONV_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#if @REPLACE_ICONV_OPEN@
|
||||
/* An iconv_open wrapper that supports the IANA standardized encoding names
|
||||
("ISO-8859-1" etc.) as far as possible. */
|
||||
# define iconv_open rpl_iconv_open
|
||||
extern iconv_t iconv_open (const char *tocode, const char *fromcode);
|
||||
#endif
|
||||
|
||||
#if @REPLACE_ICONV_UTF@
|
||||
/* Special constants for supporting UTF-{16,32}{BE,LE} encodings.
|
||||
Not public. */
|
||||
# define _ICONV_UTF8_UTF16BE (iconv_t)(-161)
|
||||
# define _ICONV_UTF8_UTF16LE (iconv_t)(-162)
|
||||
# define _ICONV_UTF8_UTF32BE (iconv_t)(-163)
|
||||
# define _ICONV_UTF8_UTF32LE (iconv_t)(-164)
|
||||
# define _ICONV_UTF16BE_UTF8 (iconv_t)(-165)
|
||||
# define _ICONV_UTF16LE_UTF8 (iconv_t)(-166)
|
||||
# define _ICONV_UTF32BE_UTF8 (iconv_t)(-167)
|
||||
# define _ICONV_UTF32LE_UTF8 (iconv_t)(-168)
|
||||
#endif
|
||||
|
||||
#if @REPLACE_ICONV@
|
||||
# define iconv rpl_iconv
|
||||
extern size_t iconv (iconv_t cd,
|
||||
@ICONV_CONST@ char **inbuf, size_t *inbytesleft,
|
||||
char **outbuf, size_t *outbytesleft);
|
||||
# define iconv_close rpl_iconv_close
|
||||
extern int iconv_close (iconv_t cd);
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _GL_ICONV_H */
|
||||
#endif /* _GL_ICONV_H */
|
||||
47
lib/iconv_close.c
Normal file
47
lib/iconv_close.c
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
/* Character set conversion.
|
||||
Copyright (C) 2007 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <iconv.h>
|
||||
|
||||
#include <stdint.h>
|
||||
#ifndef uintptr_t
|
||||
# define uintptr_t unsigned long
|
||||
#endif
|
||||
|
||||
int
|
||||
rpl_iconv_close (iconv_t cd)
|
||||
#undef iconv_close
|
||||
{
|
||||
#if REPLACE_ICONV_UTF
|
||||
switch ((uintptr_t) cd)
|
||||
{
|
||||
case (uintptr_t) _ICONV_UTF8_UTF16BE:
|
||||
case (uintptr_t) _ICONV_UTF8_UTF16LE:
|
||||
case (uintptr_t) _ICONV_UTF8_UTF32BE:
|
||||
case (uintptr_t) _ICONV_UTF8_UTF32LE:
|
||||
case (uintptr_t) _ICONV_UTF16BE_UTF8:
|
||||
case (uintptr_t) _ICONV_UTF16LE_UTF8:
|
||||
case (uintptr_t) _ICONV_UTF32BE_UTF8:
|
||||
case (uintptr_t) _ICONV_UTF32LE_UTF8:
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
return iconv_close (cd);
|
||||
}
|
||||
44
lib/iconv_open-aix.gperf
Normal file
44
lib/iconv_open-aix.gperf
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
struct mapping { int standard_name; const char vendor_name[10 + 1]; };
|
||||
%struct-type
|
||||
%language=ANSI-C
|
||||
%define slot-name standard_name
|
||||
%define hash-function-name mapping_hash
|
||||
%define lookup-function-name mapping_lookup
|
||||
%readonly-tables
|
||||
%global-table
|
||||
%define word-array-name mappings
|
||||
%pic
|
||||
%%
|
||||
# On AIX 5.1, look in /usr/lib/nls/loc/uconvTable.
|
||||
ISO-8859-1, "ISO8859-1"
|
||||
ISO-8859-2, "ISO8859-2"
|
||||
ISO-8859-3, "ISO8859-3"
|
||||
ISO-8859-4, "ISO8859-4"
|
||||
ISO-8859-5, "ISO8859-5"
|
||||
ISO-8859-6, "ISO8859-6"
|
||||
ISO-8859-7, "ISO8859-7"
|
||||
ISO-8859-8, "ISO8859-8"
|
||||
ISO-8859-9, "ISO8859-9"
|
||||
ISO-8859-15, "ISO8859-15"
|
||||
CP437, "IBM-437"
|
||||
CP850, "IBM-850"
|
||||
CP852, "IBM-852"
|
||||
CP856, "IBM-856"
|
||||
CP857, "IBM-857"
|
||||
CP861, "IBM-861"
|
||||
CP865, "IBM-865"
|
||||
CP869, "IBM-869"
|
||||
ISO-8859-13, "IBM-921"
|
||||
CP922, "IBM-922"
|
||||
CP932, "IBM-932"
|
||||
CP943, "IBM-943"
|
||||
CP1046, "IBM-1046"
|
||||
CP1124, "IBM-1124"
|
||||
CP1125, "IBM-1125"
|
||||
CP1129, "IBM-1129"
|
||||
CP1252, "IBM-1252"
|
||||
GB2312, "IBM-eucCN"
|
||||
EUC-JP, "IBM-eucJP"
|
||||
EUC-KR, "IBM-eucKR"
|
||||
EUC-TW, "IBM-eucTW"
|
||||
BIG5, "big5"
|
||||
56
lib/iconv_open-hpux.gperf
Normal file
56
lib/iconv_open-hpux.gperf
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
struct mapping { int standard_name; const char vendor_name[9 + 1]; };
|
||||
%struct-type
|
||||
%language=ANSI-C
|
||||
%define slot-name standard_name
|
||||
%define hash-function-name mapping_hash
|
||||
%define lookup-function-name mapping_lookup
|
||||
%readonly-tables
|
||||
%global-table
|
||||
%define word-array-name mappings
|
||||
%pic
|
||||
%%
|
||||
# On HP-UX 11.11, look in /usr/lib/nls/iconv.
|
||||
ISO-8859-1, "iso88591"
|
||||
ISO-8859-2, "iso88592"
|
||||
ISO-8859-5, "iso88595"
|
||||
ISO-8859-6, "iso88596"
|
||||
ISO-8859-7, "iso88597"
|
||||
ISO-8859-8, "iso88598"
|
||||
ISO-8859-9, "iso88599"
|
||||
ISO-8859-15, "iso885915"
|
||||
CP437, "cp437"
|
||||
CP775, "cp775"
|
||||
CP850, "cp850"
|
||||
CP852, "cp852"
|
||||
CP855, "cp855"
|
||||
CP857, "cp857"
|
||||
CP861, "cp861"
|
||||
CP862, "cp862"
|
||||
CP864, "cp864"
|
||||
CP865, "cp865"
|
||||
CP866, "cp866"
|
||||
CP869, "cp869"
|
||||
CP874, "cp874"
|
||||
CP1250, "cp1250"
|
||||
CP1251, "cp1251"
|
||||
CP1252, "cp1252"
|
||||
CP1253, "cp1253"
|
||||
CP1254, "cp1254"
|
||||
CP1255, "cp1255"
|
||||
CP1256, "cp1256"
|
||||
CP1257, "cp1257"
|
||||
CP1258, "cp1258"
|
||||
HP-ROMAN8, "roman8"
|
||||
HP-ARABIC8, "arabic8"
|
||||
HP-GREEK8, "greek8"
|
||||
HP-HEBREW8, "hebrew8"
|
||||
HP-TURKISH8, "turkish8"
|
||||
HP-KANA8, "kana8"
|
||||
TIS-620, "tis620"
|
||||
GB2312, "hp15CN"
|
||||
EUC-JP, "eucJP"
|
||||
EUC-KR, "eucKR"
|
||||
EUC-TW, "eucTW"
|
||||
BIG5, "big5"
|
||||
SHIFT_JIS, "sjis"
|
||||
UTF-8, "utf8"
|
||||
31
lib/iconv_open-irix.gperf
Normal file
31
lib/iconv_open-irix.gperf
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
struct mapping { int standard_name; const char vendor_name[10 + 1]; };
|
||||
%struct-type
|
||||
%language=ANSI-C
|
||||
%define slot-name standard_name
|
||||
%define hash-function-name mapping_hash
|
||||
%define lookup-function-name mapping_lookup
|
||||
%readonly-tables
|
||||
%global-table
|
||||
%define word-array-name mappings
|
||||
%pic
|
||||
%%
|
||||
# On IRIX 6.5, look in /usr/lib/iconv and /usr/lib/international/encodings.
|
||||
ISO-8859-1, "ISO8859-1"
|
||||
ISO-8859-2, "ISO8859-2"
|
||||
ISO-8859-3, "ISO8859-3"
|
||||
ISO-8859-4, "ISO8859-4"
|
||||
ISO-8859-5, "ISO8859-5"
|
||||
ISO-8859-6, "ISO8859-6"
|
||||
ISO-8859-7, "ISO8859-7"
|
||||
ISO-8859-8, "ISO8859-8"
|
||||
ISO-8859-9, "ISO8859-9"
|
||||
ISO-8859-15, "ISO8859-15"
|
||||
KOI8-R, "KOI8"
|
||||
CP855, "DOS855"
|
||||
CP1251, "WIN1251"
|
||||
GB2312, "eucCN"
|
||||
EUC-JP, "eucJP"
|
||||
EUC-KR, "eucKR"
|
||||
EUC-TW, "eucTW"
|
||||
SHIFT_JIS, "sjis"
|
||||
TIS-620, "TIS620"
|
||||
50
lib/iconv_open-osf.gperf
Normal file
50
lib/iconv_open-osf.gperf
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
struct mapping { int standard_name; const char vendor_name[10 + 1]; };
|
||||
%struct-type
|
||||
%language=ANSI-C
|
||||
%define slot-name standard_name
|
||||
%define hash-function-name mapping_hash
|
||||
%define lookup-function-name mapping_lookup
|
||||
%readonly-tables
|
||||
%global-table
|
||||
%define word-array-name mappings
|
||||
%pic
|
||||
%%
|
||||
# On OSF/1 5.1, look in /usr/lib/nls/loc/iconv.
|
||||
ISO-8859-1, "ISO8859-1"
|
||||
ISO-8859-2, "ISO8859-2"
|
||||
ISO-8859-3, "ISO8859-3"
|
||||
ISO-8859-4, "ISO8859-4"
|
||||
ISO-8859-5, "ISO8859-5"
|
||||
ISO-8859-6, "ISO8859-6"
|
||||
ISO-8859-7, "ISO8859-7"
|
||||
ISO-8859-8, "ISO8859-8"
|
||||
ISO-8859-9, "ISO8859-9"
|
||||
ISO-8859-15, "ISO8859-15"
|
||||
CP437, "cp437"
|
||||
CP775, "cp775"
|
||||
CP850, "cp850"
|
||||
CP852, "cp852"
|
||||
CP855, "cp855"
|
||||
CP857, "cp857"
|
||||
CP861, "cp861"
|
||||
CP862, "cp862"
|
||||
CP865, "cp865"
|
||||
CP866, "cp866"
|
||||
CP869, "cp869"
|
||||
CP874, "cp874"
|
||||
CP949, "KSC5601"
|
||||
CP1250, "cp1250"
|
||||
CP1251, "cp1251"
|
||||
CP1252, "cp1252"
|
||||
CP1253, "cp1253"
|
||||
CP1254, "cp1254"
|
||||
CP1255, "cp1255"
|
||||
CP1256, "cp1256"
|
||||
CP1257, "cp1257"
|
||||
CP1258, "cp1258"
|
||||
EUC-JP, "eucJP"
|
||||
EUC-KR, "eucKR"
|
||||
EUC-TW, "eucTW"
|
||||
BIG5, "big5"
|
||||
SHIFT_JIS, "SJIS"
|
||||
TIS-620, "TACTIS"
|
||||
172
lib/iconv_open.c
Normal file
172
lib/iconv_open.c
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
/* Character set conversion.
|
||||
Copyright (C) 2007 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include <iconv.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include "c-ctype.h"
|
||||
#include "c-strcase.h"
|
||||
|
||||
#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
|
||||
|
||||
/* Namespace cleanliness. */
|
||||
#define mapping_lookup rpl_iconv_open_mapping_lookup
|
||||
|
||||
/* The macro ICONV_FLAVOR is defined to one of these or undefined. */
|
||||
|
||||
#define ICONV_FLAVOR_AIX "iconv_open-aix.h"
|
||||
#define ICONV_FLAVOR_HPUX "iconv_open-hpux.h"
|
||||
#define ICONV_FLAVOR_IRIX "iconv_open-irix.h"
|
||||
#define ICONV_FLAVOR_OSF "iconv_open-osf.h"
|
||||
|
||||
#ifdef ICONV_FLAVOR
|
||||
# include ICONV_FLAVOR
|
||||
#endif
|
||||
|
||||
iconv_t
|
||||
rpl_iconv_open (const char *tocode, const char *fromcode)
|
||||
#undef iconv_open
|
||||
{
|
||||
char fromcode_upper[32];
|
||||
char tocode_upper[32];
|
||||
char *fromcode_upper_end;
|
||||
char *tocode_upper_end;
|
||||
|
||||
#if REPLACE_ICONV_UTF
|
||||
/* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}.
|
||||
Do this here, before calling the real iconv_open(), because OSF/1 5.1
|
||||
iconv() to these encoding inserts a BOM, which is wrong.
|
||||
We do not need to handle conversion between arbitrary encodings and
|
||||
UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step
|
||||
conversion throough UTF-8.
|
||||
The _ICONV_* constants are chosen to be disjoint from any iconv_t
|
||||
returned by the system's iconv_open() functions. Recall that iconv_t
|
||||
is a scalar type. */
|
||||
if (c_toupper (fromcode[0]) == 'U'
|
||||
&& c_toupper (fromcode[1]) == 'T'
|
||||
&& c_toupper (fromcode[2]) == 'F'
|
||||
&& fromcode[3] == '-')
|
||||
{
|
||||
if (c_toupper (tocode[0]) == 'U'
|
||||
&& c_toupper (tocode[1]) == 'T'
|
||||
&& c_toupper (tocode[2]) == 'F'
|
||||
&& tocode[3] == '-')
|
||||
{
|
||||
if (strcmp (fromcode + 4, "8") == 0)
|
||||
{
|
||||
if (c_strcasecmp (tocode + 4, "16BE") == 0)
|
||||
return _ICONV_UTF8_UTF16BE;
|
||||
if (c_strcasecmp (tocode + 4, "16LE") == 0)
|
||||
return _ICONV_UTF8_UTF16LE;
|
||||
if (c_strcasecmp (tocode + 4, "32BE") == 0)
|
||||
return _ICONV_UTF8_UTF32BE;
|
||||
if (c_strcasecmp (tocode + 4, "32LE") == 0)
|
||||
return _ICONV_UTF8_UTF32LE;
|
||||
}
|
||||
else if (strcmp (tocode + 4, "8") == 0)
|
||||
{
|
||||
if (c_strcasecmp (fromcode + 4, "16BE") == 0)
|
||||
return _ICONV_UTF16BE_UTF8;
|
||||
if (c_strcasecmp (fromcode + 4, "16LE") == 0)
|
||||
return _ICONV_UTF16LE_UTF8;
|
||||
if (c_strcasecmp (fromcode + 4, "32BE") == 0)
|
||||
return _ICONV_UTF32BE_UTF8;
|
||||
if (c_strcasecmp (fromcode + 4, "32LE") == 0)
|
||||
return _ICONV_UTF32LE_UTF8;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1
|
||||
here. This would lead to programs that work in some locales (such as the
|
||||
"C" or "en_US" locales) but do not work in East Asian locales. It is
|
||||
better if programmers make their programs depend on GNU libiconv (except
|
||||
on glibc systems), e.g. by using the AM_ICONV macro and documenting the
|
||||
dependency in an INSTALL or DEPENDENCIES file. */
|
||||
|
||||
/* Try with the original names first.
|
||||
This covers the case when fromcode or tocode is a lowercase encoding name
|
||||
that is understood by the system's iconv_open but not listed in our
|
||||
mappings table. */
|
||||
{
|
||||
iconv_t cd = iconv_open (tocode, fromcode);
|
||||
if (cd != (iconv_t)(-1))
|
||||
return cd;
|
||||
}
|
||||
|
||||
/* Convert the encodings to upper case, because
|
||||
1. in the arguments of iconv_open() on AIX, HP-UX, and OSF/1 the case
|
||||
matters,
|
||||
2. it makes searching in the table faster. */
|
||||
{
|
||||
const char *p = fromcode;
|
||||
char *q = fromcode_upper;
|
||||
while ((*q = c_toupper (*p)) != '\0')
|
||||
{
|
||||
p++;
|
||||
q++;
|
||||
if (q == &fromcode_upper[SIZEOF (fromcode_upper)])
|
||||
{
|
||||
errno = EINVAL;
|
||||
return (iconv_t)(-1);
|
||||
}
|
||||
}
|
||||
fromcode_upper_end = q;
|
||||
}
|
||||
|
||||
{
|
||||
const char *p = tocode;
|
||||
char *q = tocode_upper;
|
||||
while ((*q = c_toupper (*p)) != '\0')
|
||||
{
|
||||
p++;
|
||||
q++;
|
||||
if (q == &tocode_upper[SIZEOF (tocode_upper)])
|
||||
{
|
||||
errno = EINVAL;
|
||||
return (iconv_t)(-1);
|
||||
}
|
||||
}
|
||||
tocode_upper_end = q;
|
||||
}
|
||||
|
||||
#ifdef ICONV_FLAVOR
|
||||
/* Apply the mappings. */
|
||||
{
|
||||
const struct mapping *m =
|
||||
mapping_lookup (fromcode_upper, fromcode_upper_end - fromcode_upper);
|
||||
|
||||
fromcode = (m != NULL ? m->vendor_name : fromcode_upper);
|
||||
}
|
||||
{
|
||||
const struct mapping *m =
|
||||
mapping_lookup (tocode_upper, tocode_upper_end - tocode_upper);
|
||||
|
||||
tocode = (m != NULL ? m->vendor_name : tocode_upper);
|
||||
}
|
||||
#else
|
||||
fromcode = fromcode_upper;
|
||||
tocode = tocode_upper;
|
||||
#endif
|
||||
|
||||
return iconv_open (tocode, fromcode);
|
||||
}
|
||||
41
lib/iconveh.h
Normal file
41
lib/iconveh.h
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
/* Character set conversion handler type.
|
||||
Copyright (C) 2001-2007, 2009 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _ICONVEH_H
|
||||
#define _ICONVEH_H
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Handling of unconvertible characters. */
|
||||
enum iconv_ilseq_handler
|
||||
{
|
||||
iconveh_error, /* return and set errno = EILSEQ */
|
||||
iconveh_question_mark, /* use one '?' per unconvertible character */
|
||||
iconveh_escape_sequence /* use escape sequence \uxxxx or \Uxxxxxxxx */
|
||||
};
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _ICONVEH_H */
|
||||
1251
lib/striconveh.c
Normal file
1251
lib/striconveh.c
Normal file
File diff suppressed because it is too large
Load diff
120
lib/striconveh.h
Normal file
120
lib/striconveh.h
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
/* Character set conversion with error handling.
|
||||
Copyright (C) 2001-2007, 2009 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible and Simon Josefsson.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _STRICONVEH_H
|
||||
#define _STRICONVEH_H
|
||||
|
||||
#include <stddef.h>
|
||||
#if HAVE_ICONV
|
||||
#include <iconv.h>
|
||||
#endif
|
||||
|
||||
#include "iconveh.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
#if HAVE_ICONV
|
||||
|
||||
/* Convert an entire string from one encoding to another, using iconv.
|
||||
The original string is at [SRC,...,SRC+SRCLEN-1].
|
||||
CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
|
||||
the system does not support a direct conversion from FROMCODE to TOCODE.
|
||||
CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
|
||||
(iconv_t)(-1) if FROM_CODESET is UTF-8).
|
||||
CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
|
||||
if TO_CODESET is UTF-8).
|
||||
If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
|
||||
array is filled with offsets into the result, i.e. the character starting
|
||||
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
|
||||
and other offsets are set to (size_t)(-1).
|
||||
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
|
||||
or *RESULTP can initially be NULL.
|
||||
May erase the contents of the memory at *RESULTP.
|
||||
Return value: 0 if successful, otherwise -1 and errno set.
|
||||
If successful: The resulting string is stored in *RESULTP and its length
|
||||
in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is
|
||||
unchanged if no dynamic memory allocation was necessary. */
|
||||
extern int
|
||||
mem_cd_iconveh (const char *src, size_t srclen,
|
||||
iconv_t cd, iconv_t cd1, iconv_t cd2,
|
||||
enum iconv_ilseq_handler handler,
|
||||
size_t *offsets,
|
||||
char **resultp, size_t *lengthp);
|
||||
|
||||
/* Convert an entire string from one encoding to another, using iconv.
|
||||
The original string is the NUL-terminated string starting at SRC.
|
||||
CD is the conversion descriptor from FROMCODE to TOCODE, or (iconv_t)(-1) if
|
||||
the system does not support a direct conversion from FROMCODE to TOCODE.
|
||||
Both the "from" and the "to" encoding must use a single NUL byte at the end
|
||||
of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
|
||||
CD1 is the conversion descriptor from FROM_CODESET to UTF-8 (or
|
||||
(iconv_t)(-1) if FROM_CODESET is UTF-8).
|
||||
CD2 is the conversion descriptor from UTF-8 to TO_CODESET (or (iconv_t)(-1)
|
||||
if TO_CODESET is UTF-8).
|
||||
Allocate a malloced memory block for the result.
|
||||
Return value: the freshly allocated resulting NUL-terminated string if
|
||||
successful, otherwise NULL and errno set. */
|
||||
extern char *
|
||||
str_cd_iconveh (const char *src,
|
||||
iconv_t cd, iconv_t cd1, iconv_t cd2,
|
||||
enum iconv_ilseq_handler handler);
|
||||
|
||||
#endif
|
||||
|
||||
/* Convert an entire string from one encoding to another, using iconv.
|
||||
The original string is at [SRC,...,SRC+SRCLEN-1].
|
||||
If OFFSETS is not NULL, it should point to an array of SRCLEN integers; this
|
||||
array is filled with offsets into the result, i.e. the character starting
|
||||
at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
|
||||
and other offsets are set to (size_t)(-1).
|
||||
*RESULTP and *LENGTH should initially be a scratch buffer and its size,
|
||||
or *RESULTP can initially be NULL.
|
||||
May erase the contents of the memory at *RESULTP.
|
||||
Return value: 0 if successful, otherwise -1 and errno set.
|
||||
If successful: The resulting string is stored in *RESULTP and its length
|
||||
in *LENGTHP. *RESULTP is set to a freshly allocated memory block, or is
|
||||
unchanged if no dynamic memory allocation was necessary. */
|
||||
extern int
|
||||
mem_iconveh (const char *src, size_t srclen,
|
||||
const char *from_codeset, const char *to_codeset,
|
||||
enum iconv_ilseq_handler handler,
|
||||
size_t *offsets,
|
||||
char **resultp, size_t *lengthp);
|
||||
|
||||
/* Convert an entire string from one encoding to another, using iconv.
|
||||
The original string is the NUL-terminated string starting at SRC.
|
||||
Both the "from" and the "to" encoding must use a single NUL byte at the
|
||||
end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
|
||||
Allocate a malloced memory block for the result.
|
||||
Return value: the freshly allocated resulting NUL-terminated string if
|
||||
successful, otherwise NULL and errno set. */
|
||||
extern char *
|
||||
str_iconveh (const char *src,
|
||||
const char *from_codeset, const char *to_codeset,
|
||||
enum iconv_ilseq_handler handler);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* _STRICONVEH_H */
|
||||
605
lib/string.in.h
Normal file
605
lib/string.in.h
Normal file
|
|
@ -0,0 +1,605 @@
|
|||
/* A GNU-like <string.h>.
|
||||
|
||||
Copyright (C) 1995-1996, 2001-2008 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program; if not, write to the Free Software Foundation,
|
||||
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
|
||||
|
||||
#ifndef _GL_STRING_H
|
||||
|
||||
#if __GNUC__ >= 3
|
||||
@PRAGMA_SYSTEM_HEADER@
|
||||
#endif
|
||||
|
||||
/* The include_next requires a split double-inclusion guard. */
|
||||
#@INCLUDE_NEXT@ @NEXT_STRING_H@
|
||||
|
||||
#ifndef _GL_STRING_H
|
||||
#define _GL_STRING_H
|
||||
|
||||
|
||||
#ifndef __attribute__
|
||||
/* This feature is available in gcc versions 2.5 and later. */
|
||||
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 5)
|
||||
# define __attribute__(Spec) /* empty */
|
||||
# endif
|
||||
/* The attribute __pure__ was added in gcc 2.96. */
|
||||
# if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 96)
|
||||
# define __pure__ /* empty */
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
/* The definition of GL_LINK_WARNING is copied here. */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Return the first occurrence of NEEDLE in HAYSTACK. */
|
||||
#if @GNULIB_MEMMEM@
|
||||
# if @REPLACE_MEMMEM@
|
||||
# define memmem rpl_memmem
|
||||
# endif
|
||||
# if ! @HAVE_DECL_MEMMEM@ || @REPLACE_MEMMEM@
|
||||
extern void *memmem (void const *__haystack, size_t __haystack_len,
|
||||
void const *__needle, size_t __needle_len)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef memmem
|
||||
# define memmem(a,al,b,bl) \
|
||||
(GL_LINK_WARNING ("memmem is unportable and often quadratic - " \
|
||||
"use gnulib module memmem-simple for portability, " \
|
||||
"and module memmem for speed" ), \
|
||||
memmem (a, al, b, bl))
|
||||
#endif
|
||||
|
||||
/* Copy N bytes of SRC to DEST, return pointer to bytes after the
|
||||
last written byte. */
|
||||
#if @GNULIB_MEMPCPY@
|
||||
# if ! @HAVE_MEMPCPY@
|
||||
extern void *mempcpy (void *restrict __dest, void const *restrict __src,
|
||||
size_t __n);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef mempcpy
|
||||
# define mempcpy(a,b,n) \
|
||||
(GL_LINK_WARNING ("mempcpy is unportable - " \
|
||||
"use gnulib module mempcpy for portability"), \
|
||||
mempcpy (a, b, n))
|
||||
#endif
|
||||
|
||||
/* Search backwards through a block for a byte (specified as an int). */
|
||||
#if @GNULIB_MEMRCHR@
|
||||
# if ! @HAVE_DECL_MEMRCHR@
|
||||
extern void *memrchr (void const *, int, size_t)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef memrchr
|
||||
# define memrchr(a,b,c) \
|
||||
(GL_LINK_WARNING ("memrchr is unportable - " \
|
||||
"use gnulib module memrchr for portability"), \
|
||||
memrchr (a, b, c))
|
||||
#endif
|
||||
|
||||
/* Find the first occurrence of C in S. More efficient than
|
||||
memchr(S,C,N), at the expense of undefined behavior if C does not
|
||||
occur within N bytes. */
|
||||
#if @GNULIB_RAWMEMCHR@
|
||||
# if ! @HAVE_RAWMEMCHR@
|
||||
extern void *rawmemchr (void const *__s, int __c_in)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef rawmemchr
|
||||
# define rawmemchr(a,b) \
|
||||
(GL_LINK_WARNING ("rawmemchr is unportable - " \
|
||||
"use gnulib module rawmemchr for portability"), \
|
||||
rawmemchr (a, b))
|
||||
#endif
|
||||
|
||||
/* Copy SRC to DST, returning the address of the terminating '\0' in DST. */
|
||||
#if @GNULIB_STPCPY@
|
||||
# if ! @HAVE_STPCPY@
|
||||
extern char *stpcpy (char *restrict __dst, char const *restrict __src);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef stpcpy
|
||||
# define stpcpy(a,b) \
|
||||
(GL_LINK_WARNING ("stpcpy is unportable - " \
|
||||
"use gnulib module stpcpy for portability"), \
|
||||
stpcpy (a, b))
|
||||
#endif
|
||||
|
||||
/* Copy no more than N bytes of SRC to DST, returning a pointer past the
|
||||
last non-NUL byte written into DST. */
|
||||
#if @GNULIB_STPNCPY@
|
||||
# if ! @HAVE_STPNCPY@
|
||||
# define stpncpy gnu_stpncpy
|
||||
extern char *stpncpy (char *restrict __dst, char const *restrict __src,
|
||||
size_t __n);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef stpncpy
|
||||
# define stpncpy(a,b,n) \
|
||||
(GL_LINK_WARNING ("stpncpy is unportable - " \
|
||||
"use gnulib module stpncpy for portability"), \
|
||||
stpncpy (a, b, n))
|
||||
#endif
|
||||
|
||||
#if defined GNULIB_POSIXCHECK
|
||||
/* strchr() does not work with multibyte strings if the locale encoding is
|
||||
GB18030 and the character to be searched is a digit. */
|
||||
# undef strchr
|
||||
# define strchr(s,c) \
|
||||
(GL_LINK_WARNING ("strchr cannot work correctly on character strings " \
|
||||
"in some multibyte locales - " \
|
||||
"use mbschr if you care about internationalization"), \
|
||||
strchr (s, c))
|
||||
#endif
|
||||
|
||||
/* Find the first occurrence of C in S or the final NUL byte. */
|
||||
#if @GNULIB_STRCHRNUL@
|
||||
# if ! @HAVE_STRCHRNUL@
|
||||
extern char *strchrnul (char const *__s, int __c_in)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strchrnul
|
||||
# define strchrnul(a,b) \
|
||||
(GL_LINK_WARNING ("strchrnul is unportable - " \
|
||||
"use gnulib module strchrnul for portability"), \
|
||||
strchrnul (a, b))
|
||||
#endif
|
||||
|
||||
/* Duplicate S, returning an identical malloc'd string. */
|
||||
#if @GNULIB_STRDUP@
|
||||
# if @REPLACE_STRDUP@
|
||||
# undef strdup
|
||||
# define strdup rpl_strdup
|
||||
# endif
|
||||
# if !(@HAVE_DECL_STRDUP@ || defined strdup) || @REPLACE_STRDUP@
|
||||
extern char *strdup (char const *__s);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strdup
|
||||
# define strdup(a) \
|
||||
(GL_LINK_WARNING ("strdup is unportable - " \
|
||||
"use gnulib module strdup for portability"), \
|
||||
strdup (a))
|
||||
#endif
|
||||
|
||||
/* Return a newly allocated copy of at most N bytes of STRING. */
|
||||
#if @GNULIB_STRNDUP@
|
||||
# if ! @HAVE_STRNDUP@
|
||||
# undef strndup
|
||||
# define strndup rpl_strndup
|
||||
# endif
|
||||
# if ! @HAVE_STRNDUP@ || ! @HAVE_DECL_STRNDUP@
|
||||
extern char *strndup (char const *__string, size_t __n);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strndup
|
||||
# define strndup(a,n) \
|
||||
(GL_LINK_WARNING ("strndup is unportable - " \
|
||||
"use gnulib module strndup for portability"), \
|
||||
strndup (a, n))
|
||||
#endif
|
||||
|
||||
/* Find the length (number of bytes) of STRING, but scan at most
|
||||
MAXLEN bytes. If no '\0' terminator is found in that many bytes,
|
||||
return MAXLEN. */
|
||||
#if @GNULIB_STRNLEN@
|
||||
# if ! @HAVE_DECL_STRNLEN@
|
||||
extern size_t strnlen (char const *__string, size_t __maxlen)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strnlen
|
||||
# define strnlen(a,n) \
|
||||
(GL_LINK_WARNING ("strnlen is unportable - " \
|
||||
"use gnulib module strnlen for portability"), \
|
||||
strnlen (a, n))
|
||||
#endif
|
||||
|
||||
#if defined GNULIB_POSIXCHECK
|
||||
/* strcspn() assumes the second argument is a list of single-byte characters.
|
||||
Even in this simple case, it does not work with multibyte strings if the
|
||||
locale encoding is GB18030 and one of the characters to be searched is a
|
||||
digit. */
|
||||
# undef strcspn
|
||||
# define strcspn(s,a) \
|
||||
(GL_LINK_WARNING ("strcspn cannot work correctly on character strings " \
|
||||
"in multibyte locales - " \
|
||||
"use mbscspn if you care about internationalization"), \
|
||||
strcspn (s, a))
|
||||
#endif
|
||||
|
||||
/* Find the first occurrence in S of any character in ACCEPT. */
|
||||
#if @GNULIB_STRPBRK@
|
||||
# if ! @HAVE_STRPBRK@
|
||||
extern char *strpbrk (char const *__s, char const *__accept)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
# if defined GNULIB_POSIXCHECK
|
||||
/* strpbrk() assumes the second argument is a list of single-byte characters.
|
||||
Even in this simple case, it does not work with multibyte strings if the
|
||||
locale encoding is GB18030 and one of the characters to be searched is a
|
||||
digit. */
|
||||
# undef strpbrk
|
||||
# define strpbrk(s,a) \
|
||||
(GL_LINK_WARNING ("strpbrk cannot work correctly on character strings " \
|
||||
"in multibyte locales - " \
|
||||
"use mbspbrk if you care about internationalization"), \
|
||||
strpbrk (s, a))
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strpbrk
|
||||
# define strpbrk(s,a) \
|
||||
(GL_LINK_WARNING ("strpbrk is unportable - " \
|
||||
"use gnulib module strpbrk for portability"), \
|
||||
strpbrk (s, a))
|
||||
#endif
|
||||
|
||||
#if defined GNULIB_POSIXCHECK
|
||||
/* strspn() assumes the second argument is a list of single-byte characters.
|
||||
Even in this simple case, it cannot work with multibyte strings. */
|
||||
# undef strspn
|
||||
# define strspn(s,a) \
|
||||
(GL_LINK_WARNING ("strspn cannot work correctly on character strings " \
|
||||
"in multibyte locales - " \
|
||||
"use mbsspn if you care about internationalization"), \
|
||||
strspn (s, a))
|
||||
#endif
|
||||
|
||||
#if defined GNULIB_POSIXCHECK
|
||||
/* strrchr() does not work with multibyte strings if the locale encoding is
|
||||
GB18030 and the character to be searched is a digit. */
|
||||
# undef strrchr
|
||||
# define strrchr(s,c) \
|
||||
(GL_LINK_WARNING ("strrchr cannot work correctly on character strings " \
|
||||
"in some multibyte locales - " \
|
||||
"use mbsrchr if you care about internationalization"), \
|
||||
strrchr (s, c))
|
||||
#endif
|
||||
|
||||
/* Search the next delimiter (char listed in DELIM) starting at *STRINGP.
|
||||
If one is found, overwrite it with a NUL, and advance *STRINGP
|
||||
to point to the next char after it. Otherwise, set *STRINGP to NULL.
|
||||
If *STRINGP was already NULL, nothing happens.
|
||||
Return the old value of *STRINGP.
|
||||
|
||||
This is a variant of strtok() that is multithread-safe and supports
|
||||
empty fields.
|
||||
|
||||
Caveat: It modifies the original string.
|
||||
Caveat: These functions cannot be used on constant strings.
|
||||
Caveat: The identity of the delimiting character is lost.
|
||||
Caveat: It doesn't work with multibyte strings unless all of the delimiter
|
||||
characters are ASCII characters < 0x30.
|
||||
|
||||
See also strtok_r(). */
|
||||
#if @GNULIB_STRSEP@
|
||||
# if ! @HAVE_STRSEP@
|
||||
extern char *strsep (char **restrict __stringp, char const *restrict __delim);
|
||||
# endif
|
||||
# if defined GNULIB_POSIXCHECK
|
||||
# undef strsep
|
||||
# define strsep(s,d) \
|
||||
(GL_LINK_WARNING ("strsep cannot work correctly on character strings " \
|
||||
"in multibyte locales - " \
|
||||
"use mbssep if you care about internationalization"), \
|
||||
strsep (s, d))
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strsep
|
||||
# define strsep(s,d) \
|
||||
(GL_LINK_WARNING ("strsep is unportable - " \
|
||||
"use gnulib module strsep for portability"), \
|
||||
strsep (s, d))
|
||||
#endif
|
||||
|
||||
#if @GNULIB_STRSTR@
|
||||
# if @REPLACE_STRSTR@
|
||||
# define strstr rpl_strstr
|
||||
char *strstr (const char *haystack, const char *needle)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
/* strstr() does not work with multibyte strings if the locale encoding is
|
||||
different from UTF-8:
|
||||
POSIX says that it operates on "strings", and "string" in POSIX is defined
|
||||
as a sequence of bytes, not of characters. */
|
||||
# undef strstr
|
||||
# define strstr(a,b) \
|
||||
(GL_LINK_WARNING ("strstr is quadratic on many systems, and cannot " \
|
||||
"work correctly on character strings in most " \
|
||||
"multibyte locales - " \
|
||||
"use mbsstr if you care about internationalization, " \
|
||||
"or use strstr if you care about speed"), \
|
||||
strstr (a, b))
|
||||
#endif
|
||||
|
||||
/* Find the first occurrence of NEEDLE in HAYSTACK, using case-insensitive
|
||||
comparison. */
|
||||
#if @GNULIB_STRCASESTR@
|
||||
# if @REPLACE_STRCASESTR@
|
||||
# define strcasestr rpl_strcasestr
|
||||
# endif
|
||||
# if ! @HAVE_STRCASESTR@ || @REPLACE_STRCASESTR@
|
||||
extern char *strcasestr (const char *haystack, const char *needle)
|
||||
__attribute__ ((__pure__));
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
/* strcasestr() does not work with multibyte strings:
|
||||
It is a glibc extension, and glibc implements it only for unibyte
|
||||
locales. */
|
||||
# undef strcasestr
|
||||
# define strcasestr(a,b) \
|
||||
(GL_LINK_WARNING ("strcasestr does work correctly on character strings " \
|
||||
"in multibyte locales - " \
|
||||
"use mbscasestr if you care about " \
|
||||
"internationalization, or use c-strcasestr if you want " \
|
||||
"a locale independent function"), \
|
||||
strcasestr (a, b))
|
||||
#endif
|
||||
|
||||
/* Parse S into tokens separated by characters in DELIM.
|
||||
If S is NULL, the saved pointer in SAVE_PTR is used as
|
||||
the next starting point. For example:
|
||||
char s[] = "-abc-=-def";
|
||||
char *sp;
|
||||
x = strtok_r(s, "-", &sp); // x = "abc", sp = "=-def"
|
||||
x = strtok_r(NULL, "-=", &sp); // x = "def", sp = NULL
|
||||
x = strtok_r(NULL, "=", &sp); // x = NULL
|
||||
// s = "abc\0-def\0"
|
||||
|
||||
This is a variant of strtok() that is multithread-safe.
|
||||
|
||||
For the POSIX documentation for this function, see:
|
||||
http://www.opengroup.org/susv3xsh/strtok.html
|
||||
|
||||
Caveat: It modifies the original string.
|
||||
Caveat: These functions cannot be used on constant strings.
|
||||
Caveat: The identity of the delimiting character is lost.
|
||||
Caveat: It doesn't work with multibyte strings unless all of the delimiter
|
||||
characters are ASCII characters < 0x30.
|
||||
|
||||
See also strsep(). */
|
||||
#if @GNULIB_STRTOK_R@
|
||||
# if ! @HAVE_DECL_STRTOK_R@
|
||||
extern char *strtok_r (char *restrict s, char const *restrict delim,
|
||||
char **restrict save_ptr);
|
||||
# endif
|
||||
# if defined GNULIB_POSIXCHECK
|
||||
# undef strtok_r
|
||||
# define strtok_r(s,d,p) \
|
||||
(GL_LINK_WARNING ("strtok_r cannot work correctly on character strings " \
|
||||
"in multibyte locales - " \
|
||||
"use mbstok_r if you care about internationalization"), \
|
||||
strtok_r (s, d, p))
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strtok_r
|
||||
# define strtok_r(s,d,p) \
|
||||
(GL_LINK_WARNING ("strtok_r is unportable - " \
|
||||
"use gnulib module strtok_r for portability"), \
|
||||
strtok_r (s, d, p))
|
||||
#endif
|
||||
|
||||
|
||||
/* The following functions are not specified by POSIX. They are gnulib
|
||||
extensions. */
|
||||
|
||||
#if @GNULIB_MBSLEN@
|
||||
/* Return the number of multibyte characters in the character string STRING.
|
||||
This considers multibyte characters, unlike strlen, which counts bytes. */
|
||||
extern size_t mbslen (const char *string);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSNLEN@
|
||||
/* Return the number of multibyte characters in the character string starting
|
||||
at STRING and ending at STRING + LEN. */
|
||||
extern size_t mbsnlen (const char *string, size_t len);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSCHR@
|
||||
/* Locate the first single-byte character C in the character string STRING,
|
||||
and return a pointer to it. Return NULL if C is not found in STRING.
|
||||
Unlike strchr(), this function works correctly in multibyte locales with
|
||||
encodings such as GB18030. */
|
||||
# define mbschr rpl_mbschr /* avoid collision with HP-UX function */
|
||||
extern char * mbschr (const char *string, int c);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSRCHR@
|
||||
/* Locate the last single-byte character C in the character string STRING,
|
||||
and return a pointer to it. Return NULL if C is not found in STRING.
|
||||
Unlike strrchr(), this function works correctly in multibyte locales with
|
||||
encodings such as GB18030. */
|
||||
# define mbsrchr rpl_mbsrchr /* avoid collision with HP-UX function */
|
||||
extern char * mbsrchr (const char *string, int c);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSSTR@
|
||||
/* Find the first occurrence of the character string NEEDLE in the character
|
||||
string HAYSTACK. Return NULL if NEEDLE is not found in HAYSTACK.
|
||||
Unlike strstr(), this function works correctly in multibyte locales with
|
||||
encodings different from UTF-8. */
|
||||
extern char * mbsstr (const char *haystack, const char *needle);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSCASECMP@
|
||||
/* Compare the character strings S1 and S2, ignoring case, returning less than,
|
||||
equal to or greater than zero if S1 is lexicographically less than, equal to
|
||||
or greater than S2.
|
||||
Note: This function may, in multibyte locales, return 0 for strings of
|
||||
different lengths!
|
||||
Unlike strcasecmp(), this function works correctly in multibyte locales. */
|
||||
extern int mbscasecmp (const char *s1, const char *s2);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSNCASECMP@
|
||||
/* Compare the initial segment of the character string S1 consisting of at most
|
||||
N characters with the initial segment of the character string S2 consisting
|
||||
of at most N characters, ignoring case, returning less than, equal to or
|
||||
greater than zero if the initial segment of S1 is lexicographically less
|
||||
than, equal to or greater than the initial segment of S2.
|
||||
Note: This function may, in multibyte locales, return 0 for initial segments
|
||||
of different lengths!
|
||||
Unlike strncasecmp(), this function works correctly in multibyte locales.
|
||||
But beware that N is not a byte count but a character count! */
|
||||
extern int mbsncasecmp (const char *s1, const char *s2, size_t n);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSPCASECMP@
|
||||
/* Compare the initial segment of the character string STRING consisting of
|
||||
at most mbslen (PREFIX) characters with the character string PREFIX,
|
||||
ignoring case, returning less than, equal to or greater than zero if this
|
||||
initial segment is lexicographically less than, equal to or greater than
|
||||
PREFIX.
|
||||
Note: This function may, in multibyte locales, return 0 if STRING is of
|
||||
smaller length than PREFIX!
|
||||
Unlike strncasecmp(), this function works correctly in multibyte
|
||||
locales. */
|
||||
extern char * mbspcasecmp (const char *string, const char *prefix);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSCASESTR@
|
||||
/* Find the first occurrence of the character string NEEDLE in the character
|
||||
string HAYSTACK, using case-insensitive comparison.
|
||||
Note: This function may, in multibyte locales, return success even if
|
||||
strlen (haystack) < strlen (needle) !
|
||||
Unlike strcasestr(), this function works correctly in multibyte locales. */
|
||||
extern char * mbscasestr (const char *haystack, const char *needle);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSCSPN@
|
||||
/* Find the first occurrence in the character string STRING of any character
|
||||
in the character string ACCEPT. Return the number of bytes from the
|
||||
beginning of the string to this occurrence, or to the end of the string
|
||||
if none exists.
|
||||
Unlike strcspn(), this function works correctly in multibyte locales. */
|
||||
extern size_t mbscspn (const char *string, const char *accept);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSPBRK@
|
||||
/* Find the first occurrence in the character string STRING of any character
|
||||
in the character string ACCEPT. Return the pointer to it, or NULL if none
|
||||
exists.
|
||||
Unlike strpbrk(), this function works correctly in multibyte locales. */
|
||||
# define mbspbrk rpl_mbspbrk /* avoid collision with HP-UX function */
|
||||
extern char * mbspbrk (const char *string, const char *accept);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSSPN@
|
||||
/* Find the first occurrence in the character string STRING of any character
|
||||
not in the character string REJECT. Return the number of bytes from the
|
||||
beginning of the string to this occurrence, or to the end of the string
|
||||
if none exists.
|
||||
Unlike strspn(), this function works correctly in multibyte locales. */
|
||||
extern size_t mbsspn (const char *string, const char *reject);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSSEP@
|
||||
/* Search the next delimiter (multibyte character listed in the character
|
||||
string DELIM) starting at the character string *STRINGP.
|
||||
If one is found, overwrite it with a NUL, and advance *STRINGP to point
|
||||
to the next multibyte character after it. Otherwise, set *STRINGP to NULL.
|
||||
If *STRINGP was already NULL, nothing happens.
|
||||
Return the old value of *STRINGP.
|
||||
|
||||
This is a variant of mbstok_r() that supports empty fields.
|
||||
|
||||
Caveat: It modifies the original string.
|
||||
Caveat: These functions cannot be used on constant strings.
|
||||
Caveat: The identity of the delimiting character is lost.
|
||||
|
||||
See also mbstok_r(). */
|
||||
extern char * mbssep (char **stringp, const char *delim);
|
||||
#endif
|
||||
|
||||
#if @GNULIB_MBSTOK_R@
|
||||
/* Parse the character string STRING into tokens separated by characters in
|
||||
the character string DELIM.
|
||||
If STRING is NULL, the saved pointer in SAVE_PTR is used as
|
||||
the next starting point. For example:
|
||||
char s[] = "-abc-=-def";
|
||||
char *sp;
|
||||
x = mbstok_r(s, "-", &sp); // x = "abc", sp = "=-def"
|
||||
x = mbstok_r(NULL, "-=", &sp); // x = "def", sp = NULL
|
||||
x = mbstok_r(NULL, "=", &sp); // x = NULL
|
||||
// s = "abc\0-def\0"
|
||||
|
||||
Caveat: It modifies the original string.
|
||||
Caveat: These functions cannot be used on constant strings.
|
||||
Caveat: The identity of the delimiting character is lost.
|
||||
|
||||
See also mbssep(). */
|
||||
extern char * mbstok_r (char *string, const char *delim, char **save_ptr);
|
||||
#endif
|
||||
|
||||
/* Map any int, typically from errno, into an error message. */
|
||||
#if @GNULIB_STRERROR@
|
||||
# if @REPLACE_STRERROR@
|
||||
# undef strerror
|
||||
# define strerror rpl_strerror
|
||||
extern char *strerror (int);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strerror
|
||||
# define strerror(e) \
|
||||
(GL_LINK_WARNING ("strerror is unportable - " \
|
||||
"use gnulib module strerror to guarantee non-NULL result"), \
|
||||
strerror (e))
|
||||
#endif
|
||||
|
||||
#if @GNULIB_STRSIGNAL@
|
||||
# if @REPLACE_STRSIGNAL@
|
||||
# define strsignal rpl_strsignal
|
||||
# endif
|
||||
# if ! @HAVE_DECL_STRSIGNAL@ || @REPLACE_STRSIGNAL@
|
||||
extern char *strsignal (int __sig);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strsignal
|
||||
# define strsignal(a) \
|
||||
(GL_LINK_WARNING ("strsignal is unportable - " \
|
||||
"use gnulib module strsignal for portability"), \
|
||||
strsignal (a))
|
||||
#endif
|
||||
|
||||
#if @GNULIB_STRVERSCMP@
|
||||
# if !@HAVE_STRVERSCMP@
|
||||
extern int strverscmp (const char *, const char *);
|
||||
# endif
|
||||
#elif defined GNULIB_POSIXCHECK
|
||||
# undef strverscmp
|
||||
# define strverscmp(a, b) \
|
||||
(GL_LINK_WARNING ("strverscmp is unportable - " \
|
||||
"use gnulib module strverscmp for portability"), \
|
||||
strverscmp (a, b))
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _GL_STRING_H */
|
||||
#endif /* _GL_STRING_H */
|
||||
681
lib/unistr.h
Normal file
681
lib/unistr.h
Normal file
|
|
@ -0,0 +1,681 @@
|
|||
/* Elementary Unicode string functions.
|
||||
Copyright (C) 2001-2002, 2005-2009 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _UNISTR_H
|
||||
#define _UNISTR_H
|
||||
|
||||
#include "unitypes.h"
|
||||
|
||||
/* Get bool. */
|
||||
#include <stdbool.h>
|
||||
|
||||
/* Get size_t. */
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/* Conventions:
|
||||
|
||||
All functions prefixed with u8_ operate on UTF-8 encoded strings.
|
||||
Their unit is an uint8_t (1 byte).
|
||||
|
||||
All functions prefixed with u16_ operate on UTF-16 encoded strings.
|
||||
Their unit is an uint16_t (a 2-byte word).
|
||||
|
||||
All functions prefixed with u32_ operate on UCS-4 encoded strings.
|
||||
Their unit is an uint32_t (a 4-byte word).
|
||||
|
||||
All argument pairs (s, n) denote a Unicode string s[0..n-1] with exactly
|
||||
n units.
|
||||
|
||||
All arguments starting with "str" and the arguments of functions starting
|
||||
with u8_str/u16_str/u32_str denote a NUL terminated string, i.e. a string
|
||||
which terminates at the first NUL unit. This termination unit is
|
||||
considered part of the string for all memory allocation purposes, but
|
||||
is not considered part of the string for all other logical purposes.
|
||||
|
||||
Functions returning a string result take a (resultbuf, lengthp) argument
|
||||
pair. If resultbuf is not NULL and the result fits into *lengthp units,
|
||||
it is put in resultbuf, and resultbuf is returned. Otherwise, a freshly
|
||||
allocated string is returned. In both cases, *lengthp is set to the
|
||||
length (number of units) of the returned string. In case of error,
|
||||
NULL is returned and errno is set. */
|
||||
|
||||
|
||||
/* Elementary string checks. */
|
||||
|
||||
/* Check whether an UTF-8 string is well-formed.
|
||||
Return NULL if valid, or a pointer to the first invalid unit otherwise. */
|
||||
extern const uint8_t *
|
||||
u8_check (const uint8_t *s, size_t n);
|
||||
|
||||
/* Check whether an UTF-16 string is well-formed.
|
||||
Return NULL if valid, or a pointer to the first invalid unit otherwise. */
|
||||
extern const uint16_t *
|
||||
u16_check (const uint16_t *s, size_t n);
|
||||
|
||||
/* Check whether an UCS-4 string is well-formed.
|
||||
Return NULL if valid, or a pointer to the first invalid unit otherwise. */
|
||||
extern const uint32_t *
|
||||
u32_check (const uint32_t *s, size_t n);
|
||||
|
||||
|
||||
/* Elementary string conversions. */
|
||||
|
||||
/* Convert an UTF-8 string to an UTF-16 string. */
|
||||
extern uint16_t *
|
||||
u8_to_u16 (const uint8_t *s, size_t n, uint16_t *resultbuf,
|
||||
size_t *lengthp);
|
||||
|
||||
/* Convert an UTF-8 string to an UCS-4 string. */
|
||||
extern uint32_t *
|
||||
u8_to_u32 (const uint8_t *s, size_t n, uint32_t *resultbuf,
|
||||
size_t *lengthp);
|
||||
|
||||
/* Convert an UTF-16 string to an UTF-8 string. */
|
||||
extern uint8_t *
|
||||
u16_to_u8 (const uint16_t *s, size_t n, uint8_t *resultbuf,
|
||||
size_t *lengthp);
|
||||
|
||||
/* Convert an UTF-16 string to an UCS-4 string. */
|
||||
extern uint32_t *
|
||||
u16_to_u32 (const uint16_t *s, size_t n, uint32_t *resultbuf,
|
||||
size_t *lengthp);
|
||||
|
||||
/* Convert an UCS-4 string to an UTF-8 string. */
|
||||
extern uint8_t *
|
||||
u32_to_u8 (const uint32_t *s, size_t n, uint8_t *resultbuf,
|
||||
size_t *lengthp);
|
||||
|
||||
/* Convert an UCS-4 string to an UTF-16 string. */
|
||||
extern uint16_t *
|
||||
u32_to_u16 (const uint32_t *s, size_t n, uint16_t *resultbuf,
|
||||
size_t *lengthp);
|
||||
|
||||
|
||||
/* Elementary string functions. */
|
||||
|
||||
/* Return the length (number of units) of the first character in S, which is
|
||||
no longer than N. Return 0 if it is the NUL character. Return -1 upon
|
||||
failure. */
|
||||
/* Similar to mblen(), except that s must not be NULL. */
|
||||
extern int
|
||||
u8_mblen (const uint8_t *s, size_t n);
|
||||
extern int
|
||||
u16_mblen (const uint16_t *s, size_t n);
|
||||
extern int
|
||||
u32_mblen (const uint32_t *s, size_t n);
|
||||
|
||||
/* Return the length (number of units) of the first character in S, putting
|
||||
its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
|
||||
and an appropriate number of units is returned.
|
||||
The number of available units, N, must be > 0. */
|
||||
/* Similar to mbtowc(), except that puc and s must not be NULL, n must be > 0,
|
||||
and the NUL character is not treated specially. */
|
||||
/* The variants with _safe suffix are safe, even if the library is compiled
|
||||
without --enable-safety. */
|
||||
|
||||
#ifdef GNULIB_UNISTR_U8_MBTOUC_UNSAFE
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n);
|
||||
# else
|
||||
extern int
|
||||
u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n);
|
||||
static inline int
|
||||
u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
|
||||
{
|
||||
uint8_t c = *s;
|
||||
|
||||
if (c < 0x80)
|
||||
{
|
||||
*puc = c;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return u8_mbtouc_unsafe_aux (puc, s, n);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U16_MBTOUC_UNSAFE
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n);
|
||||
# else
|
||||
extern int
|
||||
u16_mbtouc_unsafe_aux (ucs4_t *puc, const uint16_t *s, size_t n);
|
||||
static inline int
|
||||
u16_mbtouc_unsafe (ucs4_t *puc, const uint16_t *s, size_t n)
|
||||
{
|
||||
uint16_t c = *s;
|
||||
|
||||
if (c < 0xd800 || c >= 0xe000)
|
||||
{
|
||||
*puc = c;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return u16_mbtouc_unsafe_aux (puc, s, n);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U32_MBTOUC_UNSAFE
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n);
|
||||
# else
|
||||
static inline int
|
||||
u32_mbtouc_unsafe (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_)
|
||||
{
|
||||
uint32_t c = *s;
|
||||
|
||||
# if CONFIG_UNICODE_SAFETY
|
||||
if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
|
||||
# endif
|
||||
*puc = c;
|
||||
# if CONFIG_UNICODE_SAFETY
|
||||
else
|
||||
/* invalid multibyte character */
|
||||
*puc = 0xfffd;
|
||||
# endif
|
||||
return 1;
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U8_MBTOUC
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n);
|
||||
# else
|
||||
extern int
|
||||
u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n);
|
||||
static inline int
|
||||
u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
|
||||
{
|
||||
uint8_t c = *s;
|
||||
|
||||
if (c < 0x80)
|
||||
{
|
||||
*puc = c;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return u8_mbtouc_aux (puc, s, n);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U16_MBTOUC
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n);
|
||||
# else
|
||||
extern int
|
||||
u16_mbtouc_aux (ucs4_t *puc, const uint16_t *s, size_t n);
|
||||
static inline int
|
||||
u16_mbtouc (ucs4_t *puc, const uint16_t *s, size_t n)
|
||||
{
|
||||
uint16_t c = *s;
|
||||
|
||||
if (c < 0xd800 || c >= 0xe000)
|
||||
{
|
||||
*puc = c;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return u16_mbtouc_aux (puc, s, n);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U32_MBTOUC
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n);
|
||||
# else
|
||||
static inline int
|
||||
u32_mbtouc (ucs4_t *puc, const uint32_t *s, size_t n _UNUSED_PARAMETER_)
|
||||
{
|
||||
uint32_t c = *s;
|
||||
|
||||
if (c < 0xd800 || (c >= 0xe000 && c < 0x110000))
|
||||
*puc = c;
|
||||
else
|
||||
/* invalid multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return 1;
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Return the length (number of units) of the first character in S, putting
|
||||
its 'ucs4_t' representation in *PUC. Upon failure, *PUC is set to 0xfffd,
|
||||
and -1 is returned for an invalid sequence of units, -2 is returned for an
|
||||
incomplete sequence of units.
|
||||
The number of available units, N, must be > 0. */
|
||||
/* Similar to u*_mbtouc(), except that the return value gives more details
|
||||
about the failure, similar to mbrtowc(). */
|
||||
|
||||
#ifdef GNULIB_UNISTR_U8_MBTOUCR
|
||||
extern int
|
||||
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n);
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U16_MBTOUCR
|
||||
extern int
|
||||
u16_mbtoucr (ucs4_t *puc, const uint16_t *s, size_t n);
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U32_MBTOUCR
|
||||
extern int
|
||||
u32_mbtoucr (ucs4_t *puc, const uint32_t *s, size_t n);
|
||||
#endif
|
||||
|
||||
/* Put the multibyte character represented by UC in S, returning its
|
||||
length. Return -1 upon failure, -2 if the number of available units, N,
|
||||
is too small. The latter case cannot occur if N >= 6/2/1, respectively. */
|
||||
/* Similar to wctomb(), except that s must not be NULL, and the argument n
|
||||
must be specified. */
|
||||
|
||||
#ifdef GNULIB_UNISTR_U8_UCTOMB
|
||||
/* Auxiliary function, also used by u8_chr, u8_strchr, u8_strrchr. */
|
||||
extern int
|
||||
u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n);
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u8_uctomb (uint8_t *s, ucs4_t uc, int n);
|
||||
# else
|
||||
static inline int
|
||||
u8_uctomb (uint8_t *s, ucs4_t uc, int n)
|
||||
{
|
||||
if (uc < 0x80 && n > 0)
|
||||
{
|
||||
s[0] = uc;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return u8_uctomb_aux (s, uc, n);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U16_UCTOMB
|
||||
/* Auxiliary function, also used by u16_chr, u16_strchr, u16_strrchr. */
|
||||
extern int
|
||||
u16_uctomb_aux (uint16_t *s, ucs4_t uc, int n);
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u16_uctomb (uint16_t *s, ucs4_t uc, int n);
|
||||
# else
|
||||
static inline int
|
||||
u16_uctomb (uint16_t *s, ucs4_t uc, int n)
|
||||
{
|
||||
if (uc < 0xd800 && n > 0)
|
||||
{
|
||||
s[0] = uc;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return u16_uctomb_aux (s, uc, n);
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef GNULIB_UNISTR_U32_UCTOMB
|
||||
# if !HAVE_INLINE
|
||||
extern int
|
||||
u32_uctomb (uint32_t *s, ucs4_t uc, int n);
|
||||
# else
|
||||
static inline int
|
||||
u32_uctomb (uint32_t *s, ucs4_t uc, int n)
|
||||
{
|
||||
if (uc < 0xd800 || (uc >= 0xe000 && uc < 0x110000))
|
||||
{
|
||||
if (n > 0)
|
||||
{
|
||||
*s = uc;
|
||||
return 1;
|
||||
}
|
||||
else
|
||||
return -2;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
# endif
|
||||
#endif
|
||||
|
||||
/* Copy N units from SRC to DEST. */
|
||||
/* Similar to memcpy(). */
|
||||
extern uint8_t *
|
||||
u8_cpy (uint8_t *dest, const uint8_t *src, size_t n);
|
||||
extern uint16_t *
|
||||
u16_cpy (uint16_t *dest, const uint16_t *src, size_t n);
|
||||
extern uint32_t *
|
||||
u32_cpy (uint32_t *dest, const uint32_t *src, size_t n);
|
||||
|
||||
/* Copy N units from SRC to DEST, guaranteeing correct behavior for
|
||||
overlapping memory areas. */
|
||||
/* Similar to memmove(). */
|
||||
extern uint8_t *
|
||||
u8_move (uint8_t *dest, const uint8_t *src, size_t n);
|
||||
extern uint16_t *
|
||||
u16_move (uint16_t *dest, const uint16_t *src, size_t n);
|
||||
extern uint32_t *
|
||||
u32_move (uint32_t *dest, const uint32_t *src, size_t n);
|
||||
|
||||
/* Set the first N characters of S to UC. UC should be a character that
|
||||
occupies only 1 unit. */
|
||||
/* Similar to memset(). */
|
||||
extern uint8_t *
|
||||
u8_set (uint8_t *s, ucs4_t uc, size_t n);
|
||||
extern uint16_t *
|
||||
u16_set (uint16_t *s, ucs4_t uc, size_t n);
|
||||
extern uint32_t *
|
||||
u32_set (uint32_t *s, ucs4_t uc, size_t n);
|
||||
|
||||
/* Compare S1 and S2, each of length N. */
|
||||
/* Similar to memcmp(). */
|
||||
extern int
|
||||
u8_cmp (const uint8_t *s1, const uint8_t *s2, size_t n);
|
||||
extern int
|
||||
u16_cmp (const uint16_t *s1, const uint16_t *s2, size_t n);
|
||||
extern int
|
||||
u32_cmp (const uint32_t *s1, const uint32_t *s2, size_t n);
|
||||
|
||||
/* Compare S1 and S2. */
|
||||
/* Similar to the gnulib function memcmp2(). */
|
||||
extern int
|
||||
u8_cmp2 (const uint8_t *s1, size_t n1, const uint8_t *s2, size_t n2);
|
||||
extern int
|
||||
u16_cmp2 (const uint16_t *s1, size_t n1, const uint16_t *s2, size_t n2);
|
||||
extern int
|
||||
u32_cmp2 (const uint32_t *s1, size_t n1, const uint32_t *s2, size_t n2);
|
||||
|
||||
/* Search the string at S for UC. */
|
||||
/* Similar to memchr(). */
|
||||
extern uint8_t *
|
||||
u8_chr (const uint8_t *s, size_t n, ucs4_t uc);
|
||||
extern uint16_t *
|
||||
u16_chr (const uint16_t *s, size_t n, ucs4_t uc);
|
||||
extern uint32_t *
|
||||
u32_chr (const uint32_t *s, size_t n, ucs4_t uc);
|
||||
|
||||
/* Count the number of Unicode characters in the N units from S. */
|
||||
/* Similar to mbsnlen(). */
|
||||
extern size_t
|
||||
u8_mbsnlen (const uint8_t *s, size_t n);
|
||||
extern size_t
|
||||
u16_mbsnlen (const uint16_t *s, size_t n);
|
||||
extern size_t
|
||||
u32_mbsnlen (const uint32_t *s, size_t n);
|
||||
|
||||
/* Elementary string functions with memory allocation. */
|
||||
|
||||
/* Make a freshly allocated copy of S, of length N. */
|
||||
extern uint8_t *
|
||||
u8_cpy_alloc (const uint8_t *s, size_t n);
|
||||
extern uint16_t *
|
||||
u16_cpy_alloc (const uint16_t *s, size_t n);
|
||||
extern uint32_t *
|
||||
u32_cpy_alloc (const uint32_t *s, size_t n);
|
||||
|
||||
/* Elementary string functions on NUL terminated strings. */
|
||||
|
||||
/* Return the length (number of units) of the first character in S.
|
||||
Return 0 if it is the NUL character. Return -1 upon failure. */
|
||||
extern int
|
||||
u8_strmblen (const uint8_t *s);
|
||||
extern int
|
||||
u16_strmblen (const uint16_t *s);
|
||||
extern int
|
||||
u32_strmblen (const uint32_t *s);
|
||||
|
||||
/* Return the length (number of units) of the first character in S, putting
|
||||
its 'ucs4_t' representation in *PUC. Return 0 if it is the NUL
|
||||
character. Return -1 upon failure. */
|
||||
extern int
|
||||
u8_strmbtouc (ucs4_t *puc, const uint8_t *s);
|
||||
extern int
|
||||
u16_strmbtouc (ucs4_t *puc, const uint16_t *s);
|
||||
extern int
|
||||
u32_strmbtouc (ucs4_t *puc, const uint32_t *s);
|
||||
|
||||
/* Forward iteration step. Advances the pointer past the next character,
|
||||
or returns NULL if the end of the string has been reached. Puts the
|
||||
character's 'ucs4_t' representation in *PUC. */
|
||||
extern const uint8_t *
|
||||
u8_next (ucs4_t *puc, const uint8_t *s);
|
||||
extern const uint16_t *
|
||||
u16_next (ucs4_t *puc, const uint16_t *s);
|
||||
extern const uint32_t *
|
||||
u32_next (ucs4_t *puc, const uint32_t *s);
|
||||
|
||||
/* Backward iteration step. Advances the pointer to point to the previous
|
||||
character, or returns NULL if the beginning of the string had been reached.
|
||||
Puts the character's 'ucs4_t' representation in *PUC. */
|
||||
extern const uint8_t *
|
||||
u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start);
|
||||
extern const uint16_t *
|
||||
u16_prev (ucs4_t *puc, const uint16_t *s, const uint16_t *start);
|
||||
extern const uint32_t *
|
||||
u32_prev (ucs4_t *puc, const uint32_t *s, const uint32_t *start);
|
||||
|
||||
/* Return the number of units in S. */
|
||||
/* Similar to strlen(), wcslen(). */
|
||||
extern size_t
|
||||
u8_strlen (const uint8_t *s);
|
||||
extern size_t
|
||||
u16_strlen (const uint16_t *s);
|
||||
extern size_t
|
||||
u32_strlen (const uint32_t *s);
|
||||
|
||||
/* Return the number of units in S, but at most MAXLEN. */
|
||||
/* Similar to strnlen(), wcsnlen(). */
|
||||
extern size_t
|
||||
u8_strnlen (const uint8_t *s, size_t maxlen);
|
||||
extern size_t
|
||||
u16_strnlen (const uint16_t *s, size_t maxlen);
|
||||
extern size_t
|
||||
u32_strnlen (const uint32_t *s, size_t maxlen);
|
||||
|
||||
/* Copy SRC to DEST. */
|
||||
/* Similar to strcpy(), wcscpy(). */
|
||||
extern uint8_t *
|
||||
u8_strcpy (uint8_t *dest, const uint8_t *src);
|
||||
extern uint16_t *
|
||||
u16_strcpy (uint16_t *dest, const uint16_t *src);
|
||||
extern uint32_t *
|
||||
u32_strcpy (uint32_t *dest, const uint32_t *src);
|
||||
|
||||
/* Copy SRC to DEST, returning the address of the terminating NUL in DEST. */
|
||||
/* Similar to stpcpy(). */
|
||||
extern uint8_t *
|
||||
u8_stpcpy (uint8_t *dest, const uint8_t *src);
|
||||
extern uint16_t *
|
||||
u16_stpcpy (uint16_t *dest, const uint16_t *src);
|
||||
extern uint32_t *
|
||||
u32_stpcpy (uint32_t *dest, const uint32_t *src);
|
||||
|
||||
/* Copy no more than N units of SRC to DEST. */
|
||||
/* Similar to strncpy(), wcsncpy(). */
|
||||
extern uint8_t *
|
||||
u8_strncpy (uint8_t *dest, const uint8_t *src, size_t n);
|
||||
extern uint16_t *
|
||||
u16_strncpy (uint16_t *dest, const uint16_t *src, size_t n);
|
||||
extern uint32_t *
|
||||
u32_strncpy (uint32_t *dest, const uint32_t *src, size_t n);
|
||||
|
||||
/* Copy no more than N units of SRC to DEST, returning the address of
|
||||
the last unit written into DEST. */
|
||||
/* Similar to stpncpy(). */
|
||||
extern uint8_t *
|
||||
u8_stpncpy (uint8_t *dest, const uint8_t *src, size_t n);
|
||||
extern uint16_t *
|
||||
u16_stpncpy (uint16_t *dest, const uint16_t *src, size_t n);
|
||||
extern uint32_t *
|
||||
u32_stpncpy (uint32_t *dest, const uint32_t *src, size_t n);
|
||||
|
||||
/* Append SRC onto DEST. */
|
||||
/* Similar to strcat(), wcscat(). */
|
||||
extern uint8_t *
|
||||
u8_strcat (uint8_t *dest, const uint8_t *src);
|
||||
extern uint16_t *
|
||||
u16_strcat (uint16_t *dest, const uint16_t *src);
|
||||
extern uint32_t *
|
||||
u32_strcat (uint32_t *dest, const uint32_t *src);
|
||||
|
||||
/* Append no more than N units of SRC onto DEST. */
|
||||
/* Similar to strncat(), wcsncat(). */
|
||||
extern uint8_t *
|
||||
u8_strncat (uint8_t *dest, const uint8_t *src, size_t n);
|
||||
extern uint16_t *
|
||||
u16_strncat (uint16_t *dest, const uint16_t *src, size_t n);
|
||||
extern uint32_t *
|
||||
u32_strncat (uint32_t *dest, const uint32_t *src, size_t n);
|
||||
|
||||
/* Compare S1 and S2. */
|
||||
/* Similar to strcmp(), wcscmp(). */
|
||||
extern int
|
||||
u8_strcmp (const uint8_t *s1, const uint8_t *s2);
|
||||
extern int
|
||||
u16_strcmp (const uint16_t *s1, const uint16_t *s2);
|
||||
extern int
|
||||
u32_strcmp (const uint32_t *s1, const uint32_t *s2);
|
||||
|
||||
/* Compare S1 and S2 using the collation rules of the current locale.
|
||||
Return -1 if S1 < S2, 0 if S1 = S2, 1 if S1 > S2.
|
||||
Upon failure, set errno and return any value. */
|
||||
/* Similar to strcoll(), wcscoll(). */
|
||||
extern int
|
||||
u8_strcoll (const uint8_t *s1, const uint8_t *s2);
|
||||
extern int
|
||||
u16_strcoll (const uint16_t *s1, const uint16_t *s2);
|
||||
extern int
|
||||
u32_strcoll (const uint32_t *s1, const uint32_t *s2);
|
||||
|
||||
/* Compare no more than N units of S1 and S2. */
|
||||
/* Similar to strncmp(), wcsncmp(). */
|
||||
extern int
|
||||
u8_strncmp (const uint8_t *s1, const uint8_t *s2, size_t n);
|
||||
extern int
|
||||
u16_strncmp (const uint16_t *s1, const uint16_t *s2, size_t n);
|
||||
extern int
|
||||
u32_strncmp (const uint32_t *s1, const uint32_t *s2, size_t n);
|
||||
|
||||
/* Duplicate S, returning an identical malloc'd string. */
|
||||
/* Similar to strdup(), wcsdup(). */
|
||||
extern uint8_t *
|
||||
u8_strdup (const uint8_t *s);
|
||||
extern uint16_t *
|
||||
u16_strdup (const uint16_t *s);
|
||||
extern uint32_t *
|
||||
u32_strdup (const uint32_t *s);
|
||||
|
||||
/* Find the first occurrence of UC in STR. */
|
||||
/* Similar to strchr(), wcschr(). */
|
||||
extern uint8_t *
|
||||
u8_strchr (const uint8_t *str, ucs4_t uc);
|
||||
extern uint16_t *
|
||||
u16_strchr (const uint16_t *str, ucs4_t uc);
|
||||
extern uint32_t *
|
||||
u32_strchr (const uint32_t *str, ucs4_t uc);
|
||||
|
||||
/* Find the last occurrence of UC in STR. */
|
||||
/* Similar to strrchr(), wcsrchr(). */
|
||||
extern uint8_t *
|
||||
u8_strrchr (const uint8_t *str, ucs4_t uc);
|
||||
extern uint16_t *
|
||||
u16_strrchr (const uint16_t *str, ucs4_t uc);
|
||||
extern uint32_t *
|
||||
u32_strrchr (const uint32_t *str, ucs4_t uc);
|
||||
|
||||
/* Return the length of the initial segment of STR which consists entirely
|
||||
of Unicode characters not in REJECT. */
|
||||
/* Similar to strcspn(), wcscspn(). */
|
||||
extern size_t
|
||||
u8_strcspn (const uint8_t *str, const uint8_t *reject);
|
||||
extern size_t
|
||||
u16_strcspn (const uint16_t *str, const uint16_t *reject);
|
||||
extern size_t
|
||||
u32_strcspn (const uint32_t *str, const uint32_t *reject);
|
||||
|
||||
/* Return the length of the initial segment of STR which consists entirely
|
||||
of Unicode characters in ACCEPT. */
|
||||
/* Similar to strspn(), wcsspn(). */
|
||||
extern size_t
|
||||
u8_strspn (const uint8_t *str, const uint8_t *accept);
|
||||
extern size_t
|
||||
u16_strspn (const uint16_t *str, const uint16_t *accept);
|
||||
extern size_t
|
||||
u32_strspn (const uint32_t *str, const uint32_t *accept);
|
||||
|
||||
/* Find the first occurrence in STR of any character in ACCEPT. */
|
||||
/* Similar to strpbrk(), wcspbrk(). */
|
||||
extern uint8_t *
|
||||
u8_strpbrk (const uint8_t *str, const uint8_t *accept);
|
||||
extern uint16_t *
|
||||
u16_strpbrk (const uint16_t *str, const uint16_t *accept);
|
||||
extern uint32_t *
|
||||
u32_strpbrk (const uint32_t *str, const uint32_t *accept);
|
||||
|
||||
/* Find the first occurrence of NEEDLE in HAYSTACK. */
|
||||
/* Similar to strstr(), wcsstr(). */
|
||||
extern uint8_t *
|
||||
u8_strstr (const uint8_t *haystack, const uint8_t *needle);
|
||||
extern uint16_t *
|
||||
u16_strstr (const uint16_t *haystack, const uint16_t *needle);
|
||||
extern uint32_t *
|
||||
u32_strstr (const uint32_t *haystack, const uint32_t *needle);
|
||||
|
||||
/* Test whether STR starts with PREFIX. */
|
||||
extern bool
|
||||
u8_startswith (const uint8_t *str, const uint8_t *prefix);
|
||||
extern bool
|
||||
u16_startswith (const uint16_t *str, const uint16_t *prefix);
|
||||
extern bool
|
||||
u32_startswith (const uint32_t *str, const uint32_t *prefix);
|
||||
|
||||
/* Test whether STR ends with SUFFIX. */
|
||||
extern bool
|
||||
u8_endswith (const uint8_t *str, const uint8_t *suffix);
|
||||
extern bool
|
||||
u16_endswith (const uint16_t *str, const uint16_t *suffix);
|
||||
extern bool
|
||||
u32_endswith (const uint32_t *str, const uint32_t *suffix);
|
||||
|
||||
/* Divide STR into tokens separated by characters in DELIM.
|
||||
This interface is actually more similar to wcstok than to strtok. */
|
||||
/* Similar to strtok_r(), wcstok(). */
|
||||
extern uint8_t *
|
||||
u8_strtok (uint8_t *str, const uint8_t *delim, uint8_t **ptr);
|
||||
extern uint16_t *
|
||||
u16_strtok (uint16_t *str, const uint16_t *delim, uint16_t **ptr);
|
||||
extern uint32_t *
|
||||
u32_strtok (uint32_t *str, const uint32_t *delim, uint32_t **ptr);
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _UNISTR_H */
|
||||
158
lib/unistr/u8-mbtouc-aux.c
Normal file
158
lib/unistr/u8-mbtouc-aux.c
Normal file
|
|
@ -0,0 +1,158 @@
|
|||
/* Conversion UTF-8 to UCS-4.
|
||||
Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2001.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
#if defined IN_LIBUNISTRING || HAVE_INLINE
|
||||
|
||||
int
|
||||
u8_mbtouc_aux (ucs4_t *puc, const uint8_t *s, size_t n)
|
||||
{
|
||||
uint8_t c = *s;
|
||||
|
||||
if (c >= 0xc2)
|
||||
{
|
||||
if (c < 0xe0)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x1f) << 6)
|
||||
| (unsigned int) (s[1] ^ 0x80);
|
||||
return 2;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf0)
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (c >= 0xe1 || s[1] >= 0xa0)
|
||||
&& (c != 0xed || s[1] < 0xa0))
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x0f) << 12)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[2] ^ 0x80);
|
||||
return 3;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf8)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf1 || s[1] >= 0x90)
|
||||
#if 1
|
||||
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x07) << 18)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[3] ^ 0x80);
|
||||
return 4;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (c < 0xfc)
|
||||
{
|
||||
if (n >= 5)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf9 || s[1] >= 0x88))
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x03) << 24)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[4] ^ 0x80);
|
||||
return 5;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xfe)
|
||||
{
|
||||
if (n >= 6)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (s[5] ^ 0x80) < 0x40
|
||||
&& (c >= 0xfd || s[1] >= 0x84))
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x01) << 30)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 24)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[4] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[5] ^ 0x80);
|
||||
return 6;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
168
lib/unistr/u8-mbtouc-unsafe-aux.c
Normal file
168
lib/unistr/u8-mbtouc-unsafe-aux.c
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
/* Conversion UTF-8 to UCS-4.
|
||||
Copyright (C) 2001-2002, 2006-2007, 2009 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2001.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
#if defined IN_LIBUNISTRING || HAVE_INLINE
|
||||
|
||||
int
|
||||
u8_mbtouc_unsafe_aux (ucs4_t *puc, const uint8_t *s, size_t n)
|
||||
{
|
||||
uint8_t c = *s;
|
||||
|
||||
if (c >= 0xc2)
|
||||
{
|
||||
if (c < 0xe0)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40)
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x1f) << 6)
|
||||
| (unsigned int) (s[1] ^ 0x80);
|
||||
return 2;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf0)
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (c >= 0xe1 || s[1] >= 0xa0)
|
||||
&& (c != 0xed || s[1] < 0xa0))
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x0f) << 12)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[2] ^ 0x80);
|
||||
return 3;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf8)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf1 || s[1] >= 0x90)
|
||||
#if 1
|
||||
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
|
||||
#endif
|
||||
)
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x07) << 18)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[3] ^ 0x80);
|
||||
return 4;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (c < 0xfc)
|
||||
{
|
||||
if (n >= 5)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf9 || s[1] >= 0x88))
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x03) << 24)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[4] ^ 0x80);
|
||||
return 5;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xfe)
|
||||
{
|
||||
if (n >= 6)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (s[5] ^ 0x80) < 0x40
|
||||
&& (c >= 0xfd || s[1] >= 0x84))
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x01) << 30)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 24)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[4] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[5] ^ 0x80);
|
||||
return 6;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
179
lib/unistr/u8-mbtouc-unsafe.c
Normal file
179
lib/unistr/u8-mbtouc-unsafe.c
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
/* Look at first character in UTF-8 string.
|
||||
Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2001.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#if defined IN_LIBUNISTRING
|
||||
/* Tell unistr.h to declare u8_mbtouc_unsafe as 'extern', not
|
||||
'static inline'. */
|
||||
# include "unistring-notinline.h"
|
||||
#endif
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
#if !HAVE_INLINE
|
||||
|
||||
int
|
||||
u8_mbtouc_unsafe (ucs4_t *puc, const uint8_t *s, size_t n)
|
||||
{
|
||||
uint8_t c = *s;
|
||||
|
||||
if (c < 0x80)
|
||||
{
|
||||
*puc = c;
|
||||
return 1;
|
||||
}
|
||||
else if (c >= 0xc2)
|
||||
{
|
||||
if (c < 0xe0)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40)
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x1f) << 6)
|
||||
| (unsigned int) (s[1] ^ 0x80);
|
||||
return 2;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf0)
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (c >= 0xe1 || s[1] >= 0xa0)
|
||||
&& (c != 0xed || s[1] < 0xa0))
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x0f) << 12)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[2] ^ 0x80);
|
||||
return 3;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf8)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf1 || s[1] >= 0x90)
|
||||
#if 1
|
||||
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
|
||||
#endif
|
||||
)
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x07) << 18)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[3] ^ 0x80);
|
||||
return 4;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (c < 0xfc)
|
||||
{
|
||||
if (n >= 5)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf9 || s[1] >= 0x88))
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x03) << 24)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[4] ^ 0x80);
|
||||
return 5;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xfe)
|
||||
{
|
||||
if (n >= 6)
|
||||
{
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (s[5] ^ 0x80) < 0x40
|
||||
&& (c >= 0xfd || s[1] >= 0x84))
|
||||
#endif
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x01) << 30)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 24)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[4] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[5] ^ 0x80);
|
||||
return 6;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
168
lib/unistr/u8-mbtouc.c
Normal file
168
lib/unistr/u8-mbtouc.c
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
/* Look at first character in UTF-8 string.
|
||||
Copyright (C) 1999-2002, 2006-2007, 2009 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2001.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#if defined IN_LIBUNISTRING
|
||||
/* Tell unistr.h to declare u8_mbtouc as 'extern', not 'static inline'. */
|
||||
# include "unistring-notinline.h"
|
||||
#endif
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
#if !HAVE_INLINE
|
||||
|
||||
int
|
||||
u8_mbtouc (ucs4_t *puc, const uint8_t *s, size_t n)
|
||||
{
|
||||
uint8_t c = *s;
|
||||
|
||||
if (c < 0x80)
|
||||
{
|
||||
*puc = c;
|
||||
return 1;
|
||||
}
|
||||
else if (c >= 0xc2)
|
||||
{
|
||||
if (c < 0xe0)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x1f) << 6)
|
||||
| (unsigned int) (s[1] ^ 0x80);
|
||||
return 2;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf0)
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (c >= 0xe1 || s[1] >= 0xa0)
|
||||
&& (c != 0xed || s[1] < 0xa0))
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x0f) << 12)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[2] ^ 0x80);
|
||||
return 3;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf8)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf1 || s[1] >= 0x90)
|
||||
#if 1
|
||||
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x07) << 18)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[3] ^ 0x80);
|
||||
return 4;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (c < 0xfc)
|
||||
{
|
||||
if (n >= 5)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf9 || s[1] >= 0x88))
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x03) << 24)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[4] ^ 0x80);
|
||||
return 5;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
else if (c < 0xfe)
|
||||
{
|
||||
if (n >= 6)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
|
||||
&& (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
|
||||
&& (s[5] ^ 0x80) < 0x40
|
||||
&& (c >= 0xfd || s[1] >= 0x84))
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x01) << 30)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 24)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[4] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[5] ^ 0x80);
|
||||
return 6;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return n;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return 1;
|
||||
}
|
||||
|
||||
#endif
|
||||
285
lib/unistr/u8-mbtoucr.c
Normal file
285
lib/unistr/u8-mbtoucr.c
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
/* Look at first character in UTF-8 string, returning an error code.
|
||||
Copyright (C) 1999-2002, 2006-2007 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2001.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
int
|
||||
u8_mbtoucr (ucs4_t *puc, const uint8_t *s, size_t n)
|
||||
{
|
||||
uint8_t c = *s;
|
||||
|
||||
if (c < 0x80)
|
||||
{
|
||||
*puc = c;
|
||||
return 1;
|
||||
}
|
||||
else if (c >= 0xc2)
|
||||
{
|
||||
if (c < 0xe0)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x1f) << 6)
|
||||
| (unsigned int) (s[1] ^ 0x80);
|
||||
return 2;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf0)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40
|
||||
&& (c >= 0xe1 || s[1] >= 0xa0)
|
||||
&& (c != 0xed || s[1] < 0xa0))
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
if ((s[2] ^ 0x80) < 0x40)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x0f) << 12)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[2] ^ 0x80);
|
||||
return 3;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
else if (c < 0xf8)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf1 || s[1] >= 0x90)
|
||||
#if 1
|
||||
&& (c < 0xf4 || (c == 0xf4 && s[1] < 0x90))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
if ((s[2] ^ 0x80) < 0x40)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
if ((s[3] ^ 0x80) < 0x40)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x07) << 18)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[3] ^ 0x80);
|
||||
return 4;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
else if (c < 0xfc)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40
|
||||
&& (c >= 0xf9 || s[1] >= 0x88))
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
if ((s[2] ^ 0x80) < 0x40)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
if ((s[3] ^ 0x80) < 0x40)
|
||||
{
|
||||
if (n >= 5)
|
||||
{
|
||||
if ((s[4] ^ 0x80) < 0x40)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x03) << 24)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[4] ^ 0x80);
|
||||
return 5;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
else if (c < 0xfe)
|
||||
{
|
||||
if (n >= 2)
|
||||
{
|
||||
if ((s[1] ^ 0x80) < 0x40
|
||||
&& (c >= 0xfd || s[1] >= 0x84))
|
||||
{
|
||||
if (n >= 3)
|
||||
{
|
||||
if ((s[2] ^ 0x80) < 0x40)
|
||||
{
|
||||
if (n >= 4)
|
||||
{
|
||||
if ((s[3] ^ 0x80) < 0x40)
|
||||
{
|
||||
if (n >= 5)
|
||||
{
|
||||
if ((s[4] ^ 0x80) < 0x40)
|
||||
{
|
||||
if (n >= 6)
|
||||
{
|
||||
if ((s[5] ^ 0x80) < 0x40)
|
||||
{
|
||||
*puc = ((unsigned int) (c & 0x01) << 30)
|
||||
| ((unsigned int) (s[1] ^ 0x80) << 24)
|
||||
| ((unsigned int) (s[2] ^ 0x80) << 18)
|
||||
| ((unsigned int) (s[3] ^ 0x80) << 12)
|
||||
| ((unsigned int) (s[4] ^ 0x80) << 6)
|
||||
| (unsigned int) (s[5] ^ 0x80);
|
||||
return 6;
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
}
|
||||
else
|
||||
{
|
||||
/* incomplete multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
/* invalid multibyte character */
|
||||
*puc = 0xfffd;
|
||||
return -1;
|
||||
}
|
||||
93
lib/unistr/u8-prev.c
Normal file
93
lib/unistr/u8-prev.c
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
/* Iterate over previous character in UTF-8 string.
|
||||
Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2002.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
const uint8_t *
|
||||
u8_prev (ucs4_t *puc, const uint8_t *s, const uint8_t *start)
|
||||
{
|
||||
/* Keep in sync with unistr.h and utf8-ucs4.c. */
|
||||
if (s != start)
|
||||
{
|
||||
uint8_t c_1 = s[-1];
|
||||
|
||||
if (c_1 < 0x80)
|
||||
{
|
||||
*puc = c_1;
|
||||
return s - 1;
|
||||
}
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((c_1 ^ 0x80) < 0x40)
|
||||
#endif
|
||||
if (s - 1 != start)
|
||||
{
|
||||
uint8_t c_2 = s[-2];
|
||||
|
||||
if (c_2 >= 0xc2 && c_2 < 0xe0)
|
||||
{
|
||||
*puc = ((unsigned int) (c_2 & 0x1f) << 6)
|
||||
| (unsigned int) (c_1 ^ 0x80);
|
||||
return s - 2;
|
||||
}
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((c_2 ^ 0x80) < 0x40)
|
||||
#endif
|
||||
if (s - 2 != start)
|
||||
{
|
||||
uint8_t c_3 = s[-3];
|
||||
|
||||
if (c_3 >= 0xe0 && c_3 < 0xf0
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
&& (c_3 >= 0xe1 || c_2 >= 0xa0)
|
||||
&& (c_3 != 0xed || c_2 < 0xa0)
|
||||
#endif
|
||||
)
|
||||
{
|
||||
*puc = ((unsigned int) (c_3 & 0x0f) << 12)
|
||||
| ((unsigned int) (c_2 ^ 0x80) << 6)
|
||||
| (unsigned int) (c_1 ^ 0x80);
|
||||
return s - 3;
|
||||
}
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
if ((c_3 ^ 0x80) < 0x40)
|
||||
#endif
|
||||
if (s - 3 != start)
|
||||
{
|
||||
uint8_t c_4 = s[-4];
|
||||
|
||||
if (c_4 >= 0xf0 && c_4 < 0xf8
|
||||
#if CONFIG_UNICODE_SAFETY
|
||||
&& (c_4 >= 0xf1 || c_3 >= 0x90)
|
||||
&& (c_4 < 0xf4 || (c_4 == 0xf4 && c_3 < 0x90))
|
||||
#endif
|
||||
)
|
||||
{
|
||||
*puc = ((unsigned int) (c_4 & 0x07) << 18)
|
||||
| ((unsigned int) (c_3 ^ 0x80) << 12)
|
||||
| ((unsigned int) (c_2 ^ 0x80) << 6)
|
||||
| (unsigned int) (c_1 ^ 0x80);
|
||||
return s - 4;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
69
lib/unistr/u8-uctomb-aux.c
Normal file
69
lib/unistr/u8-uctomb-aux.c
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
/* Conversion UCS-4 to UTF-8.
|
||||
Copyright (C) 2002, 2006-2007 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2002.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
int
|
||||
u8_uctomb_aux (uint8_t *s, ucs4_t uc, int n)
|
||||
{
|
||||
int count;
|
||||
|
||||
if (uc < 0x80)
|
||||
/* The case n >= 1 is already handled by the caller. */
|
||||
return -2;
|
||||
else if (uc < 0x800)
|
||||
count = 2;
|
||||
else if (uc < 0x10000)
|
||||
{
|
||||
if (uc < 0xd800 || uc >= 0xe000)
|
||||
count = 3;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
#if 0
|
||||
else if (uc < 0x200000)
|
||||
count = 4;
|
||||
else if (uc < 0x4000000)
|
||||
count = 5;
|
||||
else if (uc <= 0x7fffffff)
|
||||
count = 6;
|
||||
#else
|
||||
else if (uc < 0x110000)
|
||||
count = 4;
|
||||
#endif
|
||||
else
|
||||
return -1;
|
||||
|
||||
if (n < count)
|
||||
return -2;
|
||||
|
||||
switch (count) /* note: code falls through cases! */
|
||||
{
|
||||
#if 0
|
||||
case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
|
||||
case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
|
||||
#endif
|
||||
case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
|
||||
case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
|
||||
case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
|
||||
/*case 1:*/ s[0] = uc;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
88
lib/unistr/u8-uctomb.c
Normal file
88
lib/unistr/u8-uctomb.c
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
/* Store a character in UTF-8 string.
|
||||
Copyright (C) 2002, 2005-2006, 2009 Free Software Foundation, Inc.
|
||||
Written by Bruno Haible <bruno@clisp.org>, 2002.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#if defined IN_LIBUNISTRING
|
||||
/* Tell unistr.h to declare u8_uctomb as 'extern', not 'static inline'. */
|
||||
# include "unistring-notinline.h"
|
||||
#endif
|
||||
|
||||
/* Specification. */
|
||||
#include "unistr.h"
|
||||
|
||||
#if !HAVE_INLINE
|
||||
|
||||
int
|
||||
u8_uctomb (uint8_t *s, ucs4_t uc, int n)
|
||||
{
|
||||
if (uc < 0x80)
|
||||
{
|
||||
if (n > 0)
|
||||
{
|
||||
s[0] = uc;
|
||||
return 1;
|
||||
}
|
||||
/* else return -2, below. */
|
||||
}
|
||||
else
|
||||
{
|
||||
int count;
|
||||
|
||||
if (uc < 0x800)
|
||||
count = 2;
|
||||
else if (uc < 0x10000)
|
||||
{
|
||||
if (uc < 0xd800 || uc >= 0xe000)
|
||||
count = 3;
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
#if 0
|
||||
else if (uc < 0x200000)
|
||||
count = 4;
|
||||
else if (uc < 0x4000000)
|
||||
count = 5;
|
||||
else if (uc <= 0x7fffffff)
|
||||
count = 6;
|
||||
#else
|
||||
else if (uc < 0x110000)
|
||||
count = 4;
|
||||
#endif
|
||||
else
|
||||
return -1;
|
||||
|
||||
if (n >= count)
|
||||
{
|
||||
switch (count) /* note: code falls through cases! */
|
||||
{
|
||||
#if 0
|
||||
case 6: s[5] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x4000000;
|
||||
case 5: s[4] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x200000;
|
||||
#endif
|
||||
case 4: s[3] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x10000;
|
||||
case 3: s[2] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0x800;
|
||||
case 2: s[1] = 0x80 | (uc & 0x3f); uc = uc >> 6; uc |= 0xc0;
|
||||
/*case 1:*/ s[0] = uc;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
#endif
|
||||
26
lib/unitypes.h
Normal file
26
lib/unitypes.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
/* Elementary types for the GNU UniString library.
|
||||
Copyright (C) 2002, 2005-2006 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published
|
||||
by the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef _UNITYPES_H
|
||||
#define _UNITYPES_H
|
||||
|
||||
/* Get uint8_t, uint16_t, uint32_t. */
|
||||
#include <stdint.h>
|
||||
|
||||
/* Type representing a Unicode character. */
|
||||
typedef uint32_t ucs4_t;
|
||||
|
||||
#endif /* _UNITYPES_H */
|
||||
Loading…
Add table
Add a link
Reference in a new issue