Changes from arch/CVS synchronization

This commit is contained in:
Ludovic Courtès 2006-09-20 12:48:45 +00:00
commit a17d26545d
9 changed files with 322 additions and 59 deletions

View file

@ -1,3 +1,10 @@
2006-09-20 Ludovic Courtès <ludovic.courtes@laas.fr>
* configure.in: Check for `isblank ()'.
* NEWS: Mentioned the interaction between `setlocale' and SRFI-14
standard char sets.
2006-08-18 Neil Jerram <neil@ossau.uklinux.net>
* configure.in: Generate Makefile for emacs subdir.

1
NEWS
View file

@ -30,6 +30,7 @@ Changes in 1.8.1 (since 1.8.0):
** A one-dimenisonal array can now be 'equal?' to a vector.
** Structures, records, and SRFI-9 records can now be compared with `equal?'.
** SRFI-14 standard char sets are now recomputed upon successful `setlocale'.
* Changes to the C interface

View file

@ -598,9 +598,10 @@ AC_CHECK_HEADERS([assert.h crt_externs.h])
# readdir_r - recent posix, not on old systems
# stat64 - SuS largefile stuff, not on old systems
# sysconf - not on old systems
# isblank - available as a GNU extension or in C99
# _NSGetEnviron - Darwin specific
#
AC_CHECK_FUNCS([DINFINITY DQNAN ctermid fesetround ftime fchown getcwd geteuid gettimeofday gmtime_r ioctl lstat mkdir mknod nice readdir_r readlink rename rmdir select setegid seteuid setlocale setpgid setsid sigaction siginterrupt stat64 strftime strptime symlink sync sysconf tcgetpgrp tcsetpgrp times uname waitpid strdup system usleep atexit on_exit chown link fcntl ttyname getpwent getgrent kill getppid getpgrp fork setitimer getitimer strchr strcmp index bcopy memcpy rindex unsetenv _NSGetEnviron])
AC_CHECK_FUNCS([DINFINITY DQNAN ctermid fesetround ftime fchown getcwd geteuid gettimeofday gmtime_r ioctl lstat mkdir mknod nice readdir_r readlink rename rmdir select setegid seteuid setlocale setpgid setsid sigaction siginterrupt stat64 strftime strptime symlink sync sysconf tcgetpgrp tcsetpgrp times uname waitpid strdup system usleep atexit on_exit chown link fcntl ttyname getpwent getgrent kill getppid getpgrp fork setitimer getitimer strchr strcmp index bcopy memcpy rindex unsetenv isblank _NSGetEnviron])
# Reasons for testing:
# netdb.h - not in mingw

View file

@ -1,3 +1,25 @@
2006-09-20 Ludovic Courtès <ludovic.courtes@laas.fr>
* srfi-14.c: Include <config.h>. Define `_GNU_SOURCE'.
(make_predset, define_predset, make_strset, define_strset, false,
true): Removed.
(SCM_CHARSET_UNSET, CSET_BLANK_PRED, CSET_SYMBOL_PRED,
CSET_PUNCT_PRED, CSET_LOWER_PRED, CSET_UPPER_PRED,
CSET_LETTER_PRED, CSET_DIGIT_PRED, CSET_WHITESPACE_PRED,
CSET_CONTROL_PRED, CSET_HEX_DIGIT_PRED, CSET_ASCII_PRED,
CSET_LETTER_AND_DIGIT_PRED, CSET_GRAPHIC_PRED, CSET_PRINTING_PRED,
CSET_TRUE_PRED, CSET_FALSE_PRED, UPDATE_CSET): New macros.
(define_charset, scm_srfi_14_compute_char_sets): New functions.
(scm_init_srfi_14): Use `define_charset ()' instead of
`define_predset ()' and `define_strset ()'.
* srfi-14.h (scm_c_init_srfi_14): Removed.
(scm_srfi_14_compute_char_sets): New declaration.
* posix.h: Include "srfi-14.h".
(scm_setlocale): Invoke `scm_srfi_14_compute_char_sets ()' after a
successful `setlocale ()' call.
2006-08-31 Rob Browning <rlb@defaultvalue.org>
* ports.c (scm_c_port_for_each): Add a

View file

@ -34,6 +34,7 @@
#include "libguile/feature.h"
#include "libguile/strings.h"
#include "libguile/srfi-13.h"
#include "libguile/srfi-14.h"
#include "libguile/vectors.h"
#include "libguile/lang.h"
@ -1392,6 +1393,10 @@ SCM_DEFINE (scm_setlocale, "setlocale", 1, 1, 0,
SCM_SYSERROR;
}
/* Recompute the standard SRFI-14 character sets in a locale-dependent
(actually charset-dependent) way. */
scm_srfi_14_compute_char_sets ();
scm_dynwind_end ();
return scm_from_locale_string (rv);
}

View file

@ -17,6 +17,12 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#define _GNU_SOURCE /* Ask for `isblank ()'. */
#include <string.h>
#include <ctype.h>
@ -29,6 +35,10 @@
(((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] |= \
(1L << ((idx) % SCM_BITS_PER_LONG)))
#define SCM_CHARSET_UNSET(cs, idx) \
(((long *) SCM_SMOB_DATA (cs))[(idx) / SCM_BITS_PER_LONG] &= \
(~(1L << ((idx) % SCM_BITS_PER_LONG))))
#define BYTES_PER_CHARSET (SCM_CHARSET_SIZE / 8)
#define LONGS_PER_CHARSET (SCM_CHARSET_SIZE / SCM_BITS_PER_LONG)
@ -1393,6 +1403,9 @@ SCM_DEFINE (scm_char_set_diff_plus_intersection_x, "char-set-diff+intersection!"
}
#undef FUNC_NAME
/* Standard character sets. */
SCM scm_char_set_lower_case;
SCM scm_char_set_upper_case;
SCM scm_char_set_title_case;
@ -1411,48 +1424,123 @@ SCM scm_char_set_ascii;
SCM scm_char_set_empty;
SCM scm_char_set_full;
static SCM
make_predset (int (*pred) (int))
/* Create an empty character set and return it after binding it to NAME. */
static inline SCM
define_charset (const char *name)
{
int ch;
SCM cs = make_char_set (NULL);
scm_c_define (name, cs);
return scm_permanent_object (cs);
}
/* Membership predicates for the various char sets.
XXX: The `punctuation' and `symbol' char sets have no direct equivalent in
<ctype.h>. Thus, the predicates below yield correct results for ASCII,
but they do not provide the result described by the SRFI for Latin-1. The
correct Latin-1 result could only be obtained by hard-coding the
characters listed by the SRFI, but the problem would remain for other
8-bit charsets.
Similarly, character 0xA0 in Latin-1 (unbreakable space, `#\0240') should
be part of `char-set:blank'. However, glibc's current (2006/09) Latin-1
locales (which use the ISO 14652 "i18n" FDCC-set) do not consider it
`blank' so it ends up in `char-set:punctuation'. */
#ifdef HAVE_ISBLANK
# define CSET_BLANK_PRED(c) (isblank (c))
#else
# define CSET_BLANK_PRED(c) \
(((c) == ' ') || ((c) == '\t'))
#endif
#define CSET_SYMBOL_PRED(c) \
(((c) != '\0') && (strchr ("$+<=>^`|~", (c)) != NULL))
#define CSET_PUNCT_PRED(c) \
((ispunct (c)) && (!CSET_SYMBOL_PRED (c)))
#define CSET_LOWER_PRED(c) (islower (c))
#define CSET_UPPER_PRED(c) (isupper (c))
#define CSET_LETTER_PRED(c) (isalpha (c))
#define CSET_DIGIT_PRED(c) (isdigit (c))
#define CSET_WHITESPACE_PRED(c) (isspace (c))
#define CSET_CONTROL_PRED(c) (iscntrl (c))
#define CSET_HEX_DIGIT_PRED(c) (isxdigit (c))
#define CSET_ASCII_PRED(c) (isascii (c))
/* Some char sets are explicitly defined by the SRFI as a union of other char
sets so we try to follow this closely. */
#define CSET_LETTER_AND_DIGIT_PRED(c) \
(CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c))
#define CSET_GRAPHIC_PRED(c) \
(CSET_LETTER_PRED (c) || CSET_DIGIT_PRED (c) \
|| CSET_PUNCT_PRED (c) || CSET_SYMBOL_PRED (c))
#define CSET_PRINTING_PRED(c) \
(CSET_GRAPHIC_PRED (c) || CSET_WHITESPACE_PRED (c))
/* False and true predicates. */
#define CSET_TRUE_PRED(c) (1)
#define CSET_FALSE_PRED(c) (0)
/* Compute the contents of all the standard character sets. Computation may
need to be re-done at `setlocale'-time because some char sets (e.g.,
`char-set:letter') need to reflect the character set supported by Guile.
For instance, at startup time, the "C" locale is used, thus Guile supports
only ASCII; therefore, `char-set:letter' only contains English letters.
The user can change this by invoking `setlocale' and specifying a locale
with an 8-bit charset, thereby augmenting some of the SRFI-14 standard
character sets.
This works because some of the predicates used below to construct
character sets (e.g., `isalpha(3)') are locale-dependent (so
charset-dependent, though generally not language-dependent). For details,
please see the `guile-devel' mailing list archive of September 2006. */
void
scm_srfi_14_compute_char_sets (void)
{
#define UPDATE_CSET(c, cset, pred) \
do \
{ \
if (pred (c)) \
SCM_CHARSET_SET ((cset), (c)); \
else \
SCM_CHARSET_UNSET ((cset), (c)); \
} \
while (0)
register int ch;
for (ch = 0; ch < 256; ch++)
if (pred (ch))
SCM_CHARSET_SET (cs, ch);
return cs;
}
static SCM
define_predset (const char *name, int (*pred) (int))
{
SCM cs = make_predset (pred);
scm_c_define (name, cs);
return scm_permanent_object (cs);
UPDATE_CSET (ch, scm_char_set_upper_case, CSET_UPPER_PRED);
UPDATE_CSET (ch, scm_char_set_lower_case, CSET_LOWER_PRED);
UPDATE_CSET (ch, scm_char_set_title_case, CSET_FALSE_PRED);
UPDATE_CSET (ch, scm_char_set_letter, CSET_LETTER_PRED);
UPDATE_CSET (ch, scm_char_set_digit, CSET_DIGIT_PRED);
UPDATE_CSET (ch, scm_char_set_letter_and_digit,
CSET_LETTER_AND_DIGIT_PRED);
UPDATE_CSET (ch, scm_char_set_graphic, CSET_GRAPHIC_PRED);
UPDATE_CSET (ch, scm_char_set_printing, CSET_PRINTING_PRED);
UPDATE_CSET (ch, scm_char_set_whitespace, CSET_WHITESPACE_PRED);
UPDATE_CSET (ch, scm_char_set_iso_control, CSET_CONTROL_PRED);
UPDATE_CSET (ch, scm_char_set_punctuation, CSET_PUNCT_PRED);
UPDATE_CSET (ch, scm_char_set_symbol, CSET_SYMBOL_PRED);
UPDATE_CSET (ch, scm_char_set_hex_digit, CSET_HEX_DIGIT_PRED);
UPDATE_CSET (ch, scm_char_set_blank, CSET_BLANK_PRED);
UPDATE_CSET (ch, scm_char_set_ascii, CSET_ASCII_PRED);
UPDATE_CSET (ch, scm_char_set_empty, CSET_FALSE_PRED);
UPDATE_CSET (ch, scm_char_set_full, CSET_TRUE_PRED);
}
static SCM
make_strset (const char *str)
{
SCM cs = make_char_set (NULL);
while (*str)
{
SCM_CHARSET_SET (cs, *str);
str++;
}
return cs;
#undef UPDATE_CSET
}
static SCM
define_strset (const char *name, const char *str)
{
SCM cs = make_strset (str);
scm_c_define (name, cs);
return scm_permanent_object (cs);
}
static int false (int ch) { return 0; }
static int true (int ch) { return 1; }
void
scm_init_srfi_14 (void)
{
@ -1461,24 +1549,25 @@ scm_init_srfi_14 (void)
scm_set_smob_free (scm_tc16_charset, charset_free);
scm_set_smob_print (scm_tc16_charset, charset_print);
scm_char_set_upper_case = define_predset ("char-set:upper-case", isupper);
scm_char_set_lower_case = define_predset ("char-set:lower-case", islower);
scm_char_set_title_case = define_predset ("char-set:title-case", false);
scm_char_set_letter = define_predset ("char-set:letter", isalpha);
scm_char_set_digit = define_predset ("char-set:digit", isdigit);
scm_char_set_letter_and_digit = define_predset ("char-set:letter+digit",
isalnum);
scm_char_set_graphic = define_predset ("char-set:graphic", isgraph);
scm_char_set_printing = define_predset ("char-set:printing", isprint);
scm_char_set_whitespace = define_predset ("char-set:whitespace", isspace);
scm_char_set_iso_control = define_predset ("char-set:iso-control", iscntrl);
scm_char_set_punctuation = define_predset ("char-set:punctuation", ispunct);
scm_char_set_symbol = define_strset ("char-set:symbol", "$+<=>^`|~");
scm_char_set_hex_digit = define_predset ("char-set:hex-digit", isxdigit);
scm_char_set_blank = define_strset ("char-set:blank", " \t");
scm_char_set_ascii = define_predset ("char-set:ascii", isascii);
scm_char_set_empty = define_predset ("char-set:empty", false);
scm_char_set_full = define_predset ("char-set:full", true);
scm_char_set_upper_case = define_charset ("char-set:upper-case");
scm_char_set_lower_case = define_charset ("char-set:lower-case");
scm_char_set_title_case = define_charset ("char-set:title-case");
scm_char_set_letter = define_charset ("char-set:letter");
scm_char_set_digit = define_charset ("char-set:digit");
scm_char_set_letter_and_digit = define_charset ("char-set:letter+digit");
scm_char_set_graphic = define_charset ("char-set:graphic");
scm_char_set_printing = define_charset ("char-set:printing");
scm_char_set_whitespace = define_charset ("char-set:whitespace");
scm_char_set_iso_control = define_charset ("char-set:iso-control");
scm_char_set_punctuation = define_charset ("char-set:punctuation");
scm_char_set_symbol = define_charset ("char-set:symbol");
scm_char_set_hex_digit = define_charset ("char-set:hex-digit");
scm_char_set_blank = define_charset ("char-set:blank");
scm_char_set_ascii = define_charset ("char-set:ascii");
scm_char_set_empty = define_charset ("char-set:empty");
scm_char_set_full = define_charset ("char-set:full");
scm_srfi_14_compute_char_sets ();
#include "libguile/srfi-14.x"
}

View file

@ -106,7 +106,7 @@ SCM_API SCM scm_char_set_ascii;
SCM_API SCM scm_char_set_empty;
SCM_API SCM scm_char_set_full;
SCM_API void scm_c_init_srfi_14 (void);
SCM_API void scm_srfi_14_compute_char_sets (void);
SCM_API void scm_init_srfi_14 (void);
#endif /* SCM_SRFI_14_H */

View file

@ -1,3 +1,12 @@
2006-09-20 Ludovic Courtès <ludovic.courtes@laas.fr>
* tests/srfi-14.test: Use `define-module'. Use modules `(srfi
srfi-1)' and `(test-suite lib)'.
(string->char-set, standard char sets (ASCII), Latin-1 (8-bit
charset)): New test prefixes.
(every?, find-latin1-locale): New procedures.
(%latin1): New variable.
2006-06-13 Ludovic Courtès <ludovic.courtes@laas.fr>
* Makefile.am (SCM_TESTS): Added `tests/structs.test'.

View file

@ -1,4 +1,4 @@
;;;; srfi-14.test --- Test suite for Guile's SRFI-14 functions. -*- scheme -*-
;;;; srfi-14.test --- Test suite for Guile's SRFI-14 functions.
;;;; Martin Grabmueller, 2001-07-16
;;;;
;;;; Copyright (C) 2001, 2006 Free Software Foundation, Inc.
@ -18,7 +18,11 @@
;;;; the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;;;; Boston, MA 02110-1301 USA
(use-modules (srfi srfi-14))
(define-module (test-suite test-srfi-14)
:use-module (srfi srfi-14)
:use-module (srfi srfi-1) ;; `every'
:use-module (test-suite lib))
(define exception:invalid-char-set-cursor
(cons 'misc-error "^invalid character set cursor"))
@ -186,3 +190,128 @@
(pass-if "upper case char set"
(char-set= (char-set-map char-upcase char-set:lower-case)
char-set:upper-case)))
(with-test-prefix "string->char-set"
(pass-if "some char set"
(let ((chars '(#\g #\u #\i #\l #\e)))
(char-set= (list->char-set chars)
(string->char-set (apply string chars))))))
;; Make sure we get an ASCII charset and character classification.
(if (defined? 'setlocale) (setlocale LC_CTYPE "C"))
(with-test-prefix "standard char sets (ASCII)"
(pass-if "char-set:letter"
(char-set= (string->char-set
(string-append "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"))
char-set:letter))
(pass-if "char-set:punctuation"
(char-set= (string->char-set "!\"#%&'()*,-./:;?@[\\]_{}")
char-set:punctuation))
(pass-if "char-set:symbol"
(char-set= (string->char-set "$+<=>^`|~")
char-set:symbol))
(pass-if "char-set:letter+digit"
(char-set= char-set:letter+digit
(char-set-union char-set:letter char-set:digit)))
(pass-if "char-set:graphic"
(char-set= char-set:graphic
(char-set-union char-set:letter char-set:digit
char-set:punctuation char-set:symbol)))
(pass-if "char-set:printing"
(char-set= char-set:printing
(char-set-union char-set:whitespace char-set:graphic))))
;;;
;;; 8-bit charsets.
;;;
;;; Here, we only test ISO-8859-1 (Latin-1), notably because behavior of
;;; SRFI-14 for implementations supporting this charset is well-defined.
;;;
(define (every? pred lst)
(not (not (every pred lst))))
(define (find-latin1-locale)
;; Try to find and install an ISO-8859-1 locale. Return `#f' on failure.
(if (defined? 'setlocale)
(let loop ((locales (map (lambda (lang)
(string-append lang ".iso88591"))
'("de_DE" "en_GB" "en_US" "es_ES"
"fr_FR" "it_IT"))))
(if (null? locales)
#f
(if (false-if-exception (setlocale LC_CTYPE (car locales)))
(car locales)
(loop (cdr locales)))))
#f))
(define %latin1 (find-latin1-locale))
(with-test-prefix "Latin-1 (8-bit charset)"
;; Note: the membership tests below are not exhaustive.
(pass-if "char-set:letter (membership)"
(if (not %latin1)
(throw 'unresolved)
(let ((letters (char-set->list char-set:letter)))
(every? (lambda (8-bit-char)
(memq 8-bit-char letters))
(append '(#\a #\b #\c) ;; ASCII
(string->list "çéèâùÉÀÈÊ") ;; French
(string->list "øñÑíßåæðþ"))))))
(pass-if "char-set:letter (size)"
(if (not %latin1)
(throw 'unresolved)
(= (char-set-size char-set:letter) 117)))
(pass-if "char-set:lower-case (size)"
(if (not %latin1)
(throw 'unresolved)
(= (char-set-size char-set:lower-case) (+ 26 33))))
(pass-if "char-set:upper-case (size)"
(if (not %latin1)
(throw 'unresolved)
(= (char-set-size char-set:upper-case) (+ 26 30))))
(pass-if "char-set:punctuation (membership)"
(if (not %latin1)
(thrown 'unresolved)
(let ((punctuation (char-set->list char-set:punctuation)))
(every? (lambda (8-bit-char)
(memq 8-bit-char punctuation))
(append '(#\! #\. #\?) ;; ASCII
(string->list "¡¿") ;; Castellano
(string->list "«»")))))) ;; French
(pass-if "char-set:letter+digit"
(char-set= char-set:letter+digit
(char-set-union char-set:letter char-set:digit)))
(pass-if "char-set:graphic"
(char-set= char-set:graphic
(char-set-union char-set:letter char-set:digit
char-set:punctuation char-set:symbol)))
(pass-if "char-set:printing"
(char-set= char-set:printing
(char-set-union char-set:whitespace char-set:graphic))))
;; Local Variables:
;; mode: scheme
;; coding: latin-1
;; End: