From 61d509194c6ce90e678a0b27d613f3656c8bbafd Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Tue, 14 Jan 2014 02:19:52 -0500 Subject: [PATCH 1/5] Add srfi-16 and srfi-30 to %cond-expand-features. * module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-16 and srfi-30. * doc/ref/srfi-modules.texi (SRFI-0): Add srfi-16 and srfi-30 to the list of core features. * module/srfi/srfi-16.scm: Remove call to 'cond-expand-provide'. --- doc/ref/srfi-modules.texi | 6 ++++-- module/ice-9/boot-9.scm | 4 +++- module/srfi/srfi-16.scm | 6 ++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/doc/ref/srfi-modules.texi b/doc/ref/srfi-modules.texi index b6d524879..7b3d21aeb 100644 --- a/doc/ref/srfi-modules.texi +++ b/doc/ref/srfi-modules.texi @@ -1,7 +1,7 @@ @c -*-texinfo-*- @c This is part of the GNU Guile Reference Manual. -@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 -@c Free Software Foundation, Inc. +@c Copyright (C) 1996, 1997, 2000, 2001, 2002, 2003, 2004, 2006, 2007, 2008, +@c 2009, 2010, 2011, 2012, 2013, 2014 Free Software Foundation, Inc. @c See the file guile.texi for copying conditions. @node SRFI Support @@ -148,7 +148,9 @@ srfi-0 srfi-4 srfi-13 srfi-14 +srfi-16 srfi-23 +srfi-30 srfi-39 srfi-55 srfi-61 diff --git a/module/ice-9/boot-9.scm b/module/ice-9/boot-9.scm index dbef75f6b..5ee90736d 100644 --- a/module/ice-9/boot-9.scm +++ b/module/ice-9/boot-9.scm @@ -1,7 +1,7 @@ ;;; -*- mode: scheme; coding: utf-8; -*- ;;;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, -;;;; 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013 +;;;; 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 ;;;; Free Software Foundation, Inc. ;;;; ;;;; This library is free software; you can redistribute it and/or @@ -4037,7 +4037,9 @@ when none is available, reading FILE-NAME with READER." ;; of the binary I/O model and may fail to support some characters. srfi-13 ;; string library srfi-14 ;; character sets + srfi-16 ;; case-lambda srfi-23 ;; `error` procedure + srfi-30 ;; nested multi-line comments srfi-39 ;; parameterize srfi-55 ;; require-extension srfi-61 ;; general cond clause diff --git a/module/srfi/srfi-16.scm b/module/srfi/srfi-16.scm index caec784ba..d103ce979 100644 --- a/module/srfi/srfi-16.scm +++ b/module/srfi/srfi-16.scm @@ -1,6 +1,6 @@ ;;; srfi-16.scm --- case-lambda -;; Copyright (C) 2001, 2002, 2006, 2009 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2002, 2006, 2009, 2014 Free Software Foundation, Inc. ;; ;; This library is free software; you can redistribute it and/or ;; modify it under the terms of the GNU Lesser General Public @@ -48,6 +48,4 @@ (define-module (srfi srfi-16) #:re-export (case-lambda)) -;; Case-lambda is now provided by code psyntax. - -(cond-expand-provide (current-module) '(srfi-16)) +;; Case-lambda is now provided by core psyntax. From 7a329029cf898fc0b9b24252c9bb437e1ad0b1d7 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Sun, 12 Jan 2014 04:36:02 -0500 Subject: [PATCH 2/5] read: Support R7RS '#true' and '#false' syntax for booleans. * libguile/read.c (try_read_ci_chars): New static function. (scm_read_boolean, scm_read_array): Use 'try_read_ci_chars'. * doc/ref/api-data.texi (Booleans): Update docs. * test-suite/tests/reader.test ("reading"): Add tests. --- doc/ref/api-data.texi | 1 + libguile/read.c | 47 +++++++++++++++++++++++++++++++++--- test-suite/tests/reader.test | 11 ++++++++- 3 files changed, 55 insertions(+), 4 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index fda76f1dc..198854bf1 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -56,6 +56,7 @@ For the documentation of such @dfn{compound} data types, see @tpindex Booleans The two boolean values are @code{#t} for true and @code{#f} for false. +They can also be written as @code{#true} and @code{#false}, as per R7RS. Boolean values are returned by predicate procedures, such as the general equality predicates @code{eq?}, @code{eqv?} and @code{equal?} diff --git a/libguile/read.c b/libguile/read.c index b36ecd437..03a53aadc 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -1,5 +1,5 @@ /* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006, - * 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc. + * 2007, 2008, 2009, 2010, 2011, 2012, 2014 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -947,6 +947,43 @@ scm_read_semicolon_comment (int chr, SCM port) return SCM_UNSPECIFIED; } +/* If the EXPECTED_CHARS are the next ones available from PORT, then + consume them and return 1. Otherwise leave the port position where + it was and return 0. EXPECTED_CHARS should be all lowercase, and + will be matched case-insensitively against the characters read from + PORT. */ +static int +try_read_ci_chars (SCM port, const char *expected_chars) +{ + int num_chars_wanted = strlen (expected_chars); + int num_chars_read = 0; + char *chars_read = alloca (num_chars_wanted); + int c; + + while (num_chars_read < num_chars_wanted) + { + c = scm_getc (port); + if (c == EOF) + break; + else if (tolower (c) != expected_chars[num_chars_read]) + { + scm_ungetc (c, port); + break; + } + else + chars_read[num_chars_read++] = c; + } + + if (num_chars_read == num_chars_wanted) + return 1; + else + { + while (num_chars_read > 0) + scm_ungetc (chars_read[--num_chars_read], port); + return 0; + } +} + /* Sharp readers, i.e. readers called after a `#' sign has been read. */ @@ -957,10 +994,12 @@ scm_read_boolean (int chr, SCM port) { case 't': case 'T': + try_read_ci_chars (port, "rue"); return SCM_BOOL_T; case 'f': case 'F': + try_read_ci_chars (port, "alse"); return SCM_BOOL_F; } @@ -1160,8 +1199,10 @@ scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column) c = scm_getc (port); if (c != '3' && c != '6') { - if (c != EOF) - scm_ungetc (c, port); + if (c == 'a' && try_read_ci_chars (port, "lse")) + return SCM_BOOL_F; + else if (c != EOF) + scm_ungetc (c, port); return SCM_BOOL_F; } rank = 1; diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index 6e02255ad..448ae1bcb 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -1,6 +1,7 @@ ;;;; reader.test --- Reader test. -*- coding: iso-8859-1; mode: scheme -*- ;;;; -;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +;;;; Copyright (C) 1999, 2001, 2002, 2003, 2007, 2008, 2009, 2010, 2011, +;;;; 2014 Free Software Foundation, Inc. ;;;; Jim Blandy ;;;; ;;;; This library is free software; you can redistribute it and/or @@ -73,6 +74,14 @@ (not (equal? (imag-part (read-string "-nan.0-1i")) (imag-part (read-string "-nan.0+1i"))))) + (pass-if-equal "#true" + '(a #t b) + (read-string "(a #true b)")) + + (pass-if-equal "#false" + '(a #f b) + (read-string "(a #false b)")) + ;; At one time the arg list for "Unknown # object: ~S" didn't make it out ;; of read.c. Check that `format' can be applied to this error. (pass-if "error message on bad #" From 6579c3308d386ce74627e2cfb734898c9ed83d3a Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Sun, 12 Jan 2014 04:36:29 -0500 Subject: [PATCH 3/5] read: Accept "\|" in string literals. * libguile/read.c (scm_read_string): Accept "\|" in string literals. * doc/ref/api-data.texi (String Syntax): Add "\|" to the list of supported backslash escapes. * test-suite/tests/reader.test ("reading"): Add test. --- doc/ref/api-data.texi | 10 +++++++--- libguile/read.c | 1 + test-suite/tests/reader.test | 4 ++++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 198854bf1..109b2288a 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -2938,9 +2938,10 @@ The read syntax for strings is an arbitrarily long sequence of characters enclosed in double quotes (@nicode{"}). Backslash is an escape character and can be used to insert the following -special characters. @nicode{\"} and @nicode{\\} are R5RS standard, the -next seven are R6RS standard --- notice they follow C syntax --- and the -remaining four are Guile extensions. +special characters. @nicode{\"} and @nicode{\\} are R5RS standard, +@nicode{\|} is R7RS standard, the next seven are R6RS standard --- +notice they follow C syntax --- and the remaining four are Guile +extensions. @table @asis @item @nicode{\\} @@ -2950,6 +2951,9 @@ Backslash character. Double quote character (an unescaped @nicode{"} is otherwise the end of the string). +@item @nicode{\|} +Vertical bar character. + @item @nicode{\a} Bell character (ASCII 7). diff --git a/libguile/read.c b/libguile/read.c index 03a53aadc..eead36841 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -624,6 +624,7 @@ scm_read_string (int chr, SCM port, scm_t_read_opts *opts) case EOF: goto str_eof; case '"': + case '|': case '\\': break; case '\n': diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index 448ae1bcb..56f6346f6 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -74,6 +74,10 @@ (not (equal? (imag-part (read-string "-nan.0-1i")) (imag-part (read-string "-nan.0+1i"))))) + (pass-if-equal "'\|' in string literals" + "a|b" + (read-string "\"a\\|b\"")) + (pass-if-equal "#true" '(a #t b) (read-string "(a #true b)")) From 394449d5d3922cab783d51398b7727ccaf07dd76 Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Sun, 12 Jan 2014 04:36:57 -0500 Subject: [PATCH 4/5] Recognize 'escape' character name, per R7RS. * libguile/chars.c (scm_r7rs_charnames, scm_r7rs_charnums): New static constants. (SCM_N_R7RS_CHARNAMES): New macro. (scm_i_charname, scm_i_charname_to_char): Adapt to new R7RS char names. * doc/ref/api-data.texi (Characters): Document #\escape. * test-suite/tests/reader.test ("reading"): Add test. --- doc/ref/api-data.texi | 3 +++ libguile/chars.c | 28 +++++++++++++++++++++++++--- test-suite/tests/reader.test | 4 ++++ 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 109b2288a..e711402f4 100644 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -2066,6 +2066,9 @@ name for each character. The short name for the ``delete'' character (code point U+007F) is @code{#\del}. +The R7RS name for the ``escape'' character (code point U+001B) is +@code{#\escape}. + There are also a few alternative names left over for compatibility with previous versions of Guile. diff --git a/libguile/chars.c b/libguile/chars.c index 2e1610566..697a5c401 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -1,4 +1,5 @@ -/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009, 2010 Free Software Foundation, Inc. +/* Copyright (C) 1995,1996,1998, 2000, 2001, 2004, 2006, 2008, 2009, + * 2010, 2014 Free Software Foundation, Inc. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License @@ -554,6 +555,16 @@ static const scm_t_uint32 const scm_r6rs_charnums[] = { #define SCM_N_R6RS_CHARNAMES (sizeof (scm_r6rs_charnames) / sizeof (char *)) +static const char *const scm_r7rs_charnames[] = { + "escape" +}; + +static const scm_t_uint32 const scm_r7rs_charnums[] = { + 0x1b +}; + +#define SCM_N_R7RS_CHARNAMES (sizeof (scm_r7rs_charnames) / sizeof (char *)) + /* The abbreviated names for control characters. */ static const char *const scm_C0_control_charnames[] = { /* C0 controls */ @@ -600,6 +611,10 @@ scm_i_charname (SCM chr) if (scm_r6rs_charnums[c] == i) return scm_r6rs_charnames[c]; + for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++) + if (scm_r7rs_charnums[c] == i) + return scm_r7rs_charnames[c]; + for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) if (scm_C0_control_charnums[c] == i) return scm_C0_control_charnames[c]; @@ -625,13 +640,20 @@ scm_i_charname_to_char (const char *charname, size_t charname_len) && (!strncasecmp (scm_r5rs_charnames[c], charname, charname_len))) return SCM_MAKE_CHAR (scm_r5rs_charnums[c]); - /* The R6RS charnames. R6RS says that these should be case-sensitive. They - are left as case-insensitive to avoid confusion. */ + /* The R6RS charnames. R6RS says that these should be case-sensitive. + They are left as case-insensitive to avoid confusion. */ for (c = 0; c < SCM_N_R6RS_CHARNAMES; c++) if ((strlen (scm_r6rs_charnames[c]) == charname_len) && (!strncasecmp (scm_r6rs_charnames[c], charname, charname_len))) return SCM_MAKE_CHAR (scm_r6rs_charnums[c]); + /* The R7RS charnames. R7RS says that these should be case-sensitive. + They are left as case-insensitive to avoid confusion. */ + for (c = 0; c < SCM_N_R7RS_CHARNAMES; c++) + if ((strlen (scm_r7rs_charnames[c]) == charname_len) + && (!strncasecmp (scm_r7rs_charnames[c], charname, charname_len))) + return SCM_MAKE_CHAR (scm_r7rs_charnums[c]); + /* Then come the controls. By Guile convention, these are not case sensitive. */ for (c = 0; c < SCM_N_C0_CONTROL_CHARNAMES; c++) diff --git a/test-suite/tests/reader.test b/test-suite/tests/reader.test index 56f6346f6..e0126fe40 100644 --- a/test-suite/tests/reader.test +++ b/test-suite/tests/reader.test @@ -78,6 +78,10 @@ "a|b" (read-string "\"a\\|b\"")) + (pass-if-equal "#\\escape" + '(a #\esc b) + (read-string "(a #\\escape b)")) + (pass-if-equal "#true" '(a #t b) (read-string "(a #true b)")) From 0fc548287e154349f3365976e6a5854736b651ed Mon Sep 17 00:00:00 2001 From: Mark H Weaver Date: Tue, 14 Jan 2014 03:13:58 -0500 Subject: [PATCH 5/5] read: use 'c_tolower' instead of 'tolower' in 'try_read_ci_chars'. * libguile/read.c: Include . (try_read_ci_chars): Use 'c_tolower' instead of 'tolower'. --- libguile/read.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libguile/read.c b/libguile/read.c index eead36841..e862c206e 100644 --- a/libguile/read.c +++ b/libguile/read.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "libguile/_scm.h" #include "libguile/bytevectors.h" @@ -966,7 +967,7 @@ try_read_ci_chars (SCM port, const char *expected_chars) c = scm_getc (port); if (c == EOF) break; - else if (tolower (c) != expected_chars[num_chars_read]) + else if (c_tolower (c) != expected_chars[num_chars_read]) { scm_ungetc (c, port); break;