2014-10-01 20:45:41 -04:00
|
|
|
|
/* Copyright (C) 1995-1997, 1999-2001, 2003, 2004, 2006-2012, 2014
|
|
|
|
|
|
* Free Software Foundation, Inc.
|
1996-07-25 22:56:11 +00:00
|
|
|
|
*
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* This library is free software; you can redistribute it and/or
|
2009-06-17 00:22:09 +01:00
|
|
|
|
* modify it under the terms of the GNU Lesser General Public License
|
|
|
|
|
|
* as published by the Free Software Foundation; either version 3 of
|
|
|
|
|
|
* the License, or (at your option) any later version.
|
1996-07-25 22:56:11 +00:00
|
|
|
|
*
|
2009-06-17 00:22:09 +01:00
|
|
|
|
* This library is distributed in the hope that it will be useful, but
|
|
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
|
* Lesser General Public License for more details.
|
1996-07-25 22:56:11 +00:00
|
|
|
|
*
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
|
* License along with this library; if not, write to the Free Software
|
2009-06-17 00:22:09 +01:00
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
|
|
* 02110-1301 USA
|
2003-04-05 19:15:35 +00:00
|
|
|
|
*/
|
1999-12-12 02:36:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
|
|
# include <config.h>
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
#include <stdio.h>
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#include <string.h>
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
#include <unicase.h>
|
2011-01-21 08:57:39 +01:00
|
|
|
|
#include <unictype.h>
|
2013-04-05 14:04:53 -04:00
|
|
|
|
#include <c-strcase.h>
|
2014-01-14 03:13:58 -05:00
|
|
|
|
#include <c-ctype.h>
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/_scm.h"
|
2009-06-19 00:47:11 +02:00
|
|
|
|
#include "libguile/bytevectors.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/chars.h"
|
|
|
|
|
|
#include "libguile/eval.h"
|
2009-07-17 01:08:35 +02:00
|
|
|
|
#include "libguile/arrays.h"
|
2009-07-17 00:58:32 +02:00
|
|
|
|
#include "libguile/bitvectors.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/keywords.h"
|
|
|
|
|
|
#include "libguile/alist.h"
|
|
|
|
|
|
#include "libguile/srcprop.h"
|
|
|
|
|
|
#include "libguile/hashtab.h"
|
|
|
|
|
|
#include "libguile/hash.h"
|
|
|
|
|
|
#include "libguile/ports.h"
|
2013-04-14 02:48:33 -04:00
|
|
|
|
#include "libguile/ports-internal.h"
|
2009-11-27 17:00:51 +01:00
|
|
|
|
#include "libguile/fports.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/root.h"
|
|
|
|
|
|
#include "libguile/strings.h"
|
2002-08-05 23:04:44 +00:00
|
|
|
|
#include "libguile/strports.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/vectors.h"
|
|
|
|
|
|
#include "libguile/validate.h"
|
2004-10-26 17:00:13 +00:00
|
|
|
|
#include "libguile/srfi-4.h"
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#include "libguile/srfi-13.h"
|
2002-08-05 23:04:44 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/read.h"
|
* backtrace.c, debug.c, debug.h, deprecation.c, eq.c, eval.c
eval.h, gsubr.c, init.c, macros.c, print.c, print.h, read.c,
read.h, stacks.c, symbols.c, throw.c: use private-options.h
* private-options.h: new file: contain hardcoded option
definitions.
2007-01-22 15:14:40 +00:00
|
|
|
|
#include "libguile/private-options.h"
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2003-05-06 20:17:26 +00:00
|
|
|
|
SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
|
1997-03-10 06:49:15 +00:00
|
|
|
|
SCM_SYMBOL (scm_keyword_prefix, "prefix");
|
2008-04-15 19:52:43 +02:00
|
|
|
|
SCM_SYMBOL (scm_keyword_postfix, "postfix");
|
2010-04-09 14:15:16 +02:00
|
|
|
|
SCM_SYMBOL (sym_nil, "nil");
|
1997-03-10 06:49:15 +00:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
/* SRFI-105 curly infix expression support */
|
|
|
|
|
|
SCM_SYMBOL (sym_nfx, "$nfx$");
|
|
|
|
|
|
SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
|
|
|
|
|
|
SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
|
|
|
|
|
|
|
|
|
|
|
|
scm_t_option scm_read_opts[] =
|
|
|
|
|
|
{
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "copy", 0,
|
|
|
|
|
|
"Copy source code expressions." },
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "positions", 1,
|
|
|
|
|
|
"Record positions of source code expressions." },
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "case-insensitive", 0,
|
|
|
|
|
|
"Convert symbols to lower case."},
|
|
|
|
|
|
{ SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
|
|
|
|
|
|
"Style of keyword recognition: #f, 'prefix or 'postfix."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
|
|
|
|
|
|
"Use R6RS variable-length character and string hex escapes."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "square-brackets", 1,
|
|
|
|
|
|
"Treat `[' and `]' as parentheses, for R6RS compatibility."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
|
|
|
|
|
|
"In strings, consume leading whitespace after an escaped end-of-line."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "curly-infix", 0,
|
|
|
|
|
|
"Support SRFI-105 curly infix expressions."},
|
2014-01-12 07:55:22 -05:00
|
|
|
|
{ SCM_OPTION_BOOLEAN, "r7rs-symbols", 0,
|
|
|
|
|
|
"Support R7RS |...| symbol notation."},
|
2012-10-26 17:20:16 -04:00
|
|
|
|
{ 0, },
|
|
|
|
|
|
};
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
|
|
|
|
|
|
/* Internal read options structure. This is initialized by 'scm_read'
|
2012-10-23 17:28:43 -04:00
|
|
|
|
from the global and per-port read options, and a pointer is passed
|
|
|
|
|
|
down to all helper functions. */
|
|
|
|
|
|
|
|
|
|
|
|
enum t_keyword_style
|
|
|
|
|
|
{
|
|
|
|
|
|
KEYWORD_STYLE_HASH_PREFIX,
|
|
|
|
|
|
KEYWORD_STYLE_PREFIX,
|
|
|
|
|
|
KEYWORD_STYLE_POSTFIX
|
|
|
|
|
|
};
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
struct t_read_opts
|
|
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
enum t_keyword_style keyword_style;
|
|
|
|
|
|
unsigned int copy_source_p : 1;
|
|
|
|
|
|
unsigned int record_positions_p : 1;
|
|
|
|
|
|
unsigned int case_insensitive_p : 1;
|
|
|
|
|
|
unsigned int r6rs_escapes_p : 1;
|
|
|
|
|
|
unsigned int square_brackets_p : 1;
|
|
|
|
|
|
unsigned int hungry_eol_escapes_p : 1;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
unsigned int curly_infix_p : 1;
|
|
|
|
|
|
unsigned int neoteric_p : 1;
|
2014-01-12 07:55:22 -05:00
|
|
|
|
unsigned int r7rs_symbols_p : 1;
|
1996-08-20 17:11:25 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
typedef struct t_read_opts scm_t_read_opts;
|
|
|
|
|
|
|
1996-08-20 17:11:25 +00:00
|
|
|
|
|
2002-08-04 23:33:28 +00:00
|
|
|
|
/*
|
|
|
|
|
|
Give meaningful error messages for errors
|
|
|
|
|
|
|
|
|
|
|
|
We use the format
|
|
|
|
|
|
|
2002-08-05 23:04:44 +00:00
|
|
|
|
FILE:LINE:COL: MESSAGE
|
2002-08-04 23:33:28 +00:00
|
|
|
|
This happened in ....
|
|
|
|
|
|
|
|
|
|
|
|
This is not standard GNU format, but the test-suite likes the real
|
|
|
|
|
|
message to be in front.
|
|
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
2004-10-26 17:00:13 +00:00
|
|
|
|
void
|
|
|
|
|
|
scm_i_input_error (char const *function,
|
|
|
|
|
|
SCM port, const char *message, SCM arg)
|
2002-08-05 23:04:44 +00:00
|
|
|
|
{
|
2004-08-10 13:54:01 +00:00
|
|
|
|
SCM fn = (scm_is_string (SCM_FILENAME(port))
|
|
|
|
|
|
? SCM_FILENAME(port)
|
|
|
|
|
|
: scm_from_locale_string ("#<unknown port>"));
|
2002-08-05 23:04:44 +00:00
|
|
|
|
|
2004-08-10 13:54:01 +00:00
|
|
|
|
SCM string_port = scm_open_output_string ();
|
2002-08-05 23:04:44 +00:00
|
|
|
|
SCM string = SCM_EOL;
|
|
|
|
|
|
scm_simple_format (string_port,
|
(scm_i_casei_streq): New, for counted strings.
* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH. Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string. Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged. Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.
* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.
2004-08-19 17:17:43 +00:00
|
|
|
|
scm_from_locale_string ("~A:~S:~S: ~A"),
|
2004-08-10 13:54:01 +00:00
|
|
|
|
scm_list_4 (fn,
|
2007-03-07 23:35:55 +00:00
|
|
|
|
scm_from_long (SCM_LINUM (port) + 1),
|
* numbers.h, numbers.c, discouraged.h, discouraged.c (scm_short2num,
scm_ushort2num, scm_int2num, scm_uint2num, scm_long2num,
scm_ulong2num, scm_size2num, scm_ptrdiff2num, scm_num2short,
scm_num2ushort, scm_num2int, scm_num2uint, scm_num2long,
scm_num2ulong, scm_num2size, scm_num2ptrdiff, scm_long_long2num,
scm_ulong_long2num, scm_num2long_long, scm_num2ulong_long):
Discouraged by moving to discouraged.h and discouraged.c and
reimplementing in terms of scm_from_* and scm_to_*. Changed all uses
to the new scm_from_* and scm_to_* functions.
2004-08-02 16:14:04 +00:00
|
|
|
|
scm_from_int (SCM_COL (port) + 1),
|
(scm_i_casei_streq): New, for counted strings.
* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH. Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string. Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged. Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.
* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.
2004-08-19 17:17:43 +00:00
|
|
|
|
scm_from_locale_string (message)));
|
2002-08-05 23:04:44 +00:00
|
|
|
|
|
|
|
|
|
|
string = scm_get_output_string (string_port);
|
|
|
|
|
|
scm_close_output_port (string_port);
|
2011-01-07 09:08:58 -08:00
|
|
|
|
scm_error_scm (scm_from_latin1_symbol ("read-error"),
|
2004-10-26 17:00:13 +00:00
|
|
|
|
function? scm_from_locale_string (function) : SCM_BOOL_F,
|
2002-08-05 23:04:44 +00:00
|
|
|
|
string,
|
2003-06-04 16:36:03 +00:00
|
|
|
|
arg,
|
2002-08-05 23:04:44 +00:00
|
|
|
|
SCM_BOOL_F);
|
|
|
|
|
|
}
|
2002-08-04 23:33:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
2000-01-05 19:05:23 +00:00
|
|
|
|
SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
(SCM setting),
|
2001-02-16 15:17:20 +00:00
|
|
|
|
"Option interface for the read options. Instead of using\n"
|
|
|
|
|
|
"this procedure directly, use the procedures @code{read-enable},\n"
|
2002-03-15 09:40:57 +00:00
|
|
|
|
"@code{read-disable}, @code{read-set!} and @code{read-options}.")
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#define FUNC_NAME s_scm_read_options
|
1996-08-20 17:11:25 +00:00
|
|
|
|
{
|
1996-08-23 01:20:34 +00:00
|
|
|
|
SCM ans = scm_options (setting,
|
|
|
|
|
|
scm_read_opts,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
FUNC_NAME);
|
1996-08-23 01:20:34 +00:00
|
|
|
|
if (SCM_COPY_SOURCE_P)
|
|
|
|
|
|
SCM_RECORD_POSITIONS_P = 1;
|
1996-08-20 17:11:25 +00:00
|
|
|
|
return ans;
|
|
|
|
|
|
}
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-08-20 17:11:25 +00:00
|
|
|
|
|
2010-11-03 00:09:57 +01:00
|
|
|
|
/* A fluid referring to an association list mapping extra hash
|
|
|
|
|
|
characters to procedures. */
|
|
|
|
|
|
static SCM *scm_i_read_hash_procedures;
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
2010-11-03 00:09:57 +01:00
|
|
|
|
scm_i_read_hash_procedures_ref (void)
|
|
|
|
|
|
{
|
|
|
|
|
|
return scm_fluid_ref (*scm_i_read_hash_procedures);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static void
|
2010-11-03 00:09:57 +01:00
|
|
|
|
scm_i_read_hash_procedures_set_x (SCM value)
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_fluid_set_x (*scm_i_read_hash_procedures, value);
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Token readers. */
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Size of the C buffer used to read symbols and numbers. */
|
|
|
|
|
|
#define READER_BUFFER_SIZE 128
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-05-07 00:32:01 +02:00
|
|
|
|
/* Number of 32-bit codepoints in the buffer used to read strings. */
|
|
|
|
|
|
#define READER_STRING_BUFFER_SIZE 128
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* The maximum size of Scheme character names. */
|
|
|
|
|
|
#define READER_CHAR_NAME_MAX_SIZE 50
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2012-10-23 00:29:07 -04:00
|
|
|
|
/* The maximum size of reader directive names. */
|
|
|
|
|
|
#define READER_DIRECTIVE_NAME_MAX_SIZE 50
|
|
|
|
|
|
|
2000-10-06 16:51:08 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* `isblank' is only in C99. */
|
|
|
|
|
|
#define CHAR_IS_BLANK_(_chr) \
|
|
|
|
|
|
(((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
|
2007-10-17 21:56:10 +00:00
|
|
|
|
|| ((_chr) == '\f') || ((_chr) == '\r'))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
#ifdef MSDOS
|
|
|
|
|
|
# define CHAR_IS_BLANK(_chr) \
|
|
|
|
|
|
((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
|
|
|
|
|
|
#else
|
|
|
|
|
|
# define CHAR_IS_BLANK CHAR_IS_BLANK_
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
|
|
|
|
|
|
structure''). */
|
|
|
|
|
|
#define CHAR_IS_R5RS_DELIMITER(c) \
|
|
|
|
|
|
(CHAR_IS_BLANK (c) \
|
2012-10-22 23:28:56 -04:00
|
|
|
|
|| (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2012-10-22 23:28:56 -04:00
|
|
|
|
#define CHAR_IS_DELIMITER(c) \
|
|
|
|
|
|
(CHAR_IS_R5RS_DELIMITER (c) \
|
2012-10-26 17:20:16 -04:00
|
|
|
|
|| (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
|
|
|
|
|
|
|| opts->curly_infix_p)) \
|
|
|
|
|
|
|| (((c) == '}' || (c) == '{') && opts->curly_infix_p))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
|
|
|
|
|
|
Structure''. */
|
|
|
|
|
|
#define CHAR_IS_EXPONENT_MARKER(_chr) \
|
|
|
|
|
|
(((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
|
|
|
|
|
|
|| ((_chr) == 'd') || ((_chr) == 'l'))
|
|
|
|
|
|
|
2007-09-03 16:58:20 +00:00
|
|
|
|
/* Read an SCSH block comment. */
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
|
|
|
|
|
|
static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
static SCM scm_get_hash_procedure (int);
|
2007-09-03 16:58:20 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
/* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
|
|
|
|
|
|
result in the pre-allocated buffer BUF. Return zero if the whole token has
|
|
|
|
|
|
fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
|
|
|
|
|
|
bytes actually read. */
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static int
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
read_token (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
char *buf, size_t buf_size, size_t *read)
|
2012-05-04 22:36:27 +02:00
|
|
|
|
{
|
2010-02-02 20:33:41 -08:00
|
|
|
|
*read = 0;
|
2004-09-07 09:18:59 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
while (*read < buf_size)
|
|
|
|
|
|
{
|
|
|
|
|
|
int chr;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2016-04-22 17:12:58 +02:00
|
|
|
|
chr = scm_get_byte_or_eof (port);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (chr == EOF)
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
else if (CHAR_IS_DELIMITER (chr))
|
|
|
|
|
|
{
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_unget_byte_unlocked (chr, port);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
*buf = (char) chr;
|
|
|
|
|
|
buf++, (*read)++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
return 1;
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2012-05-04 22:36:27 +02:00
|
|
|
|
/* Like `read_token', but return either BUFFER, or a GC-allocated buffer
|
|
|
|
|
|
if the token doesn't fit in BUFFER_SIZE bytes. */
|
|
|
|
|
|
static char *
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
read_complete_token (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
char *buffer, size_t buffer_size, size_t *read)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
{
|
|
|
|
|
|
int overflow = 0;
|
2012-05-04 22:36:27 +02:00
|
|
|
|
size_t bytes_read, overflow_size = 0;
|
|
|
|
|
|
char *overflow_buffer = NULL;
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
do
|
|
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (bytes_read == 0)
|
|
|
|
|
|
break;
|
|
|
|
|
|
if (overflow || overflow_size != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (overflow_size == 0)
|
|
|
|
|
|
{
|
2012-05-04 22:36:27 +02:00
|
|
|
|
overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
|
|
|
|
|
|
memcpy (overflow_buffer, buffer, bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
overflow_size = bytes_read;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2012-05-06 22:23:58 +02:00
|
|
|
|
char *new_buf =
|
2012-05-04 22:36:27 +02:00
|
|
|
|
scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
|
|
|
|
|
|
|
|
|
|
|
|
memcpy (new_buf, overflow_buffer, overflow_size);
|
|
|
|
|
|
memcpy (new_buf + overflow_size, buffer, bytes_read);
|
|
|
|
|
|
|
|
|
|
|
|
overflow_buffer = new_buf;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
overflow_size += bytes_read;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
}
|
2010-02-02 20:33:41 -08:00
|
|
|
|
while (overflow);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (overflow_size)
|
|
|
|
|
|
*read = overflow_size;
|
2009-12-28 17:35:48 +01:00
|
|
|
|
else
|
2010-02-02 20:33:41 -08:00
|
|
|
|
*read = bytes_read;
|
|
|
|
|
|
|
2012-05-04 22:36:27 +02:00
|
|
|
|
return (overflow_size > 0 ? overflow_buffer : buffer);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Skip whitespace from PORT and return the first non-whitespace character
|
|
|
|
|
|
read. Raise an error on end-of-file. */
|
|
|
|
|
|
static int
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2012-02-08 03:00:15 -05:00
|
|
|
|
scm_t_wchar c;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
while (1)
|
2016-04-22 21:45:55 +02:00
|
|
|
|
switch (c = scm_getc (port))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goteof:
|
|
|
|
|
|
if (eoferr)
|
2000-08-06 22:04:11 +00:00
|
|
|
|
{
|
2004-10-26 17:00:13 +00:00
|
|
|
|
scm_i_input_error (eoferr,
|
|
|
|
|
|
port,
|
|
|
|
|
|
"end of file",
|
|
|
|
|
|
SCM_EOL);
|
2000-08-06 22:04:11 +00:00
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
return c;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
case ';':
|
|
|
|
|
|
lp:
|
2016-04-22 21:45:55 +02:00
|
|
|
|
switch (c = scm_getc (port))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goto goteof;
|
|
|
|
|
|
default:
|
|
|
|
|
|
goto lp;
|
|
|
|
|
|
case SCM_LINE_INCREMENTORS:
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2007-09-03 16:58:20 +00:00
|
|
|
|
case '#':
|
2016-04-22 21:45:55 +02:00
|
|
|
|
switch (c = scm_getc (port))
|
2007-09-03 16:58:20 +00:00
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
eoferr = "read_sharp";
|
|
|
|
|
|
goto goteof;
|
|
|
|
|
|
case '!':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_shebang (c, port, opts);
|
2007-09-03 16:58:20 +00:00
|
|
|
|
break;
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
case ';':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_commented_expression (c, port, opts);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
break;
|
2009-10-19 22:38:34 +02:00
|
|
|
|
case '|':
|
|
|
|
|
|
if (scm_is_false (scm_get_hash_procedure (c)))
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_read_r6rs_block_comment (c, port);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
/* fall through */
|
2007-09-03 16:58:20 +00:00
|
|
|
|
default:
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2007-09-03 16:58:20 +00:00
|
|
|
|
return '#';
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
case SCM_LINE_INCREMENTORS:
|
|
|
|
|
|
case SCM_SINGLE_SPACES:
|
|
|
|
|
|
case '\t':
|
|
|
|
|
|
break;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
default:
|
|
|
|
|
|
return c;
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return 0;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Token readers. */
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
|
|
|
|
|
|
static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column);
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
2012-02-08 03:10:11 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2012-02-08 03:10:11 -05:00
|
|
|
|
{
|
2014-07-21 21:37:20 +02:00
|
|
|
|
/* This condition can be caused by a user calling
|
|
|
|
|
|
set-port-column!. */
|
|
|
|
|
|
if (line < 0 || column < 0)
|
|
|
|
|
|
return x;
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->record_positions_p)
|
2012-02-08 03:10:11 -05:00
|
|
|
|
scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
|
|
|
|
|
|
return x;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
1996-09-18 19:35:48 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_i_lreadparen"
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2011-05-24 21:25:11 +02:00
|
|
|
|
int c;
|
|
|
|
|
|
SCM tmp, tl, ans = SCM_EOL;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
const int curly_list_p = (chr == '{') && opts->curly_infix_p;
|
|
|
|
|
|
const int terminating_char = ((chr == '{') ? '}'
|
|
|
|
|
|
: ((chr == '[') ? ']'
|
|
|
|
|
|
: ')'));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Need to capture line and column numbers here. */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
1996-10-25 08:30:26 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
c = flush_ws (port, opts, FUNC_NAME);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (terminating_char == c)
|
|
|
|
|
|
return SCM_EOL;
|
1996-10-25 08:30:26 +00:00
|
|
|
|
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
tmp = scm_read_expression (port, opts);
|
2011-07-01 12:20:52 +02:00
|
|
|
|
|
|
|
|
|
|
/* Note that it is possible for scm_read_expression to return
|
|
|
|
|
|
scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
|
|
|
|
|
|
check that it's a real dot by checking `c'. */
|
|
|
|
|
|
if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
ans = scm_read_expression (port, opts);
|
|
|
|
|
|
if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "missing close paren",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
return ans;
|
|
|
|
|
|
}
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Build the head of the list structure. */
|
|
|
|
|
|
ans = tl = scm_cons (tmp, SCM_EOL);
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
SCM new_tail;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
if (c == ')' || (c == ']' && opts->square_brackets_p)
|
|
|
|
|
|
|| ((c == '}' || c == ']') && opts->curly_infix_p))
|
2010-07-13 21:53:41 +02:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"in pair: mismatched close paren: ~A",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (c)));
|
|
|
|
|
|
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
tmp = scm_read_expression (port, opts);
|
2010-07-13 21:53:41 +02:00
|
|
|
|
|
2011-07-01 12:20:52 +02:00
|
|
|
|
/* See above note about scm_sym_dot. */
|
|
|
|
|
|
if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
SCM_SETCDR (tl, scm_read_expression (port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
c = flush_ws (port, opts, FUNC_NAME);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (terminating_char != c)
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"in pair: missing close paren", SCM_EOL);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
break;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2001-06-27 13:15:20 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
new_tail = scm_cons (tmp, SCM_EOL);
|
|
|
|
|
|
SCM_SETCDR (tl, new_tail);
|
|
|
|
|
|
tl = new_tail;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
if (curly_list_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* In addition to finding the length, 'scm_ilength' checks for
|
|
|
|
|
|
improper or circular lists, in which case it returns -1. */
|
|
|
|
|
|
int len = scm_ilength (ans);
|
|
|
|
|
|
|
|
|
|
|
|
/* The (len == 0) case is handled above */
|
|
|
|
|
|
if (len == 1)
|
|
|
|
|
|
/* Return directly to avoid re-annotating the element's source
|
|
|
|
|
|
location with the position of the outer brace. Also, it
|
|
|
|
|
|
might not be possible to annotate the element. */
|
|
|
|
|
|
return scm_car (ans); /* {e} => e */
|
|
|
|
|
|
else if (len == 2)
|
|
|
|
|
|
; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
|
|
|
|
|
|
else if (len >= 3 && (len & 1))
|
|
|
|
|
|
{
|
|
|
|
|
|
/* It's a proper list whose length is odd and at least 3. If
|
|
|
|
|
|
the elements at odd indices (the infix operator positions)
|
|
|
|
|
|
are all 'equal?', then it's a simple curly-infix list.
|
|
|
|
|
|
Otherwise it's a mixed curly-infix list. */
|
|
|
|
|
|
SCM op = scm_cadr (ans);
|
|
|
|
|
|
|
|
|
|
|
|
/* Check to see if the elements at odd indices are 'equal?' */
|
|
|
|
|
|
for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
|
|
|
|
|
|
{
|
|
|
|
|
|
if (scm_is_null (tl))
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Convert simple curly-infix list to prefix:
|
|
|
|
|
|
{a <op> b <op> ...} => (<op> a b ...) */
|
|
|
|
|
|
tl = ans;
|
|
|
|
|
|
while (scm_is_pair (scm_cdr (tl)))
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp = scm_cddr (tl);
|
|
|
|
|
|
SCM_SETCDR (tl, tmp);
|
|
|
|
|
|
tl = tmp;
|
|
|
|
|
|
}
|
|
|
|
|
|
ans = scm_cons (op, ans);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
|
|
|
|
|
|
ans = scm_cons (sym_nfx, ans);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
/* Mixed curly-infix (possibly improper) list:
|
|
|
|
|
|
{e . tail} => ($nfx$ e . tail) */
|
|
|
|
|
|
ans = scm_cons (sym_nfx, ans);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (ans, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
2004-10-26 17:00:13 +00:00
|
|
|
|
|
2010-01-10 18:24:23 -08:00
|
|
|
|
|
|
|
|
|
|
/* Read a hexadecimal number NDIGITS in length. Put its value into the variable
|
2010-01-12 21:02:41 -08:00
|
|
|
|
C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
|
|
|
|
|
|
found. */
|
|
|
|
|
|
#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
|
|
|
|
|
|
do \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
scm_t_wchar a; \
|
|
|
|
|
|
size_t i = 0; \
|
|
|
|
|
|
c = 0; \
|
|
|
|
|
|
while (i < ndigits) \
|
|
|
|
|
|
{ \
|
2016-04-22 21:45:55 +02:00
|
|
|
|
a = scm_getc (port); \
|
2010-01-12 21:02:41 -08:00
|
|
|
|
if (a == EOF) \
|
|
|
|
|
|
goto str_eof; \
|
|
|
|
|
|
if (terminator \
|
|
|
|
|
|
&& (a == (scm_t_wchar) terminator) \
|
|
|
|
|
|
&& (i > 0)) \
|
|
|
|
|
|
break; \
|
|
|
|
|
|
if ('0' <= a && a <= '9') \
|
|
|
|
|
|
a -= '0'; \
|
|
|
|
|
|
else if ('A' <= a && a <= 'F') \
|
|
|
|
|
|
a = a - 'A' + 10; \
|
|
|
|
|
|
else if ('a' <= a && a <= 'f') \
|
|
|
|
|
|
a = a - 'a' + 10; \
|
|
|
|
|
|
else \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
c = a; \
|
|
|
|
|
|
goto bad_escaped; \
|
|
|
|
|
|
} \
|
|
|
|
|
|
c = c * 16 + a; \
|
|
|
|
|
|
i ++; \
|
|
|
|
|
|
} \
|
2010-01-10 18:24:23 -08:00
|
|
|
|
} while (0)
|
|
|
|
|
|
|
2011-01-21 08:57:39 +01:00
|
|
|
|
static void
|
|
|
|
|
|
skip_intraline_whitespace (SCM port)
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_t_wchar c;
|
|
|
|
|
|
|
|
|
|
|
|
do
|
|
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2011-01-21 08:57:39 +01:00
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
|
|
|
|
|
|
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2011-01-21 08:57:39 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
2014-01-12 07:55:22 -05:00
|
|
|
|
/* Read either a double-quoted string or an R7RS-style symbol delimited
|
|
|
|
|
|
by vertical lines, depending on the value of 'chr' ('"' or '|').
|
|
|
|
|
|
Regardless, the result is always returned as a string. */
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
2014-01-12 07:55:22 -05:00
|
|
|
|
scm_read_string_like_syntax (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
|
|
|
|
|
/* For strings smaller than C_STR, this function creates only one Scheme
|
|
|
|
|
|
object (the string returned). */
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-05-07 00:32:01 +02:00
|
|
|
|
SCM str = SCM_EOL;
|
|
|
|
|
|
size_t c_str_len = 0;
|
|
|
|
|
|
scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
|
2004-10-29 14:45:19 +00:00
|
|
|
|
|
2012-02-08 15:51:38 -05:00
|
|
|
|
/* Need to capture line and column numbers here. */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
while (chr != (c = scm_getc (port)))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
if (c == EOF)
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
{
|
|
|
|
|
|
str_eof:
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
2014-01-12 07:55:22 -05:00
|
|
|
|
(chr == '|'
|
|
|
|
|
|
? "end of file in symbol"
|
|
|
|
|
|
: "end of file in string constant"),
|
|
|
|
|
|
SCM_EOL);
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-05-07 00:32:01 +02:00
|
|
|
|
if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
|
|
|
|
|
|
{
|
|
|
|
|
|
str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
|
|
|
|
|
|
c_str_len = 0;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (c == '\\')
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
switch (c = scm_getc (port))
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goto str_eof;
|
2014-01-12 04:36:29 -05:00
|
|
|
|
case '|':
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
case '\\':
|
2014-10-01 20:45:41 -04:00
|
|
|
|
case '(': /* Accept "\(" for use at the beginning of lines
|
|
|
|
|
|
in multiline strings to avoid confusing emacs
|
|
|
|
|
|
lisp modes. */
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
break;
|
|
|
|
|
|
case '\n':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->hungry_eol_escapes_p)
|
2011-01-21 08:57:39 +01:00
|
|
|
|
skip_intraline_whitespace (port);
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
continue;
|
|
|
|
|
|
case '0':
|
|
|
|
|
|
c = '\0';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'f':
|
|
|
|
|
|
c = '\f';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'n':
|
|
|
|
|
|
c = '\n';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'r':
|
|
|
|
|
|
c = '\r';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 't':
|
|
|
|
|
|
c = '\t';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'a':
|
|
|
|
|
|
c = '\007';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'v':
|
|
|
|
|
|
c = '\v';
|
|
|
|
|
|
break;
|
2010-01-10 15:41:37 -08:00
|
|
|
|
case 'b':
|
|
|
|
|
|
c = '\010';
|
|
|
|
|
|
break;
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
case 'x':
|
2014-01-12 07:55:22 -05:00
|
|
|
|
if (opts->r6rs_escapes_p || chr == '|')
|
2010-01-12 21:02:41 -08:00
|
|
|
|
SCM_READ_HEX_ESCAPE (10, ';');
|
|
|
|
|
|
else
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (2, '\0');
|
2010-01-10 18:24:23 -08:00
|
|
|
|
break;
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
case 'u':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (!opts->r6rs_escapes_p)
|
2010-01-13 07:02:07 -08:00
|
|
|
|
{
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (4, '\0');
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
case 'U':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (!opts->r6rs_escapes_p)
|
2010-01-13 07:02:07 -08:00
|
|
|
|
{
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (6, '\0');
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
default:
|
2014-01-12 07:55:22 -05:00
|
|
|
|
if (c == chr)
|
|
|
|
|
|
break;
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
bad_escaped:
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"illegal character in escape sequence: ~S",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (c)));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2012-05-07 00:32:01 +02:00
|
|
|
|
|
|
|
|
|
|
c_str[c_str_len++] = c;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
2012-05-07 00:32:01 +02:00
|
|
|
|
|
|
|
|
|
|
if (scm_is_null (str))
|
|
|
|
|
|
/* Fast path: we got a string that fits in C_STR. */
|
|
|
|
|
|
str = scm_from_utf32_stringn (c_str, c_str_len);
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
if (c_str_len > 0)
|
|
|
|
|
|
str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
|
|
|
|
|
|
|
|
|
|
|
|
str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (str, port, opts, line, column);
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2001-03-04 17:09:34 +00:00
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
2014-01-12 07:55:22 -05:00
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
{
|
|
|
|
|
|
return scm_read_string_like_syntax (chr, port, opts);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_r7rs_symbol (int chr, SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
{
|
|
|
|
|
|
return scm_string_to_symbol (scm_read_string_like_syntax (chr, port, opts));
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM result, str = SCM_EOL;
|
2012-05-04 22:36:27 +02:00
|
|
|
|
char local_buffer[READER_BUFFER_SIZE], *buffer;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
size_t bytes_read;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-02-15 11:47:31 -05:00
|
|
|
|
/* Need to capture line and column numbers here. */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (chr, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
|
2012-05-04 22:36:27 +02:00
|
|
|
|
&bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
2013-01-15 14:41:26 +01:00
|
|
|
|
str = scm_from_port_stringn (buffer, bytes_read, port);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
|
|
|
|
|
result = scm_string_to_number (str, SCM_UNDEFINED);
|
2012-02-15 11:47:31 -05:00
|
|
|
|
if (scm_is_false (result))
|
2010-02-02 20:33:41 -08:00
|
|
|
|
{
|
|
|
|
|
|
/* Return a symbol instead of a number */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->case_insensitive_p)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
str = scm_string_downcase_x (str);
|
|
|
|
|
|
result = scm_string_to_symbol (str);
|
|
|
|
|
|
}
|
2012-02-15 11:47:31 -05:00
|
|
|
|
else if (SCM_NIMP (result))
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
result = maybe_annotate_source (result, port, opts, line, column);
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM_COL (port) += scm_i_string_length (str);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
return result;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
SCM result;
|
|
|
|
|
|
int ends_with_colon = 0;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
size_t bytes_read;
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
|
2012-05-04 22:36:27 +02:00
|
|
|
|
char local_buffer[READER_BUFFER_SIZE], *buffer;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM str;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (chr, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
|
2012-05-04 22:36:27 +02:00
|
|
|
|
&bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (bytes_read > 0)
|
2012-05-04 22:36:27 +02:00
|
|
|
|
ends_with_colon = buffer[bytes_read - 1] == ':';
|
2008-04-15 19:52:43 +02:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (postfix && ends_with_colon && (bytes_read > 1))
|
|
|
|
|
|
{
|
2013-01-15 14:41:26 +01:00
|
|
|
|
str = scm_from_port_stringn (buffer, bytes_read - 1, port);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->case_insensitive_p)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
str = scm_string_downcase_x (str);
|
|
|
|
|
|
result = scm_symbol_to_keyword (scm_string_to_symbol (str));
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
2010-02-02 20:33:41 -08:00
|
|
|
|
{
|
2013-01-15 14:41:26 +01:00
|
|
|
|
str = scm_from_port_stringn (buffer, bytes_read, port);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->case_insensitive_p)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
str = scm_string_downcase_x (str);
|
|
|
|
|
|
result = scm_string_to_symbol (str);
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM_COL (port) += scm_i_string_length (str);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
SCM result;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
size_t read;
|
2012-05-04 22:36:27 +02:00
|
|
|
|
char local_buffer[READER_BUFFER_SIZE], *buffer;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
unsigned int radix;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM str;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case 'B':
|
|
|
|
|
|
case 'b':
|
|
|
|
|
|
radix = 2;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'o':
|
|
|
|
|
|
case 'O':
|
|
|
|
|
|
radix = 8;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'd':
|
|
|
|
|
|
case 'D':
|
|
|
|
|
|
radix = 10;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'x':
|
|
|
|
|
|
case 'X':
|
|
|
|
|
|
radix = 16;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (chr, port);
|
|
|
|
|
|
scm_ungetc_unlocked ('#', port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
radix = 10;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
|
2012-05-04 22:36:27 +02:00
|
|
|
|
&read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
2013-01-15 14:41:26 +01:00
|
|
|
|
str = scm_from_port_stringn (buffer, read, port);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
|
|
|
|
|
result = scm_string_to_number (str, scm_from_uint (radix));
|
|
|
|
|
|
|
|
|
|
|
|
SCM_COL (port) += scm_i_string_length (str);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
if (scm_is_true (result))
|
|
|
|
|
|
return result;
|
|
|
|
|
|
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
SCM p;
|
2007-08-23 21:17:24 +00:00
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '`':
|
|
|
|
|
|
p = scm_sym_quasiquote;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
|
|
p = scm_sym_quote;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case ',':
|
|
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_t_wchar c;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if ('@' == c)
|
|
|
|
|
|
p = scm_sym_uq_splicing;
|
|
|
|
|
|
else
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
p = scm_sym_unquote;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
default:
|
|
|
|
|
|
fprintf (stderr, "%s: unhandled quote character (%i)\n",
|
2008-02-07 09:54:47 +00:00
|
|
|
|
"scm_read_quote", chr);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
abort ();
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
|
|
|
|
|
|
return maybe_annotate_source (p, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
SCM_SYMBOL (sym_syntax, "syntax");
|
|
|
|
|
|
SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
|
|
|
|
|
|
SCM_SYMBOL (sym_unsyntax, "unsyntax");
|
|
|
|
|
|
SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
{
|
|
|
|
|
|
SCM p;
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '`':
|
|
|
|
|
|
p = sym_quasisyntax;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
|
|
p = sym_syntax;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case ',':
|
|
|
|
|
|
{
|
|
|
|
|
|
int c;
|
|
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
if ('@' == c)
|
|
|
|
|
|
p = sym_unsyntax_splicing;
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
p = sym_unsyntax;
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
fprintf (stderr, "%s: unhandled syntax character (%i)\n",
|
|
|
|
|
|
"scm_read_syntax", chr);
|
|
|
|
|
|
abort ();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
|
|
|
|
|
|
return maybe_annotate_source (p, port, opts, line, column);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
|
2010-04-09 14:15:16 +02:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
SCM id = scm_read_mixed_case_symbol (chr, port, opts);
|
2010-04-09 14:15:16 +02:00
|
|
|
|
|
|
|
|
|
|
if (!scm_is_eq (id, sym_nil))
|
|
|
|
|
|
scm_i_input_error ("scm_read_nil", port,
|
|
|
|
|
|
"unexpected input while reading #nil: ~a",
|
|
|
|
|
|
scm_list_1 (id));
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_ELISP_NIL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_read_semicolon_comment (int chr, SCM port)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
|
|
|
|
|
int c;
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
/* We use the get_byte here because there is no need to get the
|
|
|
|
|
|
locale correct with comment input. This presumes that newline
|
|
|
|
|
|
always represents itself no matter what the encoding is. */
|
2016-04-22 17:12:58 +02:00
|
|
|
|
for (c = scm_get_byte_or_eof (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
(c != EOF) && (c != '\n');
|
2016-04-22 17:12:58 +02:00
|
|
|
|
c = scm_get_byte_or_eof (port));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-01-12 04:36:02 -05:00
|
|
|
|
/* If the EXPECTED_CHARS are the next ones available from PORT, then
|
|
|
|
|
|
consume them and return 1. Otherwise leave the port position where
|
|
|
|
|
|
it was and return 0. EXPECTED_CHARS should be all lowercase, and
|
|
|
|
|
|
will be matched case-insensitively against the characters read from
|
|
|
|
|
|
PORT. */
|
|
|
|
|
|
static int
|
|
|
|
|
|
try_read_ci_chars (SCM port, const char *expected_chars)
|
|
|
|
|
|
{
|
|
|
|
|
|
int num_chars_wanted = strlen (expected_chars);
|
|
|
|
|
|
int num_chars_read = 0;
|
|
|
|
|
|
char *chars_read = alloca (num_chars_wanted);
|
|
|
|
|
|
int c;
|
|
|
|
|
|
|
|
|
|
|
|
while (num_chars_read < num_chars_wanted)
|
|
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2014-01-12 04:36:02 -05:00
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
break;
|
2014-01-14 03:13:58 -05:00
|
|
|
|
else if (c_tolower (c) != expected_chars[num_chars_read])
|
2014-01-12 04:36:02 -05:00
|
|
|
|
{
|
2014-01-14 03:18:34 -05:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2014-01-12 04:36:02 -05:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
chars_read[num_chars_read++] = c;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (num_chars_read == num_chars_wanted)
|
|
|
|
|
|
return 1;
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
while (num_chars_read > 0)
|
2014-01-14 03:18:34 -05:00
|
|
|
|
scm_ungetc_unlocked (chars_read[--num_chars_read], port);
|
2014-01-12 04:36:02 -05:00
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Sharp readers, i.e. readers called after a `#' sign has been read. */
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_boolean (int chr, SCM port)
|
|
|
|
|
|
{
|
|
|
|
|
|
switch (chr)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case 't':
|
|
|
|
|
|
case 'T':
|
2014-01-12 04:36:02 -05:00
|
|
|
|
try_read_ci_chars (port, "rue");
|
2007-07-22 16:30:13 +00:00
|
|
|
|
return SCM_BOOL_T;
|
|
|
|
|
|
|
|
|
|
|
|
case 'f':
|
|
|
|
|
|
case 'F':
|
2014-01-12 04:36:02 -05:00
|
|
|
|
try_read_ci_chars (port, "alse");
|
2007-07-22 16:30:13 +00:00
|
|
|
|
return SCM_BOOL_F;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
2010-02-02 20:33:41 -08:00
|
|
|
|
char buffer[READER_CHAR_NAME_MAX_SIZE];
|
|
|
|
|
|
SCM charname;
|
|
|
|
|
|
size_t charname_len, bytes_read;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_t_wchar cp;
|
|
|
|
|
|
int overflow;
|
2013-04-14 02:48:33 -04:00
|
|
|
|
scm_t_port_internal *pti;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
|
|
|
|
|
|
&bytes_read);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (overflow)
|
2010-10-18 13:29:58 +02:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (bytes_read == 0)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
chr = scm_getc (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (chr == EOF)
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
|
|
|
|
|
|
"while reading character", SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
/* CHR must be a token delimiter, like a whitespace. */
|
|
|
|
|
|
return (SCM_MAKE_CHAR (chr));
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2013-04-14 02:48:33 -04:00
|
|
|
|
pti = SCM_PORT_GET_INTERNAL (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
/* Simple ASCII characters can be processed immediately. Also, simple
|
|
|
|
|
|
ISO-8859-1 characters can be processed immediately if the encoding for this
|
|
|
|
|
|
port is ISO-8859-1. */
|
2013-01-15 15:05:40 +01:00
|
|
|
|
if (bytes_read == 1 &&
|
|
|
|
|
|
((unsigned char) buffer[0] <= 127
|
2013-04-14 02:48:33 -04:00
|
|
|
|
|| pti->encoding_mode == SCM_PORT_ENCODING_MODE_LATIN1))
|
2010-02-02 20:33:41 -08:00
|
|
|
|
{
|
|
|
|
|
|
SCM_COL (port) += 1;
|
|
|
|
|
|
return SCM_MAKE_CHAR (buffer[0]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Otherwise, convert the buffer into a proper scheme string for
|
|
|
|
|
|
processing. */
|
2013-01-15 14:41:26 +01:00
|
|
|
|
charname = scm_from_port_stringn (buffer, bytes_read, port);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
charname_len = scm_i_string_length (charname);
|
|
|
|
|
|
SCM_COL (port) += charname_len;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
cp = scm_i_string_ref (charname, 0);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (charname_len == 1)
|
|
|
|
|
|
return SCM_MAKE_CHAR (cp);
|
|
|
|
|
|
|
|
|
|
|
|
/* Ignore dotted circles, which may be used to keep combining characters from
|
|
|
|
|
|
combining with the backslash in #\charname. */
|
2009-09-03 07:47:26 -07:00
|
|
|
|
if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
|
|
|
|
|
|
return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (cp >= '0' && cp < '8')
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* Dirk:FIXME:: This type of character syntax is not R5RS
|
|
|
|
|
|
* compliant. Further, it should be verified that the constant
|
2009-08-29 07:14:49 -07:00
|
|
|
|
* does only consist of octal digits. */
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
SCM p = scm_string_to_number (charname, scm_from_uint (8));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (SCM_I_INUMP (p))
|
2009-08-29 07:14:49 -07:00
|
|
|
|
{
|
2010-11-19 11:29:26 +01:00
|
|
|
|
scm_t_wchar c = scm_to_uint32 (p);
|
2009-08-29 07:14:49 -07:00
|
|
|
|
if (SCM_IS_UNICODE_CHAR (c))
|
|
|
|
|
|
return SCM_MAKE_CHAR (c);
|
|
|
|
|
|
else
|
2010-07-17 04:16:57 -07:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
2009-08-29 07:14:49 -07:00
|
|
|
|
"out-of-range octal character escape: ~a",
|
|
|
|
|
|
scm_list_1 (charname));
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2010-07-17 04:16:57 -07:00
|
|
|
|
if (cp == 'x' && (charname_len > 1))
|
2010-01-12 21:02:41 -08:00
|
|
|
|
{
|
|
|
|
|
|
SCM p;
|
2010-07-17 04:16:57 -07:00
|
|
|
|
|
2010-01-12 21:02:41 -08:00
|
|
|
|
/* Convert from hex, skipping the initial 'x' character in CHARNAME */
|
|
|
|
|
|
p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
|
|
|
|
|
|
scm_from_uint (16));
|
|
|
|
|
|
if (SCM_I_INUMP (p))
|
|
|
|
|
|
{
|
2010-11-19 11:29:26 +01:00
|
|
|
|
scm_t_wchar c = scm_to_uint32 (p);
|
2010-01-12 21:02:41 -08:00
|
|
|
|
if (SCM_IS_UNICODE_CHAR (c))
|
|
|
|
|
|
return SCM_MAKE_CHAR (c);
|
|
|
|
|
|
else
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"out-of-range hex character escape: ~a",
|
|
|
|
|
|
scm_list_1 (charname));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
/* The names of characters should never have non-Latin1
|
|
|
|
|
|
characters. */
|
|
|
|
|
|
if (scm_i_is_narrow_string (charname)
|
|
|
|
|
|
|| scm_i_try_narrow_string (charname))
|
2009-08-26 13:15:07 +02:00
|
|
|
|
{ SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
|
|
|
|
|
|
charname_len);
|
|
|
|
|
|
if (scm_is_true (ch))
|
|
|
|
|
|
return ch;
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_list_1 (charname));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2001-03-04 17:09:34 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
SCM symbol;
|
|
|
|
|
|
|
|
|
|
|
|
/* Read the symbol that comprises the keyword. Doing this instead of
|
|
|
|
|
|
invoking a specific symbol reader function allows `scm_read_keyword ()'
|
|
|
|
|
|
to adapt to the delimiters currently valid of symbols.
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
XXX: This implementation allows sloppy syntaxes like `#: key'. */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
symbol = scm_read_expression (port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (!scm_is_symbol (symbol))
|
2008-02-07 09:54:47 +00:00
|
|
|
|
scm_i_input_error ("scm_read_keyword", port,
|
2007-07-22 16:30:13 +00:00
|
|
|
|
"keyword prefix `~a' not followed by a symbol: ~s",
|
|
|
|
|
|
scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
|
|
|
|
|
|
|
|
|
|
|
|
return (scm_symbol_to_keyword (symbol));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Note: We call `scm_read_sexp ()' rather than READER here in order to
|
|
|
|
|
|
guarantee that it's going to do what we want. After all, this is an
|
|
|
|
|
|
implementation detail of `scm_read_vector ()', not a desirable
|
|
|
|
|
|
property. */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
|
|
|
|
|
|
port, opts, line, column);
|
2012-02-08 15:51:38 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-22 23:23:45 -04:00
|
|
|
|
/* Helper used by scm_read_array */
|
|
|
|
|
|
static int
|
|
|
|
|
|
read_decimal_integer (SCM port, int c, ssize_t *resp)
|
|
|
|
|
|
{
|
|
|
|
|
|
ssize_t sign = 1;
|
|
|
|
|
|
ssize_t res = 0;
|
|
|
|
|
|
int got_it = 0;
|
|
|
|
|
|
|
|
|
|
|
|
if (c == '-')
|
|
|
|
|
|
{
|
|
|
|
|
|
sign = -1;
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
while ('0' <= c && c <= '9')
|
|
|
|
|
|
{
|
2013-12-24 08:00:51 -05:00
|
|
|
|
if (((SSIZE_MAX - (c-'0')) / 10) <= res)
|
|
|
|
|
|
scm_i_input_error ("read_decimal_integer", port,
|
|
|
|
|
|
"number too large", SCM_EOL);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
res = 10*res + c-'0';
|
|
|
|
|
|
got_it = 1;
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (got_it)
|
|
|
|
|
|
*resp = sign * res;
|
|
|
|
|
|
return c;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
1996-09-18 19:35:48 +00:00
|
|
|
|
|
2012-10-22 23:23:45 -04:00
|
|
|
|
/* Read an array. This function can also read vectors and uniform
|
|
|
|
|
|
vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
|
|
|
|
|
|
handled here.
|
|
|
|
|
|
|
2012-10-30 22:58:19 -04:00
|
|
|
|
C is the first character read after the '#'. */
|
2012-02-08 15:51:38 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
2012-10-22 23:23:45 -04:00
|
|
|
|
ssize_t rank;
|
|
|
|
|
|
scm_t_wchar tag_buf[8];
|
|
|
|
|
|
int tag_len;
|
|
|
|
|
|
|
|
|
|
|
|
SCM tag, shape = SCM_BOOL_F, elements, array;
|
|
|
|
|
|
|
|
|
|
|
|
/* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
|
|
|
|
|
|
the array code can not deal with zero-length dimensions yet, and
|
2012-10-30 22:58:19 -04:00
|
|
|
|
we want to allow zero-length vectors, of course. */
|
2012-10-22 23:23:45 -04:00
|
|
|
|
if (c == '(')
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return scm_read_vector (c, port, opts, line, column);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
|
2012-10-30 22:58:19 -04:00
|
|
|
|
/* Disambiguate between '#f' and uniform floating point vectors. */
|
2012-10-22 23:23:45 -04:00
|
|
|
|
if (c == 'f')
|
|
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
if (c != '3' && c != '6')
|
|
|
|
|
|
{
|
2014-01-12 04:36:02 -05:00
|
|
|
|
if (c == 'a' && try_read_ci_chars (port, "lse"))
|
|
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
else if (c != EOF)
|
2014-01-14 03:18:34 -05:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
}
|
|
|
|
|
|
rank = 1;
|
|
|
|
|
|
tag_buf[0] = 'f';
|
|
|
|
|
|
tag_len = 1;
|
|
|
|
|
|
goto continue_reading_tag;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Read rank. */
|
|
|
|
|
|
rank = 1;
|
|
|
|
|
|
c = read_decimal_integer (port, c, &rank);
|
|
|
|
|
|
if (rank < 0)
|
|
|
|
|
|
scm_i_input_error (NULL, port, "array rank must be non-negative",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
/* Read tag. */
|
|
|
|
|
|
tag_len = 0;
|
|
|
|
|
|
continue_reading_tag:
|
|
|
|
|
|
while (c != EOF && c != '(' && c != '@' && c != ':'
|
|
|
|
|
|
&& tag_len < sizeof tag_buf / sizeof tag_buf[0])
|
|
|
|
|
|
{
|
|
|
|
|
|
tag_buf[tag_len++] = c;
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
}
|
|
|
|
|
|
if (tag_len == 0)
|
|
|
|
|
|
tag = SCM_BOOL_T;
|
2012-02-08 15:51:38 -05:00
|
|
|
|
else
|
2012-10-22 23:23:45 -04:00
|
|
|
|
{
|
|
|
|
|
|
tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
|
|
|
|
|
|
if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
|
|
|
|
|
|
scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
|
|
|
|
|
|
scm_list_1 (tag));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Read shape. */
|
|
|
|
|
|
if (c == '@' || c == ':')
|
|
|
|
|
|
{
|
|
|
|
|
|
shape = SCM_EOL;
|
|
|
|
|
|
|
|
|
|
|
|
do
|
|
|
|
|
|
{
|
|
|
|
|
|
ssize_t lbnd = 0, len = 0;
|
|
|
|
|
|
SCM s;
|
|
|
|
|
|
|
|
|
|
|
|
if (c == '@')
|
|
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
c = read_decimal_integer (port, c, &lbnd);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
s = scm_from_ssize_t (lbnd);
|
|
|
|
|
|
|
|
|
|
|
|
if (c == ':')
|
|
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
c = read_decimal_integer (port, c, &len);
|
|
|
|
|
|
if (len < 0)
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"array length must be non-negative",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
shape = scm_cons (s, shape);
|
|
|
|
|
|
} while (c == '@' || c == ':');
|
|
|
|
|
|
|
|
|
|
|
|
shape = scm_reverse_x (shape, SCM_EOL);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Read nested lists of elements. */
|
|
|
|
|
|
if (c != '(')
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"missing '(' in vector or array literal",
|
|
|
|
|
|
SCM_EOL);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
elements = scm_read_sexp (c, port, opts);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
|
|
|
|
|
|
if (scm_is_false (shape))
|
|
|
|
|
|
shape = scm_from_ssize_t (rank);
|
|
|
|
|
|
else if (scm_ilength (shape) != rank)
|
|
|
|
|
|
scm_i_input_error
|
|
|
|
|
|
(NULL, port,
|
|
|
|
|
|
"the number of shape specifications must match the array rank",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
/* Handle special print syntax of rank zero arrays; see
|
|
|
|
|
|
scm_i_print_array for a rationale. */
|
|
|
|
|
|
if (rank == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!scm_is_pair (elements))
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"too few elements in array literal, need 1",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
if (!scm_is_null (SCM_CDR (elements)))
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"too many elements in array literal, want 1",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
elements = SCM_CAR (elements);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Construct array, annotate with source location, and return. */
|
|
|
|
|
|
array = scm_list_to_typed_array (tag, shape, elements);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (array, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-06-19 00:47:11 +02:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return scm_read_array (chr, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-06-19 00:47:11 +02:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2009-06-19 00:47:11 +02:00
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
chr = scm_getc (port);
|
2009-06-19 00:47:11 +02:00
|
|
|
|
if (chr != 'u')
|
|
|
|
|
|
goto syntax;
|
|
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
chr = scm_getc (port);
|
2009-06-19 00:47:11 +02:00
|
|
|
|
if (chr != '8')
|
|
|
|
|
|
goto syntax;
|
|
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
chr = scm_getc (port);
|
2009-06-19 00:47:11 +02:00
|
|
|
|
if (chr != '(')
|
|
|
|
|
|
goto syntax;
|
|
|
|
|
|
|
2012-02-08 15:51:38 -05:00
|
|
|
|
return maybe_annotate_source
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
(scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
|
|
|
|
|
|
port, opts, line, column);
|
2009-06-19 00:47:11 +02:00
|
|
|
|
|
|
|
|
|
|
syntax:
|
|
|
|
|
|
scm_i_input_error ("read_bytevector", port,
|
|
|
|
|
|
"invalid bytevector prefix",
|
|
|
|
|
|
SCM_MAKE_CHAR (chr));
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
|
|
|
|
|
|
terribly inefficient but who cares? */
|
|
|
|
|
|
SCM s_bits = SCM_EOL;
|
|
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
for (chr = scm_getc (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
(chr != EOF) && ((chr == '0') || (chr == '1'));
|
2016-04-22 21:45:55 +02:00
|
|
|
|
chr = scm_getc (port))
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
if (chr != EOF)
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (chr, port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2012-02-08 15:51:38 -05:00
|
|
|
|
return maybe_annotate_source
|
|
|
|
|
|
(scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
int bang_seen = 0;
|
|
|
|
|
|
|
|
|
|
|
|
for (;;)
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
int c = scm_getc (port);
|
2000-07-18 16:09:09 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
scm_i_input_error ("skip_block_comment", port,
|
|
|
|
|
|
"unterminated `#! ... !#' comment", SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
if (c == '!')
|
|
|
|
|
|
bang_seen = 1;
|
|
|
|
|
|
else if (c == '#' && bang_seen)
|
|
|
|
|
|
break;
|
|
|
|
|
|
else
|
|
|
|
|
|
bang_seen = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-24 14:37:36 -04:00
|
|
|
|
static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
int value);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
int value);
|
|
|
|
|
|
static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
int value);
|
2012-10-24 14:37:36 -04:00
|
|
|
|
|
2011-05-08 16:25:01 +02:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2010-05-27 09:20:53 -04:00
|
|
|
|
{
|
2012-10-23 00:29:07 -04:00
|
|
|
|
char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
|
|
|
|
|
|
int c;
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
|
2010-05-27 09:20:53 -04:00
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
c = scm_getc (port);
|
2012-10-23 00:29:07 -04:00
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
scm_i_input_error ("skip_block_comment", port,
|
|
|
|
|
|
"unterminated `#! ... !#' comment", SCM_EOL);
|
|
|
|
|
|
else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
|
|
|
|
|
|
name[i++] = c;
|
|
|
|
|
|
else if (CHAR_IS_DELIMITER (c))
|
|
|
|
|
|
{
|
2012-10-30 23:46:31 -04:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2012-10-23 00:29:07 -04:00
|
|
|
|
name[i] = '\0';
|
|
|
|
|
|
if (0 == strcmp ("r6rs", name))
|
|
|
|
|
|
; /* Silently ignore */
|
2012-10-24 14:37:36 -04:00
|
|
|
|
else if (0 == strcmp ("fold-case", name))
|
|
|
|
|
|
set_port_case_insensitive_p (port, opts, 1);
|
|
|
|
|
|
else if (0 == strcmp ("no-fold-case", name))
|
|
|
|
|
|
set_port_case_insensitive_p (port, opts, 0);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
else if (0 == strcmp ("curly-infix", name))
|
|
|
|
|
|
set_port_curly_infix_p (port, opts, 1);
|
|
|
|
|
|
else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
|
|
|
|
|
|
{
|
|
|
|
|
|
set_port_curly_infix_p (port, opts, 1);
|
|
|
|
|
|
set_port_square_brackets_p (port, opts, 0);
|
|
|
|
|
|
}
|
2012-10-23 00:29:07 -04:00
|
|
|
|
else
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
2012-10-30 22:53:22 -04:00
|
|
|
|
else
|
|
|
|
|
|
{
|
2012-10-30 23:46:31 -04:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2012-10-30 22:53:22 -04:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
2010-05-27 09:20:53 -04:00
|
|
|
|
}
|
2012-10-23 00:29:07 -04:00
|
|
|
|
while (i > 0)
|
2012-10-30 23:46:31 -04:00
|
|
|
|
scm_ungetc_unlocked (name[--i], port);
|
2012-10-23 00:29:07 -04:00
|
|
|
|
return scm_read_scsh_block_comment (chr, port);
|
2010-05-27 09:20:53 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-10-19 22:38:34 +02:00
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
|
|
|
|
|
|
nested. So care must be taken. */
|
|
|
|
|
|
int nesting_level = 1;
|
2011-10-05 20:41:11 +02:00
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
int a = scm_getc (port);
|
2011-10-05 20:41:11 +02:00
|
|
|
|
|
|
|
|
|
|
if (a == EOF)
|
|
|
|
|
|
scm_i_input_error ("scm_read_r6rs_block_comment", port,
|
|
|
|
|
|
"unterminated `#| ... |#' comment", SCM_EOL);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
|
|
|
|
|
|
while (nesting_level > 0)
|
|
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
int b = scm_getc (port);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
|
2011-10-05 20:41:11 +02:00
|
|
|
|
if (b == EOF)
|
2009-12-14 15:21:54 +01:00
|
|
|
|
scm_i_input_error ("scm_read_r6rs_block_comment", port,
|
2009-10-19 22:38:34 +02:00
|
|
|
|
"unterminated `#| ... |#' comment", SCM_EOL);
|
|
|
|
|
|
|
2011-10-05 20:41:11 +02:00
|
|
|
|
if (a == '|' && b == '#')
|
|
|
|
|
|
{
|
|
|
|
|
|
nesting_level--;
|
|
|
|
|
|
b = EOF;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (a == '#' && b == '|')
|
|
|
|
|
|
{
|
|
|
|
|
|
nesting_level++;
|
|
|
|
|
|
b = EOF;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
a = b;
|
2009-10-19 22:38:34 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_commented_expression (scm_t_wchar chr, SCM port,
|
|
|
|
|
|
scm_t_read_opts *opts)
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_t_wchar c;
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
c = flush_ws (port, opts, (char *) NULL);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
if (EOF == c)
|
|
|
|
|
|
scm_i_input_error ("read_commented_expression", port,
|
|
|
|
|
|
"no expression after #; comment", SCM_EOL);
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_expression (port, opts);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_read_extended_symbol (scm_t_wchar chr, SCM port)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* Guile's extended symbol read syntax looks like this:
|
|
|
|
|
|
|
|
|
|
|
|
#{This is all a symbol name}#
|
|
|
|
|
|
|
|
|
|
|
|
So here, CHR is expected to be `{'. */
|
2011-04-11 12:48:06 +02:00
|
|
|
|
int saw_brace = 0;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
size_t len = 0;
|
2011-03-20 23:34:42 +01:00
|
|
|
|
SCM buf = scm_i_make_string (1024, NULL, 0);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
buf = scm_i_string_start_writing (buf);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
while ((chr = scm_getc (port)) != EOF)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
if (saw_brace)
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (chr == '#')
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
saw_brace = 0;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_i_string_set_x (buf, len++, '}');
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2011-04-11 12:48:06 +02:00
|
|
|
|
|
|
|
|
|
|
if (chr == '}')
|
2007-07-22 16:30:13 +00:00
|
|
|
|
saw_brace = 1;
|
2011-04-11 12:48:06 +02:00
|
|
|
|
else if (chr == '\\')
|
|
|
|
|
|
{
|
|
|
|
|
|
/* It used to be that print.c would print extended-read-syntax
|
|
|
|
|
|
symbols with backslashes before "non-standard" chars, but
|
|
|
|
|
|
this routine wouldn't do anything with those escapes.
|
|
|
|
|
|
Bummer. What we've done is to change print.c to output
|
|
|
|
|
|
R6RS hex escapes for those characters, relying on the fact
|
|
|
|
|
|
that the extended read syntax would never put a `\' before
|
|
|
|
|
|
an `x'. For now, we just ignore other instances of
|
|
|
|
|
|
backslash in the string. */
|
2016-04-22 21:45:55 +02:00
|
|
|
|
switch ((chr = scm_getc (port)))
|
2011-04-11 12:48:06 +02:00
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
case 'x':
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_t_wchar c;
|
|
|
|
|
|
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (10, ';');
|
|
|
|
|
|
scm_i_string_set_x (buf, len++, c);
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
str_eof:
|
|
|
|
|
|
chr = EOF;
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
|
|
bad_escaped:
|
|
|
|
|
|
scm_i_string_stop_writing ();
|
|
|
|
|
|
scm_i_input_error ("scm_read_extended_symbol", port,
|
|
|
|
|
|
"illegal character in escape sequence: ~S",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (c)));
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
default:
|
|
|
|
|
|
scm_i_string_set_x (buf, len++, chr);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
2011-04-11 12:48:06 +02:00
|
|
|
|
scm_i_string_set_x (buf, len++, chr);
|
2000-07-18 16:09:09 +00:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (len >= scm_i_string_length (buf) - 2)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
2009-11-17 01:26:25 +01:00
|
|
|
|
SCM addy;
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_i_string_stop_writing ();
|
2011-03-20 23:34:42 +01:00
|
|
|
|
addy = scm_i_make_string (1024, NULL, 0);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
buf = scm_string_append (scm_list_2 (buf, addy));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
len = 0;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
buf = scm_i_string_start_writing (buf);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2011-04-11 12:48:06 +02:00
|
|
|
|
|
|
|
|
|
|
done:
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_i_string_stop_writing ();
|
2011-04-11 12:48:06 +02:00
|
|
|
|
if (chr == EOF)
|
|
|
|
|
|
scm_i_input_error ("scm_read_extended_symbol", port,
|
|
|
|
|
|
"end of file while reading symbol", SCM_EOL);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Top-level token readers, i.e., dispatchers. */
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
SCM proc;
|
|
|
|
|
|
|
|
|
|
|
|
proc = scm_get_hash_procedure (chr);
|
|
|
|
|
|
if (scm_is_true (scm_procedure_p (proc)))
|
|
|
|
|
|
{
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 2;
|
|
|
|
|
|
SCM got;
|
|
|
|
|
|
|
|
|
|
|
|
got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
|
2011-05-24 21:25:11 +02:00
|
|
|
|
|
2012-10-23 00:21:12 -04:00
|
|
|
|
if (opts->record_positions_p && SCM_NIMP (got)
|
|
|
|
|
|
&& !scm_i_has_source_properties (got))
|
2011-05-24 21:25:11 +02:00
|
|
|
|
scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
|
|
|
|
|
|
|
|
|
|
|
|
return got;
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* The reader for the sharp `#' character. It basically dispatches reads
|
|
|
|
|
|
among the above token readers. */
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM result;
|
|
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
chr = scm_getc (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
result = scm_read_sharp_extension (chr, port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (!scm_is_eq (result, SCM_UNSPECIFIED))
|
|
|
|
|
|
return result;
|
|
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '\\':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_character (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '(':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_vector (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case 's':
|
|
|
|
|
|
case 'u':
|
|
|
|
|
|
case 'f':
|
2011-04-05 19:42:06 -04:00
|
|
|
|
case 'c':
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* This one may return either a boolean or an SRFI-4 vector. */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_srfi4_vector (chr, port, opts, line, column));
|
2009-06-19 00:47:11 +02:00
|
|
|
|
case 'v':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_bytevector (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '*':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_guile_bit_vector (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case 't':
|
|
|
|
|
|
case 'T':
|
|
|
|
|
|
case 'F':
|
|
|
|
|
|
return (scm_read_boolean (chr, port));
|
|
|
|
|
|
case ':':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_keyword (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
|
|
|
|
case '@':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_array (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
case 'i':
|
|
|
|
|
|
case 'e':
|
|
|
|
|
|
case 'b':
|
|
|
|
|
|
case 'B':
|
|
|
|
|
|
case 'o':
|
|
|
|
|
|
case 'O':
|
|
|
|
|
|
case 'd':
|
|
|
|
|
|
case 'D':
|
|
|
|
|
|
case 'x':
|
|
|
|
|
|
case 'X':
|
|
|
|
|
|
case 'I':
|
|
|
|
|
|
case 'E':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_number_and_radix (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '{':
|
|
|
|
|
|
return (scm_read_extended_symbol (chr, port));
|
|
|
|
|
|
case '!':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_shebang (chr, port, opts));
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
case ';':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_commented_expression (chr, port, opts));
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
case '`':
|
|
|
|
|
|
case '\'':
|
|
|
|
|
|
case ',':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_syntax (chr, port, opts));
|
2010-04-09 14:15:16 +02:00
|
|
|
|
case 'n':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_nil (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
default:
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
result = scm_read_sharp_extension (chr, port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (scm_is_eq (result, SCM_UNSPECIFIED))
|
2009-10-19 22:38:34 +02:00
|
|
|
|
{
|
|
|
|
|
|
/* To remain compatible with 1.8 and earlier, the following
|
|
|
|
|
|
characters have lower precedence than `read-hash-extend'
|
|
|
|
|
|
characters. */
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '|':
|
|
|
|
|
|
return scm_read_r6rs_block_comment (chr, port);
|
|
|
|
|
|
default:
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (chr)));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
|
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
2012-10-26 17:20:16 -04:00
|
|
|
|
read_inner_expression (SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
#define FUNC_NAME "read_inner_expression"
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
2012-02-08 03:00:15 -05:00
|
|
|
|
scm_t_wchar chr;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2016-04-22 21:45:55 +02:00
|
|
|
|
chr = scm_getc (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case SCM_WHITE_SPACES:
|
|
|
|
|
|
case SCM_LINE_INCREMENTORS:
|
|
|
|
|
|
break;
|
|
|
|
|
|
case ';':
|
|
|
|
|
|
(void) scm_read_semicolon_comment (chr, port);
|
|
|
|
|
|
break;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
case '{':
|
|
|
|
|
|
if (opts->curly_infix_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (opts->neoteric_p)
|
|
|
|
|
|
return scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM expr;
|
|
|
|
|
|
|
|
|
|
|
|
/* Enable neoteric expressions within curly braces */
|
|
|
|
|
|
opts->neoteric_p = 1;
|
|
|
|
|
|
expr = scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
opts->neoteric_p = 0;
|
|
|
|
|
|
return expr;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
return scm_read_mixed_case_symbol (chr, port, opts);
|
2010-01-15 22:24:31 +01:00
|
|
|
|
case '[':
|
2012-10-26 17:20:16 -04:00
|
|
|
|
if (opts->square_brackets_p)
|
|
|
|
|
|
return scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
else if (opts->curly_infix_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* The syntax of neoteric expressions requires that '[' be
|
|
|
|
|
|
a delimiter when curly-infix is enabled, so it cannot
|
|
|
|
|
|
be part of an unescaped symbol. We might as well do
|
|
|
|
|
|
something useful with it, so we adopt Kawa's convention:
|
|
|
|
|
|
[...] => ($bracket-list$ ...) */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
return maybe_annotate_source
|
|
|
|
|
|
(scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
|
|
|
|
|
|
port, opts, line, column);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
return scm_read_mixed_case_symbol (chr, port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '(':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_sexp (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '"':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_string (chr, port, opts));
|
2014-01-12 07:55:22 -05:00
|
|
|
|
case '|':
|
|
|
|
|
|
if (opts->r7rs_symbols_p)
|
|
|
|
|
|
return scm_read_r7rs_symbol (chr, port, opts);
|
|
|
|
|
|
else
|
|
|
|
|
|
return scm_read_mixed_case_symbol (chr, port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '\'':
|
|
|
|
|
|
case '`':
|
|
|
|
|
|
case ',':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_quote (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '#':
|
|
|
|
|
|
{
|
2012-02-08 15:51:38 -05:00
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
SCM result = scm_read_sharp (chr, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (scm_is_eq (result, SCM_UNSPECIFIED))
|
|
|
|
|
|
/* We read a comment or some such. */
|
|
|
|
|
|
break;
|
|
|
|
|
|
else
|
|
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
case ')':
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
|
|
|
|
|
|
break;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
case '}':
|
|
|
|
|
|
if (opts->curly_infix_p)
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
|
|
|
|
|
|
else
|
|
|
|
|
|
return scm_read_mixed_case_symbol (chr, port, opts);
|
2010-11-04 22:07:50 -07:00
|
|
|
|
case ']':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->square_brackets_p)
|
2010-11-04 22:07:50 -07:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
|
|
|
|
|
|
/* otherwise fall through */
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case EOF:
|
|
|
|
|
|
return SCM_EOF_VAL;
|
|
|
|
|
|
case ':':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
|
|
|
|
|
|
return scm_symbol_to_keyword (scm_read_expression (port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Fall through. */
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
{
|
|
|
|
|
|
if (((chr >= '0') && (chr <= '9'))
|
|
|
|
|
|
|| (strchr ("+-.", chr)))
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_number (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_mixed_case_symbol (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_expression (SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
#define FUNC_NAME "scm_read_expression"
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!opts->neoteric_p)
|
|
|
|
|
|
return read_inner_expression (port, opts);
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
long line = 0;
|
|
|
|
|
|
int column = 0;
|
|
|
|
|
|
SCM expr;
|
|
|
|
|
|
|
|
|
|
|
|
if (opts->record_positions_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* We need to get the position of the first non-whitespace
|
|
|
|
|
|
character in order to correctly annotate neoteric
|
|
|
|
|
|
expressions. For example, for the expression 'f(x)', the
|
|
|
|
|
|
first call to 'read_inner_expression' reads the 'f' (which
|
|
|
|
|
|
cannot be annotated), and then we later read the '(x)' and
|
|
|
|
|
|
use it to construct the new list (f x). */
|
|
|
|
|
|
int c = flush_ws (port, opts, (char *) NULL);
|
|
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
return SCM_EOF_VAL;
|
2012-10-30 23:46:31 -04:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
line = SCM_LINUM (port);
|
|
|
|
|
|
column = SCM_COL (port);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
expr = read_inner_expression (port, opts);
|
|
|
|
|
|
|
|
|
|
|
|
/* 'expr' is the first component of the neoteric expression. Now
|
|
|
|
|
|
we loop, and as long as the next character is '(', '[', or '{',
|
|
|
|
|
|
(without any intervening whitespace), we use it to construct a
|
|
|
|
|
|
new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
|
|
|
|
|
|
for (;;)
|
|
|
|
|
|
{
|
2016-04-22 21:45:55 +02:00
|
|
|
|
int chr = scm_getc (port);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
|
|
|
|
|
|
if (chr == '(')
|
|
|
|
|
|
/* e(...) => (e ...) */
|
|
|
|
|
|
expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
|
|
|
|
|
|
else if (chr == '[')
|
|
|
|
|
|
/* e[...] => ($bracket-apply$ e ...) */
|
|
|
|
|
|
expr = scm_cons (sym_bracket_apply,
|
|
|
|
|
|
scm_cons (expr,
|
|
|
|
|
|
scm_read_sexp (chr, port, opts)));
|
|
|
|
|
|
else if (chr == '{')
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM arg = scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
|
|
|
|
|
|
if (scm_is_null (arg))
|
|
|
|
|
|
expr = scm_list_1 (expr); /* e{} => (e) */
|
|
|
|
|
|
else
|
|
|
|
|
|
expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
if (chr != EOF)
|
2012-10-30 23:46:31 -04:00
|
|
|
|
scm_ungetc_unlocked (chr, port);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
maybe_annotate_source (expr, port, opts, line, column);
|
|
|
|
|
|
}
|
|
|
|
|
|
return expr;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Actual reader. */
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
static void init_read_options (SCM port, scm_t_read_opts *opts);
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
SCM_DEFINE (scm_read, "read", 0, 1, 0,
|
|
|
|
|
|
(SCM port),
|
|
|
|
|
|
"Read an s-expression from the input port @var{port}, or from\n"
|
|
|
|
|
|
"the current input port if @var{port} is not specified.\n"
|
|
|
|
|
|
"Any whitespace before the next token is discarded.")
|
|
|
|
|
|
#define FUNC_NAME s_scm_read
|
|
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_t_read_opts opts;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
int c;
|
|
|
|
|
|
|
|
|
|
|
|
if (SCM_UNBNDP (port))
|
|
|
|
|
|
port = scm_current_input_port ();
|
|
|
|
|
|
SCM_VALIDATE_OPINPORT (1, port);
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
init_read_options (port, &opts);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
|
|
|
|
|
|
c = flush_ws (port, &opts, (char *) NULL);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (EOF == c)
|
|
|
|
|
|
return SCM_EOF_VAL;
|
2011-11-08 00:14:16 +01:00
|
|
|
|
scm_ungetc_unlocked (c, port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_expression (port, &opts));
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2001-03-04 17:09:34 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-09-18 19:35:48 +00:00
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1997-03-11 03:57:04 +00:00
|
|
|
|
/* Manipulate the read-hash-procedures alist. This could be written in
|
|
|
|
|
|
Scheme, but maybe it will also be used by C code during initialisation. */
|
2000-01-05 19:05:23 +00:00
|
|
|
|
SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
(SCM chr, SCM proc),
|
2001-02-16 15:17:20 +00:00
|
|
|
|
"Install the procedure @var{proc} for reading expressions\n"
|
|
|
|
|
|
"starting with the character sequence @code{#} and @var{chr}.\n"
|
|
|
|
|
|
"@var{proc} will be called with two arguments: the character\n"
|
|
|
|
|
|
"@var{chr} and the port to read further data from. The object\n"
|
2007-01-06 18:20:35 +00:00
|
|
|
|
"returned will be the return value of @code{read}. \n"
|
|
|
|
|
|
"Passing @code{#f} for @var{proc} will remove a previous setting. \n"
|
|
|
|
|
|
)
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#define FUNC_NAME s_scm_read_hash_extend
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
{
|
1997-03-08 22:52:56 +00:00
|
|
|
|
SCM this;
|
|
|
|
|
|
SCM prev;
|
|
|
|
|
|
|
2001-06-26 10:59:34 +00:00
|
|
|
|
SCM_VALIDATE_CHAR (1, chr);
|
2004-07-06 10:59:25 +00:00
|
|
|
|
SCM_ASSERT (scm_is_false (proc)
|
2004-07-27 15:41:49 +00:00
|
|
|
|
|| scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
|
2001-06-26 10:59:34 +00:00
|
|
|
|
proc, SCM_ARG2, FUNC_NAME);
|
1997-03-08 22:52:56 +00:00
|
|
|
|
|
1997-03-11 03:57:04 +00:00
|
|
|
|
/* Check if chr is already in the alist. */
|
2010-11-03 00:09:57 +01:00
|
|
|
|
this = scm_i_read_hash_procedures_ref ();
|
1997-03-11 03:57:04 +00:00
|
|
|
|
prev = SCM_BOOL_F;
|
1997-03-08 22:52:56 +00:00
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
2004-09-22 17:41:37 +00:00
|
|
|
|
if (scm_is_null (this))
|
1997-03-08 22:52:56 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* not found, so add it to the beginning. */
|
2004-07-06 10:59:25 +00:00
|
|
|
|
if (scm_is_true (proc))
|
1997-03-08 22:52:56 +00:00
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM new = scm_cons (scm_cons (chr, proc),
|
|
|
|
|
|
scm_i_read_hash_procedures_ref ());
|
|
|
|
|
|
scm_i_read_hash_procedures_set_x (new);
|
1997-03-08 22:52:56 +00:00
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2004-07-27 15:41:49 +00:00
|
|
|
|
if (scm_is_eq (chr, SCM_CAAR (this)))
|
1997-03-08 22:52:56 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* already in the alist. */
|
2004-07-06 10:59:25 +00:00
|
|
|
|
if (scm_is_false (proc))
|
1997-03-11 03:57:04 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* remove it. */
|
2004-07-06 10:59:25 +00:00
|
|
|
|
if (scm_is_false (prev))
|
1997-03-11 03:57:04 +00:00
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
|
|
|
|
|
|
scm_i_read_hash_procedures_set_x (rest);
|
1997-03-11 03:57:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
scm_set_cdr_x (prev, SCM_CDR (this));
|
|
|
|
|
|
}
|
1997-03-08 22:52:56 +00:00
|
|
|
|
else
|
1997-03-11 03:57:04 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* replace it. */
|
|
|
|
|
|
scm_set_cdr_x (SCM_CAR (this), proc);
|
|
|
|
|
|
}
|
1997-03-08 22:52:56 +00:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
prev = this;
|
|
|
|
|
|
this = SCM_CDR (this);
|
|
|
|
|
|
}
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
/* Recover the read-hash procedure corresponding to char c. */
|
|
|
|
|
|
static SCM
|
1999-12-12 20:35:02 +00:00
|
|
|
|
scm_get_hash_procedure (int c)
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM rest = scm_i_read_hash_procedures_ref ();
|
1997-03-08 22:52:56 +00:00
|
|
|
|
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
2004-09-22 17:41:37 +00:00
|
|
|
|
if (scm_is_null (rest))
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
|
2000-03-02 20:54:43 +00:00
|
|
|
|
if (SCM_CHAR (SCM_CAAR (rest)) == c)
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
return SCM_CDAR (rest);
|
|
|
|
|
|
|
|
|
|
|
|
rest = SCM_CDR (rest);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2013-01-15 14:31:49 +01:00
|
|
|
|
static int
|
|
|
|
|
|
is_encoding_char (char c)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (c >= 'a' && c <= 'z') return 1;
|
|
|
|
|
|
if (c >= 'A' && c <= 'Z') return 1;
|
|
|
|
|
|
if (c >= '0' && c <= '9') return 1;
|
|
|
|
|
|
return strchr ("_-.:/,+=()", c) != NULL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2014-01-17 18:18:41 +01:00
|
|
|
|
/* Maximum size of an encoding name. This is a bit more than the
|
|
|
|
|
|
longest name listed at
|
|
|
|
|
|
<http://www.iana.org/assignments/character-sets> ("ISO-2022-JP-2", 13
|
|
|
|
|
|
characters.) */
|
|
|
|
|
|
#define ENCODING_NAME_MAX_SIZE 20
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2014-01-17 18:18:41 +01:00
|
|
|
|
/* Number of bytes at the beginning or end of a file that are scanned
|
|
|
|
|
|
for a "coding:" declaration. */
|
|
|
|
|
|
#define SCM_ENCODING_SEARCH_SIZE (500 + ENCODING_NAME_MAX_SIZE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Search the SCM_ENCODING_SEARCH_SIZE bytes of a file for an Emacs-like
|
|
|
|
|
|
coding declaration. Returns either NULL or a string whose storage
|
|
|
|
|
|
has been allocated with `scm_gc_malloc'. */
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
char *
|
2009-11-14 16:27:28 +01:00
|
|
|
|
scm_i_scan_for_encoding (SCM port)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
{
|
2011-03-03 12:46:49 +01:00
|
|
|
|
scm_t_port *pt;
|
Port buffers are Scheme values
* libguile/ports-internal.h (scm_port_buffer_bytevector)
(scm_port_buffer_cur, scm_port_buffer_set_cur)
(scm_port_buffer_end, scm_port_buffer_set_end)
(scm_port_buffer_has_eof_p, scm_port_buffer_set_has_eof_p): New
helpers.
* libguile/ports-internal.h (scm_port_buffer_size)
(scm_port_buffer_reset, scm_port_buffer_reset_end)
(scm_port_buffer_can_take, scm_port_buffer_can_put)
(scm_port_buffer_can_putback, scm_port_buffer_did_take)
(scm_port_buffer_did_put, scm_port_buffer_take_pointer)
(scm_port_buffer_put_pointer, scm_port_buffer_take)
(scm_port_buffer_put, scm_port_buffer_putback): Adapt to treat port
buffers as SCM values and use helpers to access them.
* libguile/ports.c (scm_i_clear_pending_eof, scm_i_set_pending_eof)
(scm_c_make_port_buffer, scm_i_read_unlocked)
(scm_c_read_bytes_unlocked, scm_i_unget_bytes_unlocked)
(scm_setvbuf, scm_fill_input, scm_take_from_input_buffers)
(scm_drain_input, scm_end_input_unlocked, scm_flush_unlocked)
(scm_fill_input_unlocked, scm_i_write_unlocked)
(scm_c_write_bytes_unlocked, scm_c_write_unlocked)
(scm_char_ready_p): Adapt to treat port buffers as SCM values and use
helpers to access them.
(scm_port_read_buffer, scm_port_write_buffer): New functions,
allowing (ice-9 ports) to access port buffers.
* libguile/ports.h: Update comments on port buffers. Replace
scm_t_port_buffer structure with a Scheme vector whose fields are
enumerated by "enum scm_port_buffer_field".
(scm_get_byte_or_eof_unlocked, scm_peek_byte_or_eof_unlocked): Adapt
these implementations to port buffer representation change.
* libguile/r6rs-ports.c (scm_get_bytevector_some):
* libguile/read.c (scm_i_scan_for_encoding):
* libguile/rw.c (scm_write_string_partial): Port buffers are Scheme
objects.
2016-04-20 09:09:15 +02:00
|
|
|
|
SCM buf;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
char header[SCM_ENCODING_SEARCH_SIZE+1];
|
2010-07-16 05:39:52 -07:00
|
|
|
|
size_t bytes_read, encoding_length, i;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
char *encoding = NULL;
|
2010-07-16 05:39:52 -07:00
|
|
|
|
char *pos, *encoding_start;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
int in_comment;
|
|
|
|
|
|
|
2011-03-03 12:46:49 +01:00
|
|
|
|
pt = SCM_PTAB_ENTRY (port);
|
Generic port facility provides buffering uniformly
* libguile/ports.h (struct scm_t_port_buffer): New data type.
(struct scm_t_port): Refactor to use port buffers instead of
implementation-managed read and write pointers. Add "read_buffering"
member.
(SCM_INITIAL_PUTBACK_BUF_SIZE, SCM_READ_BUFFER_EMPTY_P): Remove.
(scm_t_ptob_descriptor): Rename "fill_input" function to "read", and
take a port buffer, returning void. Likewise "write" takes a port
buffer and returns void. Remove "end_input"; instead if there is
buffered input and rw_random is true, then there must be a seek
function, so just seek back if needed. Remove "flush"; instead all
calls to the "write" function implicitly include a "flush", since the
buffering happens in the generic port code now. Remove "setvbuf", but
add "get_natural_buffer_sizes"; instead the generic port code can
buffer any port.
(scm_make_port_type): Adapt to read and write prototype changes.
(scm_set_port_flush, scm_set_port_end_input, scm_set_port_setvbuf):
Remove.
(scm_slow_get_byte_or_eof_unlocked)
(scm_slow_get_peek_or_eof_unlocked): Remove; the slow path is to call
scm_fill_input.
(scm_set_port_get_natural_buffer_sizes): New function.
(scm_c_make_port_buffer): New internal function.
(scm_port_non_buffer): Remove. This was a function for
implementations that is no longer needed. Instead open with BUF0 or
use (setvbuf port 'none).
(scm_fill_input, scm_fill_input_unlocked): Return the filled port
buffer.
(scm_get_byte_or_eof_unlocked, scm_peek_byte_or_eof_unlocked): Adapt
to changes in buffering and EOF management.
* libguile/ports.c: Adapt to port interface changes.
(initialize_port_buffers): New function, using the port mode flags to
set up appropriate initial buffering for all ports.
(scm_c_make_port_with_encoding): Create port buffers here instead of
delegating to implementations.
(scm_close_port): Flush the port if needed instead of delegating to
the implementation.
* libguile/filesys.c (set_element): Adapt to buffering changes.
* libguile/fports.c (fport_get_natural_buffer_sizes): New function,
replacing scm_fport_buffer_add.
(fport_write, fport_read): Update to let the generic ports code do the
buffering.
(fport_flush, fport_end_input): Remove.
(fport_close): Don't flush in a dynwind; that's the core ports' job.
(scm_make_fptob): Adapt.
* libguile/ioext.c (scm_redirect_port): Adapt to buffering changes.
* libguile/poll.c (scm_primitive_poll): Adapt to buffering changes.
* libguile/ports-internal.h (struct scm_port_internal): Remove
pending_eof flag; this is now set on the read buffer.
* libguile/r6rs-ports.c (struct bytevector_input_port): New type. The
new buffering arrangement means that there's now an intermediate
buffer between the bytevector and the user of the port; this could
lead to a perf degradation, but on the other hand there are some other
speedups enabled by the buffering refactor, so probably the memcpy
cost is dwarfed by the cost of the other parts of the ports
machinery.
(make_bytevector_input_port, bytevector_input_port_read):
(bytevector_input_port_seek, initialize_bytevector_input_ports): Adapt
to new buffering arrangement.
(struct custom_binary_port): Remove read buffer, as Guile handles that
now.
(custom_binary_input_port_setvbuf): Remove; now handled by Guile.
(make_custom_binary_input_port, custom_binary_input_port_read)
(initialize_custom_binary_input_ports): Adapt.
(scm_get_bytevector_some): Adapt to new EOF management.
(scm_t_bytevector_output_port_buffer): Hold on to the underlying port,
so we can flush it if it's open.
(make_bytevector_output_port, bytevector_output_port_write):
(bytevector_output_port_seek): Adapt.
(bytevector_output_port_procedure): Flush the port as appropriate, so
that we get all the bytes.
(make_custom_binary_output_port, custom_binary_output_port_write):
Adapt.
(make_transcoded_port): Don't muck with buffering.
(transcoded_port_write): Simply forward the write to the underlying
port.
(transcoded_port_read): Likewise.
(transcoded_port_close): No need to flush.
(initialize_transcoded_ports): Adapt.
* libguile/read.c (scm_i_scan_for_encoding): Adapt to buffering
changes.
* libguile/rw.c (scm_write_string_partial): Adapt to buffering changes.
* libguile/strports.c: Adapt to the fact that we don't manage the
buffer. Probably room for speed improvements here...
* libguile/vports.c (soft_port_get_natural_buffer_sizes): New function.
Adapt the rest of the file for the new buffering regime.
* test-suite/tests/r6rs-ports.test ("8.2.10 Output ports"): Custom
binary output ports need to be flushed before you can rely on the
write! procedure having been called. Add necessary flush-port
invocations.
("8.2.6 Input and output ports"): Transcoded ports now have an
internal buffer by default. This test checks that the characters are
transcoded one at a time, so to do that, call setvbuf on the
transcoded port to remove the buffer.
* test-suite/tests/web-client.test (run-with-http-transcript): Fix for
different flushing regime on soft ports. (The vestigial flush
procedure is now called after each write, which is not what the test
was expecting.)
* test-suite/standalone/test-scm-c-read.c: Update for changes to the C
interface for defining port types.
* doc/ref/api-io.texi (Ports): Update to discuss buffering in a generic
way, and to remove a hand-wavey paragraph describing string ports as
"interesting and powerful".
(Reading, Writing): Remove placeholder comments. Document
`scm_lfwrite'.
(Buffering): New section.
(File Ports): Link to buffering.
(I/O Extensions): Join subnodes into parent and describe new API,
including buffering API.
* doc/ref/posix.texi (Ports and File Descriptors): Link to buffering.
Remove unread-char etc, as they are documented elsewhere.
(Pipes, Network Sockets and Communication): Link to buffering.
2016-04-06 09:21:44 +02:00
|
|
|
|
buf = pt->read_buf;
|
2009-11-27 17:00:51 +01:00
|
|
|
|
|
2011-03-03 12:46:49 +01:00
|
|
|
|
if (pt->rw_random)
|
2016-04-22 21:32:05 +02:00
|
|
|
|
scm_flush (port);
|
2011-03-03 12:46:49 +01:00
|
|
|
|
|
2016-04-19 19:50:21 +02:00
|
|
|
|
if (scm_port_buffer_can_take (buf) == 0)
|
2011-03-03 12:46:49 +01:00
|
|
|
|
{
|
|
|
|
|
|
/* We can use the read buffer, and thus avoid a seek. */
|
Generic port facility provides buffering uniformly
* libguile/ports.h (struct scm_t_port_buffer): New data type.
(struct scm_t_port): Refactor to use port buffers instead of
implementation-managed read and write pointers. Add "read_buffering"
member.
(SCM_INITIAL_PUTBACK_BUF_SIZE, SCM_READ_BUFFER_EMPTY_P): Remove.
(scm_t_ptob_descriptor): Rename "fill_input" function to "read", and
take a port buffer, returning void. Likewise "write" takes a port
buffer and returns void. Remove "end_input"; instead if there is
buffered input and rw_random is true, then there must be a seek
function, so just seek back if needed. Remove "flush"; instead all
calls to the "write" function implicitly include a "flush", since the
buffering happens in the generic port code now. Remove "setvbuf", but
add "get_natural_buffer_sizes"; instead the generic port code can
buffer any port.
(scm_make_port_type): Adapt to read and write prototype changes.
(scm_set_port_flush, scm_set_port_end_input, scm_set_port_setvbuf):
Remove.
(scm_slow_get_byte_or_eof_unlocked)
(scm_slow_get_peek_or_eof_unlocked): Remove; the slow path is to call
scm_fill_input.
(scm_set_port_get_natural_buffer_sizes): New function.
(scm_c_make_port_buffer): New internal function.
(scm_port_non_buffer): Remove. This was a function for
implementations that is no longer needed. Instead open with BUF0 or
use (setvbuf port 'none).
(scm_fill_input, scm_fill_input_unlocked): Return the filled port
buffer.
(scm_get_byte_or_eof_unlocked, scm_peek_byte_or_eof_unlocked): Adapt
to changes in buffering and EOF management.
* libguile/ports.c: Adapt to port interface changes.
(initialize_port_buffers): New function, using the port mode flags to
set up appropriate initial buffering for all ports.
(scm_c_make_port_with_encoding): Create port buffers here instead of
delegating to implementations.
(scm_close_port): Flush the port if needed instead of delegating to
the implementation.
* libguile/filesys.c (set_element): Adapt to buffering changes.
* libguile/fports.c (fport_get_natural_buffer_sizes): New function,
replacing scm_fport_buffer_add.
(fport_write, fport_read): Update to let the generic ports code do the
buffering.
(fport_flush, fport_end_input): Remove.
(fport_close): Don't flush in a dynwind; that's the core ports' job.
(scm_make_fptob): Adapt.
* libguile/ioext.c (scm_redirect_port): Adapt to buffering changes.
* libguile/poll.c (scm_primitive_poll): Adapt to buffering changes.
* libguile/ports-internal.h (struct scm_port_internal): Remove
pending_eof flag; this is now set on the read buffer.
* libguile/r6rs-ports.c (struct bytevector_input_port): New type. The
new buffering arrangement means that there's now an intermediate
buffer between the bytevector and the user of the port; this could
lead to a perf degradation, but on the other hand there are some other
speedups enabled by the buffering refactor, so probably the memcpy
cost is dwarfed by the cost of the other parts of the ports
machinery.
(make_bytevector_input_port, bytevector_input_port_read):
(bytevector_input_port_seek, initialize_bytevector_input_ports): Adapt
to new buffering arrangement.
(struct custom_binary_port): Remove read buffer, as Guile handles that
now.
(custom_binary_input_port_setvbuf): Remove; now handled by Guile.
(make_custom_binary_input_port, custom_binary_input_port_read)
(initialize_custom_binary_input_ports): Adapt.
(scm_get_bytevector_some): Adapt to new EOF management.
(scm_t_bytevector_output_port_buffer): Hold on to the underlying port,
so we can flush it if it's open.
(make_bytevector_output_port, bytevector_output_port_write):
(bytevector_output_port_seek): Adapt.
(bytevector_output_port_procedure): Flush the port as appropriate, so
that we get all the bytes.
(make_custom_binary_output_port, custom_binary_output_port_write):
Adapt.
(make_transcoded_port): Don't muck with buffering.
(transcoded_port_write): Simply forward the write to the underlying
port.
(transcoded_port_read): Likewise.
(transcoded_port_close): No need to flush.
(initialize_transcoded_ports): Adapt.
* libguile/read.c (scm_i_scan_for_encoding): Adapt to buffering
changes.
* libguile/rw.c (scm_write_string_partial): Adapt to buffering changes.
* libguile/strports.c: Adapt to the fact that we don't manage the
buffer. Probably room for speed improvements here...
* libguile/vports.c (soft_port_get_natural_buffer_sizes): New function.
Adapt the rest of the file for the new buffering regime.
* test-suite/tests/r6rs-ports.test ("8.2.10 Output ports"): Custom
binary output ports need to be flushed before you can rely on the
write! procedure having been called. Add necessary flush-port
invocations.
("8.2.6 Input and output ports"): Transcoded ports now have an
internal buffer by default. This test checks that the characters are
transcoded one at a time, so to do that, call setvbuf on the
transcoded port to remove the buffer.
* test-suite/tests/web-client.test (run-with-http-transcript): Fix for
different flushing regime on soft ports. (The vestigial flush
procedure is now called after each write, which is not what the test
was expecting.)
* test-suite/standalone/test-scm-c-read.c: Update for changes to the C
interface for defining port types.
* doc/ref/api-io.texi (Ports): Update to discuss buffering in a generic
way, and to remove a hand-wavey paragraph describing string ports as
"interesting and powerful".
(Reading, Writing): Remove placeholder comments. Document
`scm_lfwrite'.
(Buffering): New section.
(File Ports): Link to buffering.
(I/O Extensions): Join subnodes into parent and describe new API,
including buffering API.
* doc/ref/posix.texi (Ports and File Descriptors): Link to buffering.
Remove unread-char etc, as they are documented elsewhere.
(Pipes, Network Sockets and Communication): Link to buffering.
2016-04-06 09:21:44 +02:00
|
|
|
|
buf = scm_fill_input_unlocked (port);
|
2016-04-19 19:50:21 +02:00
|
|
|
|
bytes_read = scm_port_buffer_can_take (buf);
|
2011-03-03 12:46:49 +01:00
|
|
|
|
if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
|
|
|
|
|
|
bytes_read = SCM_ENCODING_SEARCH_SIZE;
|
|
|
|
|
|
|
|
|
|
|
|
if (bytes_read <= 1)
|
|
|
|
|
|
/* An unbuffered port -- don't scan. */
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
2016-04-18 00:19:24 +02:00
|
|
|
|
memcpy (header, scm_port_buffer_take_pointer (buf), bytes_read);
|
2011-03-03 12:46:49 +01:00
|
|
|
|
header[bytes_read] = '\0';
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Try to read some bytes and then seek back. Not all ports
|
|
|
|
|
|
support seeking back; and indeed some file ports (like
|
|
|
|
|
|
/dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
|
|
|
|
|
|
check performed by SCM_FPORT_FDES---but fail to seek
|
|
|
|
|
|
backwards. Hence this block comes second. We prefer to use
|
|
|
|
|
|
the read buffer in-place. */
|
|
|
|
|
|
if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
2016-04-22 20:42:24 +02:00
|
|
|
|
bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
|
2011-03-03 12:46:49 +01:00
|
|
|
|
header[bytes_read] = '\0';
|
|
|
|
|
|
scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
|
|
|
|
|
/* search past "coding[:=]" */
|
|
|
|
|
|
pos = header;
|
|
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
|
|
|
|
|
if ((pos = strstr(pos, "coding")) == NULL)
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
2014-01-17 18:18:41 +01:00
|
|
|
|
pos += strlen ("coding");
|
|
|
|
|
|
if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
(*pos == ':' || *pos == '='))
|
|
|
|
|
|
{
|
|
|
|
|
|
pos ++;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* skip spaces */
|
2014-01-17 18:18:41 +01:00
|
|
|
|
while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
(*pos == ' ' || *pos == '\t'))
|
|
|
|
|
|
pos ++;
|
|
|
|
|
|
|
2014-01-17 18:18:41 +01:00
|
|
|
|
if (pos - header >= SCM_ENCODING_SEARCH_SIZE - ENCODING_NAME_MAX_SIZE)
|
|
|
|
|
|
/* We found the "coding:" string, but there is probably not enough
|
|
|
|
|
|
room to store an encoding name in its entirety, so ignore it.
|
|
|
|
|
|
This makes sure we do not end up returning a truncated encoding
|
|
|
|
|
|
name. */
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
/* grab the next token */
|
2010-07-16 05:39:52 -07:00
|
|
|
|
encoding_start = pos;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
i = 0;
|
2010-07-16 05:39:52 -07:00
|
|
|
|
while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
|
|
|
|
|
|
&& encoding_start + i - header < bytes_read
|
2013-01-15 14:31:49 +01:00
|
|
|
|
&& is_encoding_char (encoding_start[i]))
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
i++;
|
|
|
|
|
|
|
2010-07-16 05:39:52 -07:00
|
|
|
|
encoding_length = i;
|
|
|
|
|
|
if (encoding_length == 0)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return NULL;
|
|
|
|
|
|
|
2010-07-16 05:39:52 -07:00
|
|
|
|
encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
|
|
|
|
|
/* push backwards to make sure we were in a comment */
|
|
|
|
|
|
in_comment = 0;
|
2010-07-16 05:39:52 -07:00
|
|
|
|
pos = encoding_start;
|
|
|
|
|
|
while (pos >= header)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
{
|
2011-03-31 14:46:21 +02:00
|
|
|
|
if (*pos == ';')
|
|
|
|
|
|
{
|
|
|
|
|
|
in_comment = 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (*pos == '\n' || pos == header)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
{
|
|
|
|
|
|
/* This wasn't in a semicolon comment. Check for a
|
|
|
|
|
|
hash-bang comment. */
|
|
|
|
|
|
char *beg = strstr (header, "#!");
|
|
|
|
|
|
char *end = strstr (header, "!#");
|
2011-03-31 14:46:21 +02:00
|
|
|
|
if (beg < encoding_start && encoding_start + encoding_length <= end)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
in_comment = 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2011-03-31 14:46:21 +02:00
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
pos --;
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
}
|
|
|
|
|
|
if (!in_comment)
|
2009-11-14 16:27:28 +01:00
|
|
|
|
/* This wasn't in a comment */
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return encoding;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
|
|
|
|
|
|
(SCM port),
|
2009-11-23 18:51:25 +01:00
|
|
|
|
"Scans the port for an Emacs-like character coding declaration\n"
|
fix typos in the manual bits generated from source comments.
* libguile/bitvectors.c, libguile/chars.c,
libguile/deprecated.c, libguile/numbers.c, libguile/random.c,
libguile/read.c, libguile/root.c, libguile/srfi-1.c,
libguile/srfi-13.c, libguile/srfi-14.c, libguile/uniform.c:
Fix typos, add missing newlines.
2011-02-07 00:29:51 +01:00
|
|
|
|
"near the top of the contents of a port with random-accessible contents.\n"
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
"The coding declaration is of the form\n"
|
|
|
|
|
|
"@code{coding: XXXXX} and must appear in a scheme comment.\n"
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
"Returns a string containing the character encoding of the file\n"
|
|
|
|
|
|
"if a declaration was found, or @code{#f} otherwise.\n")
|
|
|
|
|
|
#define FUNC_NAME s_scm_file_encoding
|
|
|
|
|
|
{
|
|
|
|
|
|
char *enc;
|
|
|
|
|
|
SCM s_enc;
|
2009-11-14 16:27:28 +01:00
|
|
|
|
|
2011-03-03 12:46:49 +01:00
|
|
|
|
SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
|
|
|
|
|
|
|
2009-11-14 16:27:28 +01:00
|
|
|
|
enc = scm_i_scan_for_encoding (port);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (enc == NULL)
|
|
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2013-04-14 02:48:33 -04:00
|
|
|
|
s_enc = scm_string_upcase (scm_from_locale_string (enc));
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return s_enc;
|
|
|
|
|
|
}
|
2009-11-14 16:27:28 +01:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
/* Per-port read options.
|
|
|
|
|
|
|
2013-11-17 01:11:57 -05:00
|
|
|
|
We store per-port read options in the 'port-read-options' port
|
|
|
|
|
|
property, which is stored in the internal port structure. The value
|
|
|
|
|
|
stored is a single integer that contains a two-bit field for each
|
|
|
|
|
|
read option.
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
If a bit field contains READ_OPTION_INHERIT (3), that indicates that
|
|
|
|
|
|
the applicable value should be inherited from the corresponding
|
|
|
|
|
|
global read option. Otherwise, the bit field contains the value of
|
|
|
|
|
|
the read option. For boolean read options that have been set
|
|
|
|
|
|
per-port, the possible values are 0 or 1. If the 'keyword_style'
|
|
|
|
|
|
read option has been set per-port, its possible values are those in
|
|
|
|
|
|
'enum t_keyword_style'. */
|
|
|
|
|
|
|
2013-11-17 01:11:57 -05:00
|
|
|
|
/* Key to read options in port properties. */
|
2012-10-23 17:28:43 -04:00
|
|
|
|
SCM_SYMBOL (sym_port_read_options, "port-read-options");
|
|
|
|
|
|
|
|
|
|
|
|
/* Offsets of bit fields for each per-port override */
|
|
|
|
|
|
#define READ_OPTION_COPY_SOURCE_P 0
|
|
|
|
|
|
#define READ_OPTION_RECORD_POSITIONS_P 2
|
|
|
|
|
|
#define READ_OPTION_CASE_INSENSITIVE_P 4
|
|
|
|
|
|
#define READ_OPTION_KEYWORD_STYLE 6
|
|
|
|
|
|
#define READ_OPTION_R6RS_ESCAPES_P 8
|
|
|
|
|
|
#define READ_OPTION_SQUARE_BRACKETS_P 10
|
|
|
|
|
|
#define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
|
2012-10-26 17:20:16 -04:00
|
|
|
|
#define READ_OPTION_CURLY_INFIX_P 14
|
2014-01-12 07:55:22 -05:00
|
|
|
|
#define READ_OPTION_R7RS_SYMBOLS_P 16
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
/* The total width in bits of the per-port overrides */
|
2014-01-12 07:55:22 -05:00
|
|
|
|
#define READ_OPTIONS_NUM_BITS 18
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
#define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
|
|
|
|
|
|
#define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
|
|
|
|
|
|
|
|
|
|
|
|
#define READ_OPTION_MASK 3
|
|
|
|
|
|
#define READ_OPTION_INHERIT 3
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_read_option (SCM port, int option, int new_value)
|
|
|
|
|
|
{
|
2013-11-17 01:11:57 -05:00
|
|
|
|
SCM scm_read_options;
|
2012-10-23 17:28:43 -04:00
|
|
|
|
unsigned int read_options;
|
|
|
|
|
|
|
|
|
|
|
|
new_value &= READ_OPTION_MASK;
|
2013-12-01 18:29:33 -05:00
|
|
|
|
|
|
|
|
|
|
scm_dynwind_begin (0);
|
|
|
|
|
|
scm_dynwind_lock_port (port);
|
|
|
|
|
|
|
2013-11-17 01:11:57 -05:00
|
|
|
|
scm_read_options = scm_i_port_property (port, sym_port_read_options);
|
2012-10-23 17:28:43 -04:00
|
|
|
|
if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
|
|
|
|
|
|
read_options = scm_to_uint (scm_read_options);
|
|
|
|
|
|
else
|
|
|
|
|
|
read_options = READ_OPTIONS_INHERIT_ALL;
|
|
|
|
|
|
read_options &= ~(READ_OPTION_MASK << option);
|
|
|
|
|
|
read_options |= new_value << option;
|
|
|
|
|
|
scm_read_options = scm_from_uint (read_options);
|
2013-11-17 01:11:57 -05:00
|
|
|
|
scm_i_set_port_property_x (port, sym_port_read_options, scm_read_options);
|
2013-12-01 18:29:33 -05:00
|
|
|
|
|
|
|
|
|
|
scm_dynwind_end ();
|
2012-10-23 17:28:43 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-24 14:37:36 -04:00
|
|
|
|
/* Set OPTS and PORT's case-insensitivity according to VALUE. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
|
|
|
|
|
|
{
|
|
|
|
|
|
value = !!value;
|
|
|
|
|
|
opts->case_insensitive_p = value;
|
|
|
|
|
|
set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
/* Set OPTS and PORT's square_brackets_p option according to VALUE. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
|
|
|
|
|
|
{
|
|
|
|
|
|
value = !!value;
|
|
|
|
|
|
opts->square_brackets_p = value;
|
|
|
|
|
|
set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Set OPTS and PORT's curly_infix_p option according to VALUE. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
|
|
|
|
|
|
{
|
|
|
|
|
|
value = !!value;
|
|
|
|
|
|
opts->curly_infix_p = value;
|
|
|
|
|
|
set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
/* Initialize OPTS based on PORT's read options and the global read
|
|
|
|
|
|
options. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
init_read_options (SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
{
|
2013-11-17 01:11:57 -05:00
|
|
|
|
SCM val, scm_read_options;
|
2012-10-23 17:28:43 -04:00
|
|
|
|
unsigned int read_options, x;
|
|
|
|
|
|
|
2013-11-17 01:11:57 -05:00
|
|
|
|
scm_read_options = scm_i_port_property (port, sym_port_read_options);
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
|
|
|
|
|
|
read_options = scm_to_uint (scm_read_options);
|
|
|
|
|
|
else
|
|
|
|
|
|
read_options = READ_OPTIONS_INHERIT_ALL;
|
|
|
|
|
|
|
|
|
|
|
|
x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
|
|
|
|
|
|
if (x == READ_OPTION_INHERIT)
|
|
|
|
|
|
{
|
|
|
|
|
|
val = SCM_PACK (SCM_KEYWORD_STYLE);
|
|
|
|
|
|
if (scm_is_eq (val, scm_keyword_prefix))
|
|
|
|
|
|
x = KEYWORD_STYLE_PREFIX;
|
|
|
|
|
|
else if (scm_is_eq (val, scm_keyword_postfix))
|
|
|
|
|
|
x = KEYWORD_STYLE_POSTFIX;
|
|
|
|
|
|
else
|
|
|
|
|
|
x = KEYWORD_STYLE_HASH_PREFIX;
|
|
|
|
|
|
}
|
|
|
|
|
|
opts->keyword_style = x;
|
|
|
|
|
|
|
|
|
|
|
|
#define RESOLVE_BOOLEAN_OPTION(NAME, name) \
|
|
|
|
|
|
do \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
|
|
|
|
|
|
if (x == READ_OPTION_INHERIT) \
|
|
|
|
|
|
x = !!SCM_ ## NAME; \
|
|
|
|
|
|
opts->name = x; \
|
|
|
|
|
|
} \
|
|
|
|
|
|
while (0)
|
|
|
|
|
|
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
|
2014-01-12 07:55:22 -05:00
|
|
|
|
RESOLVE_BOOLEAN_OPTION (R7RS_SYMBOLS_P, r7rs_symbols_p);
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
#undef RESOLVE_BOOLEAN_OPTION
|
2012-10-26 17:20:16 -04:00
|
|
|
|
|
|
|
|
|
|
opts->neoteric_p = 0;
|
2012-10-23 17:28:43 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
void
|
|
|
|
|
|
scm_init_read ()
|
|
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM read_hash_procs;
|
|
|
|
|
|
|
2011-11-23 12:21:22 +01:00
|
|
|
|
read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
|
2010-11-03 00:09:57 +01:00
|
|
|
|
|
|
|
|
|
|
scm_i_read_hash_procedures =
|
|
|
|
|
|
SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
|
1997-03-08 22:52:56 +00:00
|
|
|
|
|
2007-01-19 19:26:36 +00:00
|
|
|
|
scm_init_opts (scm_read_options, scm_read_opts);
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/read.x"
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2000-03-19 19:01:16 +00:00
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
Local Variables:
|
|
|
|
|
|
c-file-style: "gnu"
|
|
|
|
|
|
End:
|
|
|
|
|
|
*/
|