2012-02-08 03:14:17 -05:00
|
|
|
|
/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
|
2013-01-30 15:30:31 +01:00
|
|
|
|
* 2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
|
1996-07-25 22:56:11 +00:00
|
|
|
|
*
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* This library is free software; you can redistribute it and/or
|
2009-06-17 00:22:09 +01:00
|
|
|
|
* modify it under the terms of the GNU Lesser General Public License
|
|
|
|
|
|
* as published by the Free Software Foundation; either version 3 of
|
|
|
|
|
|
* the License, or (at your option) any later version.
|
1996-07-25 22:56:11 +00:00
|
|
|
|
*
|
2009-06-17 00:22:09 +01:00
|
|
|
|
* This library is distributed in the hope that it will be useful, but
|
|
|
|
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
|
* Lesser General Public License for more details.
|
1996-07-25 22:56:11 +00:00
|
|
|
|
*
|
2003-04-05 19:15:35 +00:00
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
|
* License along with this library; if not, write to the Free Software
|
2009-06-17 00:22:09 +01:00
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
|
|
|
|
|
* 02110-1301 USA
|
2003-04-05 19:15:35 +00:00
|
|
|
|
*/
|
1999-12-12 02:36:16 +00:00
|
|
|
|
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
|
|
# include <config.h>
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
#include <stdio.h>
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
#include <string.h>
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
#include <unistd.h>
|
|
|
|
|
|
#include <unicase.h>
|
2011-01-21 08:57:39 +01:00
|
|
|
|
#include <unictype.h>
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/_scm.h"
|
2009-06-19 00:47:11 +02:00
|
|
|
|
#include "libguile/bytevectors.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/chars.h"
|
|
|
|
|
|
#include "libguile/eval.h"
|
2009-07-17 01:08:35 +02:00
|
|
|
|
#include "libguile/arrays.h"
|
2009-07-17 00:58:32 +02:00
|
|
|
|
#include "libguile/bitvectors.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/keywords.h"
|
|
|
|
|
|
#include "libguile/alist.h"
|
|
|
|
|
|
#include "libguile/srcprop.h"
|
|
|
|
|
|
#include "libguile/hashtab.h"
|
|
|
|
|
|
#include "libguile/hash.h"
|
|
|
|
|
|
#include "libguile/ports.h"
|
2009-11-27 17:00:51 +01:00
|
|
|
|
#include "libguile/fports.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/root.h"
|
|
|
|
|
|
#include "libguile/strings.h"
|
2002-08-05 23:04:44 +00:00
|
|
|
|
#include "libguile/strports.h"
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/vectors.h"
|
|
|
|
|
|
#include "libguile/validate.h"
|
2004-10-26 17:00:13 +00:00
|
|
|
|
#include "libguile/srfi-4.h"
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#include "libguile/srfi-13.h"
|
2002-08-05 23:04:44 +00:00
|
|
|
|
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/read.h"
|
* backtrace.c, debug.c, debug.h, deprecation.c, eq.c, eval.c
eval.h, gsubr.c, init.c, macros.c, print.c, print.h, read.c,
read.h, stacks.c, symbols.c, throw.c: use private-options.h
* private-options.h: new file: contain hardcoded option
definitions.
2007-01-22 15:14:40 +00:00
|
|
|
|
#include "libguile/private-options.h"
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2003-05-06 20:17:26 +00:00
|
|
|
|
SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
|
1997-03-10 06:49:15 +00:00
|
|
|
|
SCM_SYMBOL (scm_keyword_prefix, "prefix");
|
2008-04-15 19:52:43 +02:00
|
|
|
|
SCM_SYMBOL (scm_keyword_postfix, "postfix");
|
2010-04-09 14:15:16 +02:00
|
|
|
|
SCM_SYMBOL (sym_nil, "nil");
|
1997-03-10 06:49:15 +00:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
/* SRFI-105 curly infix expression support */
|
|
|
|
|
|
SCM_SYMBOL (sym_nfx, "$nfx$");
|
|
|
|
|
|
SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
|
|
|
|
|
|
SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
|
|
|
|
|
|
|
|
|
|
|
|
scm_t_option scm_read_opts[] =
|
|
|
|
|
|
{
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "copy", 0,
|
|
|
|
|
|
"Copy source code expressions." },
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "positions", 1,
|
|
|
|
|
|
"Record positions of source code expressions." },
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "case-insensitive", 0,
|
|
|
|
|
|
"Convert symbols to lower case."},
|
|
|
|
|
|
{ SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
|
|
|
|
|
|
"Style of keyword recognition: #f, 'prefix or 'postfix."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
|
|
|
|
|
|
"Use R6RS variable-length character and string hex escapes."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "square-brackets", 1,
|
|
|
|
|
|
"Treat `[' and `]' as parentheses, for R6RS compatibility."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
|
|
|
|
|
|
"In strings, consume leading whitespace after an escaped end-of-line."},
|
|
|
|
|
|
{ SCM_OPTION_BOOLEAN, "curly-infix", 0,
|
|
|
|
|
|
"Support SRFI-105 curly infix expressions."},
|
|
|
|
|
|
{ 0, },
|
|
|
|
|
|
};
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
|
|
|
|
|
|
/* Internal read options structure. This is initialized by 'scm_read'
|
2012-10-23 17:28:43 -04:00
|
|
|
|
from the global and per-port read options, and a pointer is passed
|
|
|
|
|
|
down to all helper functions. */
|
|
|
|
|
|
|
|
|
|
|
|
enum t_keyword_style
|
|
|
|
|
|
{
|
|
|
|
|
|
KEYWORD_STYLE_HASH_PREFIX,
|
|
|
|
|
|
KEYWORD_STYLE_PREFIX,
|
|
|
|
|
|
KEYWORD_STYLE_POSTFIX
|
|
|
|
|
|
};
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
struct t_read_opts
|
|
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
enum t_keyword_style keyword_style;
|
|
|
|
|
|
unsigned int copy_source_p : 1;
|
|
|
|
|
|
unsigned int record_positions_p : 1;
|
|
|
|
|
|
unsigned int case_insensitive_p : 1;
|
|
|
|
|
|
unsigned int r6rs_escapes_p : 1;
|
|
|
|
|
|
unsigned int square_brackets_p : 1;
|
|
|
|
|
|
unsigned int hungry_eol_escapes_p : 1;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
unsigned int curly_infix_p : 1;
|
|
|
|
|
|
unsigned int neoteric_p : 1;
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
typedef struct t_read_opts scm_t_read_opts;
|
|
|
|
|
|
|
1996-08-20 17:11:25 +00:00
|
|
|
|
|
2002-08-04 23:33:28 +00:00
|
|
|
|
/*
|
|
|
|
|
|
Give meaningful error messages for errors
|
|
|
|
|
|
|
|
|
|
|
|
We use the format
|
|
|
|
|
|
|
2002-08-05 23:04:44 +00:00
|
|
|
|
FILE:LINE:COL: MESSAGE
|
2002-08-04 23:33:28 +00:00
|
|
|
|
This happened in ....
|
|
|
|
|
|
|
|
|
|
|
|
This is not standard GNU format, but the test-suite likes the real
|
|
|
|
|
|
message to be in front.
|
|
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
|
2004-10-26 17:00:13 +00:00
|
|
|
|
void
|
|
|
|
|
|
scm_i_input_error (char const *function,
|
|
|
|
|
|
SCM port, const char *message, SCM arg)
|
2002-08-05 23:04:44 +00:00
|
|
|
|
{
|
2004-08-10 13:54:01 +00:00
|
|
|
|
SCM fn = (scm_is_string (SCM_FILENAME(port))
|
|
|
|
|
|
? SCM_FILENAME(port)
|
|
|
|
|
|
: scm_from_locale_string ("#<unknown port>"));
|
2002-08-05 23:04:44 +00:00
|
|
|
|
|
2004-08-10 13:54:01 +00:00
|
|
|
|
SCM string_port = scm_open_output_string ();
|
2002-08-05 23:04:44 +00:00
|
|
|
|
SCM string = SCM_EOL;
|
|
|
|
|
|
scm_simple_format (string_port,
|
(scm_i_casei_streq): New, for counted strings.
* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH. Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string. Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged. Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.
* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.
2004-08-19 17:17:43 +00:00
|
|
|
|
scm_from_locale_string ("~A:~S:~S: ~A"),
|
2004-08-10 13:54:01 +00:00
|
|
|
|
scm_list_4 (fn,
|
2007-03-07 23:35:55 +00:00
|
|
|
|
scm_from_long (SCM_LINUM (port) + 1),
|
* numbers.h, numbers.c, discouraged.h, discouraged.c (scm_short2num,
scm_ushort2num, scm_int2num, scm_uint2num, scm_long2num,
scm_ulong2num, scm_size2num, scm_ptrdiff2num, scm_num2short,
scm_num2ushort, scm_num2int, scm_num2uint, scm_num2long,
scm_num2ulong, scm_num2size, scm_num2ptrdiff, scm_long_long2num,
scm_ulong_long2num, scm_num2long_long, scm_num2ulong_long):
Discouraged by moving to discouraged.h and discouraged.c and
reimplementing in terms of scm_from_* and scm_to_*. Changed all uses
to the new scm_from_* and scm_to_* functions.
2004-08-02 16:14:04 +00:00
|
|
|
|
scm_from_int (SCM_COL (port) + 1),
|
(scm_i_casei_streq): New, for counted strings.
* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH. Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string. Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged. Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.
* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.
2004-08-19 17:17:43 +00:00
|
|
|
|
scm_from_locale_string (message)));
|
2002-08-05 23:04:44 +00:00
|
|
|
|
|
|
|
|
|
|
string = scm_get_output_string (string_port);
|
|
|
|
|
|
scm_close_output_port (string_port);
|
2011-01-07 09:08:58 -08:00
|
|
|
|
scm_error_scm (scm_from_latin1_symbol ("read-error"),
|
2004-10-26 17:00:13 +00:00
|
|
|
|
function? scm_from_locale_string (function) : SCM_BOOL_F,
|
2002-08-05 23:04:44 +00:00
|
|
|
|
string,
|
2003-06-04 16:36:03 +00:00
|
|
|
|
arg,
|
2002-08-05 23:04:44 +00:00
|
|
|
|
SCM_BOOL_F);
|
|
|
|
|
|
}
|
2002-08-04 23:33:28 +00:00
|
|
|
|
|
|
|
|
|
|
|
2000-01-05 19:05:23 +00:00
|
|
|
|
SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
(SCM setting),
|
2001-02-16 15:17:20 +00:00
|
|
|
|
"Option interface for the read options. Instead of using\n"
|
|
|
|
|
|
"this procedure directly, use the procedures @code{read-enable},\n"
|
2002-03-15 09:40:57 +00:00
|
|
|
|
"@code{read-disable}, @code{read-set!} and @code{read-options}.")
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#define FUNC_NAME s_scm_read_options
|
1996-08-20 17:11:25 +00:00
|
|
|
|
{
|
1996-08-23 01:20:34 +00:00
|
|
|
|
SCM ans = scm_options (setting,
|
|
|
|
|
|
scm_read_opts,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
FUNC_NAME);
|
1996-08-23 01:20:34 +00:00
|
|
|
|
if (SCM_COPY_SOURCE_P)
|
|
|
|
|
|
SCM_RECORD_POSITIONS_P = 1;
|
1996-08-20 17:11:25 +00:00
|
|
|
|
return ans;
|
|
|
|
|
|
}
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-08-20 17:11:25 +00:00
|
|
|
|
|
2010-11-03 00:09:57 +01:00
|
|
|
|
/* A fluid referring to an association list mapping extra hash
|
|
|
|
|
|
characters to procedures. */
|
|
|
|
|
|
static SCM *scm_i_read_hash_procedures;
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
2010-11-03 00:09:57 +01:00
|
|
|
|
scm_i_read_hash_procedures_ref (void)
|
|
|
|
|
|
{
|
|
|
|
|
|
return scm_fluid_ref (*scm_i_read_hash_procedures);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static void
|
2010-11-03 00:09:57 +01:00
|
|
|
|
scm_i_read_hash_procedures_set_x (SCM value)
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_fluid_set_x (*scm_i_read_hash_procedures, value);
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Token readers. */
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Size of the C buffer used to read symbols and numbers. */
|
|
|
|
|
|
#define READER_BUFFER_SIZE 128
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-05-07 00:32:01 +02:00
|
|
|
|
/* Number of 32-bit codepoints in the buffer used to read strings. */
|
|
|
|
|
|
#define READER_STRING_BUFFER_SIZE 128
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* The maximum size of Scheme character names. */
|
|
|
|
|
|
#define READER_CHAR_NAME_MAX_SIZE 50
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2012-10-23 00:29:07 -04:00
|
|
|
|
/* The maximum size of reader directive names. */
|
|
|
|
|
|
#define READER_DIRECTIVE_NAME_MAX_SIZE 50
|
|
|
|
|
|
|
2000-10-06 16:51:08 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* `isblank' is only in C99. */
|
|
|
|
|
|
#define CHAR_IS_BLANK_(_chr) \
|
|
|
|
|
|
(((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n') \
|
2007-10-17 21:56:10 +00:00
|
|
|
|
|| ((_chr) == '\f') || ((_chr) == '\r'))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
#ifdef MSDOS
|
|
|
|
|
|
# define CHAR_IS_BLANK(_chr) \
|
|
|
|
|
|
((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
|
|
|
|
|
|
#else
|
|
|
|
|
|
# define CHAR_IS_BLANK CHAR_IS_BLANK_
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
|
|
|
|
|
|
structure''). */
|
|
|
|
|
|
#define CHAR_IS_R5RS_DELIMITER(c) \
|
|
|
|
|
|
(CHAR_IS_BLANK (c) \
|
2012-10-22 23:28:56 -04:00
|
|
|
|
|| (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2012-10-22 23:28:56 -04:00
|
|
|
|
#define CHAR_IS_DELIMITER(c) \
|
|
|
|
|
|
(CHAR_IS_R5RS_DELIMITER (c) \
|
2012-10-26 17:20:16 -04:00
|
|
|
|
|| (((c) == ']' || (c) == '[') && (opts->square_brackets_p \
|
|
|
|
|
|
|| opts->curly_infix_p)) \
|
|
|
|
|
|
|| (((c) == '}' || (c) == '{') && opts->curly_infix_p))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
|
|
|
|
|
|
Structure''. */
|
|
|
|
|
|
#define CHAR_IS_EXPONENT_MARKER(_chr) \
|
|
|
|
|
|
(((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f') \
|
|
|
|
|
|
|| ((_chr) == 'd') || ((_chr) == 'l'))
|
|
|
|
|
|
|
2007-09-03 16:58:20 +00:00
|
|
|
|
/* Read an SCSH block comment. */
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
|
|
|
|
|
|
static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
static SCM scm_get_hash_procedure (int);
|
2007-09-03 16:58:20 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
/* Read from PORT until a delimiter (e.g., a whitespace) is read. Put the
|
|
|
|
|
|
result in the pre-allocated buffer BUF. Return zero if the whole token has
|
|
|
|
|
|
fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
|
|
|
|
|
|
bytes actually read. */
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static int
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
read_token (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
char *buf, size_t buf_size, size_t *read)
|
2012-05-04 22:36:27 +02:00
|
|
|
|
{
|
2010-02-02 20:33:41 -08:00
|
|
|
|
*read = 0;
|
2004-09-07 09:18:59 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
while (*read < buf_size)
|
|
|
|
|
|
{
|
|
|
|
|
|
int chr;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
chr = scm_get_byte_or_eof (port);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (chr == EOF)
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
else if (CHAR_IS_DELIMITER (chr))
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_unget_byte (chr, port);
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
*buf = (char) chr;
|
|
|
|
|
|
buf++, (*read)++;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
return 1;
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2012-05-04 22:36:27 +02:00
|
|
|
|
/* Like `read_token', but return either BUFFER, or a GC-allocated buffer
|
|
|
|
|
|
if the token doesn't fit in BUFFER_SIZE bytes. */
|
|
|
|
|
|
static char *
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
read_complete_token (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
char *buffer, size_t buffer_size, size_t *read)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
{
|
|
|
|
|
|
int overflow = 0;
|
2012-05-04 22:36:27 +02:00
|
|
|
|
size_t bytes_read, overflow_size = 0;
|
|
|
|
|
|
char *overflow_buffer = NULL;
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
do
|
|
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (bytes_read == 0)
|
|
|
|
|
|
break;
|
|
|
|
|
|
if (overflow || overflow_size != 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (overflow_size == 0)
|
|
|
|
|
|
{
|
2012-05-04 22:36:27 +02:00
|
|
|
|
overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
|
|
|
|
|
|
memcpy (overflow_buffer, buffer, bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
overflow_size = bytes_read;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2012-05-06 22:23:58 +02:00
|
|
|
|
char *new_buf =
|
2012-05-04 22:36:27 +02:00
|
|
|
|
scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
|
|
|
|
|
|
|
|
|
|
|
|
memcpy (new_buf, overflow_buffer, overflow_size);
|
|
|
|
|
|
memcpy (new_buf + overflow_size, buffer, bytes_read);
|
|
|
|
|
|
|
|
|
|
|
|
overflow_buffer = new_buf;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
overflow_size += bytes_read;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
}
|
2010-02-02 20:33:41 -08:00
|
|
|
|
while (overflow);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (overflow_size)
|
|
|
|
|
|
*read = overflow_size;
|
2009-12-28 17:35:48 +01:00
|
|
|
|
else
|
2010-02-02 20:33:41 -08:00
|
|
|
|
*read = bytes_read;
|
|
|
|
|
|
|
2012-05-04 22:36:27 +02:00
|
|
|
|
return (overflow_size > 0 ? overflow_buffer : buffer);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Skip whitespace from PORT and return the first non-whitespace character
|
|
|
|
|
|
read. Raise an error on end-of-file. */
|
|
|
|
|
|
static int
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2012-02-08 03:00:15 -05:00
|
|
|
|
scm_t_wchar c;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
while (1)
|
* Makefile.in: Rebuilt.
* Makefile.am (libguile_la_SOURCES): Removed extchrs.c,
mbstrings.c.
(modinclude_HEADERS): Removed extchrs.h, mbstrings.h.
* unif.c (scm_vector_set_length_x): Don't handle multibyte
strings.
* tag.c (scm_utag_mb_string, scm_utag_mb_substring): Removed.
(scm_tag): Don't handle multibyte strings.
* read.c: Don't include mbstrings.h.
(scm_lreadr): Don't handle multibyte ports.
* kw.c: Don't include mbstrings.h.
* init.c: Don't include mbstrings.h.
(scm_boot_guile_1): Don't init mbstrings module.
* hash.c (scm_hasher): Don't handle mbstrings.
* gscm.c (gscm_run_scm): Don't init mbstrings module.
* gc.c (scm_gc_mark): Don't handle mbstrings.
(scm_gc_sweep): Likewise.
* eval.c (SCM_CEVAL): Don't handle mbstrings.
* eq.c (scm_equal_p): Use SCM_TYP7S, not SCM_TYP7SD.
* tags.h (SCM_TYP7SD): Removed.
(SCM_TYP7D): Removed.
(scm_tc7_mb_string): Removed.
(scm_tc7_mb_substring): Removed.
* print.c (scm_iprin1): Handle char printing directly. Don't
handle mbstrings.
Don't include "mbstrings.h".
* symbols.c (scm_intern_obarray_soft, scm_string_to_symbol,
scm_string_to_obarray_symbol, msymbolize): Don't set symbol's
multi-byte flag.
Don't include "mbstrings.h".
* symbols.h (SCM_SYMBOL_MULTI_BYTE_STRINGP): Removed.
(SCM_SYMBOL_SLOTS): Define as 4.
(SCM_ROSTRINGP): Use SCM_TYP7S, not SCM_TYP7SD.
* arbiters.c, backtrace.c, debug.c, dynl.c, eval.c, fluids.c,
gc.c, gsubr.c, ioext.c, kw.c, mallocs.c, numbers.c, ports.c,
print.c, read.c, regex-posix.c, root.c, srcprop.c, stackchk.c,
struct.c, threads.c, throw.c, unif.c, variable.c: Use new
("gen"-less) I/O function names.
* ports.c (scm_add_to_port_table): Don't set port's
representation.
* ports.h (scm_port_representation_type): Removed.
(scm_string_representation_type): Removed.
(struct scm_port_table ): Removed representation field.
(SCM_PORT_REPRESENTATION): Removed.
(SCM_SET_PORT_REPRESENTATION): Removed.
* genio.h: Use new function names.
* genio.c: Don't include "extchrs.h".
(scm_gen_putc, scm_gen_puts, scm_gen_write, scm_get_getc):
Removed.
(scm_putc, scm_puts, scm_lfwrite): No longer static.
(scm_getc): No longer static; handle line and column changes.
(scm_ungetc): Renamed from scm_gen_ungetc.
(scm_do_read_line): Renamed from scm_gen_read_line.
* libguile.h: Don't include "extchrs.h" or "mbstrings.h"
* extchrs.h, extchrs.c, mbstrings.h, mbstrings.c: Removed.
1997-10-15 17:18:32 +00:00
|
|
|
|
switch (c = scm_getc (port))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goteof:
|
|
|
|
|
|
if (eoferr)
|
2000-08-06 22:04:11 +00:00
|
|
|
|
{
|
2004-10-26 17:00:13 +00:00
|
|
|
|
scm_i_input_error (eoferr,
|
|
|
|
|
|
port,
|
|
|
|
|
|
"end of file",
|
|
|
|
|
|
SCM_EOL);
|
2000-08-06 22:04:11 +00:00
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
return c;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
case ';':
|
|
|
|
|
|
lp:
|
* Makefile.in: Rebuilt.
* Makefile.am (libguile_la_SOURCES): Removed extchrs.c,
mbstrings.c.
(modinclude_HEADERS): Removed extchrs.h, mbstrings.h.
* unif.c (scm_vector_set_length_x): Don't handle multibyte
strings.
* tag.c (scm_utag_mb_string, scm_utag_mb_substring): Removed.
(scm_tag): Don't handle multibyte strings.
* read.c: Don't include mbstrings.h.
(scm_lreadr): Don't handle multibyte ports.
* kw.c: Don't include mbstrings.h.
* init.c: Don't include mbstrings.h.
(scm_boot_guile_1): Don't init mbstrings module.
* hash.c (scm_hasher): Don't handle mbstrings.
* gscm.c (gscm_run_scm): Don't init mbstrings module.
* gc.c (scm_gc_mark): Don't handle mbstrings.
(scm_gc_sweep): Likewise.
* eval.c (SCM_CEVAL): Don't handle mbstrings.
* eq.c (scm_equal_p): Use SCM_TYP7S, not SCM_TYP7SD.
* tags.h (SCM_TYP7SD): Removed.
(SCM_TYP7D): Removed.
(scm_tc7_mb_string): Removed.
(scm_tc7_mb_substring): Removed.
* print.c (scm_iprin1): Handle char printing directly. Don't
handle mbstrings.
Don't include "mbstrings.h".
* symbols.c (scm_intern_obarray_soft, scm_string_to_symbol,
scm_string_to_obarray_symbol, msymbolize): Don't set symbol's
multi-byte flag.
Don't include "mbstrings.h".
* symbols.h (SCM_SYMBOL_MULTI_BYTE_STRINGP): Removed.
(SCM_SYMBOL_SLOTS): Define as 4.
(SCM_ROSTRINGP): Use SCM_TYP7S, not SCM_TYP7SD.
* arbiters.c, backtrace.c, debug.c, dynl.c, eval.c, fluids.c,
gc.c, gsubr.c, ioext.c, kw.c, mallocs.c, numbers.c, ports.c,
print.c, read.c, regex-posix.c, root.c, srcprop.c, stackchk.c,
struct.c, threads.c, throw.c, unif.c, variable.c: Use new
("gen"-less) I/O function names.
* ports.c (scm_add_to_port_table): Don't set port's
representation.
* ports.h (scm_port_representation_type): Removed.
(scm_string_representation_type): Removed.
(struct scm_port_table ): Removed representation field.
(SCM_PORT_REPRESENTATION): Removed.
(SCM_SET_PORT_REPRESENTATION): Removed.
* genio.h: Use new function names.
* genio.c: Don't include "extchrs.h".
(scm_gen_putc, scm_gen_puts, scm_gen_write, scm_get_getc):
Removed.
(scm_putc, scm_puts, scm_lfwrite): No longer static.
(scm_getc): No longer static; handle line and column changes.
(scm_ungetc): Renamed from scm_gen_ungetc.
(scm_do_read_line): Renamed from scm_gen_read_line.
* libguile.h: Don't include "extchrs.h" or "mbstrings.h"
* extchrs.h, extchrs.c, mbstrings.h, mbstrings.c: Removed.
1997-10-15 17:18:32 +00:00
|
|
|
|
switch (c = scm_getc (port))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goto goteof;
|
|
|
|
|
|
default:
|
|
|
|
|
|
goto lp;
|
|
|
|
|
|
case SCM_LINE_INCREMENTORS:
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2007-09-03 16:58:20 +00:00
|
|
|
|
case '#':
|
|
|
|
|
|
switch (c = scm_getc (port))
|
|
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
eoferr = "read_sharp";
|
|
|
|
|
|
goto goteof;
|
|
|
|
|
|
case '!':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_shebang (c, port, opts);
|
2007-09-03 16:58:20 +00:00
|
|
|
|
break;
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
case ';':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_commented_expression (c, port, opts);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
break;
|
2009-10-19 22:38:34 +02:00
|
|
|
|
case '|':
|
|
|
|
|
|
if (scm_is_false (scm_get_hash_procedure (c)))
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_read_r6rs_block_comment (c, port);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
/* fall through */
|
2007-09-03 16:58:20 +00:00
|
|
|
|
default:
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
return '#';
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
case SCM_LINE_INCREMENTORS:
|
|
|
|
|
|
case SCM_SINGLE_SPACES:
|
|
|
|
|
|
case '\t':
|
|
|
|
|
|
break;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
default:
|
|
|
|
|
|
return c;
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return 0;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Token readers. */
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
|
|
|
|
|
|
static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column);
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
2012-02-08 03:10:11 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2012-02-08 03:10:11 -05:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->record_positions_p)
|
2012-02-08 03:10:11 -05:00
|
|
|
|
scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
|
|
|
|
|
|
return x;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
1996-09-18 19:35:48 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_i_lreadparen"
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2011-05-24 21:25:11 +02:00
|
|
|
|
int c;
|
|
|
|
|
|
SCM tmp, tl, ans = SCM_EOL;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
const int curly_list_p = (chr == '{') && opts->curly_infix_p;
|
|
|
|
|
|
const int terminating_char = ((chr == '{') ? '}'
|
|
|
|
|
|
: ((chr == '[') ? ']'
|
|
|
|
|
|
: ')'));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Need to capture line and column numbers here. */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
1996-10-25 08:30:26 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
c = flush_ws (port, opts, FUNC_NAME);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (terminating_char == c)
|
|
|
|
|
|
return SCM_EOL;
|
1996-10-25 08:30:26 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_ungetc (c, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
tmp = scm_read_expression (port, opts);
|
2011-07-01 12:20:52 +02:00
|
|
|
|
|
|
|
|
|
|
/* Note that it is possible for scm_read_expression to return
|
|
|
|
|
|
scm_sym_dot, but not as part of a dotted pair: as in #{.}#. So
|
|
|
|
|
|
check that it's a real dot by checking `c'. */
|
|
|
|
|
|
if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
ans = scm_read_expression (port, opts);
|
|
|
|
|
|
if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "missing close paren",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
return ans;
|
|
|
|
|
|
}
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Build the head of the list structure. */
|
|
|
|
|
|
ans = tl = scm_cons (tmp, SCM_EOL);
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
SCM new_tail;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
if (c == ')' || (c == ']' && opts->square_brackets_p)
|
|
|
|
|
|
|| ((c == '}' || c == ']') && opts->curly_infix_p))
|
2010-07-13 21:53:41 +02:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"in pair: mismatched close paren: ~A",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (c)));
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_ungetc (c, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
tmp = scm_read_expression (port, opts);
|
2010-07-13 21:53:41 +02:00
|
|
|
|
|
2011-07-01 12:20:52 +02:00
|
|
|
|
/* See above note about scm_sym_dot. */
|
|
|
|
|
|
if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
SCM_SETCDR (tl, scm_read_expression (port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
c = flush_ws (port, opts, FUNC_NAME);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (terminating_char != c)
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"in pair: missing close paren", SCM_EOL);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
break;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2001-06-27 13:15:20 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
new_tail = scm_cons (tmp, SCM_EOL);
|
|
|
|
|
|
SCM_SETCDR (tl, new_tail);
|
|
|
|
|
|
tl = new_tail;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
if (curly_list_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* In addition to finding the length, 'scm_ilength' checks for
|
|
|
|
|
|
improper or circular lists, in which case it returns -1. */
|
|
|
|
|
|
int len = scm_ilength (ans);
|
|
|
|
|
|
|
|
|
|
|
|
/* The (len == 0) case is handled above */
|
|
|
|
|
|
if (len == 1)
|
|
|
|
|
|
/* Return directly to avoid re-annotating the element's source
|
|
|
|
|
|
location with the position of the outer brace. Also, it
|
|
|
|
|
|
might not be possible to annotate the element. */
|
|
|
|
|
|
return scm_car (ans); /* {e} => e */
|
|
|
|
|
|
else if (len == 2)
|
|
|
|
|
|
; /* Leave the list unchanged: {e1 e2} => (e1 e2) */
|
|
|
|
|
|
else if (len >= 3 && (len & 1))
|
|
|
|
|
|
{
|
|
|
|
|
|
/* It's a proper list whose length is odd and at least 3. If
|
|
|
|
|
|
the elements at odd indices (the infix operator positions)
|
|
|
|
|
|
are all 'equal?', then it's a simple curly-infix list.
|
|
|
|
|
|
Otherwise it's a mixed curly-infix list. */
|
|
|
|
|
|
SCM op = scm_cadr (ans);
|
|
|
|
|
|
|
|
|
|
|
|
/* Check to see if the elements at odd indices are 'equal?' */
|
|
|
|
|
|
for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
|
|
|
|
|
|
{
|
|
|
|
|
|
if (scm_is_null (tl))
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Convert simple curly-infix list to prefix:
|
|
|
|
|
|
{a <op> b <op> ...} => (<op> a b ...) */
|
|
|
|
|
|
tl = ans;
|
|
|
|
|
|
while (scm_is_pair (scm_cdr (tl)))
|
|
|
|
|
|
{
|
|
|
|
|
|
tmp = scm_cddr (tl);
|
|
|
|
|
|
SCM_SETCDR (tl, tmp);
|
|
|
|
|
|
tl = tmp;
|
|
|
|
|
|
}
|
|
|
|
|
|
ans = scm_cons (op, ans);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
|
|
|
|
|
|
ans = scm_cons (sym_nfx, ans);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
/* Mixed curly-infix (possibly improper) list:
|
|
|
|
|
|
{e . tail} => ($nfx$ e . tail) */
|
|
|
|
|
|
ans = scm_cons (sym_nfx, ans);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (ans, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
2004-10-26 17:00:13 +00:00
|
|
|
|
|
2010-01-10 18:24:23 -08:00
|
|
|
|
|
|
|
|
|
|
/* Read a hexadecimal number NDIGITS in length. Put its value into the variable
|
2010-01-12 21:02:41 -08:00
|
|
|
|
C. If TERMINATOR is non-null, terminate early if the TERMINATOR character is
|
|
|
|
|
|
found. */
|
|
|
|
|
|
#define SCM_READ_HEX_ESCAPE(ndigits, terminator) \
|
|
|
|
|
|
do \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
scm_t_wchar a; \
|
|
|
|
|
|
size_t i = 0; \
|
|
|
|
|
|
c = 0; \
|
|
|
|
|
|
while (i < ndigits) \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
a = scm_getc (port); \
|
|
|
|
|
|
if (a == EOF) \
|
|
|
|
|
|
goto str_eof; \
|
|
|
|
|
|
if (terminator \
|
|
|
|
|
|
&& (a == (scm_t_wchar) terminator) \
|
|
|
|
|
|
&& (i > 0)) \
|
|
|
|
|
|
break; \
|
|
|
|
|
|
if ('0' <= a && a <= '9') \
|
|
|
|
|
|
a -= '0'; \
|
|
|
|
|
|
else if ('A' <= a && a <= 'F') \
|
|
|
|
|
|
a = a - 'A' + 10; \
|
|
|
|
|
|
else if ('a' <= a && a <= 'f') \
|
|
|
|
|
|
a = a - 'a' + 10; \
|
|
|
|
|
|
else \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
c = a; \
|
|
|
|
|
|
goto bad_escaped; \
|
|
|
|
|
|
} \
|
|
|
|
|
|
c = c * 16 + a; \
|
|
|
|
|
|
i ++; \
|
|
|
|
|
|
} \
|
2010-01-10 18:24:23 -08:00
|
|
|
|
} while (0)
|
|
|
|
|
|
|
2011-01-21 08:57:39 +01:00
|
|
|
|
static void
|
|
|
|
|
|
skip_intraline_whitespace (SCM port)
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_t_wchar c;
|
|
|
|
|
|
|
|
|
|
|
|
do
|
|
|
|
|
|
{
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
return;
|
|
|
|
|
|
}
|
|
|
|
|
|
while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
|
|
|
|
|
|
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
|
|
|
|
|
/* For strings smaller than C_STR, this function creates only one Scheme
|
|
|
|
|
|
object (the string returned). */
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-05-07 00:32:01 +02:00
|
|
|
|
SCM str = SCM_EOL;
|
|
|
|
|
|
size_t c_str_len = 0;
|
|
|
|
|
|
scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
|
2004-10-29 14:45:19 +00:00
|
|
|
|
|
2012-02-08 15:51:38 -05:00
|
|
|
|
/* Need to capture line and column numbers here. */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
while ('"' != (c = scm_getc (port)))
|
|
|
|
|
|
{
|
|
|
|
|
|
if (c == EOF)
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
{
|
|
|
|
|
|
str_eof:
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"end of file in string constant", SCM_EOL);
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-05-07 00:32:01 +02:00
|
|
|
|
if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
|
|
|
|
|
|
{
|
|
|
|
|
|
str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
|
|
|
|
|
|
c_str_len = 0;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (c == '\\')
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
{
|
|
|
|
|
|
switch (c = scm_getc (port))
|
|
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goto str_eof;
|
|
|
|
|
|
case '"':
|
|
|
|
|
|
case '\\':
|
|
|
|
|
|
break;
|
|
|
|
|
|
case '\n':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->hungry_eol_escapes_p)
|
2011-01-21 08:57:39 +01:00
|
|
|
|
skip_intraline_whitespace (port);
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
continue;
|
|
|
|
|
|
case '0':
|
|
|
|
|
|
c = '\0';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'f':
|
|
|
|
|
|
c = '\f';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'n':
|
|
|
|
|
|
c = '\n';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'r':
|
|
|
|
|
|
c = '\r';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 't':
|
|
|
|
|
|
c = '\t';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'a':
|
|
|
|
|
|
c = '\007';
|
|
|
|
|
|
break;
|
|
|
|
|
|
case 'v':
|
|
|
|
|
|
c = '\v';
|
|
|
|
|
|
break;
|
2010-01-10 15:41:37 -08:00
|
|
|
|
case 'b':
|
|
|
|
|
|
c = '\010';
|
|
|
|
|
|
break;
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
case 'x':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->r6rs_escapes_p)
|
2010-01-12 21:02:41 -08:00
|
|
|
|
SCM_READ_HEX_ESCAPE (10, ';');
|
|
|
|
|
|
else
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (2, '\0');
|
2010-01-10 18:24:23 -08:00
|
|
|
|
break;
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
case 'u':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (!opts->r6rs_escapes_p)
|
2010-01-13 07:02:07 -08:00
|
|
|
|
{
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (4, '\0');
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
case 'U':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (!opts->r6rs_escapes_p)
|
2010-01-13 07:02:07 -08:00
|
|
|
|
{
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (6, '\0');
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
Add Unicode strings and symbols
This adds full Unicode strings as a datatype, and it adds some
minimal functionality. The terminal and port encoding is assumed
to be ISO-8859-1. Non-ISO-8859-1 characters are written or
input as string character escapes.
The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.
The process for writing to strings has been modified. There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.
To compile strings that may be wide, the VM storage of strings and
string-likes has changed.
Most string-using functions have not yet been updated and may break
when used with wide strings.
* module/language/assembly/compile-bytecode.scm (write-bytecode):
use variable width string bytecode format
* module/language/assembly.scm (byte-length): use variable width
bytecode format
* libguile/vm-i-loader.c (load-string, load-symbol):
(load-keyword, define): use variable-width bytecode format
* libguile/vm-engine.h (FETCH_WIDTH): new macro
* libguile/strings.h: new declarations
* libguile/strings.c (make_wide_stringbuf): new function
(widen_stringbuf): new function
(scm_i_make_wide_string): new function
(scm_i_is_narrow_string): new function
(scm_i_string_wide_chars): new function
(scm_i_string_start_writing): new function
(scm_i_string_ref): new function
(scm_i_string_set_x): new function
(scm_i_is_narrow_symbol): new function
(scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
(scm_string_width): new function
(unistring_escapes_to_guile_escapes): new function
(scm_to_stringn): new function
(scm_i_stringbuf_free): modify for wide strings
(scm_i_substring_copy): modify for wide strings
(scm_i_string_chars, scm_string_append): modify for wide strings
(scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
(scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
(scm_string, scm_i_deprecated_string_chars): modify for wide strings
(scm_from_locale_string, scm_from_locale_stringn): add null test
* libguile/srfi-13.c: add calls for scm_i_string_start_writing for
each call of scm_i_string_stop_writing
(scm_string_for_each): modify for wide strings
* libguile/socket.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/rw.c: add calls for scm_i_string_start_writing for each
call of scm_i_string_stop_writing
* libguile/read.c (scm_read_string): allow reading of wide strings
* libguile/print.h: add declaration for scm_charprint
* libguile/print.c (iprin1): print wide strings and add new string
escapes
(scm_charprint): new function
* libguile/ports.h: new declarations for scm_lfwrite_substr and
scm_lfwrite_str
* libguile/ports.c (update_port_lf): new function
(scm_lfwrite): use update_port_lf
(scm_lfwrite_substr): new function
(scm_lfwrite_str): new function
* test-suite/tests/asm-to-bytecode.test ("compiler"): add string
width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
|
|
|
|
default:
|
|
|
|
|
|
bad_escaped:
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"illegal character in escape sequence: ~S",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (c)));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2012-05-07 00:32:01 +02:00
|
|
|
|
|
|
|
|
|
|
c_str[c_str_len++] = c;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
2012-05-07 00:32:01 +02:00
|
|
|
|
|
|
|
|
|
|
if (scm_is_null (str))
|
|
|
|
|
|
/* Fast path: we got a string that fits in C_STR. */
|
|
|
|
|
|
str = scm_from_utf32_stringn (c_str, c_str_len);
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
if (c_str_len > 0)
|
|
|
|
|
|
str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
|
|
|
|
|
|
|
|
|
|
|
|
str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (str, port, opts, line, column);
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2001-03-04 17:09:34 +00:00
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM result, str = SCM_EOL;
|
2012-05-04 22:36:27 +02:00
|
|
|
|
char local_buffer[READER_BUFFER_SIZE], *buffer;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
size_t bytes_read;
|
|
|
|
|
|
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-02-15 11:47:31 -05:00
|
|
|
|
/* Need to capture line and column numbers here. */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_ungetc (chr, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
|
2012-05-04 22:36:27 +02:00
|
|
|
|
&bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
2012-05-04 22:36:27 +02:00
|
|
|
|
str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
|
|
|
|
|
result = scm_string_to_number (str, SCM_UNDEFINED);
|
2012-02-15 11:47:31 -05:00
|
|
|
|
if (scm_is_false (result))
|
2010-02-02 20:33:41 -08:00
|
|
|
|
{
|
|
|
|
|
|
/* Return a symbol instead of a number */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->case_insensitive_p)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
str = scm_string_downcase_x (str);
|
|
|
|
|
|
result = scm_string_to_symbol (str);
|
|
|
|
|
|
}
|
2012-02-15 11:47:31 -05:00
|
|
|
|
else if (SCM_NIMP (result))
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
result = maybe_annotate_source (result, port, opts, line, column);
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM_COL (port) += scm_i_string_length (str);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
return result;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
SCM result;
|
|
|
|
|
|
int ends_with_colon = 0;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
size_t bytes_read;
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
|
2012-05-04 22:36:27 +02:00
|
|
|
|
char local_buffer[READER_BUFFER_SIZE], *buffer;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
scm_t_port *pt = SCM_PTAB_ENTRY (port);
|
|
|
|
|
|
SCM str;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
scm_ungetc (chr, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
|
2012-05-04 22:36:27 +02:00
|
|
|
|
&bytes_read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (bytes_read > 0)
|
2012-05-04 22:36:27 +02:00
|
|
|
|
ends_with_colon = buffer[bytes_read - 1] == ':';
|
2008-04-15 19:52:43 +02:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (postfix && ends_with_colon && (bytes_read > 1))
|
|
|
|
|
|
{
|
2012-05-04 22:36:27 +02:00
|
|
|
|
str = scm_from_stringn (buffer, bytes_read - 1,
|
|
|
|
|
|
pt->encoding, pt->ilseq_handler);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->case_insensitive_p)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
str = scm_string_downcase_x (str);
|
|
|
|
|
|
result = scm_symbol_to_keyword (scm_string_to_symbol (str));
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
2010-02-02 20:33:41 -08:00
|
|
|
|
{
|
2012-05-04 22:36:27 +02:00
|
|
|
|
str = scm_from_stringn (buffer, bytes_read,
|
|
|
|
|
|
pt->encoding, pt->ilseq_handler);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->case_insensitive_p)
|
2010-02-02 20:33:41 -08:00
|
|
|
|
str = scm_string_downcase_x (str);
|
|
|
|
|
|
result = scm_string_to_symbol (str);
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM_COL (port) += scm_i_string_length (str);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
SCM result;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
size_t read;
|
2012-05-04 22:36:27 +02:00
|
|
|
|
char local_buffer[READER_BUFFER_SIZE], *buffer;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
unsigned int radix;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
SCM str;
|
|
|
|
|
|
scm_t_port *pt;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case 'B':
|
|
|
|
|
|
case 'b':
|
|
|
|
|
|
radix = 2;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'o':
|
|
|
|
|
|
case 'O':
|
|
|
|
|
|
radix = 8;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'd':
|
|
|
|
|
|
case 'D':
|
|
|
|
|
|
radix = 10;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case 'x':
|
|
|
|
|
|
case 'X':
|
|
|
|
|
|
radix = 16;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
scm_ungetc (chr, port);
|
|
|
|
|
|
scm_ungetc ('#', port);
|
|
|
|
|
|
radix = 10;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
|
2012-05-04 22:36:27 +02:00
|
|
|
|
&read);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
|
|
|
|
|
pt = SCM_PTAB_ENTRY (port);
|
2012-05-04 22:36:27 +02:00
|
|
|
|
str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
|
|
|
|
|
|
result = scm_string_to_number (str, scm_from_uint (radix));
|
|
|
|
|
|
|
|
|
|
|
|
SCM_COL (port) += scm_i_string_length (str);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
if (scm_is_true (result))
|
|
|
|
|
|
return result;
|
|
|
|
|
|
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
SCM p;
|
2007-08-23 21:17:24 +00:00
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '`':
|
|
|
|
|
|
p = scm_sym_quasiquote;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
|
|
p = scm_sym_quote;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case ',':
|
|
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_t_wchar c;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
if ('@' == c)
|
|
|
|
|
|
p = scm_sym_uq_splicing;
|
|
|
|
|
|
else
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
p = scm_sym_unquote;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
default:
|
|
|
|
|
|
fprintf (stderr, "%s: unhandled quote character (%i)\n",
|
2008-02-07 09:54:47 +00:00
|
|
|
|
"scm_read_quote", chr);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
abort ();
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
|
|
|
|
|
|
return maybe_annotate_source (p, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
SCM_SYMBOL (sym_syntax, "syntax");
|
|
|
|
|
|
SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
|
|
|
|
|
|
SCM_SYMBOL (sym_unsyntax, "unsyntax");
|
|
|
|
|
|
SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
{
|
|
|
|
|
|
SCM p;
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '`':
|
|
|
|
|
|
p = sym_quasisyntax;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
|
|
p = sym_syntax;
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
case ',':
|
|
|
|
|
|
{
|
|
|
|
|
|
int c;
|
|
|
|
|
|
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
if ('@' == c)
|
|
|
|
|
|
p = sym_unsyntax_splicing;
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
p = sym_unsyntax;
|
|
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
fprintf (stderr, "%s: unhandled syntax character (%i)\n",
|
|
|
|
|
|
"scm_read_syntax", chr);
|
|
|
|
|
|
abort ();
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
|
|
|
|
|
|
return maybe_annotate_source (p, port, opts, line, column);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
|
2010-04-09 14:15:16 +02:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
SCM id = scm_read_mixed_case_symbol (chr, port, opts);
|
2010-04-09 14:15:16 +02:00
|
|
|
|
|
|
|
|
|
|
if (!scm_is_eq (id, sym_nil))
|
|
|
|
|
|
scm_i_input_error ("scm_read_nil", port,
|
|
|
|
|
|
"unexpected input while reading #nil: ~a",
|
|
|
|
|
|
scm_list_1 (id));
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_ELISP_NIL;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
2007-07-22 16:30:13 +00:00
|
|
|
|
scm_read_semicolon_comment (int chr, SCM port)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
|
|
|
|
|
int c;
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
/* We use the get_byte here because there is no need to get the
|
|
|
|
|
|
locale correct with comment input. This presumes that newline
|
|
|
|
|
|
always represents itself no matter what the encoding is. */
|
|
|
|
|
|
for (c = scm_get_byte_or_eof (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
(c != EOF) && (c != '\n');
|
2010-02-15 20:45:58 -08:00
|
|
|
|
c = scm_get_byte_or_eof (port));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Sharp readers, i.e. readers called after a `#' sign has been read. */
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_boolean (int chr, SCM port)
|
|
|
|
|
|
{
|
|
|
|
|
|
switch (chr)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case 't':
|
|
|
|
|
|
case 'T':
|
|
|
|
|
|
return SCM_BOOL_T;
|
|
|
|
|
|
|
|
|
|
|
|
case 'f':
|
|
|
|
|
|
case 'F':
|
|
|
|
|
|
return SCM_BOOL_F;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
2010-02-02 20:33:41 -08:00
|
|
|
|
char buffer[READER_CHAR_NAME_MAX_SIZE];
|
|
|
|
|
|
SCM charname;
|
|
|
|
|
|
size_t charname_len, bytes_read;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_t_wchar cp;
|
|
|
|
|
|
int overflow;
|
2010-02-02 20:33:41 -08:00
|
|
|
|
scm_t_port *pt;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
|
|
|
|
|
|
&bytes_read);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (overflow)
|
2010-10-18 13:29:58 +02:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (bytes_read == 0)
|
1996-07-25 22:56:11 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
chr = scm_getc (port);
|
|
|
|
|
|
if (chr == EOF)
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
|
|
|
|
|
|
"while reading character", SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
/* CHR must be a token delimiter, like a whitespace. */
|
|
|
|
|
|
return (SCM_MAKE_CHAR (chr));
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
pt = SCM_PTAB_ENTRY (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
2010-02-02 20:33:41 -08:00
|
|
|
|
/* Simple ASCII characters can be processed immediately. Also, simple
|
|
|
|
|
|
ISO-8859-1 characters can be processed immediately if the encoding for this
|
|
|
|
|
|
port is ISO-8859-1. */
|
|
|
|
|
|
if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM_COL (port) += 1;
|
|
|
|
|
|
return SCM_MAKE_CHAR (buffer[0]);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Otherwise, convert the buffer into a proper scheme string for
|
|
|
|
|
|
processing. */
|
|
|
|
|
|
charname = scm_from_stringn (buffer, bytes_read, pt->encoding,
|
|
|
|
|
|
pt->ilseq_handler);
|
|
|
|
|
|
charname_len = scm_i_string_length (charname);
|
|
|
|
|
|
SCM_COL (port) += charname_len;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
cp = scm_i_string_ref (charname, 0);
|
2010-02-02 20:33:41 -08:00
|
|
|
|
if (charname_len == 1)
|
|
|
|
|
|
return SCM_MAKE_CHAR (cp);
|
|
|
|
|
|
|
|
|
|
|
|
/* Ignore dotted circles, which may be used to keep combining characters from
|
|
|
|
|
|
combining with the backslash in #\charname. */
|
2009-09-03 07:47:26 -07:00
|
|
|
|
if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
|
|
|
|
|
|
return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (cp >= '0' && cp < '8')
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* Dirk:FIXME:: This type of character syntax is not R5RS
|
|
|
|
|
|
* compliant. Further, it should be verified that the constant
|
2009-08-29 07:14:49 -07:00
|
|
|
|
* does only consist of octal digits. */
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
SCM p = scm_string_to_number (charname, scm_from_uint (8));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (SCM_I_INUMP (p))
|
2009-08-29 07:14:49 -07:00
|
|
|
|
{
|
2010-11-19 11:29:26 +01:00
|
|
|
|
scm_t_wchar c = scm_to_uint32 (p);
|
2009-08-29 07:14:49 -07:00
|
|
|
|
if (SCM_IS_UNICODE_CHAR (c))
|
|
|
|
|
|
return SCM_MAKE_CHAR (c);
|
|
|
|
|
|
else
|
2010-07-17 04:16:57 -07:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
2009-08-29 07:14:49 -07:00
|
|
|
|
"out-of-range octal character escape: ~a",
|
|
|
|
|
|
scm_list_1 (charname));
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2010-07-17 04:16:57 -07:00
|
|
|
|
if (cp == 'x' && (charname_len > 1))
|
2010-01-12 21:02:41 -08:00
|
|
|
|
{
|
|
|
|
|
|
SCM p;
|
2010-07-17 04:16:57 -07:00
|
|
|
|
|
2010-01-12 21:02:41 -08:00
|
|
|
|
/* Convert from hex, skipping the initial 'x' character in CHARNAME */
|
|
|
|
|
|
p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
|
|
|
|
|
|
scm_from_uint (16));
|
|
|
|
|
|
if (SCM_I_INUMP (p))
|
|
|
|
|
|
{
|
2010-11-19 11:29:26 +01:00
|
|
|
|
scm_t_wchar c = scm_to_uint32 (p);
|
2010-01-12 21:02:41 -08:00
|
|
|
|
if (SCM_IS_UNICODE_CHAR (c))
|
|
|
|
|
|
return SCM_MAKE_CHAR (c);
|
|
|
|
|
|
else
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port,
|
|
|
|
|
|
"out-of-range hex character escape: ~a",
|
|
|
|
|
|
scm_list_1 (charname));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
/* The names of characters should never have non-Latin1
|
|
|
|
|
|
characters. */
|
|
|
|
|
|
if (scm_i_is_narrow_string (charname)
|
|
|
|
|
|
|| scm_i_try_narrow_string (charname))
|
2009-08-26 13:15:07 +02:00
|
|
|
|
{ SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
|
|
|
|
|
|
charname_len);
|
|
|
|
|
|
if (scm_is_true (ch))
|
|
|
|
|
|
return ch;
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_list_1 (charname));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2001-03-04 17:09:34 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
SCM symbol;
|
|
|
|
|
|
|
|
|
|
|
|
/* Read the symbol that comprises the keyword. Doing this instead of
|
|
|
|
|
|
invoking a specific symbol reader function allows `scm_read_keyword ()'
|
|
|
|
|
|
to adapt to the delimiters currently valid of symbols.
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
XXX: This implementation allows sloppy syntaxes like `#: key'. */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
symbol = scm_read_expression (port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (!scm_is_symbol (symbol))
|
2008-02-07 09:54:47 +00:00
|
|
|
|
scm_i_input_error ("scm_read_keyword", port,
|
2007-07-22 16:30:13 +00:00
|
|
|
|
"keyword prefix `~a' not followed by a symbol: ~s",
|
|
|
|
|
|
scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
|
|
|
|
|
|
|
|
|
|
|
|
return (scm_symbol_to_keyword (symbol));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Note: We call `scm_read_sexp ()' rather than READER here in order to
|
|
|
|
|
|
guarantee that it's going to do what we want. After all, this is an
|
|
|
|
|
|
implementation detail of `scm_read_vector ()', not a desirable
|
|
|
|
|
|
property. */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
|
|
|
|
|
|
port, opts, line, column);
|
2012-02-08 15:51:38 -05:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-22 23:23:45 -04:00
|
|
|
|
/* Helper used by scm_read_array */
|
|
|
|
|
|
static int
|
|
|
|
|
|
read_decimal_integer (SCM port, int c, ssize_t *resp)
|
|
|
|
|
|
{
|
|
|
|
|
|
ssize_t sign = 1;
|
|
|
|
|
|
ssize_t res = 0;
|
|
|
|
|
|
int got_it = 0;
|
|
|
|
|
|
|
|
|
|
|
|
if (c == '-')
|
|
|
|
|
|
{
|
|
|
|
|
|
sign = -1;
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
while ('0' <= c && c <= '9')
|
|
|
|
|
|
{
|
|
|
|
|
|
res = 10*res + c-'0';
|
|
|
|
|
|
got_it = 1;
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
if (got_it)
|
|
|
|
|
|
*resp = sign * res;
|
|
|
|
|
|
return c;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Read an array. This function can also read vectors and uniform
|
|
|
|
|
|
vectors. Also, the conflict between '#f' and '#f32' and '#f64' is
|
|
|
|
|
|
handled here.
|
|
|
|
|
|
|
2012-10-30 22:58:19 -04:00
|
|
|
|
C is the first character read after the '#'. */
|
2012-02-08 15:51:38 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
|
2012-02-08 15:51:38 -05:00
|
|
|
|
{
|
2012-10-22 23:23:45 -04:00
|
|
|
|
ssize_t rank;
|
|
|
|
|
|
scm_t_wchar tag_buf[8];
|
|
|
|
|
|
int tag_len;
|
|
|
|
|
|
|
|
|
|
|
|
SCM tag, shape = SCM_BOOL_F, elements, array;
|
|
|
|
|
|
|
|
|
|
|
|
/* XXX - shortcut for ordinary vectors. Shouldn't be necessary but
|
|
|
|
|
|
the array code can not deal with zero-length dimensions yet, and
|
2012-10-30 22:58:19 -04:00
|
|
|
|
we want to allow zero-length vectors, of course. */
|
2012-10-22 23:23:45 -04:00
|
|
|
|
if (c == '(')
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return scm_read_vector (c, port, opts, line, column);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
|
2012-10-30 22:58:19 -04:00
|
|
|
|
/* Disambiguate between '#f' and uniform floating point vectors. */
|
2012-10-22 23:23:45 -04:00
|
|
|
|
if (c == 'f')
|
|
|
|
|
|
{
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
if (c != '3' && c != '6')
|
|
|
|
|
|
{
|
|
|
|
|
|
if (c != EOF)
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
}
|
|
|
|
|
|
rank = 1;
|
|
|
|
|
|
tag_buf[0] = 'f';
|
|
|
|
|
|
tag_len = 1;
|
|
|
|
|
|
goto continue_reading_tag;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Read rank. */
|
|
|
|
|
|
rank = 1;
|
|
|
|
|
|
c = read_decimal_integer (port, c, &rank);
|
|
|
|
|
|
if (rank < 0)
|
|
|
|
|
|
scm_i_input_error (NULL, port, "array rank must be non-negative",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
/* Read tag. */
|
|
|
|
|
|
tag_len = 0;
|
|
|
|
|
|
continue_reading_tag:
|
|
|
|
|
|
while (c != EOF && c != '(' && c != '@' && c != ':'
|
|
|
|
|
|
&& tag_len < sizeof tag_buf / sizeof tag_buf[0])
|
|
|
|
|
|
{
|
|
|
|
|
|
tag_buf[tag_len++] = c;
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
}
|
|
|
|
|
|
if (tag_len == 0)
|
|
|
|
|
|
tag = SCM_BOOL_T;
|
2012-02-08 15:51:38 -05:00
|
|
|
|
else
|
2012-10-22 23:23:45 -04:00
|
|
|
|
{
|
|
|
|
|
|
tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
|
|
|
|
|
|
if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
|
|
|
|
|
|
scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
|
|
|
|
|
|
scm_list_1 (tag));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Read shape. */
|
|
|
|
|
|
if (c == '@' || c == ':')
|
|
|
|
|
|
{
|
|
|
|
|
|
shape = SCM_EOL;
|
|
|
|
|
|
|
|
|
|
|
|
do
|
|
|
|
|
|
{
|
|
|
|
|
|
ssize_t lbnd = 0, len = 0;
|
|
|
|
|
|
SCM s;
|
|
|
|
|
|
|
|
|
|
|
|
if (c == '@')
|
|
|
|
|
|
{
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
c = read_decimal_integer (port, c, &lbnd);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
s = scm_from_ssize_t (lbnd);
|
|
|
|
|
|
|
|
|
|
|
|
if (c == ':')
|
|
|
|
|
|
{
|
|
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
c = read_decimal_integer (port, c, &len);
|
|
|
|
|
|
if (len < 0)
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"array length must be non-negative",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
shape = scm_cons (s, shape);
|
|
|
|
|
|
} while (c == '@' || c == ':');
|
|
|
|
|
|
|
|
|
|
|
|
shape = scm_reverse_x (shape, SCM_EOL);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Read nested lists of elements. */
|
|
|
|
|
|
if (c != '(')
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"missing '(' in vector or array literal",
|
|
|
|
|
|
SCM_EOL);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
elements = scm_read_sexp (c, port, opts);
|
2012-10-22 23:23:45 -04:00
|
|
|
|
|
|
|
|
|
|
if (scm_is_false (shape))
|
|
|
|
|
|
shape = scm_from_ssize_t (rank);
|
|
|
|
|
|
else if (scm_ilength (shape) != rank)
|
|
|
|
|
|
scm_i_input_error
|
|
|
|
|
|
(NULL, port,
|
|
|
|
|
|
"the number of shape specifications must match the array rank",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
/* Handle special print syntax of rank zero arrays; see
|
|
|
|
|
|
scm_i_print_array for a rationale. */
|
|
|
|
|
|
if (rank == 0)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!scm_is_pair (elements))
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"too few elements in array literal, need 1",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
if (!scm_is_null (SCM_CDR (elements)))
|
|
|
|
|
|
scm_i_input_error (NULL, port,
|
|
|
|
|
|
"too many elements in array literal, want 1",
|
|
|
|
|
|
SCM_EOL);
|
|
|
|
|
|
elements = SCM_CAR (elements);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Construct array, annotate with source location, and return. */
|
|
|
|
|
|
array = scm_list_to_typed_array (tag, shape, elements);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return maybe_annotate_source (array, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
1996-09-18 19:35:48 +00:00
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return scm_read_array (chr, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-06-19 00:47:11 +02:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2009-06-19 00:47:11 +02:00
|
|
|
|
{
|
|
|
|
|
|
chr = scm_getc (port);
|
|
|
|
|
|
if (chr != 'u')
|
|
|
|
|
|
goto syntax;
|
|
|
|
|
|
|
|
|
|
|
|
chr = scm_getc (port);
|
|
|
|
|
|
if (chr != '8')
|
|
|
|
|
|
goto syntax;
|
|
|
|
|
|
|
|
|
|
|
|
chr = scm_getc (port);
|
|
|
|
|
|
if (chr != '(')
|
|
|
|
|
|
goto syntax;
|
|
|
|
|
|
|
2012-02-08 15:51:38 -05:00
|
|
|
|
return maybe_annotate_source
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
(scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
|
|
|
|
|
|
port, opts, line, column);
|
2009-06-19 00:47:11 +02:00
|
|
|
|
|
|
|
|
|
|
syntax:
|
|
|
|
|
|
scm_i_input_error ("read_bytevector", port,
|
|
|
|
|
|
"invalid bytevector prefix",
|
|
|
|
|
|
SCM_MAKE_CHAR (chr));
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* Read the `#*10101'-style read syntax for bit vectors in Guile. This is
|
|
|
|
|
|
terribly inefficient but who cares? */
|
|
|
|
|
|
SCM s_bits = SCM_EOL;
|
|
|
|
|
|
|
|
|
|
|
|
for (chr = scm_getc (port);
|
|
|
|
|
|
(chr != EOF) && ((chr == '0') || (chr == '1'));
|
|
|
|
|
|
chr = scm_getc (port))
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
if (chr != EOF)
|
|
|
|
|
|
scm_ungetc (chr, port);
|
|
|
|
|
|
|
2012-02-08 15:51:38 -05:00
|
|
|
|
return maybe_annotate_source
|
|
|
|
|
|
(scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
2012-02-08 03:00:15 -05:00
|
|
|
|
static SCM
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
int bang_seen = 0;
|
|
|
|
|
|
|
|
|
|
|
|
for (;;)
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2011-02-28 23:33:47 +01:00
|
|
|
|
int c = scm_getc (port);
|
2000-07-18 16:09:09 +00:00
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
scm_i_input_error ("skip_block_comment", port,
|
|
|
|
|
|
"unterminated `#! ... !#' comment", SCM_EOL);
|
|
|
|
|
|
|
|
|
|
|
|
if (c == '!')
|
|
|
|
|
|
bang_seen = 1;
|
|
|
|
|
|
else if (c == '#' && bang_seen)
|
|
|
|
|
|
break;
|
|
|
|
|
|
else
|
|
|
|
|
|
bang_seen = 0;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-24 14:37:36 -04:00
|
|
|
|
static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
int value);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
int value);
|
|
|
|
|
|
static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
int value);
|
2012-10-24 14:37:36 -04:00
|
|
|
|
|
2011-05-08 16:25:01 +02:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
|
2010-05-27 09:20:53 -04:00
|
|
|
|
{
|
2012-10-23 00:29:07 -04:00
|
|
|
|
char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
|
|
|
|
|
|
int c;
|
|
|
|
|
|
int i = 0;
|
|
|
|
|
|
|
|
|
|
|
|
while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
|
2010-05-27 09:20:53 -04:00
|
|
|
|
{
|
2012-10-23 00:29:07 -04:00
|
|
|
|
c = scm_getc (port);
|
|
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
scm_i_input_error ("skip_block_comment", port,
|
|
|
|
|
|
"unterminated `#! ... !#' comment", SCM_EOL);
|
|
|
|
|
|
else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
|
|
|
|
|
|
name[i++] = c;
|
|
|
|
|
|
else if (CHAR_IS_DELIMITER (c))
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
name[i] = '\0';
|
|
|
|
|
|
if (0 == strcmp ("r6rs", name))
|
|
|
|
|
|
; /* Silently ignore */
|
2012-10-24 14:37:36 -04:00
|
|
|
|
else if (0 == strcmp ("fold-case", name))
|
|
|
|
|
|
set_port_case_insensitive_p (port, opts, 1);
|
|
|
|
|
|
else if (0 == strcmp ("no-fold-case", name))
|
|
|
|
|
|
set_port_case_insensitive_p (port, opts, 0);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
else if (0 == strcmp ("curly-infix", name))
|
|
|
|
|
|
set_port_curly_infix_p (port, opts, 1);
|
|
|
|
|
|
else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
|
|
|
|
|
|
{
|
|
|
|
|
|
set_port_curly_infix_p (port, opts, 1);
|
|
|
|
|
|
set_port_square_brackets_p (port, opts, 0);
|
|
|
|
|
|
}
|
2012-10-23 00:29:07 -04:00
|
|
|
|
else
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
2012-10-30 22:53:22 -04:00
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2010-05-27 09:20:53 -04:00
|
|
|
|
}
|
2012-10-23 00:29:07 -04:00
|
|
|
|
while (i > 0)
|
|
|
|
|
|
scm_ungetc (name[--i], port);
|
|
|
|
|
|
return scm_read_scsh_block_comment (chr, port);
|
2010-05-27 09:20:53 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
2009-10-19 22:38:34 +02:00
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
|
|
|
|
|
|
nested. So care must be taken. */
|
|
|
|
|
|
int nesting_level = 1;
|
2011-10-05 20:41:11 +02:00
|
|
|
|
|
|
|
|
|
|
int a = scm_getc (port);
|
|
|
|
|
|
|
|
|
|
|
|
if (a == EOF)
|
|
|
|
|
|
scm_i_input_error ("scm_read_r6rs_block_comment", port,
|
|
|
|
|
|
"unterminated `#| ... |#' comment", SCM_EOL);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
|
|
|
|
|
|
while (nesting_level > 0)
|
|
|
|
|
|
{
|
2011-10-05 20:41:11 +02:00
|
|
|
|
int b = scm_getc (port);
|
2009-10-19 22:38:34 +02:00
|
|
|
|
|
2011-10-05 20:41:11 +02:00
|
|
|
|
if (b == EOF)
|
2009-12-14 15:21:54 +01:00
|
|
|
|
scm_i_input_error ("scm_read_r6rs_block_comment", port,
|
2009-10-19 22:38:34 +02:00
|
|
|
|
"unterminated `#| ... |#' comment", SCM_EOL);
|
|
|
|
|
|
|
2011-10-05 20:41:11 +02:00
|
|
|
|
if (a == '|' && b == '#')
|
|
|
|
|
|
{
|
|
|
|
|
|
nesting_level--;
|
|
|
|
|
|
b = EOF;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (a == '#' && b == '|')
|
|
|
|
|
|
{
|
|
|
|
|
|
nesting_level++;
|
|
|
|
|
|
b = EOF;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
a = b;
|
2009-10-19 22:38:34 +02:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_commented_expression (scm_t_wchar chr, SCM port,
|
|
|
|
|
|
scm_t_read_opts *opts)
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
{
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_t_wchar c;
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
c = flush_ws (port, opts, (char *) NULL);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
if (EOF == c)
|
|
|
|
|
|
scm_i_input_error ("read_commented_expression", port,
|
|
|
|
|
|
"no expression after #; comment", SCM_EOL);
|
|
|
|
|
|
scm_ungetc (c, port);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_expression (port, opts);
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
static SCM
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_read_extended_symbol (scm_t_wchar chr, SCM port)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* Guile's extended symbol read syntax looks like this:
|
|
|
|
|
|
|
|
|
|
|
|
#{This is all a symbol name}#
|
|
|
|
|
|
|
|
|
|
|
|
So here, CHR is expected to be `{'. */
|
2011-04-11 12:48:06 +02:00
|
|
|
|
int saw_brace = 0;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
size_t len = 0;
|
2011-03-20 23:34:42 +01:00
|
|
|
|
SCM buf = scm_i_make_string (1024, NULL, 0);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
buf = scm_i_string_start_writing (buf);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
while ((chr = scm_getc (port)) != EOF)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (saw_brace)
|
1996-09-18 19:35:48 +00:00
|
|
|
|
{
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (chr == '#')
|
|
|
|
|
|
{
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
saw_brace = 0;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_i_string_set_x (buf, len++, '}');
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2011-04-11 12:48:06 +02:00
|
|
|
|
|
|
|
|
|
|
if (chr == '}')
|
2007-07-22 16:30:13 +00:00
|
|
|
|
saw_brace = 1;
|
2011-04-11 12:48:06 +02:00
|
|
|
|
else if (chr == '\\')
|
|
|
|
|
|
{
|
|
|
|
|
|
/* It used to be that print.c would print extended-read-syntax
|
|
|
|
|
|
symbols with backslashes before "non-standard" chars, but
|
|
|
|
|
|
this routine wouldn't do anything with those escapes.
|
|
|
|
|
|
Bummer. What we've done is to change print.c to output
|
|
|
|
|
|
R6RS hex escapes for those characters, relying on the fact
|
|
|
|
|
|
that the extended read syntax would never put a `\' before
|
|
|
|
|
|
an `x'. For now, we just ignore other instances of
|
|
|
|
|
|
backslash in the string. */
|
|
|
|
|
|
switch ((chr = scm_getc (port)))
|
|
|
|
|
|
{
|
|
|
|
|
|
case EOF:
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
case 'x':
|
|
|
|
|
|
{
|
|
|
|
|
|
scm_t_wchar c;
|
|
|
|
|
|
|
|
|
|
|
|
SCM_READ_HEX_ESCAPE (10, ';');
|
|
|
|
|
|
scm_i_string_set_x (buf, len++, c);
|
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
|
|
str_eof:
|
|
|
|
|
|
chr = EOF;
|
|
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
|
|
|
|
bad_escaped:
|
|
|
|
|
|
scm_i_string_stop_writing ();
|
|
|
|
|
|
scm_i_input_error ("scm_read_extended_symbol", port,
|
|
|
|
|
|
"illegal character in escape sequence: ~S",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (c)));
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
default:
|
|
|
|
|
|
scm_i_string_set_x (buf, len++, chr);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
2011-04-11 12:48:06 +02:00
|
|
|
|
scm_i_string_set_x (buf, len++, chr);
|
2000-07-18 16:09:09 +00:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (len >= scm_i_string_length (buf) - 2)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
2009-11-17 01:26:25 +01:00
|
|
|
|
SCM addy;
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_i_string_stop_writing ();
|
2011-03-20 23:34:42 +01:00
|
|
|
|
addy = scm_i_make_string (1024, NULL, 0);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
buf = scm_string_append (scm_list_2 (buf, addy));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
len = 0;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
buf = scm_i_string_start_writing (buf);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2011-04-11 12:48:06 +02:00
|
|
|
|
|
|
|
|
|
|
done:
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_i_string_stop_writing ();
|
2011-04-11 12:48:06 +02:00
|
|
|
|
if (chr == EOF)
|
|
|
|
|
|
scm_i_input_error ("scm_read_extended_symbol", port,
|
|
|
|
|
|
"end of file while reading symbol", SCM_EOL);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Top-level token readers, i.e., dispatchers. */
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
SCM proc;
|
|
|
|
|
|
|
|
|
|
|
|
proc = scm_get_hash_procedure (chr);
|
|
|
|
|
|
if (scm_is_true (scm_procedure_p (proc)))
|
|
|
|
|
|
{
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 2;
|
|
|
|
|
|
SCM got;
|
|
|
|
|
|
|
|
|
|
|
|
got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
|
2011-05-24 21:25:11 +02:00
|
|
|
|
|
2012-10-23 00:21:12 -04:00
|
|
|
|
if (opts->record_positions_p && SCM_NIMP (got)
|
|
|
|
|
|
&& !scm_i_has_source_properties (got))
|
2011-05-24 21:25:11 +02:00
|
|
|
|
scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
|
|
|
|
|
|
|
|
|
|
|
|
return got;
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* The reader for the sharp `#' character. It basically dispatches reads
|
|
|
|
|
|
among the above token readers. */
|
|
|
|
|
|
static SCM
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
|
|
|
|
|
|
long line, int column)
|
2007-07-22 16:30:13 +00:00
|
|
|
|
#define FUNC_NAME "scm_lreadr"
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM result;
|
|
|
|
|
|
|
|
|
|
|
|
chr = scm_getc (port);
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
result = scm_read_sharp_extension (chr, port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (!scm_is_eq (result, SCM_UNSPECIFIED))
|
|
|
|
|
|
return result;
|
|
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '\\':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_character (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '(':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_vector (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case 's':
|
|
|
|
|
|
case 'u':
|
|
|
|
|
|
case 'f':
|
2011-04-05 19:42:06 -04:00
|
|
|
|
case 'c':
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* This one may return either a boolean or an SRFI-4 vector. */
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_srfi4_vector (chr, port, opts, line, column));
|
2009-06-19 00:47:11 +02:00
|
|
|
|
case 'v':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_bytevector (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '*':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_guile_bit_vector (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case 't':
|
|
|
|
|
|
case 'T':
|
|
|
|
|
|
case 'F':
|
|
|
|
|
|
return (scm_read_boolean (chr, port));
|
|
|
|
|
|
case ':':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_keyword (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
|
|
|
|
case '@':
|
|
|
|
|
|
#if SCM_ENABLE_DEPRECATED
|
|
|
|
|
|
/* See below for 'i' and 'e'. */
|
|
|
|
|
|
case 'a':
|
|
|
|
|
|
case 'y':
|
|
|
|
|
|
case 'h':
|
|
|
|
|
|
case 'l':
|
|
|
|
|
|
#endif
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_array (chr, port, opts, line, column));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
case 'i':
|
|
|
|
|
|
case 'e':
|
|
|
|
|
|
#if SCM_ENABLE_DEPRECATED
|
|
|
|
|
|
{
|
|
|
|
|
|
/* When next char is '(', it really is an old-style
|
|
|
|
|
|
uniform array. */
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
scm_t_wchar next_c = scm_getc (port);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (next_c != EOF)
|
|
|
|
|
|
scm_ungetc (next_c, port);
|
|
|
|
|
|
if (next_c == '(')
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return scm_read_array (chr, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Fall through. */
|
|
|
|
|
|
}
|
|
|
|
|
|
#endif
|
|
|
|
|
|
case 'b':
|
|
|
|
|
|
case 'B':
|
|
|
|
|
|
case 'o':
|
|
|
|
|
|
case 'O':
|
|
|
|
|
|
case 'd':
|
|
|
|
|
|
case 'D':
|
|
|
|
|
|
case 'x':
|
|
|
|
|
|
case 'X':
|
|
|
|
|
|
case 'I':
|
|
|
|
|
|
case 'E':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_number_and_radix (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '{':
|
|
|
|
|
|
return (scm_read_extended_symbol (chr, port));
|
|
|
|
|
|
case '!':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_shebang (chr, port, opts));
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
case ';':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_commented_expression (chr, port, opts));
|
add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.
* libguile/read.c (flush_ws, scm_read_commented_expression)
(scm_read_sharp): Add support for commenting out expressions with #;.
(scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.
* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
didn't do anything at all. It's been there since 1997, but no Guile
code I've ever seen uses it, and it conflicts with #'x => (syntax x)
from modern Scheme.
* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
of bugs here.
2009-05-28 14:49:33 +02:00
|
|
|
|
case '`':
|
|
|
|
|
|
case '\'':
|
|
|
|
|
|
case ',':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_syntax (chr, port, opts));
|
2010-04-09 14:15:16 +02:00
|
|
|
|
case 'n':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_nil (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
default:
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
result = scm_read_sharp_extension (chr, port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (scm_is_eq (result, SCM_UNSPECIFIED))
|
2009-10-19 22:38:34 +02:00
|
|
|
|
{
|
|
|
|
|
|
/* To remain compatible with 1.8 and earlier, the following
|
|
|
|
|
|
characters have lower precedence than `read-hash-extend'
|
|
|
|
|
|
characters. */
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case '|':
|
|
|
|
|
|
return scm_read_r6rs_block_comment (chr, port);
|
|
|
|
|
|
default:
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
|
|
|
|
|
|
scm_list_1 (SCM_MAKE_CHAR (chr)));
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
|
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
|
|
|
|
|
static SCM
|
2012-10-26 17:20:16 -04:00
|
|
|
|
read_inner_expression (SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
#define FUNC_NAME "read_inner_expression"
|
2007-07-22 16:30:13 +00:00
|
|
|
|
{
|
|
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
2012-02-08 03:00:15 -05:00
|
|
|
|
scm_t_wchar chr;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
chr = scm_getc (port);
|
|
|
|
|
|
|
|
|
|
|
|
switch (chr)
|
|
|
|
|
|
{
|
|
|
|
|
|
case SCM_WHITE_SPACES:
|
|
|
|
|
|
case SCM_LINE_INCREMENTORS:
|
|
|
|
|
|
break;
|
|
|
|
|
|
case ';':
|
|
|
|
|
|
(void) scm_read_semicolon_comment (chr, port);
|
|
|
|
|
|
break;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
case '{':
|
|
|
|
|
|
if (opts->curly_infix_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
if (opts->neoteric_p)
|
|
|
|
|
|
return scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM expr;
|
|
|
|
|
|
|
|
|
|
|
|
/* Enable neoteric expressions within curly braces */
|
|
|
|
|
|
opts->neoteric_p = 1;
|
|
|
|
|
|
expr = scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
opts->neoteric_p = 0;
|
|
|
|
|
|
return expr;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
return scm_read_mixed_case_symbol (chr, port, opts);
|
2010-01-15 22:24:31 +01:00
|
|
|
|
case '[':
|
2012-10-26 17:20:16 -04:00
|
|
|
|
if (opts->square_brackets_p)
|
|
|
|
|
|
return scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
else if (opts->curly_infix_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* The syntax of neoteric expressions requires that '[' be
|
|
|
|
|
|
a delimiter when curly-infix is enabled, so it cannot
|
|
|
|
|
|
be part of an unescaped symbol. We might as well do
|
|
|
|
|
|
something useful with it, so we adopt Kawa's convention:
|
|
|
|
|
|
[...] => ($bracket-list$ ...) */
|
|
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
|
|
|
|
|
return maybe_annotate_source
|
|
|
|
|
|
(scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
|
|
|
|
|
|
port, opts, line, column);
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
return scm_read_mixed_case_symbol (chr, port, opts);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '(':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_sexp (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '"':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_string (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '\'':
|
|
|
|
|
|
case '`':
|
|
|
|
|
|
case ',':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_quote (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case '#':
|
|
|
|
|
|
{
|
2012-02-08 15:51:38 -05:00
|
|
|
|
long line = SCM_LINUM (port);
|
|
|
|
|
|
int column = SCM_COL (port) - 1;
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
SCM result = scm_read_sharp (chr, port, opts, line, column);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (scm_is_eq (result, SCM_UNSPECIFIED))
|
|
|
|
|
|
/* We read a comment or some such. */
|
|
|
|
|
|
break;
|
|
|
|
|
|
else
|
|
|
|
|
|
return result;
|
|
|
|
|
|
}
|
|
|
|
|
|
case ')':
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
|
|
|
|
|
|
break;
|
2012-10-26 17:20:16 -04:00
|
|
|
|
case '}':
|
|
|
|
|
|
if (opts->curly_infix_p)
|
|
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
|
|
|
|
|
|
else
|
|
|
|
|
|
return scm_read_mixed_case_symbol (chr, port, opts);
|
2010-11-04 22:07:50 -07:00
|
|
|
|
case ']':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->square_brackets_p)
|
2010-11-04 22:07:50 -07:00
|
|
|
|
scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
|
|
|
|
|
|
/* otherwise fall through */
|
2007-07-22 16:30:13 +00:00
|
|
|
|
case EOF:
|
|
|
|
|
|
return SCM_EOF_VAL;
|
|
|
|
|
|
case ':':
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
|
|
|
|
|
|
return scm_symbol_to_keyword (scm_read_expression (port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
/* Fall through. */
|
|
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
|
{
|
|
|
|
|
|
if (((chr >= '0') && (chr <= '9'))
|
|
|
|
|
|
|| (strchr ("+-.", chr)))
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_number (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
else
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_mixed_case_symbol (chr, port, opts));
|
2007-07-22 16:30:13 +00:00
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
static SCM
|
|
|
|
|
|
scm_read_expression (SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
#define FUNC_NAME "scm_read_expression"
|
|
|
|
|
|
{
|
|
|
|
|
|
if (!opts->neoteric_p)
|
|
|
|
|
|
return read_inner_expression (port, opts);
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
long line = 0;
|
|
|
|
|
|
int column = 0;
|
|
|
|
|
|
SCM expr;
|
|
|
|
|
|
|
|
|
|
|
|
if (opts->record_positions_p)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* We need to get the position of the first non-whitespace
|
|
|
|
|
|
character in order to correctly annotate neoteric
|
|
|
|
|
|
expressions. For example, for the expression 'f(x)', the
|
|
|
|
|
|
first call to 'read_inner_expression' reads the 'f' (which
|
|
|
|
|
|
cannot be annotated), and then we later read the '(x)' and
|
|
|
|
|
|
use it to construct the new list (f x). */
|
|
|
|
|
|
int c = flush_ws (port, opts, (char *) NULL);
|
|
|
|
|
|
if (c == EOF)
|
|
|
|
|
|
return SCM_EOF_VAL;
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
line = SCM_LINUM (port);
|
|
|
|
|
|
column = SCM_COL (port);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
expr = read_inner_expression (port, opts);
|
|
|
|
|
|
|
|
|
|
|
|
/* 'expr' is the first component of the neoteric expression. Now
|
|
|
|
|
|
we loop, and as long as the next character is '(', '[', or '{',
|
|
|
|
|
|
(without any intervening whitespace), we use it to construct a
|
|
|
|
|
|
new expression. For example, f{n - 1}(x) => ((f (- n 1)) x). */
|
|
|
|
|
|
for (;;)
|
|
|
|
|
|
{
|
|
|
|
|
|
int chr = scm_getc (port);
|
|
|
|
|
|
|
|
|
|
|
|
if (chr == '(')
|
|
|
|
|
|
/* e(...) => (e ...) */
|
|
|
|
|
|
expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
|
|
|
|
|
|
else if (chr == '[')
|
|
|
|
|
|
/* e[...] => ($bracket-apply$ e ...) */
|
|
|
|
|
|
expr = scm_cons (sym_bracket_apply,
|
|
|
|
|
|
scm_cons (expr,
|
|
|
|
|
|
scm_read_sexp (chr, port, opts)));
|
|
|
|
|
|
else if (chr == '{')
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM arg = scm_read_sexp (chr, port, opts);
|
|
|
|
|
|
|
|
|
|
|
|
if (scm_is_null (arg))
|
|
|
|
|
|
expr = scm_list_1 (expr); /* e{} => (e) */
|
|
|
|
|
|
else
|
|
|
|
|
|
expr = scm_list_2 (expr, arg); /* e{...} => (e {...}) */
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
if (chr != EOF)
|
|
|
|
|
|
scm_ungetc (chr, port);
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
maybe_annotate_source (expr, port, opts, line, column);
|
|
|
|
|
|
}
|
|
|
|
|
|
return expr;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
|
|
|
|
|
|
/* Actual reader. */
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
static void init_read_options (SCM port, scm_t_read_opts *opts);
|
|
|
|
|
|
|
2007-07-22 16:30:13 +00:00
|
|
|
|
SCM_DEFINE (scm_read, "read", 0, 1, 0,
|
|
|
|
|
|
(SCM port),
|
|
|
|
|
|
"Read an s-expression from the input port @var{port}, or from\n"
|
|
|
|
|
|
"the current input port if @var{port} is not specified.\n"
|
|
|
|
|
|
"Any whitespace before the next token is discarded.")
|
|
|
|
|
|
#define FUNC_NAME s_scm_read
|
|
|
|
|
|
{
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
scm_t_read_opts opts;
|
2007-07-22 16:30:13 +00:00
|
|
|
|
int c;
|
|
|
|
|
|
|
|
|
|
|
|
if (SCM_UNBNDP (port))
|
|
|
|
|
|
port = scm_current_input_port ();
|
|
|
|
|
|
SCM_VALIDATE_OPINPORT (1, port);
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
init_read_options (port, &opts);
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
|
|
|
|
|
|
c = flush_ws (port, &opts, (char *) NULL);
|
2007-07-22 16:30:13 +00:00
|
|
|
|
if (EOF == c)
|
|
|
|
|
|
return SCM_EOF_VAL;
|
|
|
|
|
|
scm_ungetc (c, port);
|
|
|
|
|
|
|
Change reader to pass read options to helpers via explicit parameter.
* libguile/read.c (enum t_keyword_style, struct t_read_opts,
scm_t_read_opts): New types.
(init_read_options): New function.
(CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.
(scm_read): Call 'init_read_options', and pass 'opts' to helpers.
(flush_ws, maybe_annotate_source, read_complete_token, read_token,
scm_read_bytevector, scm_read_character,
scm_read_commented_expression, scm_read_expression,
scm_read_guile_bit_vector, scm_read_keyword,
scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
scm_read_vector, scm_read_array): Add 'opts' as an additional
parameter, and use it to look up read options. Previously the global
read options were consulted directly.
2012-10-23 17:11:41 -04:00
|
|
|
|
return (scm_read_expression (port, &opts));
|
1996-09-18 19:35:48 +00:00
|
|
|
|
}
|
2001-03-04 17:09:34 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-09-18 19:35:48 +00:00
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1997-03-11 03:57:04 +00:00
|
|
|
|
/* Manipulate the read-hash-procedures alist. This could be written in
|
|
|
|
|
|
Scheme, but maybe it will also be used by C code during initialisation. */
|
2000-01-05 19:05:23 +00:00
|
|
|
|
SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
|
1999-12-12 02:36:16 +00:00
|
|
|
|
(SCM chr, SCM proc),
|
2001-02-16 15:17:20 +00:00
|
|
|
|
"Install the procedure @var{proc} for reading expressions\n"
|
|
|
|
|
|
"starting with the character sequence @code{#} and @var{chr}.\n"
|
|
|
|
|
|
"@var{proc} will be called with two arguments: the character\n"
|
|
|
|
|
|
"@var{chr} and the port to read further data from. The object\n"
|
2007-01-06 18:20:35 +00:00
|
|
|
|
"returned will be the return value of @code{read}. \n"
|
|
|
|
|
|
"Passing @code{#f} for @var{proc} will remove a previous setting. \n"
|
|
|
|
|
|
)
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#define FUNC_NAME s_scm_read_hash_extend
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
{
|
1997-03-08 22:52:56 +00:00
|
|
|
|
SCM this;
|
|
|
|
|
|
SCM prev;
|
|
|
|
|
|
|
2001-06-26 10:59:34 +00:00
|
|
|
|
SCM_VALIDATE_CHAR (1, chr);
|
2004-07-06 10:59:25 +00:00
|
|
|
|
SCM_ASSERT (scm_is_false (proc)
|
2004-07-27 15:41:49 +00:00
|
|
|
|
|| scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
|
2001-06-26 10:59:34 +00:00
|
|
|
|
proc, SCM_ARG2, FUNC_NAME);
|
1997-03-08 22:52:56 +00:00
|
|
|
|
|
1997-03-11 03:57:04 +00:00
|
|
|
|
/* Check if chr is already in the alist. */
|
2010-11-03 00:09:57 +01:00
|
|
|
|
this = scm_i_read_hash_procedures_ref ();
|
1997-03-11 03:57:04 +00:00
|
|
|
|
prev = SCM_BOOL_F;
|
1997-03-08 22:52:56 +00:00
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
2004-09-22 17:41:37 +00:00
|
|
|
|
if (scm_is_null (this))
|
1997-03-08 22:52:56 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* not found, so add it to the beginning. */
|
2004-07-06 10:59:25 +00:00
|
|
|
|
if (scm_is_true (proc))
|
1997-03-08 22:52:56 +00:00
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM new = scm_cons (scm_cons (chr, proc),
|
|
|
|
|
|
scm_i_read_hash_procedures_ref ());
|
|
|
|
|
|
scm_i_read_hash_procedures_set_x (new);
|
1997-03-08 22:52:56 +00:00
|
|
|
|
}
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2004-07-27 15:41:49 +00:00
|
|
|
|
if (scm_is_eq (chr, SCM_CAAR (this)))
|
1997-03-08 22:52:56 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* already in the alist. */
|
2004-07-06 10:59:25 +00:00
|
|
|
|
if (scm_is_false (proc))
|
1997-03-11 03:57:04 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* remove it. */
|
2004-07-06 10:59:25 +00:00
|
|
|
|
if (scm_is_false (prev))
|
1997-03-11 03:57:04 +00:00
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
|
|
|
|
|
|
scm_i_read_hash_procedures_set_x (rest);
|
1997-03-11 03:57:04 +00:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
scm_set_cdr_x (prev, SCM_CDR (this));
|
|
|
|
|
|
}
|
1997-03-08 22:52:56 +00:00
|
|
|
|
else
|
1997-03-11 03:57:04 +00:00
|
|
|
|
{
|
|
|
|
|
|
/* replace it. */
|
|
|
|
|
|
scm_set_cdr_x (SCM_CAR (this), proc);
|
|
|
|
|
|
}
|
1997-03-08 22:52:56 +00:00
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
prev = this;
|
|
|
|
|
|
this = SCM_CDR (this);
|
|
|
|
|
|
}
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
|
|
|
|
|
|
return SCM_UNSPECIFIED;
|
|
|
|
|
|
}
|
1999-12-12 02:36:16 +00:00
|
|
|
|
#undef FUNC_NAME
|
1996-07-25 22:56:11 +00:00
|
|
|
|
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
/* Recover the read-hash procedure corresponding to char c. */
|
|
|
|
|
|
static SCM
|
1999-12-12 20:35:02 +00:00
|
|
|
|
scm_get_hash_procedure (int c)
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM rest = scm_i_read_hash_procedures_ref ();
|
1997-03-08 22:52:56 +00:00
|
|
|
|
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
2004-09-22 17:41:37 +00:00
|
|
|
|
if (scm_is_null (rest))
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
|
2000-03-02 20:54:43 +00:00
|
|
|
|
if (SCM_CHAR (SCM_CAAR (rest)) == c)
|
* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.
* gdbint.c (gdb_read): update scm_lreadr usage.
* load.h: update prototypes.
* load.c (scm_primitive_load, scm_read_and_eval_x,
scm_primitive_load_path): remove case_insensitive_p, sharp arguments.
* read.h: add prototype for scm_read_hash_extend. Change args for
other prototypes.
* read.c (scm_read_hash_procedures): new variable.
(scm_read_hash_extend): new procedure.
(scm_get_hash_procedure): new procedure.
* (scm_lreadr): use scm_get_hash_procedure instead of an argument
for extended # processing.
(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
scm_read_token): remove case_i, sharp arguments. Change callers.
* read.h (SCM_N_READ_OPTIONS): increase to 3.
(SCM_CASE_INSENSITIVE_P): define.
* read.c: add case-insensitive option to scm_read_opts.
* (scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
to determine whether to convert symbol case.
(default_case_i): definition removed.
* read.c (scm_read_token): if case_i, downcase ic before doing
anything with it.
1997-03-08 18:58:24 +00:00
|
|
|
|
return SCM_CDAR (rest);
|
|
|
|
|
|
|
|
|
|
|
|
rest = SCM_CDR (rest);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes. (Patch thanks to Marius
Vollmer.)
1996-10-14 01:33:50 +00:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
#define SCM_ENCODING_SEARCH_SIZE (500)
|
|
|
|
|
|
|
2009-11-14 16:27:28 +01:00
|
|
|
|
/* Search the first few hundred characters of a file for an Emacs-like coding
|
|
|
|
|
|
declaration. Returns either NULL or a string whose storage has been
|
|
|
|
|
|
allocated with `scm_gc_malloc ()'. */
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
char *
|
2009-11-14 16:27:28 +01:00
|
|
|
|
scm_i_scan_for_encoding (SCM port)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
{
|
2011-03-03 12:46:49 +01:00
|
|
|
|
scm_t_port *pt;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
char header[SCM_ENCODING_SEARCH_SIZE+1];
|
2010-07-16 05:39:52 -07:00
|
|
|
|
size_t bytes_read, encoding_length, i;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
char *encoding = NULL;
|
2013-01-30 15:30:31 +01:00
|
|
|
|
int utf8_bom = 0;
|
2010-07-16 05:39:52 -07:00
|
|
|
|
char *pos, *encoding_start;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
int in_comment;
|
|
|
|
|
|
|
2011-03-03 12:46:49 +01:00
|
|
|
|
pt = SCM_PTAB_ENTRY (port);
|
2009-11-27 17:00:51 +01:00
|
|
|
|
|
2011-03-03 12:46:49 +01:00
|
|
|
|
if (pt->rw_active == SCM_PORT_WRITE)
|
|
|
|
|
|
scm_flush (port);
|
2009-11-27 17:00:51 +01:00
|
|
|
|
|
2011-03-03 12:46:49 +01:00
|
|
|
|
if (pt->rw_random)
|
|
|
|
|
|
pt->rw_active = SCM_PORT_READ;
|
|
|
|
|
|
|
|
|
|
|
|
if (pt->read_pos == pt->read_end)
|
|
|
|
|
|
{
|
|
|
|
|
|
/* We can use the read buffer, and thus avoid a seek. */
|
|
|
|
|
|
if (scm_fill_input (port) == EOF)
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
|
|
bytes_read = pt->read_end - pt->read_pos;
|
|
|
|
|
|
if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
|
|
|
|
|
|
bytes_read = SCM_ENCODING_SEARCH_SIZE;
|
|
|
|
|
|
|
|
|
|
|
|
if (bytes_read <= 1)
|
|
|
|
|
|
/* An unbuffered port -- don't scan. */
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
|
|
memcpy (header, pt->read_pos, bytes_read);
|
|
|
|
|
|
header[bytes_read] = '\0';
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
/* Try to read some bytes and then seek back. Not all ports
|
|
|
|
|
|
support seeking back; and indeed some file ports (like
|
|
|
|
|
|
/dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
|
|
|
|
|
|
check performed by SCM_FPORT_FDES---but fail to seek
|
|
|
|
|
|
backwards. Hence this block comes second. We prefer to use
|
|
|
|
|
|
the read buffer in-place. */
|
|
|
|
|
|
if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
|
|
bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
|
|
|
|
|
|
header[bytes_read] = '\0';
|
2013-01-30 15:30:31 +01:00
|
|
|
|
scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
|
2011-03-03 12:46:49 +01:00
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
2013-01-30 15:30:31 +01:00
|
|
|
|
if (bytes_read > 3
|
|
|
|
|
|
&& header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
|
|
|
|
|
|
utf8_bom = 1;
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
/* search past "coding[:=]" */
|
|
|
|
|
|
pos = header;
|
|
|
|
|
|
while (1)
|
|
|
|
|
|
{
|
|
|
|
|
|
if ((pos = strstr(pos, "coding")) == NULL)
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
|
|
|
|
pos += strlen("coding");
|
|
|
|
|
|
if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
|
|
|
|
|
|
(*pos == ':' || *pos == '='))
|
|
|
|
|
|
{
|
|
|
|
|
|
pos ++;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* skip spaces */
|
|
|
|
|
|
while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
|
|
|
|
|
|
(*pos == ' ' || *pos == '\t'))
|
|
|
|
|
|
pos ++;
|
|
|
|
|
|
|
|
|
|
|
|
/* grab the next token */
|
2010-07-16 05:39:52 -07:00
|
|
|
|
encoding_start = pos;
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
i = 0;
|
2010-07-16 05:39:52 -07:00
|
|
|
|
while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
|
|
|
|
|
|
&& encoding_start + i - header < bytes_read
|
|
|
|
|
|
&& (isalnum ((int) encoding_start[i])
|
|
|
|
|
|
|| strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
i++;
|
|
|
|
|
|
|
2010-07-16 05:39:52 -07:00
|
|
|
|
encoding_length = i;
|
|
|
|
|
|
if (encoding_length == 0)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return NULL;
|
|
|
|
|
|
|
2010-07-16 05:39:52 -07:00
|
|
|
|
encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
|
|
|
|
|
|
for (i = 0; i < encoding_length; i++)
|
2009-08-27 07:35:39 -07:00
|
|
|
|
encoding[i] = toupper ((int) encoding[i]);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
|
|
|
|
|
|
/* push backwards to make sure we were in a comment */
|
|
|
|
|
|
in_comment = 0;
|
2010-07-16 05:39:52 -07:00
|
|
|
|
pos = encoding_start;
|
|
|
|
|
|
while (pos >= header)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
{
|
2011-03-31 14:46:21 +02:00
|
|
|
|
if (*pos == ';')
|
|
|
|
|
|
{
|
|
|
|
|
|
in_comment = 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
|
|
|
|
|
else if (*pos == '\n' || pos == header)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
{
|
|
|
|
|
|
/* This wasn't in a semicolon comment. Check for a
|
|
|
|
|
|
hash-bang comment. */
|
|
|
|
|
|
char *beg = strstr (header, "#!");
|
|
|
|
|
|
char *end = strstr (header, "!#");
|
2011-03-31 14:46:21 +02:00
|
|
|
|
if (beg < encoding_start && encoding_start + encoding_length <= end)
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
in_comment = 1;
|
|
|
|
|
|
break;
|
|
|
|
|
|
}
|
2011-03-31 14:46:21 +02:00
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
pos --;
|
|
|
|
|
|
continue;
|
|
|
|
|
|
}
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
}
|
|
|
|
|
|
if (!in_comment)
|
2009-11-14 16:27:28 +01:00
|
|
|
|
/* This wasn't in a comment */
|
|
|
|
|
|
return NULL;
|
|
|
|
|
|
|
2013-01-30 15:30:31 +01:00
|
|
|
|
if (utf8_bom && strcmp(encoding, "UTF-8"))
|
|
|
|
|
|
scm_misc_error (NULL,
|
|
|
|
|
|
"the port input declares the encoding ~s but is encoded as UTF-8",
|
|
|
|
|
|
scm_list_1 (scm_from_locale_string (encoding)));
|
|
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return encoding;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
|
|
|
|
|
|
(SCM port),
|
2009-11-23 18:51:25 +01:00
|
|
|
|
"Scans the port for an Emacs-like character coding declaration\n"
|
fix typos in the manual bits generated from source comments.
* libguile/bitvectors.c, libguile/chars.c,
libguile/deprecated.c, libguile/numbers.c, libguile/random.c,
libguile/read.c, libguile/root.c, libguile/srfi-1.c,
libguile/srfi-13.c, libguile/srfi-14.c, libguile/uniform.c:
Fix typos, add missing newlines.
2011-02-07 00:29:51 +01:00
|
|
|
|
"near the top of the contents of a port with random-accessible contents.\n"
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
"The coding declaration is of the form\n"
|
|
|
|
|
|
"@code{coding: XXXXX} and must appear in a scheme comment.\n"
|
|
|
|
|
|
"\n"
|
|
|
|
|
|
"Returns a string containing the character encoding of the file\n"
|
|
|
|
|
|
"if a declaration was found, or @code{#f} otherwise.\n")
|
|
|
|
|
|
#define FUNC_NAME s_scm_file_encoding
|
|
|
|
|
|
{
|
|
|
|
|
|
char *enc;
|
|
|
|
|
|
SCM s_enc;
|
2009-11-14 16:27:28 +01:00
|
|
|
|
|
2011-03-03 12:46:49 +01:00
|
|
|
|
SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
|
|
|
|
|
|
|
2009-11-14 16:27:28 +01:00
|
|
|
|
enc = scm_i_scan_for_encoding (port);
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
if (enc == NULL)
|
|
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
|
|
|
|
|
s_enc = scm_from_locale_string (enc);
|
|
|
|
|
|
return s_enc;
|
|
|
|
|
|
}
|
2009-11-14 16:27:28 +01:00
|
|
|
|
|
Add full Unicode capability to ports and the default reader
Ports are given two additional properties: a character encoding and
a conversion failure strategy. These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.
If unspecified, ports use a default value. The default value of these
properties is held in a fluid. The default character encoding can be
modified by calling setlocale.
ISO-8859-1 is treated specially. Since it is a native encoding of
strings, it can be processed more quickly. Source code is assumed to be
ISO-8859-1 unless otherwise specified. The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.
The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.
* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding
* module/system/base/compile.scm (compile-file): use source-code
file's self-declared encoding when compiling files
* libguile/strports.c: store string ports in locale encoding
(scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
(scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
new functions
* libguile/strings.h: new declaration for scm_i_string_contains_char
* libguile/strings.c (scm_i_string_contains_char): new function
(scm_from_stringn, scm_to_stringn): use NULL for Latin-1
(scm_from_locale_stringn, scm_to_locale_stringn): respect character
encoding of input and output ports
* libguile/read.h: declaration for scm_scan_for_encoding
* libguile/read.c:
(read_token): now takes scheme string instead of C string/length
(read_complete_token): new function
(scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
(scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
(scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
(scm_read_scsh_block_comment, scm_read_commented_expression)
(scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
(scm_read_expression): use scm_t_wchar for char type, use read_complete_token
(scm_scan_for_encoding): new function to find a file's character encoding
(scm_file_encoding): new function to find a port's character encoding
* libguile/rdelim.c: don't unpack strings
* libguile/print.h: declaration for modified function
scm_i_charprint
* libguile/print.c: use locale when printing characters and
strings
(scm_i_charprint): input parameter is now scm_t_wchar
(scm_simple_format): don't unpack strings
* libguile/posix.h: new declaration for scm_setbinary.
* libguile/posix.c (scm_setlocale): set default and stdio port
encodings based on the locale's character encoding
(scm_setbinary): new function
* libguile/ports.h (scm_t_port): add encoding and failed
conversion handler to port type. Declarations for new or modified
functions scm_getc, scm_unget_byte, scm_ungetc,
scm_i_get_port_encoding, scm_i_set_port_encoding_x,
scm_port_encoding, scm_set_port_encoding_x,
scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.
* libguile/ports.c: assign the current ports to zero on startup so
we can see if they've been set.
(scm_current_input_port, scm_current_output_port,
scm_current_error_port): return #f if the port is not yet
initialized
(scm_new_port_table_entry): set up a new port's encoding and
illegal sequence handler based on the thread's current defaults
(scm_i_remove_port): free port encoding name when port is removed
(scm_i_mode_bits_n): now takes a scheme string instead of a c
string and length. All callers changed.
(SCM_MBCHAR_BUF_SIZE): new const
(scm_getc): new function, since the scm_getc in inline.h is now
scm_get_byte_or_eof. This pulls one codepoint from a port.
(scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
(scm_unget_byte): new function, incorportaing the low-level functionality
of scm_ungetc
(scm_ungetc): uses scm_unget_byte
* libguile/numbers.h (scm_t_wchar): compilation order problem with
scm_t_wchar being use in functions in multiple headers. Forward
declare scm_t_wchar.
* libguile/load.c (scm_primitive_load): scan for file encoding at
top of file and use it to set the load port's encoding
* libguile/inline.h (scm_get_byte_or_eof): new function
incorporating most of the functionality of scm_getc.
* libguile/fports.c (fport_fill_input): now returns scm_t_wchar
* libguile/chars.h (scm_t_wchar): avoid compilation order problem
with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
|
|
|
|
return SCM_BOOL_F;
|
|
|
|
|
|
}
|
|
|
|
|
|
#undef FUNC_NAME
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
/* Per-port read options.
|
|
|
|
|
|
|
|
|
|
|
|
We store per-port read options in the 'port-read-options' key of the
|
|
|
|
|
|
port's alist, which is stored in 'scm_i_port_weak_hash'. The value
|
|
|
|
|
|
stored in the alist is a single integer that contains a two-bit field
|
|
|
|
|
|
for each read option.
|
|
|
|
|
|
|
|
|
|
|
|
If a bit field contains READ_OPTION_INHERIT (3), that indicates that
|
|
|
|
|
|
the applicable value should be inherited from the corresponding
|
|
|
|
|
|
global read option. Otherwise, the bit field contains the value of
|
|
|
|
|
|
the read option. For boolean read options that have been set
|
|
|
|
|
|
per-port, the possible values are 0 or 1. If the 'keyword_style'
|
|
|
|
|
|
read option has been set per-port, its possible values are those in
|
|
|
|
|
|
'enum t_keyword_style'. */
|
|
|
|
|
|
|
|
|
|
|
|
/* Key to read options in per-port alists. */
|
|
|
|
|
|
SCM_SYMBOL (sym_port_read_options, "port-read-options");
|
|
|
|
|
|
|
|
|
|
|
|
/* Offsets of bit fields for each per-port override */
|
|
|
|
|
|
#define READ_OPTION_COPY_SOURCE_P 0
|
|
|
|
|
|
#define READ_OPTION_RECORD_POSITIONS_P 2
|
|
|
|
|
|
#define READ_OPTION_CASE_INSENSITIVE_P 4
|
|
|
|
|
|
#define READ_OPTION_KEYWORD_STYLE 6
|
|
|
|
|
|
#define READ_OPTION_R6RS_ESCAPES_P 8
|
|
|
|
|
|
#define READ_OPTION_SQUARE_BRACKETS_P 10
|
|
|
|
|
|
#define READ_OPTION_HUNGRY_EOL_ESCAPES_P 12
|
2012-10-26 17:20:16 -04:00
|
|
|
|
#define READ_OPTION_CURLY_INFIX_P 14
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
/* The total width in bits of the per-port overrides */
|
|
|
|
|
|
#define READ_OPTIONS_NUM_BITS 16
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
#define READ_OPTIONS_INHERIT_ALL ((1UL << READ_OPTIONS_NUM_BITS) - 1)
|
|
|
|
|
|
#define READ_OPTIONS_MAX_VALUE READ_OPTIONS_INHERIT_ALL
|
|
|
|
|
|
|
|
|
|
|
|
#define READ_OPTION_MASK 3
|
|
|
|
|
|
#define READ_OPTION_INHERIT 3
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_read_option (SCM port, int option, int new_value)
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM alist, scm_read_options;
|
|
|
|
|
|
unsigned int read_options;
|
|
|
|
|
|
|
|
|
|
|
|
new_value &= READ_OPTION_MASK;
|
|
|
|
|
|
scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
|
|
|
|
|
|
alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
|
|
|
|
|
|
scm_read_options = scm_assq_ref (alist, sym_port_read_options);
|
|
|
|
|
|
if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
|
|
|
|
|
|
read_options = scm_to_uint (scm_read_options);
|
|
|
|
|
|
else
|
|
|
|
|
|
read_options = READ_OPTIONS_INHERIT_ALL;
|
|
|
|
|
|
read_options &= ~(READ_OPTION_MASK << option);
|
|
|
|
|
|
read_options |= new_value << option;
|
|
|
|
|
|
scm_read_options = scm_from_uint (read_options);
|
|
|
|
|
|
alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options);
|
|
|
|
|
|
scm_hashq_set_x (scm_i_port_weak_hash, port, alist);
|
|
|
|
|
|
scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-24 14:37:36 -04:00
|
|
|
|
/* Set OPTS and PORT's case-insensitivity according to VALUE. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
|
|
|
|
|
|
{
|
|
|
|
|
|
value = !!value;
|
|
|
|
|
|
opts->case_insensitive_p = value;
|
|
|
|
|
|
set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-26 17:20:16 -04:00
|
|
|
|
/* Set OPTS and PORT's square_brackets_p option according to VALUE. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
|
|
|
|
|
|
{
|
|
|
|
|
|
value = !!value;
|
|
|
|
|
|
opts->square_brackets_p = value;
|
|
|
|
|
|
set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Set OPTS and PORT's curly_infix_p option according to VALUE. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
|
|
|
|
|
|
{
|
|
|
|
|
|
value = !!value;
|
|
|
|
|
|
opts->curly_infix_p = value;
|
|
|
|
|
|
set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2012-10-23 17:28:43 -04:00
|
|
|
|
/* Initialize OPTS based on PORT's read options and the global read
|
|
|
|
|
|
options. */
|
|
|
|
|
|
static void
|
|
|
|
|
|
init_read_options (SCM port, scm_t_read_opts *opts)
|
|
|
|
|
|
{
|
|
|
|
|
|
SCM alist, val, scm_read_options;
|
|
|
|
|
|
unsigned int read_options, x;
|
|
|
|
|
|
|
|
|
|
|
|
scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
|
|
|
|
|
|
alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
|
|
|
|
|
|
scm_read_options = scm_assq_ref (alist, sym_port_read_options);
|
|
|
|
|
|
scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
|
|
|
|
|
|
|
|
|
|
|
|
if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
|
|
|
|
|
|
read_options = scm_to_uint (scm_read_options);
|
|
|
|
|
|
else
|
|
|
|
|
|
read_options = READ_OPTIONS_INHERIT_ALL;
|
|
|
|
|
|
|
|
|
|
|
|
x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
|
|
|
|
|
|
if (x == READ_OPTION_INHERIT)
|
|
|
|
|
|
{
|
|
|
|
|
|
val = SCM_PACK (SCM_KEYWORD_STYLE);
|
|
|
|
|
|
if (scm_is_eq (val, scm_keyword_prefix))
|
|
|
|
|
|
x = KEYWORD_STYLE_PREFIX;
|
|
|
|
|
|
else if (scm_is_eq (val, scm_keyword_postfix))
|
|
|
|
|
|
x = KEYWORD_STYLE_POSTFIX;
|
|
|
|
|
|
else
|
|
|
|
|
|
x = KEYWORD_STYLE_HASH_PREFIX;
|
|
|
|
|
|
}
|
|
|
|
|
|
opts->keyword_style = x;
|
|
|
|
|
|
|
|
|
|
|
|
#define RESOLVE_BOOLEAN_OPTION(NAME, name) \
|
|
|
|
|
|
do \
|
|
|
|
|
|
{ \
|
|
|
|
|
|
x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME); \
|
|
|
|
|
|
if (x == READ_OPTION_INHERIT) \
|
|
|
|
|
|
x = !!SCM_ ## NAME; \
|
|
|
|
|
|
opts->name = x; \
|
|
|
|
|
|
} \
|
|
|
|
|
|
while (0)
|
|
|
|
|
|
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P, copy_source_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P, record_positions_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P, case_insensitive_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P, r6rs_escapes_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P, square_brackets_p);
|
|
|
|
|
|
RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
|
2012-10-26 17:20:16 -04:00
|
|
|
|
RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P, curly_infix_p);
|
2012-10-23 17:28:43 -04:00
|
|
|
|
|
|
|
|
|
|
#undef RESOLVE_BOOLEAN_OPTION
|
2012-10-26 17:20:16 -04:00
|
|
|
|
|
|
|
|
|
|
opts->neoteric_p = 0;
|
2012-10-23 17:28:43 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
1996-07-25 22:56:11 +00:00
|
|
|
|
void
|
|
|
|
|
|
scm_init_read ()
|
|
|
|
|
|
{
|
2010-11-03 00:09:57 +01:00
|
|
|
|
SCM read_hash_procs;
|
|
|
|
|
|
|
2011-11-23 12:21:22 +01:00
|
|
|
|
read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
|
2010-11-03 00:09:57 +01:00
|
|
|
|
|
|
|
|
|
|
scm_i_read_hash_procedures =
|
|
|
|
|
|
SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
|
1997-03-08 22:52:56 +00:00
|
|
|
|
|
2007-01-19 19:26:36 +00:00
|
|
|
|
scm_init_opts (scm_read_options, scm_read_opts);
|
2000-04-21 14:16:44 +00:00
|
|
|
|
#include "libguile/read.x"
|
1996-07-25 22:56:11 +00:00
|
|
|
|
}
|
2000-03-19 19:01:16 +00:00
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
Local Variables:
|
|
|
|
|
|
c-file-style: "gnu"
|
|
|
|
|
|
End:
|
|
|
|
|
|
*/
|