guile/libguile/rdelim.c

223 lines
6.1 KiB
C
Raw Normal View History

/* Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2006,
* 2011 Free Software Foundation, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 3 of
* the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
* 02110-1301 USA
*/
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include "libguile/_scm.h"
#include <stdio.h>
#ifdef HAVE_STRING_H
#include <string.h>
#endif
#include "libguile/chars.h"
#include "libguile/modules.h"
#include "libguile/ports.h"
#include "libguile/rdelim.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
#include "libguile/validate.h"
SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 3, 0,
(SCM delims, SCM str, SCM gobble, SCM port, SCM start, SCM end),
"Read characters from @var{port} into @var{str} until one of the\n"
"characters in the @var{delims} string is encountered. If\n"
"@var{gobble} is true, discard the delimiter character;\n"
"otherwise, leave it in the input stream for the next read. If\n"
"@var{port} is not specified, use the value of\n"
"@code{(current-input-port)}. If @var{start} or @var{end} are\n"
"specified, store data only into the substring of @var{str}\n"
"bounded by @var{start} and @var{end} (which default to the\n"
"beginning and end of the string, respectively).\n"
"\n"
" Return a pair consisting of the delimiter that terminated the\n"
"string and the number of characters read. If reading stopped\n"
"at the end of file, the delimiter returned is the\n"
"@var{eof-object}; if the string was filled without encountering\n"
"a delimiter, this value is @code{#f}.")
#define FUNC_NAME s_scm_read_delimited_x
{
* validate.h (SCM_NUM2{SIZE,PTRDIFF,SHORT,USHORT,BITS,UBITS,INT,UINT}[_DEF]): new macros. * unif.h: type renaming: scm_array -> scm_array_t scm_array_dim -> scm_array_dim_t the old names are deprecated, all in-Guile uses changed. * tags.h (scm_ubits_t): new typedef, representing unsigned scm_bits_t. * stacks.h: type renaming: scm_info_frame -> scm_info_frame_t scm_stack -> scm_stack_t the old names are deprecated, all in-Guile uses changed. * srcprop.h: type renaming: scm_srcprops -> scm_srcprops_t scm_srcprops_chunk -> scm_srcprops_chunk_t the old names are deprecated, all in-Guile uses changed. * gsubr.c, procs.c, print.c, ports.c, read.c, rdelim.c, ramap.c, rw.c, smob.c, sort.c, srcprop.c, stacks.c, strings.c, strop.c, strorder.c, strports.c, struct.c, symbols.c, unif.c, values.c, vectors.c, vports.c, weaks.c: various int/size_t -> size_t/scm_bits_t changes. * random.h: type renaming: scm_rstate -> scm_rstate_t scm_rng -> scm_rng_t scm_i_rstate -> scm_i_rstate_t the old names are deprecated, all in-Guile uses changed. * procs.h: type renaming: scm_subr_entry -> scm_subr_entry_t the old name is deprecated, all in-Guile uses changed. * options.h (scm_option_t.val): unsigned long -> scm_bits_t. type renaming: scm_option -> scm_option_t the old name is deprecated, all in-Guile uses changed. * objects.c: various long -> scm_bits_t changes. (scm_i_make_class_object): flags: unsigned long -> scm_ubits_t * numbers.h (SCM_FIXNUM_BIT): deprecated, renamed to SCM_I_FIXNUM_BIT. * num2integral.i.c: new file, multiply included by numbers.c, used to "templatize" the various integral <-> num conversion routines. * numbers.c (scm_mkbig, scm_big2num, scm_adjbig, scm_normbig, scm_copybig, scm_2ulong2big, scm_dbl2big, scm_big2dbl): deprecated. (scm_i_mkbig, scm_i_big2inum, scm_i_adjbig, scm_i_normbig, scm_i_copybig, scm_i_short2big, scm_i_ushort2big, scm_i_int2big, scm_i_uint2big, scm_i_long2big, scm_i_ulong2big, scm_i_bits2big, scm_i_ubits2big, scm_i_size2big, scm_i_ptrdiff2big, scm_i_long_long2big, scm_i_ulong_long2big, scm_i_dbl2big, scm_i_big2dbl, scm_short2num, scm_ushort2num, scm_int2num, scm_uint2num, scm_bits2num, scm_ubits2num, scm_size2num, scm_ptrdiff2num, scm_num2short, scm_num2ushort, scm_num2int, scm_num2uint, scm_num2bits, scm_num2ubits, scm_num2ptrdiff, scm_num2size): new functions. * modules.c (scm_module_reverse_lookup): i, n: int -> scm_bits_t.x * load.c: change int -> size_t in various places (where the variable is used to store a string length). (search-path): call scm_done_free, not scm_done_malloc. * list.c (scm_ilength): return a scm_bits_t, not long. some other {int,long} -> scm_bits_t changes. * hashtab.c: various [u]int -> scm_bits_t changes. scm_ihashx_closure -> scm_ihashx_closure_t (and made a typedef). (scm_ihashx): n: uint -> scm_bits_t use scm_bits2num instead of scm_ulong2num. * gsubr.c: various int -> scm_bits_t changes. * gh_data.c (gh_scm2double): no loss of precision any more. * gh.h (gh_str2scm): len: int -> size_t (gh_{get,set}_substr): start: int -> scm_bits_t, len: int -> size_t (gh_<num>2scm): n: int -> scm_bits_t (gh_*vector_length): return scm_[u]size_t, not unsigned long. (gh_length): return scm_bits_t, not unsigned long. * fports.h: type renaming: scm_fport -> scm_fport_t the old name is deprecated, all in-Guile uses changed. * fports.c (fport_fill_input): count: int -> scm_bits_t (fport_flush): init_size, remaining, count: int -> scm_bits_t * debug.h (scm_lookup_cstr, scm_lookup_soft, scm_evstr): removed those prototypes, as the functions they prototype don't exist. * fports.c (default_buffer_size): int -> size_t (scm_fport_buffer_add): read_size, write_size: int -> scm_bits_t default_size: int -> size_t (scm_setvbuf): csize: int -> scm_bits_t * fluids.c (n_fluids): int -> scm_bits_t (grow_fluids): old_length, i: int -> scm_bits_t (next_fluid_num, scm_fluid_ref, scm_fluid_set_x): n: int -> scm_bits_t (scm_c_with_fluids): flen, vlen: int -> scm_bits_t * filesys.c (s_scm_open_fdes): changed calls to SCM_NUM2LONG to the new and shiny SCM_NUM2INT. * extensions.c: extension -> extension_t (and made a typedef). * eval.h (SCM_IFRAME): cast to scm_bits_t, not int. just so there are no nasty surprises if/when the various deeply magic tag bits move somewhere else. * eval.c: changed the locals used to store results of SCM_IFRAME, scm_ilength and such to be of type scm_bits_t (and not int/long). (iqq): depth, edepth: int -> scm_bits_t (scm_eval_stack): int -> scm_bits_t (SCM_CEVAL): various vars are not scm_bits_t instead of int. (check_map_args, scm_map, scm_for_each): len: long -> scm_bits_t i: int -> scm_bits_t * environments.c: changed the many calls to scm_ulong2num to scm_ubits2num. (import_environment_fold): proc_as_ul: ulong -> scm_ubits_t * dynwind.c (scm_dowinds): delta: long -> scm_bits_t * debug.h: type renaming: scm_debug_info -> scm_debug_info_t scm_debug_frame -> scm_debug_frame_t the old names are deprecated, all in-Guile uses changed. (scm_debug_eframe_size): int -> scm_bits_t * debug.c (scm_init_debug): use scm_c_define instead of the deprecated scm_define. * continuations.h: type renaming: scm_contregs -> scm_contregs_t the old name is deprecated, all in-Guile uses changed. (scm_contregs_t.num_stack_items): size_t -> scm_bits_t (scm_contregs_t.num_stack_items): ulong -> scm_ubits_t * continuations.c (scm_make_continuation): change the type of stack_size form long to scm_bits_t. * ports.h: type renaming: scm_port_rw_active -> scm_port_rw_active_t (and made a typedef) scm_port -> scm_port_t scm_ptob_descriptor -> scm_ptob_descriptor_t the old names are deprecated, all in-Guile uses changed. (scm_port_t.entry): int -> scm_bits_t. (scm_port_t.line_number): int -> long. (scm_port_t.putback_buf_size): int -> size_t. * __scm.h (long_long, ulong_long): deprecated (they pollute the global namespace and have little value besides that). (SCM_BITS_LENGTH): new, is the bit size of scm_bits_t (i.e. of an SCM handle). (ifdef spaghetti): include sys/types.h and sys/stdtypes.h, if they exist (for size_t & ptrdiff_t) (scm_sizet): deprecated. * Makefile.am (noinst_HEADERS): add num2integral.i.c
2001-05-24 00:50:51 +00:00
size_t j;
size_t cstart;
size_t cend;
Add full Unicode capability to ports and the default reader Ports are given two additional properties: a character encoding and a conversion failure strategy. These properties have getters and setters. The new properties are used to convert any locale text to/from the internal representation of strings. If unspecified, ports use a default value. The default value of these properties is held in a fluid. The default character encoding can be modified by calling setlocale. ISO-8859-1 is treated specially. Since it is a native encoding of strings, it can be processed more quickly. Source code is assumed to be ISO-8859-1 unless otherwise specified. The encoding of a source code file can be given as 'coding: XXXXX' in a magic comment at the top of a file. The C functions that deal with encoding often use a null pointer as shorthand for the native Latin-1 encoding, for efficiency's sake. * test-suite/tests/encoding-iso88591.test: new tests * test-suite/tests/encoding-iso88597.test: new tests * test-suite/tests/encoding-utf8.test: new tests * test-suite/tests/encoding-escapes.test: new tests * test-suite/tests/numbers.test: declare 'binary' encoding * test-suite/tests/ports.test: declare 'binary' encoding * test-suite/tests/r6rs-ports.test: declare 'binary' encoding * module/system/base/compile.scm (compile-file): use source-code file's self-declared encoding when compiling files * libguile/strports.c: store string ports in locale encoding (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector) (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector): new functions * libguile/strings.h: new declaration for scm_i_string_contains_char * libguile/strings.c (scm_i_string_contains_char): new function (scm_from_stringn, scm_to_stringn): use NULL for Latin-1 (scm_from_locale_stringn, scm_to_locale_stringn): respect character encoding of input and output ports * libguile/read.h: declaration for scm_scan_for_encoding * libguile/read.c: (read_token): now takes scheme string instead of C string/length (read_complete_token): new function (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol) (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment) (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector) (scm_read_scsh_block_comment, scm_read_commented_expression) (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart) (scm_read_expression): use scm_t_wchar for char type, use read_complete_token (scm_scan_for_encoding): new function to find a file's character encoding (scm_file_encoding): new function to find a port's character encoding * libguile/rdelim.c: don't unpack strings * libguile/print.h: declaration for modified function scm_i_charprint * libguile/print.c: use locale when printing characters and strings (scm_i_charprint): input parameter is now scm_t_wchar (scm_simple_format): don't unpack strings * libguile/posix.h: new declaration for scm_setbinary. * libguile/posix.c (scm_setlocale): set default and stdio port encodings based on the locale's character encoding (scm_setbinary): new function * libguile/ports.h (scm_t_port): add encoding and failed conversion handler to port type. Declarations for new or modified functions scm_getc, scm_unget_byte, scm_ungetc, scm_i_get_port_encoding, scm_i_set_port_encoding_x, scm_port_encoding, scm_set_port_encoding_x, scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x, scm_port_conversion_strategy, scm_set_port_conversion_strategy_x. * libguile/ports.c: assign the current ports to zero on startup so we can see if they've been set. (scm_current_input_port, scm_current_output_port, scm_current_error_port): return #f if the port is not yet initialized (scm_new_port_table_entry): set up a new port's encoding and illegal sequence handler based on the thread's current defaults (scm_i_remove_port): free port encoding name when port is removed (scm_i_mode_bits_n): now takes a scheme string instead of a c string and length. All callers changed. (SCM_MBCHAR_BUF_SIZE): new const (scm_getc): new function, since the scm_getc in inline.h is now scm_get_byte_or_eof. This pulls one codepoint from a port. (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding (scm_unget_byte): new function, incorportaing the low-level functionality of scm_ungetc (scm_ungetc): uses scm_unget_byte * libguile/numbers.h (scm_t_wchar): compilation order problem with scm_t_wchar being use in functions in multiple headers. Forward declare scm_t_wchar. * libguile/load.c (scm_primitive_load): scan for file encoding at top of file and use it to set the load port's encoding * libguile/inline.h (scm_get_byte_or_eof): new function incorporating most of the functionality of scm_getc. * libguile/fports.c (fport_fill_input): now returns scm_t_wchar * libguile/chars.h (scm_t_wchar): avoid compilation order problem with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
scm_t_wchar c;
* validate.h (SCM_NUM2{SIZE,PTRDIFF,SHORT,USHORT,BITS,UBITS,INT,UINT}[_DEF]): new macros. * unif.h: type renaming: scm_array -> scm_array_t scm_array_dim -> scm_array_dim_t the old names are deprecated, all in-Guile uses changed. * tags.h (scm_ubits_t): new typedef, representing unsigned scm_bits_t. * stacks.h: type renaming: scm_info_frame -> scm_info_frame_t scm_stack -> scm_stack_t the old names are deprecated, all in-Guile uses changed. * srcprop.h: type renaming: scm_srcprops -> scm_srcprops_t scm_srcprops_chunk -> scm_srcprops_chunk_t the old names are deprecated, all in-Guile uses changed. * gsubr.c, procs.c, print.c, ports.c, read.c, rdelim.c, ramap.c, rw.c, smob.c, sort.c, srcprop.c, stacks.c, strings.c, strop.c, strorder.c, strports.c, struct.c, symbols.c, unif.c, values.c, vectors.c, vports.c, weaks.c: various int/size_t -> size_t/scm_bits_t changes. * random.h: type renaming: scm_rstate -> scm_rstate_t scm_rng -> scm_rng_t scm_i_rstate -> scm_i_rstate_t the old names are deprecated, all in-Guile uses changed. * procs.h: type renaming: scm_subr_entry -> scm_subr_entry_t the old name is deprecated, all in-Guile uses changed. * options.h (scm_option_t.val): unsigned long -> scm_bits_t. type renaming: scm_option -> scm_option_t the old name is deprecated, all in-Guile uses changed. * objects.c: various long -> scm_bits_t changes. (scm_i_make_class_object): flags: unsigned long -> scm_ubits_t * numbers.h (SCM_FIXNUM_BIT): deprecated, renamed to SCM_I_FIXNUM_BIT. * num2integral.i.c: new file, multiply included by numbers.c, used to "templatize" the various integral <-> num conversion routines. * numbers.c (scm_mkbig, scm_big2num, scm_adjbig, scm_normbig, scm_copybig, scm_2ulong2big, scm_dbl2big, scm_big2dbl): deprecated. (scm_i_mkbig, scm_i_big2inum, scm_i_adjbig, scm_i_normbig, scm_i_copybig, scm_i_short2big, scm_i_ushort2big, scm_i_int2big, scm_i_uint2big, scm_i_long2big, scm_i_ulong2big, scm_i_bits2big, scm_i_ubits2big, scm_i_size2big, scm_i_ptrdiff2big, scm_i_long_long2big, scm_i_ulong_long2big, scm_i_dbl2big, scm_i_big2dbl, scm_short2num, scm_ushort2num, scm_int2num, scm_uint2num, scm_bits2num, scm_ubits2num, scm_size2num, scm_ptrdiff2num, scm_num2short, scm_num2ushort, scm_num2int, scm_num2uint, scm_num2bits, scm_num2ubits, scm_num2ptrdiff, scm_num2size): new functions. * modules.c (scm_module_reverse_lookup): i, n: int -> scm_bits_t.x * load.c: change int -> size_t in various places (where the variable is used to store a string length). (search-path): call scm_done_free, not scm_done_malloc. * list.c (scm_ilength): return a scm_bits_t, not long. some other {int,long} -> scm_bits_t changes. * hashtab.c: various [u]int -> scm_bits_t changes. scm_ihashx_closure -> scm_ihashx_closure_t (and made a typedef). (scm_ihashx): n: uint -> scm_bits_t use scm_bits2num instead of scm_ulong2num. * gsubr.c: various int -> scm_bits_t changes. * gh_data.c (gh_scm2double): no loss of precision any more. * gh.h (gh_str2scm): len: int -> size_t (gh_{get,set}_substr): start: int -> scm_bits_t, len: int -> size_t (gh_<num>2scm): n: int -> scm_bits_t (gh_*vector_length): return scm_[u]size_t, not unsigned long. (gh_length): return scm_bits_t, not unsigned long. * fports.h: type renaming: scm_fport -> scm_fport_t the old name is deprecated, all in-Guile uses changed. * fports.c (fport_fill_input): count: int -> scm_bits_t (fport_flush): init_size, remaining, count: int -> scm_bits_t * debug.h (scm_lookup_cstr, scm_lookup_soft, scm_evstr): removed those prototypes, as the functions they prototype don't exist. * fports.c (default_buffer_size): int -> size_t (scm_fport_buffer_add): read_size, write_size: int -> scm_bits_t default_size: int -> size_t (scm_setvbuf): csize: int -> scm_bits_t * fluids.c (n_fluids): int -> scm_bits_t (grow_fluids): old_length, i: int -> scm_bits_t (next_fluid_num, scm_fluid_ref, scm_fluid_set_x): n: int -> scm_bits_t (scm_c_with_fluids): flen, vlen: int -> scm_bits_t * filesys.c (s_scm_open_fdes): changed calls to SCM_NUM2LONG to the new and shiny SCM_NUM2INT. * extensions.c: extension -> extension_t (and made a typedef). * eval.h (SCM_IFRAME): cast to scm_bits_t, not int. just so there are no nasty surprises if/when the various deeply magic tag bits move somewhere else. * eval.c: changed the locals used to store results of SCM_IFRAME, scm_ilength and such to be of type scm_bits_t (and not int/long). (iqq): depth, edepth: int -> scm_bits_t (scm_eval_stack): int -> scm_bits_t (SCM_CEVAL): various vars are not scm_bits_t instead of int. (check_map_args, scm_map, scm_for_each): len: long -> scm_bits_t i: int -> scm_bits_t * environments.c: changed the many calls to scm_ulong2num to scm_ubits2num. (import_environment_fold): proc_as_ul: ulong -> scm_ubits_t * dynwind.c (scm_dowinds): delta: long -> scm_bits_t * debug.h: type renaming: scm_debug_info -> scm_debug_info_t scm_debug_frame -> scm_debug_frame_t the old names are deprecated, all in-Guile uses changed. (scm_debug_eframe_size): int -> scm_bits_t * debug.c (scm_init_debug): use scm_c_define instead of the deprecated scm_define. * continuations.h: type renaming: scm_contregs -> scm_contregs_t the old name is deprecated, all in-Guile uses changed. (scm_contregs_t.num_stack_items): size_t -> scm_bits_t (scm_contregs_t.num_stack_items): ulong -> scm_ubits_t * continuations.c (scm_make_continuation): change the type of stack_size form long to scm_bits_t. * ports.h: type renaming: scm_port_rw_active -> scm_port_rw_active_t (and made a typedef) scm_port -> scm_port_t scm_ptob_descriptor -> scm_ptob_descriptor_t the old names are deprecated, all in-Guile uses changed. (scm_port_t.entry): int -> scm_bits_t. (scm_port_t.line_number): int -> long. (scm_port_t.putback_buf_size): int -> size_t. * __scm.h (long_long, ulong_long): deprecated (they pollute the global namespace and have little value besides that). (SCM_BITS_LENGTH): new, is the bit size of scm_bits_t (i.e. of an SCM handle). (ifdef spaghetti): include sys/types.h and sys/stdtypes.h, if they exist (for size_t & ptrdiff_t) (scm_sizet): deprecated. * Makefile.am (noinst_HEADERS): add num2integral.i.c
2001-05-24 00:50:51 +00:00
size_t num_delims;
SCM_VALIDATE_STRING (1, delims);
num_delims = scm_i_string_length (delims);
SCM_VALIDATE_STRING (2, str);
scm_i_get_substring_spec (scm_i_string_length (str),
start, &cstart, end, &cend);
if (SCM_UNBNDP (port))
2005-03-02 20:42:01 +00:00
port = scm_current_input_port ();
else
SCM_VALIDATE_OPINPORT (4, port);
for (j = cstart; j < cend; j++)
{
* validate.h (SCM_NUM2{SIZE,PTRDIFF,SHORT,USHORT,BITS,UBITS,INT,UINT}[_DEF]): new macros. * unif.h: type renaming: scm_array -> scm_array_t scm_array_dim -> scm_array_dim_t the old names are deprecated, all in-Guile uses changed. * tags.h (scm_ubits_t): new typedef, representing unsigned scm_bits_t. * stacks.h: type renaming: scm_info_frame -> scm_info_frame_t scm_stack -> scm_stack_t the old names are deprecated, all in-Guile uses changed. * srcprop.h: type renaming: scm_srcprops -> scm_srcprops_t scm_srcprops_chunk -> scm_srcprops_chunk_t the old names are deprecated, all in-Guile uses changed. * gsubr.c, procs.c, print.c, ports.c, read.c, rdelim.c, ramap.c, rw.c, smob.c, sort.c, srcprop.c, stacks.c, strings.c, strop.c, strorder.c, strports.c, struct.c, symbols.c, unif.c, values.c, vectors.c, vports.c, weaks.c: various int/size_t -> size_t/scm_bits_t changes. * random.h: type renaming: scm_rstate -> scm_rstate_t scm_rng -> scm_rng_t scm_i_rstate -> scm_i_rstate_t the old names are deprecated, all in-Guile uses changed. * procs.h: type renaming: scm_subr_entry -> scm_subr_entry_t the old name is deprecated, all in-Guile uses changed. * options.h (scm_option_t.val): unsigned long -> scm_bits_t. type renaming: scm_option -> scm_option_t the old name is deprecated, all in-Guile uses changed. * objects.c: various long -> scm_bits_t changes. (scm_i_make_class_object): flags: unsigned long -> scm_ubits_t * numbers.h (SCM_FIXNUM_BIT): deprecated, renamed to SCM_I_FIXNUM_BIT. * num2integral.i.c: new file, multiply included by numbers.c, used to "templatize" the various integral <-> num conversion routines. * numbers.c (scm_mkbig, scm_big2num, scm_adjbig, scm_normbig, scm_copybig, scm_2ulong2big, scm_dbl2big, scm_big2dbl): deprecated. (scm_i_mkbig, scm_i_big2inum, scm_i_adjbig, scm_i_normbig, scm_i_copybig, scm_i_short2big, scm_i_ushort2big, scm_i_int2big, scm_i_uint2big, scm_i_long2big, scm_i_ulong2big, scm_i_bits2big, scm_i_ubits2big, scm_i_size2big, scm_i_ptrdiff2big, scm_i_long_long2big, scm_i_ulong_long2big, scm_i_dbl2big, scm_i_big2dbl, scm_short2num, scm_ushort2num, scm_int2num, scm_uint2num, scm_bits2num, scm_ubits2num, scm_size2num, scm_ptrdiff2num, scm_num2short, scm_num2ushort, scm_num2int, scm_num2uint, scm_num2bits, scm_num2ubits, scm_num2ptrdiff, scm_num2size): new functions. * modules.c (scm_module_reverse_lookup): i, n: int -> scm_bits_t.x * load.c: change int -> size_t in various places (where the variable is used to store a string length). (search-path): call scm_done_free, not scm_done_malloc. * list.c (scm_ilength): return a scm_bits_t, not long. some other {int,long} -> scm_bits_t changes. * hashtab.c: various [u]int -> scm_bits_t changes. scm_ihashx_closure -> scm_ihashx_closure_t (and made a typedef). (scm_ihashx): n: uint -> scm_bits_t use scm_bits2num instead of scm_ulong2num. * gsubr.c: various int -> scm_bits_t changes. * gh_data.c (gh_scm2double): no loss of precision any more. * gh.h (gh_str2scm): len: int -> size_t (gh_{get,set}_substr): start: int -> scm_bits_t, len: int -> size_t (gh_<num>2scm): n: int -> scm_bits_t (gh_*vector_length): return scm_[u]size_t, not unsigned long. (gh_length): return scm_bits_t, not unsigned long. * fports.h: type renaming: scm_fport -> scm_fport_t the old name is deprecated, all in-Guile uses changed. * fports.c (fport_fill_input): count: int -> scm_bits_t (fport_flush): init_size, remaining, count: int -> scm_bits_t * debug.h (scm_lookup_cstr, scm_lookup_soft, scm_evstr): removed those prototypes, as the functions they prototype don't exist. * fports.c (default_buffer_size): int -> size_t (scm_fport_buffer_add): read_size, write_size: int -> scm_bits_t default_size: int -> size_t (scm_setvbuf): csize: int -> scm_bits_t * fluids.c (n_fluids): int -> scm_bits_t (grow_fluids): old_length, i: int -> scm_bits_t (next_fluid_num, scm_fluid_ref, scm_fluid_set_x): n: int -> scm_bits_t (scm_c_with_fluids): flen, vlen: int -> scm_bits_t * filesys.c (s_scm_open_fdes): changed calls to SCM_NUM2LONG to the new and shiny SCM_NUM2INT. * extensions.c: extension -> extension_t (and made a typedef). * eval.h (SCM_IFRAME): cast to scm_bits_t, not int. just so there are no nasty surprises if/when the various deeply magic tag bits move somewhere else. * eval.c: changed the locals used to store results of SCM_IFRAME, scm_ilength and such to be of type scm_bits_t (and not int/long). (iqq): depth, edepth: int -> scm_bits_t (scm_eval_stack): int -> scm_bits_t (SCM_CEVAL): various vars are not scm_bits_t instead of int. (check_map_args, scm_map, scm_for_each): len: long -> scm_bits_t i: int -> scm_bits_t * environments.c: changed the many calls to scm_ulong2num to scm_ubits2num. (import_environment_fold): proc_as_ul: ulong -> scm_ubits_t * dynwind.c (scm_dowinds): delta: long -> scm_bits_t * debug.h: type renaming: scm_debug_info -> scm_debug_info_t scm_debug_frame -> scm_debug_frame_t the old names are deprecated, all in-Guile uses changed. (scm_debug_eframe_size): int -> scm_bits_t * debug.c (scm_init_debug): use scm_c_define instead of the deprecated scm_define. * continuations.h: type renaming: scm_contregs -> scm_contregs_t the old name is deprecated, all in-Guile uses changed. (scm_contregs_t.num_stack_items): size_t -> scm_bits_t (scm_contregs_t.num_stack_items): ulong -> scm_ubits_t * continuations.c (scm_make_continuation): change the type of stack_size form long to scm_bits_t. * ports.h: type renaming: scm_port_rw_active -> scm_port_rw_active_t (and made a typedef) scm_port -> scm_port_t scm_ptob_descriptor -> scm_ptob_descriptor_t the old names are deprecated, all in-Guile uses changed. (scm_port_t.entry): int -> scm_bits_t. (scm_port_t.line_number): int -> long. (scm_port_t.putback_buf_size): int -> size_t. * __scm.h (long_long, ulong_long): deprecated (they pollute the global namespace and have little value besides that). (SCM_BITS_LENGTH): new, is the bit size of scm_bits_t (i.e. of an SCM handle). (ifdef spaghetti): include sys/types.h and sys/stdtypes.h, if they exist (for size_t & ptrdiff_t) (scm_sizet): deprecated. * Makefile.am (noinst_HEADERS): add num2integral.i.c
2001-05-24 00:50:51 +00:00
size_t k;
c = scm_getc (port);
for (k = 0; k < num_delims; k++)
{
Add full Unicode capability to ports and the default reader Ports are given two additional properties: a character encoding and a conversion failure strategy. These properties have getters and setters. The new properties are used to convert any locale text to/from the internal representation of strings. If unspecified, ports use a default value. The default value of these properties is held in a fluid. The default character encoding can be modified by calling setlocale. ISO-8859-1 is treated specially. Since it is a native encoding of strings, it can be processed more quickly. Source code is assumed to be ISO-8859-1 unless otherwise specified. The encoding of a source code file can be given as 'coding: XXXXX' in a magic comment at the top of a file. The C functions that deal with encoding often use a null pointer as shorthand for the native Latin-1 encoding, for efficiency's sake. * test-suite/tests/encoding-iso88591.test: new tests * test-suite/tests/encoding-iso88597.test: new tests * test-suite/tests/encoding-utf8.test: new tests * test-suite/tests/encoding-escapes.test: new tests * test-suite/tests/numbers.test: declare 'binary' encoding * test-suite/tests/ports.test: declare 'binary' encoding * test-suite/tests/r6rs-ports.test: declare 'binary' encoding * module/system/base/compile.scm (compile-file): use source-code file's self-declared encoding when compiling files * libguile/strports.c: store string ports in locale encoding (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector) (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector): new functions * libguile/strings.h: new declaration for scm_i_string_contains_char * libguile/strings.c (scm_i_string_contains_char): new function (scm_from_stringn, scm_to_stringn): use NULL for Latin-1 (scm_from_locale_stringn, scm_to_locale_stringn): respect character encoding of input and output ports * libguile/read.h: declaration for scm_scan_for_encoding * libguile/read.c: (read_token): now takes scheme string instead of C string/length (read_complete_token): new function (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol) (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment) (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector) (scm_read_scsh_block_comment, scm_read_commented_expression) (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart) (scm_read_expression): use scm_t_wchar for char type, use read_complete_token (scm_scan_for_encoding): new function to find a file's character encoding (scm_file_encoding): new function to find a port's character encoding * libguile/rdelim.c: don't unpack strings * libguile/print.h: declaration for modified function scm_i_charprint * libguile/print.c: use locale when printing characters and strings (scm_i_charprint): input parameter is now scm_t_wchar (scm_simple_format): don't unpack strings * libguile/posix.h: new declaration for scm_setbinary. * libguile/posix.c (scm_setlocale): set default and stdio port encodings based on the locale's character encoding (scm_setbinary): new function * libguile/ports.h (scm_t_port): add encoding and failed conversion handler to port type. Declarations for new or modified functions scm_getc, scm_unget_byte, scm_ungetc, scm_i_get_port_encoding, scm_i_set_port_encoding_x, scm_port_encoding, scm_set_port_encoding_x, scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x, scm_port_conversion_strategy, scm_set_port_conversion_strategy_x. * libguile/ports.c: assign the current ports to zero on startup so we can see if they've been set. (scm_current_input_port, scm_current_output_port, scm_current_error_port): return #f if the port is not yet initialized (scm_new_port_table_entry): set up a new port's encoding and illegal sequence handler based on the thread's current defaults (scm_i_remove_port): free port encoding name when port is removed (scm_i_mode_bits_n): now takes a scheme string instead of a c string and length. All callers changed. (SCM_MBCHAR_BUF_SIZE): new const (scm_getc): new function, since the scm_getc in inline.h is now scm_get_byte_or_eof. This pulls one codepoint from a port. (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding (scm_unget_byte): new function, incorportaing the low-level functionality of scm_ungetc (scm_ungetc): uses scm_unget_byte * libguile/numbers.h (scm_t_wchar): compilation order problem with scm_t_wchar being use in functions in multiple headers. Forward declare scm_t_wchar. * libguile/load.c (scm_primitive_load): scan for file encoding at top of file and use it to set the load port's encoding * libguile/inline.h (scm_get_byte_or_eof): new function incorporating most of the functionality of scm_getc. * libguile/fports.c (fport_fill_input): now returns scm_t_wchar * libguile/chars.h (scm_t_wchar): avoid compilation order problem with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
if (scm_i_string_ref (delims, k) == c)
{
if (scm_is_false (gobble))
scm_ungetc (c, port);
return scm_cons (SCM_MAKE_CHAR (c),
scm_from_size_t (j - cstart));
}
}
if (c == EOF)
return scm_cons (SCM_EOF_VAL,
scm_from_size_t (j - cstart));
scm_c_string_set_x (str, j, SCM_MAKE_CHAR (c));
}
return scm_cons (SCM_BOOL_F, scm_from_size_t (j - cstart));
}
#undef FUNC_NAME
/*
* %read-line
* truncates any terminating newline from its input, and returns
* a cons of the string read and its terminating character. Doing
* so makes it easy to implement the hairy `read-line' options
* efficiently in Scheme.
*/
SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
(SCM port),
"Read a newline-terminated line from @var{port}, allocating storage as\n"
"necessary. The newline terminator (if any) is removed from the string,\n"
"and a pair consisting of the line and its delimiter is returned. The\n"
"delimiter may be either a newline or the @var{eof-object}; if\n"
"@code{%read-line} is called at the end of file, it returns the pair\n"
"@code{(#<eof> . #<eof>)}.")
#define FUNC_NAME s_scm_read_line
{
/* Threshold under which the only allocation performed is that of the
resulting string and pair. */
#define LINE_BUFFER_SIZE 256
SCM line, strings, result;
scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
size_t index;
if (SCM_UNBNDP (port))
2005-03-02 20:42:01 +00:00
port = scm_current_input_port ();
SCM_VALIDATE_OPINPORT (1,port);
index = 0;
delim = 0;
strings = SCM_BOOL_F;
do
{
if (SCM_UNLIKELY (index >= LINE_BUFFER_SIZE))
{
/* The line is getting longer than BUF so store its current
contents in STRINGS. */
strings = scm_cons (scm_from_utf32_stringn (buf, index),
scm_is_false (strings) ? SCM_EOL : strings);
index = 0;
}
else
{
buf[index] = scm_getc (port);
switch (buf[index])
{
case EOF:
case '\n':
delim = buf[index];
break;
default:
index++;
}
}
}
while (delim == 0);
if (SCM_LIKELY (scm_is_false (strings)))
/* The fast path. */
line = scm_from_utf32_stringn (buf, index);
else
{
/* Aggregate the intermediary results. */
strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
line = scm_string_concatenate (scm_reverse (strings));
}
if (delim == EOF && scm_i_string_length (line) == 0)
result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
else
result = scm_cons (line,
delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
return result;
#undef LINE_BUFFER_SIZE
}
#undef FUNC_NAME
SCM_DEFINE (scm_write_line, "write-line", 1, 1, 0,
(SCM obj, SCM port),
"Display @var{obj} and a newline character to @var{port}. If\n"
"@var{port} is not specified, @code{(current-output-port)} is\n"
"used. This function is equivalent to:\n"
"@lisp\n"
"(display obj [port])\n"
"(newline [port])\n"
"@end lisp")
#define FUNC_NAME s_scm_write_line
{
scm_display (obj, port);
return scm_newline (port);
}
#undef FUNC_NAME
SCM
scm_init_rdelim_builtins (void)
{
#include "libguile/rdelim.x"
return SCM_UNSPECIFIED;
}
void
scm_init_rdelim (void)
{
scm_c_define_gsubr ("%init-rdelim-builtins", 0, 0, 0,
scm_init_rdelim_builtins);
}
/*
Local Variables:
c-file-style: "gnu"
End:
*/