guile/libguile/strings.h

312 lines
12 KiB
C
Raw Normal View History

#ifndef SCM_STRINGS_H
#define SCM_STRINGS_H
2001-08-31 14:42:31 +00:00
/* Copyright 1995-1998,2000-2001,2004-2006,2008-2011,2013,2015-2019
Free Software Foundation, Inc.
This file is part of Guile.
Guile is free software: you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Guile is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with Guile. If not, see
<https://www.gnu.org/licenses/>. */
#include <libguile/gc.h>
#include <libguile/error.h>
#include "libguile/inline.h"
#include <libguile/snarf.h>
/* String representation.
A string is a piece of a stringbuf. A stringbuf can be used by
more than one string. When a string is written to and the
stringbuf of that string is used by more than one string, a new
stringbuf is created. That is, strings are copy-on-write. This
behavior can be used to make the substring operation quite
efficient.
The implementation is tuned so that mutating a string is costly,
but just reading it is cheap and lock-free.
There are also mutation-sharing strings. They refer to a part of
an ordinary string. Writing to a mutation-sharing string just
writes to the ordinary string.
Internal, low level interface to the character arrays
- Use scm_i_is_narrow_string to determine is the string is narrow or
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
wide.
- Use scm_i_string_chars or scm_i_string_wide_chars to get a
pointer to the byte or scm_t_wchar array of a string for reading.
Use scm_i_string_length to get the number of characters in that
array. The array is not null-terminated.
- The array is valid as long as the corresponding SCM object is
protected but only until the next SCM_TICK. During such a 'safe
point', strings might change their representation.
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
- Use scm_i_string_start_writing to get a version of the string
ready for reading and writing. This is a potentially costly
operation since it implements the copy-on-write behavior. When
done with the writing, call scm_i_string_stop_writing. You must
do this before the next SCM_TICK. (This means, before calling
almost any other scm_ function and you can't allow throws, of
course.)
- New strings can be created with scm_i_make_string or
scm_i_make_wide_string. This gives access to a writable pointer
that remains valid as long as nobody else makes a copy-on-write
substring of the string. Do not call scm_i_string_stop_writing
for this pointer.
- Alternately, scm_i_string_ref and scm_i_string_set_x can be used
to read and write strings without worrying about whether the
string is narrow or wide. scm_i_string_set_x still needs to be
bracketed by scm_i_string_start_writing and
scm_i_string_stop_writing.
Legacy interface
2004-09-20 21:47:41 +00:00
- SCM_STRINGP is just scm_is_string.
- SCM_STRING_CHARS uses scm_i_string_writable_chars and immediately
calls scm_i_stop_writing, hoping for the best. SCM_STRING_LENGTH
2004-09-20 21:47:41 +00:00
is the same as scm_i_string_length. SCM_STRING_CHARS will throw
an error for strings that are not null-terminated. There is
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
no wide version of this interface.
*/
/* A type indicating what strategy to take when string locale
conversion is unsuccessful. */
typedef enum
{
SCM_FAILED_CONVERSION_ERROR = SCM_ICONVEH_ERROR,
SCM_FAILED_CONVERSION_QUESTION_MARK = SCM_ICONVEH_QUESTION_MARK,
SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE = SCM_ICONVEH_ESCAPE_SEQUENCE
} scm_t_string_failed_conversion_handler;
decruftify scm_sys_protects * libguile/root.h * libguile/root.c (scm_sys_protects): It used to be that for some reason we'd define a special array of "protected" values. This was a little silly, always, but with the BDW GC it's completely unnecessary. Also many of these variables were unused, and none of them were good API. So remove this array, and either eliminate, make static, or make internal the various values. * libguile/snarf.h: No need to generate calls to scm_permanent_object. * guile-readline/readline.c (scm_init_readline): No need to call scm_permanent_object. * libguile/array-map.c (ramap, rafe): Remove the dubious nullvect optimizations. * libguile/async.c (scm_init_async): No need to init scm_asyncs, it is no more. * libguile/eval.c (scm_init_eval): No need to init scm_listofnull, it is no more. * libguile/gc.c: Make scm_protects a static var. (scm_storage_prehistory): Change the sanity check to use the address of protects. (scm_init_gc_protect_object): No need to clear the scm_sys_protects, as it is no more. * libguile/keywords.c: Make the keyword obarray a static var. * libguile/numbers.c: Make flo0 a static var. * libguile/objprop.c: Make object_whash a static var. * libguile/properties.c: Make properties_whash a static var. * libguile/srcprop.h: * libguile/srcprop.c: Make scm_source_whash a global with internal linkage. * libguile/strings.h: * libguile/strings.c: Make scm_nullstr a global with internal linkage. * libguile/vectors.c (scm_init_vectors): No need to init scm_nullvect, it's unused.
2009-12-05 12:38:32 +01:00
SCM_INTERNAL SCM scm_nullstr;
Port conversion strategies internally are symbols * libguile/ports.h (scm_t_port): Represent the conversion strategy as a symbol, to make things easier for Scheme. Rename to "conversion_strategy". (scm_c_make_port_with_encoding): Change to take encoding and conversion_strategy arguments as symbols. (scm_i_string_failed_conversion_handler): New internal helper, to turn a symbol to a scm_t_string_failed_conversion_handler. (scm_i_default_port_encoding): Return the default port encoding as a symbol. (scm_i_default_port_conversion_strategy) (scm_i_set_default_port_conversion_strategy): Rename from scm_i_default_port_conversion_handler et al. Take and return Scheme symbols. * libguile/foreign.c (scm_string_to_pointer, scm_pointer_to_string): Use scm_i_default_string_failed_conversion_handler instead of scm_i_default_port_conversion_handler. * libguile/print.c (PORT_CONVERSION_HANDLER): Update definition. (print_normal_symbol): Use PORT_CONVERSION_HANDLER. * libguile/r6rs-ports.c (make_bytevector_input_port): (make_custom_binary_input_port, make_bytevector_output_port): Adapt to changes in scm_c_make_port_with_encoding. * libguile/strings.h: * libguile/strings.c (scm_i_default_string_failed_conversion_handler): New helper. (scm_from_locale_stringn, scm_from_port_stringn): (scm_to_locale_stringn, scm_to_port_stringn): Adapt to interface changes. * libguile/strports.c (scm_mkstrport): Adapt to scm_c_make_port_with_encoding change. * libguile/ports.c (scm_c_make_port): Adapt to scm_c_make_port_with_encoding change. (ascii_toupper, encoding_matches, canonicalize_encoding): Move down in the file. (peek_codepoint, get_codepoint, scm_ungetc): Adapt to port conversion strategy change. Remove duplicate case in get_codepoint. (scm_init_ports): Move symbol initializations to the same place.
2016-05-04 10:31:21 +02:00
SCM_INTERNAL scm_t_string_failed_conversion_handler
scm_i_default_string_failed_conversion_handler (void);
SCM_INLINE int scm_is_string (SCM x);
SCM_API SCM scm_string_p (SCM x);
SCM_API SCM scm_string (SCM chrs);
SCM_API SCM scm_make_string (SCM k, SCM chr);
SCM_API SCM scm_string_length (SCM str);
SCM_API SCM scm_string_utf8_length (SCM str);
SCM_API SCM scm_string_bytes_per_char (SCM str);
SCM_API SCM scm_string_ref (SCM str, SCM k);
SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr);
SCM_API SCM scm_substring (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_read_only (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
SCM_API SCM scm_from_stringn (const char *str, size_t len, const char *encoding,
scm_t_string_failed_conversion_handler handler);
SCM_API SCM scm_c_make_string (size_t len, SCM chr);
SCM_API size_t scm_c_string_length (SCM str);
SCM_API size_t scm_c_string_utf8_length (SCM str);
2008-07-05 20:10:44 +02:00
SCM_API size_t scm_c_symbol_length (SCM sym);
SCM_API SCM scm_c_string_ref (SCM str, size_t pos);
SCM_API void scm_c_string_set_x (SCM str, size_t pos, SCM chr);
SCM_API SCM scm_c_substring (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_read_only (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_shared (SCM str, size_t start, size_t end);
SCM_API SCM scm_c_substring_copy (SCM str, size_t start, size_t end);
/* Use locale encoding for user input, user output, or interacting with
the C library. Use latin1 for ASCII, and for literals in source
code. Use utf8 for interaction with modern libraries which deal in
UTF-8. Otherwise use scm_to_stringn or scm_from_stringn with a
specific encoding. */
SCM_API SCM scm_from_locale_string (const char *str);
SCM_API SCM scm_from_locale_stringn (const char *str, size_t len);
SCM_API SCM scm_take_locale_string (char *str);
SCM_API SCM scm_take_locale_stringn (char *str, size_t len);
SCM_API char *scm_to_locale_string (SCM str);
SCM_API char *scm_to_locale_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_latin1_string (const char *str);
SCM_API SCM scm_from_latin1_stringn (const char *str, size_t len);
SCM_API char *scm_to_latin1_string (SCM str);
SCM_API char *scm_to_latin1_stringn (SCM str, size_t *lenp);
SCM_API char *scm_to_utf8_string (SCM str);
SCM_API char *scm_to_utf8_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_utf8_string (const char *str);
SCM_API SCM scm_from_utf8_stringn (const char *str, size_t len);
SCM_API scm_t_wchar *scm_to_utf32_string (SCM str);
SCM_API scm_t_wchar *scm_to_utf32_stringn (SCM str, size_t *lenp);
SCM_API SCM scm_from_utf32_string (const scm_t_wchar *str);
SCM_API SCM scm_from_utf32_stringn (const scm_t_wchar *str, size_t len);
SCM_API char *scm_to_port_string (SCM str, SCM port);
SCM_API char *scm_to_port_stringn (SCM str, size_t *lenp, SCM port);
SCM_API SCM scm_from_port_string (const char *str, SCM port);
SCM_API SCM scm_from_port_stringn (const char *str, size_t len, SCM port);
SCM_API char *scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
scm_t_string_failed_conversion_handler handler);
SCM_API size_t scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len);
SCM_API SCM scm_string_normalize_nfd (SCM str);
SCM_API SCM scm_string_normalize_nfkd (SCM str);
SCM_API SCM scm_string_normalize_nfc (SCM str);
SCM_API SCM scm_string_normalize_nfkc (SCM str);
SCM_API SCM scm_makfromstrs (int argc, char **argv);
/* Snarfing support. See snarf.h. */
#ifdef SCM_SUPPORT_STATIC_ALLOCATION
#define SCM_IMMUTABLE_STRINGBUF(c_name, contents) \
static SCM_UNUSED const \
struct \
{ \
scm_t_bits word_0; \
scm_t_bits word_1; \
const char buffer[sizeof (contents)]; \
} \
c_name = \
{ \
scm_tc7_stringbuf, \
sizeof (contents) - 1, \
contents \
}
#define SCM_IMMUTABLE_STRING(c_name, contents) \
SCM_IMMUTABLE_STRINGBUF (scm_i_paste (c_name, _stringbuf), contents); \
SCM_IMMUTABLE_DOUBLE_CELL (c_name, \
scm_tc7_ro_string, \
(scm_t_bits) &scm_i_paste (c_name, \
_stringbuf), \
(scm_t_bits) 0, \
(scm_t_bits) (sizeof (contents) - 1))
#endif /* SCM_SUPPORT_STATIC_ALLOCATION */
/* internal constants */
/* Type tag for read-only strings. */
#define scm_tc7_ro_string (scm_tc7_string + 0x200)
/* Flags for shared and wide strings. */
#define SCM_I_STRINGBUF_F_WIDE 0x400
#define SCM_I_STRINGBUF_F_MUTABLE 0x800
SCM_INTERNAL void scm_i_print_stringbuf (SCM exp, SCM port,
scm_print_state *pstate);
/* internal accessor functions. Arguments must be valid. */
SCM_INTERNAL SCM scm_i_make_string (size_t len, char **datap,
int read_only_p);
SCM_INTERNAL SCM scm_i_make_wide_string (size_t len, scm_t_wchar **datap,
int read_only_p);
2008-05-14 05:20:47 +02:00
SCM_INTERNAL SCM scm_i_substring (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_read_only (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_shared (SCM str, size_t start, size_t end);
SCM_INTERNAL SCM scm_i_substring_copy (SCM str, size_t start, size_t end);
SCM_INTERNAL size_t scm_i_string_length (SCM str);
All literal constants are read-only * libguile/array-handle.c (initialize_vector_handle): Add mutable_p argument. Unless the vector handle is mutable, null out its writable_elements member. (scm_array_get_handle): Adapt to determine mutability of the various arrays. (scm_array_handle_elements, scm_array_handle_writable_elements): Reverse the sense: instead of implementing read-only in terms of read-write, go the other way around, adding an assertion in the read-write case that the array handle is mutable. * libguile/array-map.c (racp): Assert that the destination is mutable. * libguile/bitvectors.c (SCM_F_BITVECTOR_IMMUTABLE, IS_BITVECTOR): (IS_MUTABLE_BITVECTOR): Add a flag to indicate immutability. (scm_i_bitvector_bits): Fix indentation. (scm_i_is_mutable_bitvector): New helper. (scm_array_handle_bit_elements) ((scm_array_handle_bit_writable_elements): Build writable_elements in terms of elements. (scm_bitvector_elements, scm_bitvector_writable_elements): Likewise. (scm_c_bitvector_set_x): Require a mutable bitvector for the fast-path. (scm_bitvector_to_list, scm_bit_count): Use read-only elements() function. * libguile/bitvectors.h (scm_i_is_mutable_bitvector): New decl. * libguile/bytevectors.c (INTEGER_ACCESSOR_PROLOGUE): (INTEGER_GETTER_PROLOGUE, INTEGER_SETTER_PROLOGUE): (INTEGER_REF, INTEGER_NATIVE_REF, INTEGER_SET, INTEGER_NATIVE_SET): (GENERIC_INTEGER_ACCESSOR_PROLOGUE): (GENERIC_INTEGER_GETTER_PROLOGUE, GENERIC_INTEGER_SETTER_PROLOGUE): (LARGE_INTEGER_NATIVE_REF, LARGE_INTEGER_NATIVE_SET): (IEEE754_GETTER_PROLOGUE, IEEE754_SETTER_PROLOGUE): (IEEE754_REF, IEEE754_NATIVE_REF, IEEE754_SET, IEEE754_NATIVE_SET): Setters require a mutable bytevector. (SCM_BYTEVECTOR_SET_FLAG): New helper. (SCM_BYTEVECTOR_SET_CONTIGUOUS_P, SCM_BYTEVECTOR_SET_ELEMENT_TYPE): Remove helpers. (SCM_VALIDATE_MUTABLE_BYTEVECTOR): New helper. (make_bytevector, make_bytevector_from_buffer): Use SCM_SET_BYTEVECTOR_FLAGS. (scm_c_bytevector_set_x, scm_bytevector_fill_x) (scm_bytevector_copy_x): Require a mutable bytevector. * libguile/bytevectors.h (SCM_F_BYTEVECTOR_CONTIGUOUS) (SCM_F_BYTEVECTOR_IMMUTABLE, SCM_MUTABLE_BYTEVECTOR_P): New definitions. * libguile/bytevectors.h (SCM_BYTEVECTOR_CONTIGUOUS_P): Just access one bit. * libguile/srfi-4.c (DEFINE_SRFI_4_C_FUNCS): Implement writable_elements() in terms of elements(). * libguile/strings.c (scm_i_string_is_mutable): New helper. * libguile/uniform.c (scm_array_handle_uniform_elements): (scm_array_handle_uniform_writable_elements): Implement writable_elements in terms of elements. * libguile/vectors.c (SCM_VALIDATE_MUTABLE_VECTOR): New helper. (scm_vector_elements, scm_vector_writable_elements): Implement writable_elements in terms of elements. (scm_c_vector_set_x): Require a mutable vector. * libguile/vectors.h (SCM_F_VECTOR_IMMUTABLE, SCM_I_IS_MUTABLE_VECTOR): New definitions. * libguile/vm-engine.c (VM_VALIDATE_MUTABLE_BYTEVECTOR): (VM_VALIDATE_MUTABLE_VECTOR, vector-set!, vector-set!/immediate) (BV_BOUNDED_SET, BV_SET): Require mutable bytevector/vector. * libguile/vm.c (vm_error_not_a_mutable_bytevector): (vm_error_not_a_mutable_vector): New definitions. * module/system/vm/assembler.scm (link-data): Mark residualized vectors, bytevectors, and bitvectors as being read-only.
2017-04-18 14:56:48 +02:00
SCM_INTERNAL int scm_i_string_is_mutable (SCM str);
2008-05-14 05:20:47 +02:00
SCM_API /* FIXME: not internal */ const char *scm_i_string_chars (SCM str);
SCM_API /* FIXME: not internal */ char *scm_i_string_writable_chars (SCM str);
SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars (SCM str);
SCM_INTERNAL const void *scm_i_string_data (SCM str);
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
2008-05-14 05:20:47 +02:00
SCM_INTERNAL void scm_i_string_stop_writing (void);
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
Add full Unicode capability to ports and the default reader Ports are given two additional properties: a character encoding and a conversion failure strategy. These properties have getters and setters. The new properties are used to convert any locale text to/from the internal representation of strings. If unspecified, ports use a default value. The default value of these properties is held in a fluid. The default character encoding can be modified by calling setlocale. ISO-8859-1 is treated specially. Since it is a native encoding of strings, it can be processed more quickly. Source code is assumed to be ISO-8859-1 unless otherwise specified. The encoding of a source code file can be given as 'coding: XXXXX' in a magic comment at the top of a file. The C functions that deal with encoding often use a null pointer as shorthand for the native Latin-1 encoding, for efficiency's sake. * test-suite/tests/encoding-iso88591.test: new tests * test-suite/tests/encoding-iso88597.test: new tests * test-suite/tests/encoding-utf8.test: new tests * test-suite/tests/encoding-escapes.test: new tests * test-suite/tests/numbers.test: declare 'binary' encoding * test-suite/tests/ports.test: declare 'binary' encoding * test-suite/tests/r6rs-ports.test: declare 'binary' encoding * module/system/base/compile.scm (compile-file): use source-code file's self-declared encoding when compiling files * libguile/strports.c: store string ports in locale encoding (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector) (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector): new functions * libguile/strings.h: new declaration for scm_i_string_contains_char * libguile/strings.c (scm_i_string_contains_char): new function (scm_from_stringn, scm_to_stringn): use NULL for Latin-1 (scm_from_locale_stringn, scm_to_locale_stringn): respect character encoding of input and output ports * libguile/read.h: declaration for scm_scan_for_encoding * libguile/read.c: (read_token): now takes scheme string instead of C string/length (read_complete_token): new function (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol) (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment) (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector) (scm_read_scsh_block_comment, scm_read_commented_expression) (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart) (scm_read_expression): use scm_t_wchar for char type, use read_complete_token (scm_scan_for_encoding): new function to find a file's character encoding (scm_file_encoding): new function to find a port's character encoding * libguile/rdelim.c: don't unpack strings * libguile/print.h: declaration for modified function scm_i_charprint * libguile/print.c: use locale when printing characters and strings (scm_i_charprint): input parameter is now scm_t_wchar (scm_simple_format): don't unpack strings * libguile/posix.h: new declaration for scm_setbinary. * libguile/posix.c (scm_setlocale): set default and stdio port encodings based on the locale's character encoding (scm_setbinary): new function * libguile/ports.h (scm_t_port): add encoding and failed conversion handler to port type. Declarations for new or modified functions scm_getc, scm_unget_byte, scm_ungetc, scm_i_get_port_encoding, scm_i_set_port_encoding_x, scm_port_encoding, scm_set_port_encoding_x, scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x, scm_port_conversion_strategy, scm_set_port_conversion_strategy_x. * libguile/ports.c: assign the current ports to zero on startup so we can see if they've been set. (scm_current_input_port, scm_current_output_port, scm_current_error_port): return #f if the port is not yet initialized (scm_new_port_table_entry): set up a new port's encoding and illegal sequence handler based on the thread's current defaults (scm_i_remove_port): free port encoding name when port is removed (scm_i_mode_bits_n): now takes a scheme string instead of a c string and length. All callers changed. (SCM_MBCHAR_BUF_SIZE): new const (scm_getc): new function, since the scm_getc in inline.h is now scm_get_byte_or_eof. This pulls one codepoint from a port. (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding (scm_unget_byte): new function, incorportaing the low-level functionality of scm_ungetc (scm_ungetc): uses scm_unget_byte * libguile/numbers.h (scm_t_wchar): compilation order problem with scm_t_wchar being use in functions in multiple headers. Forward declare scm_t_wchar. * libguile/load.c (scm_primitive_load): scan for file encoding at top of file and use it to set the load port's encoding * libguile/inline.h (scm_get_byte_or_eof): new function incorporating most of the functionality of scm_getc. * libguile/fports.c (fport_fill_input): now returns scm_t_wchar * libguile/chars.h (scm_t_wchar): avoid compilation order problem with declaration of scm_t_wchar
2009-08-25 07:54:37 -07:00
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr);
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
/* internal functions related to symbols. */
2008-05-14 05:20:47 +02:00
SCM_INTERNAL SCM scm_i_make_symbol (SCM name, scm_t_bits flags,
unsigned long hash, SCM props);
SCM_INTERNAL SCM
scm_i_c_make_symbol (const char *name, size_t len,
scm_t_bits flags, unsigned long hash, SCM props);
2008-05-14 05:20:47 +02:00
SCM_INTERNAL const char *scm_i_symbol_chars (SCM sym);
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
SCM_INTERNAL const scm_t_wchar *scm_i_symbol_wide_chars (SCM sym);
2008-05-14 05:20:47 +02:00
SCM_INTERNAL size_t scm_i_symbol_length (SCM sym);
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
SCM_INTERNAL int scm_i_is_narrow_symbol (SCM str);
SCM_INTERNAL int scm_i_try_narrow_string (SCM str);
2008-05-14 05:20:47 +02:00
SCM_INTERNAL SCM scm_i_symbol_substring (SCM sym, size_t start, size_t end);
Add Unicode strings and symbols This adds full Unicode strings as a datatype, and it adds some minimal functionality. The terminal and port encoding is assumed to be ISO-8859-1. Non-ISO-8859-1 characters are written or input as string character escapes. The string character escapes now have 3 forms: \xXX \uXXXX and \UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits. The process for writing to strings has been modified. There is now a function scm_i_string_start_writing that does the copy-on-write conversion if necessary. To compile strings that may be wide, the VM storage of strings and string-likes has changed. Most string-using functions have not yet been updated and may break when used with wide strings. * module/language/assembly/compile-bytecode.scm (write-bytecode): use variable width string bytecode format * module/language/assembly.scm (byte-length): use variable width bytecode format * libguile/vm-i-loader.c (load-string, load-symbol): (load-keyword, define): use variable-width bytecode format * libguile/vm-engine.h (FETCH_WIDTH): new macro * libguile/strings.h: new declarations * libguile/strings.c (make_wide_stringbuf): new function (widen_stringbuf): new function (scm_i_make_wide_string): new function (scm_i_is_narrow_string): new function (scm_i_string_wide_chars): new function (scm_i_string_start_writing): new function (scm_i_string_ref): new function (scm_i_string_set_x): new function (scm_i_is_narrow_symbol): new function (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function (scm_string_width): new function (unistring_escapes_to_guile_escapes): new function (scm_to_stringn): new function (scm_i_stringbuf_free): modify for wide strings (scm_i_substring_copy): modify for wide strings (scm_i_string_chars, scm_string_append): modify for wide strings (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf): (scm_string, scm_i_deprecated_string_chars): modify for wide strings (scm_from_locale_string, scm_from_locale_stringn): add null test * libguile/srfi-13.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing (scm_string_for_each): modify for wide strings * libguile/socket.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/rw.c: add calls for scm_i_string_start_writing for each call of scm_i_string_stop_writing * libguile/read.c (scm_read_string): allow reading of wide strings * libguile/print.h: add declaration for scm_charprint * libguile/print.c (iprin1): print wide strings and add new string escapes (scm_charprint): new function * libguile/ports.h: new declarations for scm_lfwrite_substr and scm_lfwrite_str * libguile/ports.c (update_port_lf): new function (scm_lfwrite): use update_port_lf (scm_lfwrite_substr): new function (scm_lfwrite_str): new function * test-suite/tests/asm-to-bytecode.test ("compiler"): add string width byte to sting-like asm tests
2009-08-08 02:35:00 -07:00
SCM_INTERNAL scm_t_wchar scm_i_symbol_ref (SCM sym, size_t x);
SCM_INTERNAL void scm_encoding_error (const char *subr, int err,
const char *message, SCM port, SCM chr);
SCM_INTERNAL void scm_decoding_error (const char *subr, int err,
const char *message, SCM port);
/* internal utility functions. */
2008-05-14 05:20:47 +02:00
SCM_INTERNAL char **scm_i_allocate_string_pointers (SCM list);
SCM_INTERNAL void scm_i_get_substring_spec (size_t len,
SCM start, size_t *cstart,
SCM end, size_t *cend);
/* Debugging functions */
SCM_API SCM scm_sys_string_dump (SCM);
SCM_API SCM scm_sys_symbol_dump (SCM);
#ifdef SCM_STRING_LENGTH_HISTOGRAM
SCM_API SCM scm_sys_stringbuf_hist (void);
#endif
#if SCM_CAN_INLINE || defined SCM_INLINE_C_IMPLEMENTING_INLINES
/* Either inlining, or being included from inline.c. */
SCM_INLINE_IMPLEMENTATION int
scm_is_string (SCM x)
{
return SCM_HAS_TYP7 (x, scm_tc7_string);
}
#endif
#define SCM_VALIDATE_STRING(pos, str) \
do { \
SCM_ASSERT_TYPE (scm_is_string (str), str, pos, FUNC_NAME, "string"); \
} while (0)
2008-05-14 05:20:47 +02:00
SCM_INTERNAL void scm_init_strings (void);
#endif /* SCM_STRINGS_H */