guile/libguile/strings.c

/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 3 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301 USA
 */


#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <alloca.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <uninorm.h>
#include <unistr.h>
#include <uniconv.h>

#include "striconveh.h"

#include "libguile/_scm.h"
#include "libguile/chars.h"
#include "libguile/root.h"
#include "libguile/strings.h"
#include "libguile/error.h"
#include "libguile/generalized-vectors.h"
#include "libguile/deprecation.h"
#include "libguile/validate.h"
#include "libguile/private-options.h"


/* {Strings}
 */


/* Stringbufs 
 *
 * XXX - keeping an accurate refcount during GC seems to be quite
 * tricky, so we just keep score of whether a stringbuf might be
 * shared, not whether it definitely is.  
 *
 * The scheme I (mvo) tried to keep an accurate reference count would
 * recount all strings that point to a stringbuf during the mark-phase
 * of the GC.  This was done since one cannot access the stringbuf of
 * a string when that string is freed (in order to decrease the
 * reference count).  The memory of the stringbuf might have been
 * reused already for something completely different.
 *
 * This recounted worked for a small number of threads beating on
 * cow-strings, but it failed randomly with more than 10 threads, say.
 * I couldn't figure out what went wrong, so I used the conservative
 * approach implemented below.
 *
 * There are 2 storage strategies for stringbufs: 8-bit and wide.  8-bit
 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
 * strings.
 */

/* The size in words of the stringbuf header (type tag + size).  */
#define STRINGBUF_HEADER_SIZE   2U

#define STRINGBUF_HEADER_BYTES  (STRINGBUF_HEADER_SIZE * sizeof (SCM))

#define STRINGBUF_F_SHARED      SCM_I_STRINGBUF_F_SHARED
#define STRINGBUF_F_WIDE        SCM_I_STRINGBUF_F_WIDE

#define STRINGBUF_TAG           scm_tc7_stringbuf
#define STRINGBUF_SHARED(buf)   (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
#define STRINGBUF_WIDE(buf)     (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)

#define STRINGBUF_CONTENTS(buf) ((void *)				\
                                 SCM_CELL_OBJECT_LOC (buf,		\
						      STRINGBUF_HEADER_SIZE))
#define STRINGBUF_CHARS(buf)    ((unsigned char *) STRINGBUF_CONTENTS (buf))
#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CONTENTS (buf))

#define STRINGBUF_LENGTH(buf)   (SCM_CELL_WORD_1 (buf))

#define SET_STRINGBUF_SHARED(buf)					\
  do									\
    {									\
      /* Don't modify BUF if it's already marked as shared since it might be \
	 a read-only, statically allocated stringbuf.  */		\
      if (SCM_LIKELY (!STRINGBUF_SHARED (buf)))				\
	SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
    }									\
  while (0)

#ifdef SCM_STRING_LENGTH_HISTOGRAM
static size_t lenhist[1001];
#endif

/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
   characters. */
static SCM
make_stringbuf (size_t len)
{
  /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
     scm_i_symbol_chars, all stringbufs are null-terminated.  Once
     SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
     has been changed for scm_i_symbol_chars, this null-termination
     can be dropped.
  */

  SCM buf;

#ifdef SCM_STRING_LENGTH_HISTOGRAM
  if (len < 1000)
    lenhist[len]++;
  else
    lenhist[1000]++;
#endif

  buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
					    "string"));

  SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
  SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);

  STRINGBUF_CHARS (buf)[len] = 0;

  return buf;
}

/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
   characters.  */
static SCM
make_wide_stringbuf (size_t len)
{
  SCM buf;
  size_t raw_len;

#ifdef SCM_STRING_LENGTH_HISTOGRAM
  if (len < 1000)
    lenhist[len]++;
  else
    lenhist[1000]++;
#endif

  raw_len = (len + 1) * sizeof (scm_t_wchar);
  buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
					    "string"));

  SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
  SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);

  STRINGBUF_WIDE_CHARS (buf)[len] = 0;

  return buf;
}

/* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
   characters from BUF.  */
static SCM
wide_stringbuf (SCM buf)
{
  SCM new_buf;

  if (STRINGBUF_WIDE (buf))
    new_buf = buf;
  else
    {
      size_t i, len;
      scm_t_wchar *mem;

      len = STRINGBUF_LENGTH (buf);

      new_buf = make_wide_stringbuf (len);

      mem = STRINGBUF_WIDE_CHARS (new_buf);
      for (i = 0; i < len; i++)
	mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
      mem[len] = 0;
    }

  return new_buf;
}

/* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
   characters from BUF, if possible.  */
static SCM
narrow_stringbuf (SCM buf)
{
  SCM new_buf;

  if (!STRINGBUF_WIDE (buf))
    new_buf = buf;
  else
    {
      size_t i, len;
      scm_t_wchar *wmem;
      unsigned char *mem;

      len = STRINGBUF_LENGTH (buf);
      wmem = STRINGBUF_WIDE_CHARS (buf);

      for (i = 0; i < len; i++)
	if (wmem[i] > 0xFF)
	  /* BUF cannot be narrowed.  */
	  return buf;

      new_buf = make_stringbuf (len);

      mem = STRINGBUF_CHARS (new_buf);
      for (i = 0; i < len; i++)
	mem[i] = (unsigned char) wmem[i];
      mem[len] = 0;
    }

  return new_buf;
}

scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;


/* Copy-on-write strings.
 */

#define STRING_TAG            scm_tc7_string

#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
#define STRING_START(str)     ((size_t)SCM_CELL_WORD_2(str))
#define STRING_LENGTH(str)    ((size_t)SCM_CELL_WORD_3(str))

#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))

#define IS_STRING(str)        (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)

/* Read-only strings.
 */

#define RO_STRING_TAG         scm_tc7_ro_string
#define IS_RO_STRING(str)     (SCM_CELL_TYPE(str)==RO_STRING_TAG)

/* Mutation-sharing substrings
 */

#define SH_STRING_TAG       (scm_tc7_string + 0x100)

#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
/* START and LENGTH as for STRINGs. */

#define IS_SH_STRING(str)   (SCM_CELL_TYPE(str)==SH_STRING_TAG)

SCM scm_nullstr;

/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
   characters.  CHARSP, if not NULL, will be set to location of the
   char array.  */
SCM
scm_i_make_string (size_t len, char **charsp)
{
  SCM buf = make_stringbuf (len);
  SCM res;
  if (charsp)
    *charsp = (char *) STRINGBUF_CHARS (buf);
  res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
			 (scm_t_bits)0, (scm_t_bits) len);
  return res;
}

/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
   characters.  CHARSP, if not NULL, will be set to location of the
   character array.  */
SCM
scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
{
  SCM buf = make_wide_stringbuf (len);
  SCM res;
  if (charsp)
    *charsp = STRINGBUF_WIDE_CHARS (buf);
  res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
                         (scm_t_bits) 0, (scm_t_bits) len);
  return res;
}

static void
validate_substring_args (SCM str, size_t start, size_t end)
{
  if (!IS_STRING (str))
    scm_wrong_type_arg_msg (NULL, 0, str, "string");
  if (start > STRING_LENGTH (str))
    scm_out_of_range (NULL, scm_from_size_t (start));
  if (end > STRING_LENGTH (str) || end < start)
    scm_out_of_range (NULL, scm_from_size_t (end));
}

static inline void
get_str_buf_start (SCM *str, SCM *buf, size_t *start)
{
  *start = STRING_START (*str);
  if (IS_SH_STRING (*str))
    {
      *str = SH_STRING_STRING (*str);
      *start += STRING_START (*str);
    }
  *buf = STRING_STRINGBUF (*str);
}

SCM
scm_i_substring (SCM str, size_t start, size_t end)
{
  SCM buf;
  size_t str_start;
  get_str_buf_start (&str, &buf, &str_start);
  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  SET_STRINGBUF_SHARED (buf);
  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
			  (scm_t_bits)str_start + start,
			  (scm_t_bits) end - start);
}

SCM
scm_i_substring_read_only (SCM str, size_t start, size_t end)
{
  SCM buf;
  size_t str_start;
  get_str_buf_start (&str, &buf, &str_start);
  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  SET_STRINGBUF_SHARED (buf);
  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
			  (scm_t_bits)str_start + start,
			  (scm_t_bits) end - start);
}

SCM
scm_i_substring_copy (SCM str, size_t start, size_t end)
{
  size_t len = end - start;
  SCM buf, my_buf;
  size_t str_start;
  get_str_buf_start (&str, &buf, &str_start);
  if (scm_i_is_narrow_string (str))
    {
      my_buf = make_stringbuf (len);
      memcpy (STRINGBUF_CHARS (my_buf),
              STRINGBUF_CHARS (buf) + str_start + start, len);
    }
  else
    {
      my_buf = make_wide_stringbuf (len);
      u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
               (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start 
                                 + start), len);
      /* Even though this string is wide, the substring may be narrow.
         Consider adding code to narrow the string.  */
    }
  scm_remember_upto_here_1 (buf);
  return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
                          (scm_t_bits) 0, (scm_t_bits) len);
}

SCM
scm_i_substring_shared (SCM str, size_t start, size_t end)
{
  if (start == 0 && end == STRING_LENGTH (str))
    return str;
  else 
    {
      size_t len = end - start;
      if (IS_SH_STRING (str))
	{
	  start += STRING_START (str);
	  str = SH_STRING_STRING (str);
	}
      return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
			      (scm_t_bits)start, (scm_t_bits) len);
    }
}

SCM
scm_c_substring (SCM str, size_t start, size_t end)
{
  validate_substring_args (str, start, end);
  return scm_i_substring (str, start, end);
}

SCM
scm_c_substring_read_only (SCM str, size_t start, size_t end)
{
  validate_substring_args (str, start, end);
  return scm_i_substring_read_only (str, start, end);
}

SCM
scm_c_substring_copy (SCM str, size_t start, size_t end)
{
  validate_substring_args (str, start, end);
  return scm_i_substring_copy (str, start, end);
}

SCM
scm_c_substring_shared (SCM str, size_t start, size_t end)
{
  validate_substring_args (str, start, end);
  return scm_i_substring_shared (str, start, end);
}


/* Internal accessors
 */

/* Returns the number of characters in STR.  This may be different
   than the memory size of the string storage.  */
size_t
scm_i_string_length (SCM str)
{
  return STRING_LENGTH (str);
}

/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
   encoding.  False if it is 'wide', having a 32-bit UCS-4
   encoding.  */
int
scm_i_is_narrow_string (SCM str)
{
  return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
}

/* Try to coerce a string to be narrow.  It if is narrow already, do
   nothing.  If it is wide, shrink it to narrow if none of its
   characters are above 0xFF.  Return true if the string is narrow or
   was made to be narrow.  */
int
scm_i_try_narrow_string (SCM str)
{
  SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));

  return scm_i_is_narrow_string (str);
}

/* Return a pointer to the raw data of the string, which can be either Latin-1
   or UCS-4 encoded data, depending on `scm_i_is_narrow_string (STR)'.  */
const void *
scm_i_string_data (SCM str)
{
  SCM buf;
  size_t start;
  const char *data;

  get_str_buf_start (&str, &buf, &start);

  data = STRINGBUF_CONTENTS (buf);
  data += start * (scm_i_is_narrow_string (str) ? 1 : 4);

  return data;
}

/* Returns a pointer to the 8-bit Latin-1 encoded character array of
   STR.  */
const char *
scm_i_string_chars (SCM str)
{
  SCM buf;
  size_t start;
  get_str_buf_start (&str, &buf, &start);
  if (scm_i_is_narrow_string (str))
    return (const char *) STRINGBUF_CHARS (buf) + start;
  else
    scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
                    scm_list_1 (str));
  return NULL;
}

/* Returns a pointer to the 32-bit UCS-4 encoded character array of
   STR.  */
const scm_t_wchar *
scm_i_string_wide_chars (SCM str)
{
  SCM buf;
  size_t start;

  get_str_buf_start (&str, &buf, &start);
  if (!scm_i_is_narrow_string (str))
    return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
  else
    scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
                    scm_list_1 (str));
}

/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
   a new string buffer, so that it can be modified without modifying
   other strings.  Also, lock the string mutex.  Later, one must call
   scm_i_string_stop_writing to unlock the mutex.  */
SCM
scm_i_string_start_writing (SCM orig_str)
{
  SCM buf, str = orig_str;
  size_t start;

  get_str_buf_start (&str, &buf, &start);
  if (IS_RO_STRING (str))
    scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));

  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  if (STRINGBUF_SHARED (buf))
    {
      /* Clone the stringbuf.  */
      size_t len = STRING_LENGTH (str);
      SCM new_buf;

      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);

      if (scm_i_is_narrow_string (str))
        {
          new_buf = make_stringbuf (len);
          memcpy (STRINGBUF_CHARS (new_buf),
                  STRINGBUF_CHARS (buf) + STRING_START (str), len);

        }
      else
        {
          new_buf = make_wide_stringbuf (len);
          u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
                   (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) 
                                     + STRING_START (str)), len);
        }

      SET_STRING_STRINGBUF (str, new_buf);
      start -= STRING_START (str);

      /* FIXME: The following operations are not atomic, so other threads
	 looking at STR may see an inconsistent state.  Nevertheless it can't
	 hurt much since (i) accessing STR while it is being mutated can't
	 yield a crash, and (ii) concurrent accesses to STR should be
	 protected by a mutex at the application level.  The latter may not
	 apply when STR != ORIG_STR, though.  */
      SET_STRING_START (str, 0);
      SET_STRING_STRINGBUF (str, new_buf);

      buf = new_buf;

      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
    }
  return orig_str;
}

/* Return a pointer to the 8-bit Latin-1 chars of a string.  */
char *
scm_i_string_writable_chars (SCM str)
{
  SCM buf;
  size_t start;

  get_str_buf_start (&str, &buf, &start);
  if (scm_i_is_narrow_string (str))
    return (char *) STRINGBUF_CHARS (buf) + start;
  else
    scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
                    scm_list_1 (str));
  return NULL;
}

/* Return a pointer to the UCS-4 codepoints of a string.  */
static scm_t_wchar *
scm_i_string_writable_wide_chars (SCM str)
{
  SCM buf;
  size_t start;

  get_str_buf_start (&str, &buf, &start);
  if (!scm_i_is_narrow_string (str))
    return STRINGBUF_WIDE_CHARS (buf) + start;
  else
    scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
                    scm_list_1 (str));
}

/* Unlock the string mutex that was locked when
   scm_i_string_start_writing was called.  */
void
scm_i_string_stop_writing (void)
{
  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
}

/* Return the Xth character of STR as a UCS-4 codepoint.  */
scm_t_wchar
scm_i_string_ref (SCM str, size_t x)
{
  if (scm_i_is_narrow_string (str))
    return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
  else
    return scm_i_string_wide_chars (str)[x];
}

/* Returns index+1 of the first char in STR that matches C, or
   0 if the char is not found.  */
int
scm_i_string_contains_char (SCM str, char ch)
{
  size_t i;
  size_t len = scm_i_string_length (str);

  i = 0;
  if (scm_i_is_narrow_string (str))
    {
      while (i < len)
        {
          if (scm_i_string_chars (str)[i] == ch)
            return i+1;
          i++;
        }
    }
  else
    {
      while (i < len)
        {
          if (scm_i_string_wide_chars (str)[i] 
              == (unsigned char) ch)
            return i+1;
          i++;
        }
    }
  return 0;
}

int 
scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
{
  if (scm_i_is_narrow_string (sstr))
    {
      const char *a = scm_i_string_chars (sstr) + start_x;
      const char *b = cstr;
      return strncmp (a, b, strlen(b));
    }
  else
    {
      size_t i;
      const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
      const char *b = cstr;
      for (i = 0; i < strlen (b); i++)
        {
          if (a[i] != (unsigned char) b[i])
            return 1;
        }
    }
  return 0;
}

/* Set the Pth character of STR to UCS-4 codepoint CHR. */
void
scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
{
  if (chr > 0xFF && scm_i_is_narrow_string (str))
    SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));

  if (scm_i_is_narrow_string (str))
    {
      char *dst = scm_i_string_writable_chars (str);
      dst[p] = chr;
    }
  else
    {
      scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
      dst[p] = chr;
    }
}


/* Symbols.

   Basic symbol creation and accessing is done here, the rest is in
   symbols.[hc].  This has been done to keep stringbufs and the
   internals of strings and string-like objects confined to this file.
*/

#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1

SCM
scm_i_make_symbol (SCM name, scm_t_bits flags,
		   unsigned long hash, SCM props)
{
  SCM buf;
  size_t start = STRING_START (name);
  size_t length = STRING_LENGTH (name);

  if (IS_SH_STRING (name))
    {
      name = SH_STRING_STRING (name);
      start += STRING_START (name);
    }
  buf = SYMBOL_STRINGBUF (name);

  if (start == 0 && length == STRINGBUF_LENGTH (buf))
    {
      /* reuse buf. */
      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
      SET_STRINGBUF_SHARED (buf);
      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
    }
  else
    {
      /* make new buf. */
      if (scm_i_is_narrow_string (name))
        {
          SCM new_buf = make_stringbuf (length);
          memcpy (STRINGBUF_CHARS (new_buf),
                  STRINGBUF_CHARS (buf) + start, length);
          buf = new_buf;
        }
      else
        {
          SCM new_buf = make_wide_stringbuf (length);
          u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
                   (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
                   length);
          buf = new_buf;
        }
    }
  return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
			  (scm_t_bits) hash, SCM_UNPACK (props));
}

SCM
scm_i_c_make_symbol (const char *name, size_t len,
		     scm_t_bits flags, unsigned long hash, SCM props)
{
  SCM buf = make_stringbuf (len);
  memcpy (STRINGBUF_CHARS (buf), name, len);

  return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
				    (scm_t_bits) hash, SCM_UNPACK (props));
}

/* Returns the number of characters in SYM.  This may be different
   from the memory size of SYM.  */
size_t
scm_i_symbol_length (SCM sym)
{
  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
}

size_t
scm_c_symbol_length (SCM sym)
#define FUNC_NAME "scm_c_symbol_length"
{
  SCM_VALIDATE_SYMBOL (1, sym);

  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
}
#undef FUNC_NAME

/* True if the name of SYM is stored as a Latin-1 encoded string.
   False if it is stored as a 32-bit UCS-4-encoded string.  */
int
scm_i_is_narrow_symbol (SCM sym)
{
  SCM buf;

  buf = SYMBOL_STRINGBUF (sym);
  return !STRINGBUF_WIDE (buf);
}

/* Returns a pointer to the 8-bit Latin-1 encoded character array that
   contains the name of SYM.  */
const char *
scm_i_symbol_chars (SCM sym)
{
  SCM buf;

  buf = SYMBOL_STRINGBUF (sym);
  if (!STRINGBUF_WIDE (buf))
    return (const char *) STRINGBUF_CHARS (buf);
  else
    scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
                    scm_list_1 (sym));
}

/* Return a pointer to the 32-bit UCS-4-encoded character array of a
   symbol's name.  */
const scm_t_wchar *
scm_i_symbol_wide_chars (SCM sym)
{
  SCM buf;

  buf = SYMBOL_STRINGBUF (sym);
  if (STRINGBUF_WIDE (buf))
    return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
  else
    scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
                    scm_list_1 (sym));
}

SCM
scm_i_symbol_substring (SCM sym, size_t start, size_t end)
{
  SCM buf = SYMBOL_STRINGBUF (sym);
  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
  SET_STRINGBUF_SHARED (buf);
  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
			  (scm_t_bits)start, (scm_t_bits) end - start);
}

/* Returns the Xth character of symbol SYM as a UCS-4 codepoint.  */
scm_t_wchar
scm_i_symbol_ref (SCM sym, size_t x)
{
  if (scm_i_is_narrow_symbol (sym))
    return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
  else
    return scm_i_symbol_wide_chars (sym)[x];
}

/* Debugging
 */

SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str), 
            "Returns an association list containing debugging information\n"
            "for @var{str}. The association list has the following entries."
            "@table @code\n"
            "@item string\n"
            "The string itself.\n"
            "@item start\n"
            "The start index of the string into its stringbuf\n"
            "@item length\n"
            "The length of the string\n"
            "@item shared\n"
            "If this string is a substring, it returns its parent string.\n"
            "Otherwise, it returns @code{#f}\n"
            "@item read-only\n"
            "@code{#t} if the string is read-only\n"
            "@item stringbuf-chars\n"
            "A new string containing this string's stringbuf's characters\n"
            "@item stringbuf-length\n"
            "The number of characters in this stringbuf\n"
            "@item stringbuf-shared\n"
            "@code{#t} if this stringbuf is shared\n"
            "@item stringbuf-wide\n"
            "@code{#t} if this stringbuf's characters are stored in a\n"
            "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
            "buffer\n"
            "@end table")
#define FUNC_NAME s_scm_sys_string_dump
{
  SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
  SCM buf;
  SCM_VALIDATE_STRING (1, str);

  /* String info */
  e1 = scm_cons (scm_from_latin1_symbol ("string"),
                 str);
  e2 = scm_cons (scm_from_latin1_symbol ("start"),
                 scm_from_size_t (STRING_START (str)));
  e3 = scm_cons (scm_from_latin1_symbol ("length"),
                 scm_from_size_t (STRING_LENGTH (str)));

  if (IS_SH_STRING (str))
    {
      e4 = scm_cons (scm_from_latin1_symbol ("shared"),
                     SH_STRING_STRING (str));
      buf = STRING_STRINGBUF (SH_STRING_STRING (str));
    }
  else
    {
      e4 = scm_cons (scm_from_latin1_symbol ("shared"),
                     SCM_BOOL_F);
      buf = STRING_STRINGBUF (str);
    }

  if (IS_RO_STRING (str))
    e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
                   SCM_BOOL_T);
  else
    e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
                   SCM_BOOL_F);

  /* Stringbuf info */
  if (!STRINGBUF_WIDE (buf))
    {
      size_t len = STRINGBUF_LENGTH (buf);
      char *cbuf;
      SCM sbc = scm_i_make_string (len, &cbuf);
      memcpy (cbuf, STRINGBUF_CHARS (buf), len);
      e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                     sbc);
    }
  else
    {
      size_t len = STRINGBUF_LENGTH (buf);
      scm_t_wchar *cbuf;
      SCM sbc = scm_i_make_wide_string (len, &cbuf);
      u32_cpy ((scm_t_uint32 *) cbuf, 
               (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
      e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                     sbc);
    }
  e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), 
                 scm_from_size_t (STRINGBUF_LENGTH (buf)));
  if (STRINGBUF_SHARED (buf))
    e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                   SCM_BOOL_T);
  else
    e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                   SCM_BOOL_F);
  if (STRINGBUF_WIDE (buf))
    e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
		   SCM_BOOL_T);
  else
    e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
		   SCM_BOOL_F);

  return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
}
#undef FUNC_NAME

SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
            "Returns an association list containing debugging information\n"
            "for @var{sym}. The association list has the following entries."
            "@table @code\n"
            "@item symbol\n"
            "The symbol itself\n"
            "@item hash\n"
            "Its hash value\n"
            "@item interned\n"
            "@code{#t} if it is an interned symbol\n"
            "@item stringbuf-chars\n"
            "A new string containing this symbols's stringbuf's characters\n"
            "@item stringbuf-length\n"
            "The number of characters in this stringbuf\n"
            "@item stringbuf-shared\n"
            "@code{#t} if this stringbuf is shared\n"
            "@item stringbuf-wide\n"
            "@code{#t} if this stringbuf's characters are stored in a\n"
            "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
            "buffer\n"
            "@end table")
#define FUNC_NAME s_scm_sys_symbol_dump
{
  SCM e1, e2, e3, e4, e5, e6, e7;
  SCM buf;
  SCM_VALIDATE_SYMBOL (1, sym);
  e1 = scm_cons (scm_from_latin1_symbol ("symbol"),
                 sym);
  e2 = scm_cons (scm_from_latin1_symbol ("hash"),
                 scm_from_ulong (scm_i_symbol_hash (sym)));
  e3 = scm_cons (scm_from_latin1_symbol ("interned"),
                 scm_symbol_interned_p (sym));
  buf = SYMBOL_STRINGBUF (sym);

  /* Stringbuf info */
  if (!STRINGBUF_WIDE (buf))
    {
      size_t len = STRINGBUF_LENGTH (buf);
      char *cbuf;
      SCM sbc = scm_i_make_string (len, &cbuf);
      memcpy (cbuf, STRINGBUF_CHARS (buf), len);
      e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                     sbc);
    }
  else
    {
      size_t len = STRINGBUF_LENGTH (buf);
      scm_t_wchar *cbuf;
      SCM sbc = scm_i_make_wide_string (len, &cbuf);
      u32_cpy ((scm_t_uint32 *) cbuf, 
               (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
      e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
                     sbc);
    }
  e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"), 
                 scm_from_size_t (STRINGBUF_LENGTH (buf)));
  if (STRINGBUF_SHARED (buf))
    e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                   SCM_BOOL_T);
  else
    e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"), 
                   SCM_BOOL_F);
  if (STRINGBUF_WIDE (buf))
    e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
                    SCM_BOOL_T);
  else
    e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
                    SCM_BOOL_F);
  return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);

}
#undef FUNC_NAME

#ifdef SCM_STRING_LENGTH_HISTOGRAM

SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
#define FUNC_NAME s_scm_sys_stringbuf_hist
{
  int i;
  for (i = 0; i < 1000; i++)
    if (lenhist[i])
      fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
  fprintf (stderr, ">999: %u\n", lenhist[1000]);
  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

#endif


SCM_DEFINE (scm_string_p, "string?", 1, 0, 0, 
	    (SCM obj),
	    "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
#define FUNC_NAME s_scm_string_p
{
  return scm_from_bool (IS_STRING (obj));
}
#undef FUNC_NAME


SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);

SCM_DEFINE (scm_string, "string", 0, 0, 1, 
            (SCM chrs),
	    "@deffnx {Scheme Procedure} list->string chrs\n"
	    "Return a newly allocated string composed of the arguments,\n"
	    "@var{chrs}.")
#define FUNC_NAME s_scm_string
{
  SCM result = SCM_BOOL_F;
  SCM rest;
  size_t len;
  size_t p = 0;
  long i;
  int wide = 0;

  /* Verify that this is a list of chars.  */
  i = scm_ilength (chrs);
  SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);

  len = (size_t) i;
  rest = chrs;

  while (len > 0 && scm_is_pair (rest))
    {
      SCM elt = SCM_CAR (rest);
      SCM_VALIDATE_CHAR (SCM_ARGn, elt);
      if (SCM_CHAR (elt) > 0xFF)
        wide = 1;
      rest = SCM_CDR (rest);
      len--;
      scm_remember_upto_here_1 (elt);
    }

  /* Construct a string containing this list of chars.  */
  len = (size_t) i;
  rest = chrs;

  if (wide == 0)
    {
      char *buf;

      result = scm_i_make_string (len, NULL);
      result = scm_i_string_start_writing (result);
      buf = scm_i_string_writable_chars (result);
      while (len > 0 && scm_is_pair (rest))
        {
          SCM elt = SCM_CAR (rest);
          buf[p] = (unsigned char) SCM_CHAR (elt);
          p++;
          rest = SCM_CDR (rest);
          len--;
          scm_remember_upto_here_1 (elt);
        }
    }
  else
    {
      scm_t_wchar *buf;

      result = scm_i_make_wide_string (len, NULL);
      result = scm_i_string_start_writing (result);
      buf = scm_i_string_writable_wide_chars (result);
      while (len > 0 && scm_is_pair (rest))
        {
          SCM elt = SCM_CAR (rest);
          buf[p] = SCM_CHAR (elt);
          p++;
          rest = SCM_CDR (rest);
          len--;
          scm_remember_upto_here_1 (elt);
        }
    }
  scm_i_string_stop_writing ();

  if (len > 0)
    scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
  if (!scm_is_null (rest))
    scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");

  return result;
}
#undef FUNC_NAME

SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
            (SCM k, SCM chr),
	    "Return a newly allocated string of\n"
            "length @var{k}.  If @var{chr} is given, then all elements of\n"
	    "the string are initialized to @var{chr}, otherwise the contents\n"
	    "of the @var{string} are all set to @var{#\nul}.")
#define FUNC_NAME s_scm_make_string
{
  return scm_c_make_string (scm_to_size_t (k), chr);
}
#undef FUNC_NAME

SCM
scm_c_make_string (size_t len, SCM chr)
#define FUNC_NAME NULL
{
  size_t p;
  char *contents = NULL;
  SCM res = scm_i_make_string (len, &contents);

  /* If no char is given, initialize string contents to NULL.  */
  if (SCM_UNBNDP (chr))
    memset (contents, 0, len);
  else
    {
      SCM_VALIDATE_CHAR (0, chr);
      res = scm_i_string_start_writing (res);
      for (p = 0; p < len; p++)
        scm_i_string_set_x (res, p, SCM_CHAR (chr));
      scm_i_string_stop_writing ();
    }

  return res;
}
#undef FUNC_NAME

SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0, 
	    (SCM string),
	    "Return the number of characters in @var{string}.")
#define FUNC_NAME s_scm_string_length
{
  SCM_VALIDATE_STRING (1, string);
  return scm_from_size_t (STRING_LENGTH (string));
}
#undef FUNC_NAME

SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
            (SCM string),
            "Return the bytes used to represent a character in @var{string}."
            "This will return 1 or 4.")
#define FUNC_NAME s_scm_string_bytes_per_char
{
  SCM_VALIDATE_STRING (1, string);
  if (!scm_i_is_narrow_string (string))
    return scm_from_int (4);

  return scm_from_int (1);
}
#undef FUNC_NAME

size_t
scm_c_string_length (SCM string)
{
  if (!IS_STRING (string))
    scm_wrong_type_arg_msg (NULL, 0, string, "string");
  return STRING_LENGTH (string);
}

SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
            (SCM str, SCM k),
            "Return character @var{k} of @var{str} using zero-origin\n"
            "indexing. @var{k} must be a valid index of @var{str}.")
#define FUNC_NAME s_scm_string_ref
{
  size_t len;
  unsigned long idx;

  SCM_VALIDATE_STRING (1, str);

  len = scm_i_string_length (str);
  if (SCM_LIKELY (len > 0))
    idx = scm_to_unsigned_integer (k, 0, len - 1);
  else
    scm_out_of_range (NULL, k);

  if (scm_i_is_narrow_string (str))
    return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
  else
    return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
}
#undef FUNC_NAME

SCM
scm_c_string_ref (SCM str, size_t p)
{
  if (p >= scm_i_string_length (str))
    scm_out_of_range (NULL, scm_from_size_t (p));
  if (scm_i_is_narrow_string (str))
    return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
  else
    return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);

}

SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
            (SCM str, SCM k, SCM chr),
            "Store @var{chr} in element @var{k} of @var{str} and return\n"
            "an unspecified value. @var{k} must be a valid index of\n"
            "@var{str}.")
#define FUNC_NAME s_scm_string_set_x
{
  size_t len;
  unsigned long idx;

  SCM_VALIDATE_STRING (1, str);

  len = scm_i_string_length (str);
  if (SCM_LIKELY (len > 0))
    idx = scm_to_unsigned_integer (k, 0, len - 1);
  else
    scm_out_of_range (NULL, k);

  SCM_VALIDATE_CHAR (3, chr);
  str = scm_i_string_start_writing (str);
  scm_i_string_set_x (str, idx, SCM_CHAR (chr));
  scm_i_string_stop_writing ();

  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

void
scm_c_string_set_x (SCM str, size_t p, SCM chr)
{
  if (p >= scm_i_string_length (str))
    scm_out_of_range (NULL, scm_from_size_t (p));
  str = scm_i_string_start_writing (str);
  scm_i_string_set_x (str, p, SCM_CHAR (chr));
  scm_i_string_stop_writing ();
}

SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
	    (SCM str, SCM start, SCM end),
	    "Return a newly allocated string formed from the characters\n"
            "of @var{str} beginning with index @var{start} (inclusive) and\n"
	    "ending with index @var{end} (exclusive).\n"
            "@var{str} must be a string, @var{start} and @var{end} must be\n"
	    "exact integers satisfying:\n\n"
            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
#define FUNC_NAME s_scm_substring
{
  size_t len, from, to;

  SCM_VALIDATE_STRING (1, str);
  len = scm_i_string_length (str);
  from = scm_to_unsigned_integer (start, 0, len);
  if (SCM_UNBNDP (end))
    to = len;
  else
    to = scm_to_unsigned_integer (end, from, len);
  return scm_i_substring (str, from, to);
}
#undef FUNC_NAME

SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
	    (SCM str, SCM start, SCM end),
	    "Return a newly allocated string formed from the characters\n"
            "of @var{str} beginning with index @var{start} (inclusive) and\n"
	    "ending with index @var{end} (exclusive).\n"
            "@var{str} must be a string, @var{start} and @var{end} must be\n"
	    "exact integers satisfying:\n"
	    "\n"
            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
	    "\n"
	    "The returned string is read-only.\n")
#define FUNC_NAME s_scm_substring_read_only
{
  size_t len, from, to;

  SCM_VALIDATE_STRING (1, str);
  len = scm_i_string_length (str);
  from = scm_to_unsigned_integer (start, 0, len);
  if (SCM_UNBNDP (end))
    to = len;
  else
    to = scm_to_unsigned_integer (end, from, len);
  return scm_i_substring_read_only (str, from, to);
}
#undef FUNC_NAME

SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
	    (SCM str, SCM start, SCM end),
	    "Return a newly allocated string formed from the characters\n"
            "of @var{str} beginning with index @var{start} (inclusive) and\n"
	    "ending with index @var{end} (exclusive).\n"
            "@var{str} must be a string, @var{start} and @var{end} must be\n"
	    "exact integers satisfying:\n\n"
            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
#define FUNC_NAME s_scm_substring_copy
{
  /* For the Scheme version, START is mandatory, but for the C
     version, it is optional.  See scm_string_copy in srfi-13.c for a
     rationale.
  */

  size_t from, to;

  SCM_VALIDATE_STRING (1, str);
  scm_i_get_substring_spec (scm_i_string_length (str),
			    start, &from, end, &to);
  return scm_i_substring_copy (str, from, to);
}
#undef FUNC_NAME

SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
	    (SCM str, SCM start, SCM end),
	    "Return string that indirectly refers to the characters\n"
            "of @var{str} beginning with index @var{start} (inclusive) and\n"
	    "ending with index @var{end} (exclusive).\n"
            "@var{str} must be a string, @var{start} and @var{end} must be\n"
	    "exact integers satisfying:\n\n"
            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
#define FUNC_NAME s_scm_substring_shared
{
  size_t len, from, to;

  SCM_VALIDATE_STRING (1, str);
  len = scm_i_string_length (str);
  from = scm_to_unsigned_integer (start, 0, len);
  if (SCM_UNBNDP (end))
    to = len;
  else
    to = scm_to_unsigned_integer (end, from, len);
  return scm_i_substring_shared (str, from, to);
}
#undef FUNC_NAME

SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1, 
            (SCM args),
            "Return a newly allocated string whose characters form the\n"
            "concatenation of the given strings, @var{args}.")
#define FUNC_NAME s_scm_string_append
{
  SCM res;
  size_t len = 0;
  int wide = 0;
  SCM l, s;
  size_t i;
  union
  {
    char *narrow;
    scm_t_wchar *wide;
  } data;

  SCM_VALIDATE_REST_ARGUMENT (args);
  for (l = args; !scm_is_null (l); l = SCM_CDR (l))
    {
      s = SCM_CAR (l);
      SCM_VALIDATE_STRING (SCM_ARGn, s);
      len += scm_i_string_length (s);
      if (!scm_i_is_narrow_string (s))
        wide = 1;
    }
  data.narrow = NULL;
  if (!wide)
    res = scm_i_make_string (len, &data.narrow);
  else
    res = scm_i_make_wide_string (len, &data.wide);

  for (l = args; !scm_is_null (l); l = SCM_CDR (l))
    {
      size_t len;
      s = SCM_CAR (l);
      SCM_VALIDATE_STRING (SCM_ARGn, s);
      len = scm_i_string_length (s);
      if (!wide)
        {
          memcpy (data.narrow, scm_i_string_chars (s), len);
          data.narrow += len;
        }
      else
        {
          if (scm_i_is_narrow_string (s))
            {
              for (i = 0; i < scm_i_string_length (s); i++)
                data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
            }
          else
            u32_cpy ((scm_t_uint32 *) data.wide,
                     (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
          data.wide += len;
        }
      scm_remember_upto_here_1 (s);
    }
  return res;
}
#undef FUNC_NAME


/* Charset conversion error handling.  */

SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
SCM_SYMBOL (scm_decoding_error_key, "decoding-error");

/* Raise an exception informing that character CHR could not be written
   to PORT in its current encoding.  */
void
scm_encoding_error (const char *subr, int err, const char *message,
		    SCM port, SCM chr)
{
  scm_throw (scm_encoding_error_key,
	     scm_list_n (scm_from_locale_string (subr),
			 scm_from_locale_string (message),
			 scm_from_int (err),
			 port, chr,
			 SCM_UNDEFINED));
}

/* Raise an exception informing of an encoding error on PORT.  This
   means that a character could not be written in PORT's encoding.  */
void
scm_decoding_error (const char *subr, int err, const char *message, SCM port)
{
  scm_throw (scm_decoding_error_key,
	     scm_list_n (scm_from_locale_string (subr),
			 scm_from_locale_string (message),
			 scm_from_int (err),
			 port,
			 SCM_UNDEFINED));
}


/* String conversion to/from C.  */

SCM
scm_from_stringn (const char *str, size_t len, const char *encoding,
                  scm_t_string_failed_conversion_handler handler)
{
  size_t u32len, i;
  scm_t_wchar *u32;
  int wide = 0;
  SCM res;

  /* The order of these checks is important. */
  if (!str && len != 0)
    scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
  if (len == (size_t) -1)
    len = strlen (str);
  if (len == 0)
    return scm_nullstr;

  if (encoding == NULL)
    {
      /* If encoding is null, use Latin-1.  */
      char *buf;
      res = scm_i_make_string (len, &buf);
      memcpy (buf, str, len);
      return res;
    }

  u32len = 0;
  u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
                                                (enum iconv_ilseq_handler)
                                                handler,
                                                str, len,
                                                NULL,
                                                NULL, &u32len);

  if (SCM_UNLIKELY (u32 == NULL))
    {
      /* Raise an error and pass the raw C string as a bytevector to the `throw'
	 handler.  */
      SCM bv;
      signed char *buf;

      buf = scm_gc_malloc_pointerless (len, "bytevector");
      memcpy (buf, str, len);
      bv = scm_c_take_bytevector (buf, len);

      scm_decoding_error (__func__, errno,
			  "input locale conversion error", bv);
    }

  i = 0;
  while (i < u32len)
    if (u32[i++] > 0xFF)
      {
        wide = 1;
        break;
      }

  if (!wide)
    {
      char *dst;
      res = scm_i_make_string (u32len, &dst);
      for (i = 0; i < u32len; i ++)
        dst[i] = (unsigned char) u32[i];
      dst[u32len] = '\0';
    }
  else
    {
      scm_t_wchar *wdst;
      res = scm_i_make_wide_string (u32len, &wdst);
      u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
      wdst[u32len] = 0;
    }

  free (u32);
  return res;
}

SCM
scm_from_locale_string (const char *str)
{
  return scm_from_locale_stringn (str, -1);
}

SCM
scm_from_locale_stringn (const char *str, size_t len)
{
  const char *enc;
  scm_t_string_failed_conversion_handler hndl;
  SCM inport;
  scm_t_port *pt;

  inport = scm_current_input_port ();
  if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
    {
      pt = SCM_PTAB_ENTRY (inport);
      enc = pt->encoding;
      hndl = pt->ilseq_handler;
    }
  else
    {
      enc = NULL;
      hndl = SCM_FAILED_CONVERSION_ERROR;
    }

  return scm_from_stringn (str, len, enc, hndl);
}

SCM
scm_from_latin1_string (const char *str)
{
  return scm_from_latin1_stringn (str, -1);
}

SCM
scm_from_latin1_stringn (const char *str, size_t len)
{
  char *buf;
  SCM result;

  if (len == (size_t) -1)
    len = strlen (str);

  /* Make a narrow string and copy STR as is.  */
  result = scm_i_make_string (len, &buf);
  memcpy (buf, str, len);

  return result;
}

SCM
scm_from_utf8_string (const char *str)
{
  return scm_from_utf8_stringn (str, -1);
}

SCM
scm_from_utf8_stringn (const char *str, size_t len)
{
  return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
}

SCM
scm_from_utf32_string (const scm_t_wchar *str)
{
  return scm_from_utf32_stringn (str, -1);
}

SCM
scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
{
  SCM result;
  scm_t_wchar *buf;

  if (len == (size_t) -1)
    len = u32_strlen ((uint32_t *) str);

  result = scm_i_make_wide_string (len, &buf);
  memcpy (buf, str, len * sizeof (scm_t_wchar));
  scm_i_try_narrow_string (result);

  return result;
}

/* Create a new scheme string from the C string STR.  The memory of
   STR may be used directly as storage for the new string.  */
/* FIXME: GC-wise, the only way to use the memory area pointed to by STR
   would be to register a finalizer to eventually free(3) STR, which isn't
   worth it.  Should we just deprecate the `scm_take_' functions?  */
SCM
scm_take_locale_stringn (char *str, size_t len)
{
  SCM res;

  res = scm_from_locale_stringn (str, len);
  free (str);

  return res;
}

SCM
scm_take_locale_string (char *str)
{
  return scm_take_locale_stringn (str, -1);
}

/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
   *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
   Set *LENP to the size of the resulting string.

   FIXME: This is a hack we should get rid of.  See
   <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00004.html>
   for details.  */
static void
unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
{
  char *before, *after;
  size_t i, j;

  before = buf;
  after = buf;
  i = 0;
  j = 0;
  while (i < *lenp)
    {
      if ((i <= *lenp - 6)
          && before[i] == '\\'
          && before[i + 1] == 'u'
          && before[i + 2] == '0' && before[i + 3] == '0')
        {
          /* Convert \u00NN to \xNN */
          after[j] = '\\';
          after[j + 1] = 'x';
          after[j + 2] = tolower ((int) before[i + 4]);
          after[j + 3] = tolower ((int) before[i + 5]);
          i += 6;
          j += 4;
        }
      else if ((i <= *lenp - 10)
               && before[i] == '\\'
               && before[i + 1] == 'U'
               && before[i + 2] == '0' && before[i + 3] == '0')
        {
          /* Convert \U00NNNNNN to \UNNNNNN */
          after[j] = '\\';
          after[j + 1] = 'U';
          after[j + 2] = tolower ((int) before[i + 4]);
          after[j + 3] = tolower ((int) before[i + 5]);
          after[j + 4] = tolower ((int) before[i + 6]);
          after[j + 5] = tolower ((int) before[i + 7]);
          after[j + 6] = tolower ((int) before[i + 8]);
          after[j + 7] = tolower ((int) before[i + 9]);
          i += 10;
          j += 8;
        }
      else
        {
          after[j] = before[i];
          i++;
          j++;
        }
    }
  *lenp = j;
}

/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
   *LENP-byte locale-encoded string, to `\xXXXX;'.  Set *LEN to the size
   of the resulting string.  BUF must be large enough to handle the
   worst case when `\uXXXX' escapes (6 characters) are replaced by
   `\xXXXX;' (7 characters).  */
static void
unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
{
  char *before, *after;
  size_t i, j;
  /* The worst case is if the input string contains all 4-digit hex escapes.
     "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
  size_t max_out_len = (*lenp * 7) / 6 + 1;
  size_t nzeros, ndigits;

  before = buf;
  after = alloca (max_out_len);
  i = 0;
  j = 0;
  while (i < *lenp)
    {
      if (((i <= *lenp - 6) && before[i] == '\\' && before[i + 1] == 'u')
          || ((i <= *lenp - 10) && before[i] == '\\' && before[i + 1] == 'U'))
        {
          if (before[i + 1] == 'u')
            ndigits = 4;
          else if (before[i + 1] == 'U')
            ndigits = 8;
          else
            abort ();

          /* Add the R6RS hex escape initial sequence.  */
          after[j] = '\\';
          after[j + 1] = 'x';

          /* Move string positions to the start of the hex numbers.  */
          i += 2;
          j += 2;

          /* Find the number of initial zeros in this hex number.  */
          nzeros = 0;
          while (before[i + nzeros] == '0' && nzeros < ndigits)
            nzeros++;

          /* Copy the number, skipping initial zeros, and then move the string
             positions.  */
          if (nzeros == ndigits)
            {
              after[j] = '0';
              i += ndigits;
              j += 1;
            }
          else
            {
              int pos;
              for (pos = 0; pos < ndigits - nzeros; pos++)
                after[j + pos] = tolower ((int) before[i + nzeros + pos]);
              i += ndigits;
              j += (ndigits - nzeros);
            }

          /* Add terminating semicolon.  */
          after[j] = ';';
          j++;
        }
      else
        {
          after[j] = before[i];
          i++;
          j++;
        }
    }
  *lenp = j;
  memcpy (before, after, j);
}

char *
scm_to_locale_string (SCM str)
{
  return scm_to_locale_stringn (str, NULL);
}

char *
scm_to_locale_stringn (SCM str, size_t *lenp)
{
  SCM outport;
  scm_t_port *pt;
  const char *enc;

  outport = scm_current_output_port ();
  if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
    {
      pt = SCM_PTAB_ENTRY (outport);
      enc = pt->encoding;
    }
  else
    enc = NULL;

  return scm_to_stringn (str, lenp, 
                         enc,
                         scm_i_get_conversion_strategy (SCM_BOOL_F));
}

char *
scm_to_latin1_string (SCM str)
{
  return scm_to_latin1_stringn (str, NULL);
}

char *
scm_to_latin1_stringn (SCM str, size_t *lenp)
#define FUNC_NAME "scm_to_latin1_stringn"
{
  char *result;

  SCM_VALIDATE_STRING (1, str);

  if (scm_i_is_narrow_string (str))
    {
      if (lenp)
	*lenp = scm_i_string_length (str);

      result = scm_strdup (scm_i_string_data (str));
    }
  else
    result = scm_to_stringn (str, lenp, NULL,
			     SCM_FAILED_CONVERSION_ERROR);

  return result;
}
#undef FUNC_NAME

char *
scm_to_utf8_string (SCM str)
{
  return scm_to_utf8_stringn (str, NULL);
}

char *
scm_to_utf8_stringn (SCM str, size_t *lenp)
{
  return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
}

scm_t_wchar *
scm_to_utf32_string (SCM str)
{
  return scm_to_utf32_stringn (str, NULL);
}

scm_t_wchar *
scm_to_utf32_stringn (SCM str, size_t *lenp)
#define FUNC_NAME "scm_to_utf32_stringn"
{
  scm_t_wchar *result;

  SCM_VALIDATE_STRING (1, str);

  if (scm_i_is_narrow_string (str))
    result = (scm_t_wchar *)
      scm_to_stringn (str, lenp, "UTF-32",
		      SCM_FAILED_CONVERSION_ERROR);
  else
    {
      size_t len;

      len = scm_i_string_length (str);
      if (lenp)
	*lenp = len;

      result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
      memcpy (result, scm_i_string_wide_chars (str),
	      len * sizeof (scm_t_wchar));
      result[len] = 0;
    }

  return result;
}
#undef FUNC_NAME

/* Return a malloc(3)-allocated buffer containing the contents of STR encoded
   according to ENCODING.  If LENP is non-NULL, set it to the size in bytes of
   the returned buffer.  If the conversion to ENCODING fails, apply the strategy
   defined by HANDLER.  */
char *
scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
                scm_t_string_failed_conversion_handler handler)
{
  char *buf;
  size_t ilen, len, i;
  int ret;
  const char *enc;

  if (!scm_is_string (str))
    scm_wrong_type_arg_msg (NULL, 0, str, "string");
  ilen = scm_i_string_length (str);

  if (ilen == 0)
    {
      buf = scm_malloc (1);
      buf[0] = '\0';
      if (lenp)
        *lenp = 0;
      return buf;
    }

  if (lenp == NULL)
    for (i = 0; i < ilen; i++)
      if (scm_i_string_ref (str, i) == '\0')
        scm_misc_error (NULL,
                        "string contains #\\nul character: ~S",
                        scm_list_1 (str));

  if (scm_i_is_narrow_string (str) && (encoding == NULL))
    {
      /* If using native Latin-1 encoding, just copy the string
         contents.  */
      if (lenp)
        {
          buf = scm_malloc (ilen);
          memcpy (buf, scm_i_string_chars (str), ilen);
          *lenp = ilen;
          return buf;
        }
      else
        {
          buf = scm_malloc (ilen + 1);
          memcpy (buf, scm_i_string_chars (str), ilen);
          buf[ilen] = '\0';
          return buf;
        }
    }


  buf = NULL;
  len = 0;
  enc = encoding;
  if (enc == NULL)
    enc = "ISO-8859-1";
  if (scm_i_is_narrow_string (str))
    {
      ret = mem_iconveh (scm_i_string_chars (str), ilen,
                         "ISO-8859-1", enc,
                         (enum iconv_ilseq_handler) handler, NULL,
                         &buf, &len);

      if (ret != 0)
        scm_encoding_error (__func__, errno,
			    "cannot convert narrow string to output locale",
			    SCM_BOOL_F,
			    /* FIXME: Faulty character unknown.  */
			    SCM_BOOL_F);
    }
  else
    {
      buf = u32_conv_to_encoding (enc,
                                  (enum iconv_ilseq_handler) handler,
                                  (scm_t_uint32 *) scm_i_string_wide_chars (str),
                                  ilen,
                                  NULL,
                                  NULL, &len);
      if (buf == NULL)
        scm_encoding_error (__func__, errno,
			    "cannot convert wide string to output locale",
			    SCM_BOOL_F,
			    /* FIXME: Faulty character unknown.  */
			    SCM_BOOL_F);
    }
  if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
    {
      if (SCM_R6RS_ESCAPES_P)
	{
	  /* The worst case is if the input string contains all 4-digit
	     hex escapes.  "\uXXXX" (six characters) becomes "\xXXXX;"
	     (seven characters).  Make BUF large enough to hold
	     that.  */
	  buf = scm_realloc (buf, (len * 7) / 6 + 1);
	  unistring_escapes_to_r6rs_escapes (buf, &len);
	}
      else
        unistring_escapes_to_guile_escapes (buf, &len);

      buf = scm_realloc (buf, len);
    }
  if (lenp)
    *lenp = len;
  else
    {
      buf = scm_realloc (buf, len + 1);
      buf[len] = '\0';
    }

  scm_remember_upto_here_1 (str);
  return buf;
}

size_t
scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
{
  size_t len;
  char *result = NULL;
  if (!scm_is_string (str))
    scm_wrong_type_arg_msg (NULL, 0, str, "string");
  result = scm_to_locale_stringn (str, &len);

  memcpy (buf, result, (len > max_len) ? max_len : len);
  free (result);

  scm_remember_upto_here_1 (str);
  return len;
}


/* Unicode string normalization.  */

/* This function is a partial clone of SCM_STRING_TO_U32_BUF from 
   libguile/i18n.c.  It would be useful to have this factored out into a more
   convenient location, but its use of alloca makes that tricky to do. */

static SCM 
normalize_str (SCM string, uninorm_t form)
{
  SCM ret;
  scm_t_uint32 *w_str;
  scm_t_wchar *cbuf;
  size_t rlen, len = scm_i_string_length (string);
  
  if (scm_i_is_narrow_string (string))
    {
      size_t i;
      const char *buf = scm_i_string_chars (string);
      
      w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
      
      for (i = 0; i < len; i ++)
	w_str[i] = (unsigned char) buf[i];
      w_str[len] = 0;
    }
  else 
    w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);

  w_str = u32_normalize (form, w_str, len, NULL, &rlen);  
  
  ret = scm_i_make_wide_string (rlen, &cbuf);
  u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
  free (w_str);

  scm_i_try_narrow_string (ret);

  return ret;
}

SCM_DEFINE (scm_string_normalize_nfc, "string-normalize-nfc", 1, 0, 0,
	    (SCM string),
	    "Returns the NFC normalized form of @var{string}.")
#define FUNC_NAME s_scm_string_normalize_nfc
{
  SCM_VALIDATE_STRING (1, string);
  return normalize_str (string, UNINORM_NFC);
}
#undef FUNC_NAME

SCM_DEFINE (scm_string_normalize_nfd, "string-normalize-nfd", 1, 0, 0,
	    (SCM string),
	    "Returns the NFD normalized form of @var{string}.")
#define FUNC_NAME s_scm_string_normalize_nfd
{
  SCM_VALIDATE_STRING (1, string);
  return normalize_str (string, UNINORM_NFD);
}
#undef FUNC_NAME

SCM_DEFINE (scm_string_normalize_nfkc, "string-normalize-nfkc", 1, 0, 0,
	    (SCM string),
	    "Returns the NFKC normalized form of @var{string}.")
#define FUNC_NAME s_scm_string_normalize_nfkc
{
  SCM_VALIDATE_STRING (1, string);
  return normalize_str (string, UNINORM_NFKC);
}
#undef FUNC_NAME

SCM_DEFINE (scm_string_normalize_nfkd, "string-normalize-nfkd", 1, 0, 0,
	    (SCM string),
	    "Returns the NFKD normalized form of @var{string}.")
#define FUNC_NAME s_scm_string_normalize_nfkd
{
  SCM_VALIDATE_STRING (1, string);
  return normalize_str (string, UNINORM_NFKD);
}
#undef FUNC_NAME

/* converts C scm_array of strings to SCM scm_list of strings. */
/* If argc < 0, a null terminated scm_array is assumed. */
SCM
scm_makfromstrs (int argc, char **argv)
{
  int i = argc;
  SCM lst = SCM_EOL;
  if (0 > i)
    for (i = 0; argv[i]; i++);
  while (i--)
    lst = scm_cons (scm_from_locale_string (argv[i]), lst);
  return lst;
}

/* Return a newly allocated array of char pointers to each of the strings
   in args, with a terminating NULL pointer.  */

char **
scm_i_allocate_string_pointers (SCM list)
#define FUNC_NAME "scm_i_allocate_string_pointers"
{
  char **result;
  int len = scm_ilength (list);
  int i;

  if (len < 0)
    scm_wrong_type_arg_msg (NULL, 0, list, "proper list");

  result = scm_gc_malloc ((len + 1) * sizeof (char *),
			  "string pointers");
  result[len] = NULL;

  /* The list might be have been modified in another thread, so
     we check LIST before each access.
   */
  for (i = 0; i < len && scm_is_pair (list); i++)
    {
      SCM str;
      size_t len;

      str = SCM_CAR (list);
      len = scm_c_string_length (str);

      result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
      memcpy (result[i], scm_i_string_chars (str), len);
      result[i][len] = '\0';

      list = SCM_CDR (list);
    }

  return result;
}
#undef FUNC_NAME

void
scm_i_get_substring_spec (size_t len,
			  SCM start, size_t *cstart,
			  SCM end, size_t *cend)
{
  if (SCM_UNBNDP (start))
    *cstart = 0;
  else
    *cstart = scm_to_unsigned_integer (start, 0, len);

  if (SCM_UNBNDP (end))
    *cend = len;
  else
    *cend = scm_to_unsigned_integer (end, *cstart, len);
}
		  
#if SCM_ENABLE_DEPRECATED

/* When these definitions are removed, it becomes reasonable to use
   read-only strings for string literals.  For that, change the reader
   to create string literals with scm_c_substring_read_only instead of
   with scm_c_substring_copy.
*/

int
scm_i_deprecated_stringp (SCM str)
{
  scm_c_issue_deprecation_warning
    ("SCM_STRINGP is deprecated.  Use scm_is_string instead.");
  
  return scm_is_string (str);
}

char *
scm_i_deprecated_string_chars (SCM str)
{
  char *chars;

  scm_c_issue_deprecation_warning
    ("SCM_STRING_CHARS is deprecated.  See the manual for alternatives.");

  /* We don't accept shared substrings here since they are not
     null-terminated.
  */
  if (IS_SH_STRING (str))
    scm_misc_error (NULL,
		    "SCM_STRING_CHARS does not work with shared substrings",
		    SCM_EOL);

  /* We explicitly test for read-only strings to produce a better
     error message.
  */

  if (IS_RO_STRING (str))
    scm_misc_error (NULL,
		    "SCM_STRING_CHARS does not work with read-only strings",
		    SCM_EOL);

  /* The following is still wrong, of course...
   */
  str = scm_i_string_start_writing (str);
  chars = scm_i_string_writable_chars (str);
  scm_i_string_stop_writing ();
  return chars;
}

size_t
scm_i_deprecated_string_length (SCM str)
{
  scm_c_issue_deprecation_warning
    ("SCM_STRING_LENGTH is deprecated.  Use scm_c_string_length instead.");
  return scm_c_string_length (str);
}

#endif

static SCM
string_handle_ref (scm_t_array_handle *h, size_t index)
{
  return scm_c_string_ref (h->array, index);
}

static void
string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
{
  scm_c_string_set_x (h->array, index, val);
}

static void
string_get_handle (SCM v, scm_t_array_handle *h)
{
  h->array = v;
  h->ndims = 1;
  h->dims = &h->dim0;
  h->dim0.lbnd = 0;
  h->dim0.ubnd = scm_c_string_length (v) - 1;
  h->dim0.inc = 1;
  h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
  h->elements = h->writable_elements = NULL;
}

SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f,
                          string_handle_ref, string_handle_set,
                          string_get_handle)
SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string)

void
scm_init_strings ()
{
  scm_nullstr = scm_i_make_string (0, NULL);

#include "libguile/strings.x"
}


/*
  Local Variables:
  c-file-style: "gnu"
  End:
*/
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								/* Copyright (C) 1995,1996,1998,2000,2001, 2004, 2006, 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * This library is free software; you can redistribute it and/or
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * modify it under the terms of the GNU Lesser General Public License
 								 * as published by the Free Software Foundation; either version 3 of
 								 * the License, or (at your option) any later version.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * This library is distributed in the hope that it will be useful, but
 								 * WITHOUT ANY WARRANTY; without even the implied warranty of
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 								 * Lesser General Public License for more details.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * You should have received a copy of the GNU Lesser General Public
 								 * License along with this library; if not, write to the Free Software
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 								 * 02110-1301 USA
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 */
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Include <config.h> in all C files; use `#ifdef HAVE_CONFIG_H' rather than `#if'.

											
										
										
											2008-09-13 15:35:27 +02:00
+								#ifdef HAVE_CONFIG_H
 								# include <config.h>
 								#endif
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Include <alloca.h> wherever `alloca' is used.

Patch provided by <carlo.bramix@libero.it> (tiny change).

* libguile/control.c, libguile/fluids.c, libguile/foreign.c,
  libguile/hashtab.c, libguile/strings.c: Include <alloca.h>.

											
										
										
											2010-11-19 14:14:53 +01:00
+								#include <alloca.h>
-												* strings.c: #include <string.h>. (Thanks to Bill Schottstaedt.)

											
										
										
											2000-06-13 19:08:25 +00:00
+								#include <string.h>
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#include <stdio.h>
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								#include <ctype.h>
-												Support for Unicode string normalization functions

* libguile/strings.c, libguile/strings.h (normalize_str,
  scm_string_normalize_nfc, scm_string_normalize_nfd, scm_normalize_nfkc,
  scm_string_normalize_nfkd): New functions.
* test-suite/tests/strings.test: Unit tests for `string-normalize-nfc',
  `string-normalize-nfd', `string-normalize-nfkc', and
  `string-normalize-nfkd'.
* doc/ref/api-data.texi (String Comparison): Documentation for normalization
  functions.

											
										
										
											2010-01-03 01:06:05 -05:00
+								#include <uninorm.h>
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								#include <unistr.h>
-												Don't include libunistring headers in Guile public headers

This requres the creation of a new type
scm_t_string_failed_conversion_handler to replace libunistring's
enum iconveh_ilseq_handler.

* libguile/strings.h: don't include <uniconv.h>
(scm_t_string_failed_conversion_handler): new enum type
(SCM_FAILED_CONVERSION_ERROR, SCM_FAILED_CONVERSION_QUESTION_MARK):
(SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE): new enum type values

* libguile/strings.c (scm_to_stringn): now takes type
scm_t_string_failed_conversion_handler.  All callers changed.

* libguile/print.c: include <uniconv.h>

* libguile/ports.c (scm_lfwrite_substr): use
scm_t_string_conversion_handler's constants

* libguile/gen-scmconfig.c (SCM_ICONVEH_ERROR):
(SCM_ICONVEH_QUESTION_MARK, SCM_ICONVEH_ESCAPE_SEQUENCE): store
iconveh_ilseq_hander constants as #define's

											
										
										
											2009-08-12 08:30:59 -07:00
+								#include <uniconv.h>
-												* strings.c: #include <string.h>. (Thanks to Bill Schottstaedt.)

											
										
										
											2000-06-13 19:08:25 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								#include "striconveh.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/_scm.h"
 								#include "libguile/chars.h"
-												* Make SCM_DEBUG_CELL_ACCESSES=1 work with GUILE_DEBUG_FREELIST.

											
										
										
											2001-05-15 09:45:10 +00:00
+								#include "libguile/root.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/strings.h"
-												Use `encoding-error' instead of `misc-error' for string encoding errors.

* libguile/strings.c (scm_encoding_error): New function.
  (scm_from_stringn, scm_to_stringn): Use it instead of `scm_misc_error ()'.

* test-suite/lib.scm (exception:encoding-error): Adjust accordingly.

* test-suite/tests/encoding-escapes.test (exception:conversion):
  Remove.  Use `exception:encoding-error' instead.

* test-suite/tests/encoding-iso88591.test: Likewise.

* test-suite/tests/encoding-iso88597.test: Likewise.

* test-suite/tests/encoding-utf8.test: Likewise.

											
										
										
											2010-01-07 00:37:10 +01:00
+								#include "libguile/error.h"
-												add registry of vector constructors, make-generalized-vector

* libguile/generalized-vectors.h:
* libguile/generalized-vectors.c: Add a registry of vector constructors.
  (scm_make_generalized_vector): New public function, constructs a
  vector of a given type.

* libguile/bitvectors.c:
* libguile/bytevectors.c:
* libguile/srfi-4.c:
* libguile/strings.c:
* libguile/vectors.c: Register vector constructors.

* libguile/extensions.c (scm_init_extensions): No need to NULL the list
  of registered extensions here, the static init does it for us. Allows
  scm_c_register_extension to be called before scm_init_extensions.

* libguile/init.c (scm_i_init_guile): Move array initialization earlier,
  so e.g. scm_init_strings has access to a valid list of array element
  types when registering its vector constructor.

											
										
										
											2009-07-18 12:43:54 +02:00
+								#include "libguile/generalized-vectors.h"
-													* list.h (scm_list_1, scm_list_2, scm_list_3, scm_list_4, scm_list_5,
	scm_list_n): New functions.
	(SCM_LIST0, SCM_LIST1, SCM_LIST2, SCM_LIST3, SCM_LIST4, SCM_LIST5,
	SCM_LIST6, SCM_LIST7, SCM_LIST8, SCM_LIST9, scm_listify): Deprecated.
	(lots of files): Use the new functions.

	* goops.c (CALL_GF1, CALL_GF2, CALL_GF3, CALL_GF4): Use scm_call_N.

	* strings.c: #include "libguile/deprecation.h".

											
										
										
											2001-06-28 01:11:59 +00:00
+								#include "libguile/deprecation.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/validate.h"
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
+								#include "libguile/private-options.h"
-													* list.h (scm_list_1, scm_list_2, scm_list_3, scm_list_4, scm_list_5,
	scm_list_n): New functions.
	(SCM_LIST0, SCM_LIST1, SCM_LIST2, SCM_LIST3, SCM_LIST4, SCM_LIST5,
	SCM_LIST6, SCM_LIST7, SCM_LIST8, SCM_LIST9, scm_listify): Deprecated.
	(lots of files): Use the new functions.

	* goops.c (CALL_GF1, CALL_GF2, CALL_GF3, CALL_GF4): Use scm_call_N.

	* strings.c: #include "libguile/deprecation.h".

											
										
										
											2001-06-28 01:11:59 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
 								/* {Strings}
 								 */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
 								/* Stringbufs
 								 *
 								 * XXX - keeping an accurate refcount during GC seems to be quite
 								 * tricky, so we just keep score of whether a stringbuf might be
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								 * shared, not whether it definitely is.
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								 *
 								 * The scheme I (mvo) tried to keep an accurate reference count would
 								 * recount all strings that point to a stringbuf during the mark-phase
 								 * of the GC.  This was done since one cannot access the stringbuf of
 								 * a string when that string is freed (in order to decrease the
 								 * reference count).  The memory of the stringbuf might have been
 								 * reused already for something completely different.
 								 *
 								 * This recounted worked for a small number of threads beating on
 								 * cow-strings, but it failed randomly with more than 10 threads, say.
 								 * I couldn't figure out what went wrong, so I used the conservative
 								 * approach implemented below.
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								 *
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								 * There are 2 storage strategies for stringbufs: 8-bit and wide.  8-bit
 								 * strings are ISO-8859-1-encoded strings; wide strings are 32-bit (UCS-4)
 								 * strings.
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								 */
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								/* The size in words of the stringbuf header (type tag + size).  */
 								#define STRINGBUF_HEADER_SIZE   2U
 								#define STRINGBUF_HEADER_BYTES  (STRINGBUF_HEADER_SIZE * sizeof (SCM))
-												Expose some of the string/stringbuf internal flags and tags.

* libguile/strings.h (scm_tc7_ro_string, SCM_I_STRINGBUF_F_SHARED,
  SCM_I_STRINGBUF_F_INLINE): New macros.

* libguile/strings.c (STRINGBUF_F_SHARED): Alias for
  `SCM_I_STRINGBUF_F_SHARED'.
  (STRINGBUF_F_INLINE): Alias for `SCM_I_STRINGBUF_F_INLINE'.
  (RO_STRING_TAG): Alias for `scm_tc7_ro_string'.

											
										
										
											2009-01-13 23:59:17 +01:00
+								#define STRINGBUF_F_SHARED      SCM_I_STRINGBUF_F_SHARED
-												Merge branch 'boehm-demers-weiser-gc' into bdw-gc-static-alloc

Conflicts:
	acinclude.m4
	libguile/strings.c

											
										
										
											2009-09-02 00:07:27 +02:00
+								#define STRINGBUF_F_WIDE        SCM_I_STRINGBUF_F_WIDE
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
 								#define STRINGBUF_TAG           scm_tc7_stringbuf
 								#define STRINGBUF_SHARED(buf)   (SCM_CELL_WORD_0(buf) & STRINGBUF_F_SHARED)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								#define STRINGBUF_WIDE(buf)     (SCM_CELL_WORD_0(buf) & STRINGBUF_F_WIDE)
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Add `scm_i_string_data'.

* libguile/strings.c (STRINGBUF_CONTENTS): New macro.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Use it.
  (scm_i_string_data): New function.

* libguile/strings.h (scm_i_string_data): New declaration.

											
										
										
											2010-07-04 18:38:53 +02:00
+								#define STRINGBUF_CONTENTS(buf) ((void *)				\
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								                                 SCM_CELL_OBJECT_LOC (buf,		\
 														      STRINGBUF_HEADER_SIZE))
-												Add `scm_i_string_data'.

* libguile/strings.c (STRINGBUF_CONTENTS): New macro.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Use it.
  (scm_i_string_data): New function.

* libguile/strings.h (scm_i_string_data): New declaration.

											
										
										
											2010-07-04 18:38:53 +02:00
+								#define STRINGBUF_CHARS(buf)    ((unsigned char *) STRINGBUF_CONTENTS (buf))
 								#define STRINGBUF_WIDE_CHARS(buf) ((scm_t_wchar *) STRINGBUF_CONTENTS (buf))
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Add `scm_i_string_data'.

* libguile/strings.c (STRINGBUF_CONTENTS): New macro.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Use it.
  (scm_i_string_data): New function.

* libguile/strings.h (scm_i_string_data): New declaration.

											
										
										
											2010-07-04 18:38:53 +02:00
+								#define STRINGBUF_LENGTH(buf)   (SCM_CELL_WORD_1 (buf))
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Fix invalid writes to read-only stringbufs.

* libguile/strings.c (SET_STRINGBUF_SHARED): Don't modify BUF if it's
  already marked as shared since it might be a read-only stringbuf.
  This error can be caught when linking with GNU ld with "-z relro".

											
										
										
											2009-02-01 01:54:41 +01:00
+								#define SET_STRINGBUF_SHARED(buf)					\
 								  do									\
 								    {									\
 								      /* Don't modify BUF if it's already marked as shared since it might be \
 									 a read-only, statically allocated stringbuf.  */		\
 								      if (SCM_LIKELY (!STRINGBUF_SHARED (buf)))				\
 									SCM_SET_CELL_WORD_0 ((buf), SCM_CELL_WORD_0 (buf) | STRINGBUF_F_SHARED); \
 								    }									\
 								  while (0)
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								#ifdef SCM_STRING_LENGTH_HISTOGRAM
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								static size_t lenhist[1001];
 								#endif
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Make a stringbuf with space for LEN 8-bit Latin-1-encoded
 								   characters. */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								static SCM
 								make_stringbuf (size_t len)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  /* XXX - for the benefit of SCM_STRING_CHARS, SCM_SYMBOL_CHARS and
 								     scm_i_symbol_chars, all stringbufs are null-terminated.  Once
 								     SCM_STRING_CHARS and SCM_SYMBOL_CHARS are removed and the code
 								     has been changed for scm_i_symbol_chars, this null-termination
 								     can be dropped.
 								  */
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  SCM buf;
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								#ifdef SCM_STRING_LENGTH_HISTOGRAM
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  if (len < 1000)
 								    lenhist[len]++;
 								  else
 								    lenhist[1000]++;
 								#endif
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + len + 1,
 													    "string"));
 								  SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG);
 								  SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
 								  STRINGBUF_CHARS (buf)[len] = 0;
 								  return buf;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-												* strings.c (scm_make_shared_substring, scm_read_only_string_p):
Deprecated.
(scm_string_length, scm_string_ref, scm_substring,
scm_string_append): Don't accept symbols as arguments (R5RS).

											
										
										
											2000-08-26 20:56:48 +00:00
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Make a stringbuf with space for LEN 32-bit UCS-4-encoded
 								   characters.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								static SCM
 								make_wide_stringbuf (size_t len)
 								{
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  SCM buf;
 								  size_t raw_len;
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								#ifdef SCM_STRING_LENGTH_HISTOGRAM
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (len < 1000)
 								    lenhist[len]++;
 								  else
 								    lenhist[1000]++;
 								#endif
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  raw_len = (len + 1) * sizeof (scm_t_wchar);
 								  buf = PTR2SCM (scm_gc_malloc_pointerless (STRINGBUF_HEADER_BYTES + raw_len,
 													    "string"));
 								  SCM_SET_CELL_TYPE (buf, STRINGBUF_TAG | STRINGBUF_F_WIDE);
 								  SCM_SET_CELL_WORD_1 (buf, (scm_t_bits) len);
 								  STRINGBUF_WIDE_CHARS (buf)[len] = 0;
 								  return buf;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								}
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								/* Return a UCS-4-encoded stringbuf containing the (possibly Latin-1-encoded)
 								   characters from BUF.  */
 								static SCM
 								wide_stringbuf (SCM buf)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								{
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  SCM new_buf;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
 								  if (STRINGBUF_WIDE (buf))
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								    new_buf = buf;
 								  else
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								    {
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      size_t i, len;
 								      scm_t_wchar *mem;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      len = STRINGBUF_LENGTH (buf);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      new_buf = make_wide_stringbuf (len);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      mem = STRINGBUF_WIDE_CHARS (new_buf);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      for (i = 0; i < len; i++)
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+									mem[i] = (scm_t_wchar) STRINGBUF_CHARS (buf)[i];
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      mem[len] = 0;
 								    }
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
 								  return new_buf;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-													* strings.h: don't use SCM_P.  don't include <string.h>.
	* error.c, gh_data.c, ports.c, script.c, strop.c: include <string.h>.

	* strings.c (scm_string_ref): make the 2nd argument compulsory.
	previously it defaulted to zero for no good reason that I can see.
	use a local variable for SCM_INUM (k).  replace
	SCM_VALIDATE_INUM_DEF with SCM_VALIDATE_INUM_COPY.

	(scm_makfromstr): cosmetic changes.

	(scm_string): Accept only chars in the list, not strings, for
	conformance to R5RS (particularly for list->string, which is
	supposed to be the inverse of string->list.)  remove
	SCM_DEFER_INTS/SCM_ALLOW_INTS, which is unnecessary since
	scm_makstr handles the cell allocation.  when reporting wrong-type
	arg, don't report the position as 1.

	* posix.c (scm_init_posix): intern PIPE_BUF if it's defined.

	* boot-9.scm (find-and-link-dynamic-module): pass strings, not symbols,
	to string-append.

											
										
										
											2000-01-31 18:29:56 +00:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								/* Return a Latin-1-encoded stringbuf containing the (possibly UCS-4-encoded)
 								   characters from BUF, if possible.  */
 								static SCM
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								narrow_stringbuf (SCM buf)
 								{
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  SCM new_buf;
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
 								  if (!STRINGBUF_WIDE (buf))
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								    new_buf = buf;
 								  else
 								    {
 								      size_t i, len;
 								      scm_t_wchar *wmem;
 								      unsigned char *mem;
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      len = STRINGBUF_LENGTH (buf);
 								      wmem = STRINGBUF_WIDE_CHARS (buf);
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      for (i = 0; i < len; i++)
 									if (wmem[i] > 0xFF)
 									  /* BUF cannot be narrowed.  */
 									  return buf;
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      new_buf = make_stringbuf (len);
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								      mem = STRINGBUF_CHARS (new_buf);
 								      for (i = 0; i < len; i++)
 									mem[i] = (unsigned char) wmem[i];
 								      mem[len] = 0;
 								    }
 								  return new_buf;
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								}
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								scm_i_pthread_mutex_t stringbuf_write_mutex = SCM_I_PTHREAD_MUTEX_INITIALIZER;
-													* strings.h: don't use SCM_P.  don't include <string.h>.
	* error.c, gh_data.c, ports.c, script.c, strop.c: include <string.h>.

	* strings.c (scm_string_ref): make the 2nd argument compulsory.
	previously it defaulted to zero for no good reason that I can see.
	use a local variable for SCM_INUM (k).  replace
	SCM_VALIDATE_INUM_DEF with SCM_VALIDATE_INUM_COPY.

	(scm_makfromstr): cosmetic changes.

	(scm_string): Accept only chars in the list, not strings, for
	conformance to R5RS (particularly for list->string, which is
	supposed to be the inverse of string->list.)  remove
	SCM_DEFER_INTS/SCM_ALLOW_INTS, which is unnecessary since
	scm_makstr handles the cell allocation.  when reporting wrong-type
	arg, don't report the position as 1.

	* posix.c (scm_init_posix): intern PIPE_BUF if it's defined.

	* boot-9.scm (find-and-link-dynamic-module): pass strings, not symbols,
	to string-append.

											
										
										
											2000-01-31 18:29:56 +00:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								/* Copy-on-write strings.
 								 */
-													* strings.h: don't use SCM_P.  don't include <string.h>.
	* error.c, gh_data.c, ports.c, script.c, strop.c: include <string.h>.

	* strings.c (scm_string_ref): make the 2nd argument compulsory.
	previously it defaulted to zero for no good reason that I can see.
	use a local variable for SCM_INUM (k).  replace
	SCM_VALIDATE_INUM_DEF with SCM_VALIDATE_INUM_COPY.

	(scm_makfromstr): cosmetic changes.

	(scm_string): Accept only chars in the list, not strings, for
	conformance to R5RS (particularly for list->string, which is
	supposed to be the inverse of string->list.)  remove
	SCM_DEFER_INTS/SCM_ALLOW_INTS, which is unnecessary since
	scm_makstr handles the cell allocation.  when reporting wrong-type
	arg, don't report the position as 1.

	* posix.c (scm_init_posix): intern PIPE_BUF if it's defined.

	* boot-9.scm (find-and-link-dynamic-module): pass strings, not symbols,
	to string-append.

											
										
										
											2000-01-31 18:29:56 +00:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#define STRING_TAG            scm_tc7_string
-													* strings.h: don't use SCM_P.  don't include <string.h>.
	* error.c, gh_data.c, ports.c, script.c, strop.c: include <string.h>.

	* strings.c (scm_string_ref): make the 2nd argument compulsory.
	previously it defaulted to zero for no good reason that I can see.
	use a local variable for SCM_INUM (k).  replace
	SCM_VALIDATE_INUM_DEF with SCM_VALIDATE_INUM_COPY.

	(scm_makfromstr): cosmetic changes.

	(scm_string): Accept only chars in the list, not strings, for
	conformance to R5RS (particularly for list->string, which is
	supposed to be the inverse of string->list.)  remove
	SCM_DEFER_INTS/SCM_ALLOW_INTS, which is unnecessary since
	scm_makstr handles the cell allocation.  when reporting wrong-type
	arg, don't report the position as 1.

	* posix.c (scm_init_posix): intern PIPE_BUF if it's defined.

	* boot-9.scm (find-and-link-dynamic-module): pass strings, not symbols,
	to string-append.

											
										
										
											2000-01-31 18:29:56 +00:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#define STRING_STRINGBUF(str) (SCM_CELL_OBJECT_1(str))
 								#define STRING_START(str)     ((size_t)SCM_CELL_WORD_2(str))
 								#define STRING_LENGTH(str)    ((size_t)SCM_CELL_WORD_3(str))
-													* strings.h: don't use SCM_P.  don't include <string.h>.
	* error.c, gh_data.c, ports.c, script.c, strop.c: include <string.h>.

	* strings.c (scm_string_ref): make the 2nd argument compulsory.
	previously it defaulted to zero for no good reason that I can see.
	use a local variable for SCM_INUM (k).  replace
	SCM_VALIDATE_INUM_DEF with SCM_VALIDATE_INUM_COPY.

	(scm_makfromstr): cosmetic changes.

	(scm_string): Accept only chars in the list, not strings, for
	conformance to R5RS (particularly for list->string, which is
	supposed to be the inverse of string->list.)  remove
	SCM_DEFER_INTS/SCM_ALLOW_INTS, which is unnecessary since
	scm_makstr handles the cell allocation.  when reporting wrong-type
	arg, don't report the position as 1.

	* posix.c (scm_init_posix): intern PIPE_BUF if it's defined.

	* boot-9.scm (find-and-link-dynamic-module): pass strings, not symbols,
	to string-append.

											
										
										
											2000-01-31 18:29:56 +00:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#define SET_STRING_STRINGBUF(str,buf) (SCM_SET_CELL_OBJECT_1(str,buf))
 								#define SET_STRING_START(str,start) (SCM_SET_CELL_WORD_2(str,start))
 								#define IS_STRING(str)        (SCM_NIMP(str) && SCM_TYP7(str) == STRING_TAG)
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								/* Read-only strings.
 								 */
-												Expose some of the string/stringbuf internal flags and tags.

* libguile/strings.h (scm_tc7_ro_string, SCM_I_STRINGBUF_F_SHARED,
  SCM_I_STRINGBUF_F_INLINE): New macros.

* libguile/strings.c (STRINGBUF_F_SHARED): Alias for
  `SCM_I_STRINGBUF_F_SHARED'.
  (STRINGBUF_F_INLINE): Alias for `SCM_I_STRINGBUF_F_INLINE'.
  (RO_STRING_TAG): Alias for `scm_tc7_ro_string'.

											
										
										
											2009-01-13 23:59:17 +01:00
+								#define RO_STRING_TAG         scm_tc7_ro_string
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								#define IS_RO_STRING(str)     (SCM_CELL_TYPE(str)==RO_STRING_TAG)
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								/* Mutation-sharing substrings
 								 */
 								#define SH_STRING_TAG       (scm_tc7_string + 0x100)
 								#define SH_STRING_STRING(sh) (SCM_CELL_OBJECT_1(sh))
 								/* START and LENGTH as for STRINGs. */
 								#define IS_SH_STRING(str)   (SCM_CELL_TYPE(str)==SH_STRING_TAG)
-												decruftify scm_sys_protects

* libguile/root.h
* libguile/root.c (scm_sys_protects): It used to be that for some reason
  we'd define a special array of "protected" values. This was a little
  silly, always, but with the BDW GC it's completely unnecessary. Also
  many of these variables were unused, and none of them were good API.
  So remove this array, and either eliminate, make static, or make
  internal the various values.

* libguile/snarf.h: No need to generate calls to scm_permanent_object.

* guile-readline/readline.c (scm_init_readline): No need to call
  scm_permanent_object.

* libguile/array-map.c (ramap, rafe): Remove the dubious nullvect
  optimizations.

* libguile/async.c (scm_init_async): No need to init scm_asyncs, it is
  no more.

* libguile/eval.c (scm_init_eval): No need to init scm_listofnull, it is
  no more.

* libguile/gc.c: Make scm_protects a static var.
  (scm_storage_prehistory): Change the sanity check to use the address
  of protects.
  (scm_init_gc_protect_object): No need to clear the scm_sys_protects,
  as it is no more.

* libguile/keywords.c: Make the keyword obarray a static var.
* libguile/numbers.c: Make flo0 a static var.
* libguile/objprop.c: Make object_whash a static var.
* libguile/properties.c: Make properties_whash a static var.

* libguile/srcprop.h:
* libguile/srcprop.c: Make scm_source_whash a global with internal
  linkage.

* libguile/strings.h:
* libguile/strings.c: Make scm_nullstr a global with internal linkage.

* libguile/vectors.c (scm_init_vectors): No need to init scm_nullvect,
  it's unused.

											
										
										
											2009-12-05 12:38:32 +01:00
+								SCM scm_nullstr;
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Create a scheme string with space for LEN 8-bit Latin-1-encoded
 								   characters.  CHARSP, if not NULL, will be set to location of the
 								   char array.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								SCM
 								scm_i_make_string (size_t len, char **charsp)
 								{
 								  SCM buf = make_stringbuf (len);
 								  SCM res;
 								  if (charsp)
-												Avoid double-casts of stringbuf

Conversion from char to scm_t_wchar require an intermediate cast to
unsigned char.  By changing the return type of SCM_STRINGBUF_INLINE_CHARS
to unsigned char *, doublecasts in the code can be avoided.  Also,
some clarification of return types.

* libguile/strings.c (STRINGBUF_OUTLINE_CHARS)
(STRINGBUF_INLINE_CHARS): now returns unsigned char *; all callers changed.

											
										
										
											2009-08-18 21:14:56 -07:00
+								    *charsp = (char *) STRINGBUF_CHARS (buf);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  res = scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
 											 (scm_t_bits)0, (scm_t_bits) len);
 								  return res;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Create a scheme string with space for LEN 32-bit UCS-4-encoded
 								   characters.  CHARSP, if not NULL, will be set to location of the
 								   character array.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								SCM
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								scm_i_make_wide_string (size_t len, scm_t_wchar **charsp)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								{
 								  SCM buf = make_wide_stringbuf (len);
 								  SCM res;
 								  if (charsp)
 								    *charsp = STRINGBUF_WIDE_CHARS (buf);
 								  res = scm_double_cell (STRING_TAG, SCM_UNPACK (buf),
 								                         (scm_t_bits) 0, (scm_t_bits) len);
 								  return res;
 								}
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								static void
 								validate_substring_args (SCM str, size_t start, size_t end)
 								{
 								  if (!IS_STRING (str))
 								    scm_wrong_type_arg_msg (NULL, 0, str, "string");
 								  if (start > STRING_LENGTH (str))
 								    scm_out_of_range (NULL, scm_from_size_t (start));
 								  if (end > STRING_LENGTH (str) || end < start)
 								    scm_out_of_range (NULL, scm_from_size_t (end));
 								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								static inline void
 								get_str_buf_start (SCM *str, SCM *buf, size_t *start)
 								{
 								  *start = STRING_START (*str);
 								  if (IS_SH_STRING (*str))
 								    {
 								      *str = SH_STRING_STRING (*str);
 								      *start += STRING_START (*str);
 								    }
 								  *buf = STRING_STRINGBUF (*str);
 								}
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								SCM
 								scm_i_substring (SCM str, size_t start, size_t end)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								  SCM buf;
 								  size_t str_start;
 								  get_str_buf_start (&str, &buf, &str_start);
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  SET_STRINGBUF_SHARED (buf);
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  return scm_double_cell (STRING_TAG, SCM_UNPACK(buf),
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+											  (scm_t_bits)str_start + start,
 											  (scm_t_bits) end - start);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								SCM
 								scm_i_substring_read_only (SCM str, size_t start, size_t end)
 								{
-												Revert "Make literal strings (i.e., returned by `read') read-only."

This reverts commit fb2f8886c4d537b0c7d3e9e78a8d4e5e272a36f4.

The rationale is that `read' must return mutable strings, as reported
by szgyg <szgyg@ludens.elte.hu>.

											
										
										
											2008-10-09 22:21:33 +02:00
+								  SCM buf;
 								  size_t str_start;
 								  get_str_buf_start (&str, &buf, &str_start);
 								  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
 								  SET_STRINGBUF_SHARED (buf);
 								  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
 								  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK(buf),
 											  (scm_t_bits)str_start + start,
 											  (scm_t_bits) end - start);
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								}
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								SCM
 								scm_i_substring_copy (SCM str, size_t start, size_t end)
 								{
 								  size_t len = end - start;
--09-03  Stefan Jahn  <stefan@lkcc.org>

        * configure.in (isinf): Let configure find the isinf() function
        on MinGW32 systems.

2004-09-03  Stefan Jahn  <stefan@lkcc.org>

        * threads.c (scm_threads_mark_stacks):  Fixed local variable
        definitions.

        * strings.c (scm_i_substring_copy, s_scm_string_append): Fixed
        local variable definitions.

        * stime.c (_POSIX_C_SOURCE):  Do not define this item on
        MinGW32 because it conflicts with its pthread headers.
        (s_scm_mktime): Consider the HAVE_STRUCT_TM_TM_ZONE define.
        (s_scm_strftime): Using scm_from_locale_string() instead of
        scm_makfrom0str().

        * posix.c (s_scm_putenv): Fixed typo in the !HAVE_UNSETENV
        part.

        * numbers.c (scm_init_numbers): Removed check_sanity() call
        inside GUILE_DEBUG.  The function has been removed somewhen...

        * filesys.c (_POSIX_C_SOURCE): Do not define this item on
        MinGW32 because it conflicts with its pthread headers.

2004-09-03  Stefan Jahn  <stefan@lkcc.org>

        * srfi-1.c, srfi-1.h: Renamed any 'lst1' into 'list1' because
        lst1 is a #define on Win32 systems.

											
										
										
											2004-09-03 19:45:37 +00:00
+								  SCM buf, my_buf;
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								  size_t str_start;
 								  get_str_buf_start (&str, &buf, &str_start);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (scm_i_is_narrow_string (str))
 								    {
 								      my_buf = make_stringbuf (len);
 								      memcpy (STRINGBUF_CHARS (my_buf),
 								              STRINGBUF_CHARS (buf) + str_start + start, len);
 								    }
 								  else
 								    {
 								      my_buf = make_wide_stringbuf (len);
 								      u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (my_buf),
 								               (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf) + str_start
 								                                 + start), len);
 								      /* Even though this string is wide, the substring may be narrow.
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								         Consider adding code to narrow the string.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								    }
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  scm_remember_upto_here_1 (buf);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  return scm_double_cell (STRING_TAG, SCM_UNPACK (my_buf),
 								                          (scm_t_bits) 0, (scm_t_bits) len);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								SCM
 								scm_i_substring_shared (SCM str, size_t start, size_t end)
 								{
 								  if (start == 0 && end == STRING_LENGTH (str))
 								    return str;
 								  else
 								    {
 								      size_t len = end - start;
 								      if (IS_SH_STRING (str))
 									{
 									  start += STRING_START (str);
 									  str = SH_STRING_STRING (str);
 									}
 								      return scm_double_cell (SH_STRING_TAG, SCM_UNPACK(str),
 											      (scm_t_bits)start, (scm_t_bits) len);
 								    }
 								}
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								SCM
 								scm_c_substring (SCM str, size_t start, size_t end)
 								{
 								  validate_substring_args (str, start, end);
 								  return scm_i_substring (str, start, end);
 								}
-													* ioext.c (scm_do_read_line): Rewritten to use memchr to find the
	newline.  A bit faster, and definitely hairier.
	(scm_read_line): Count newlines here instead.
	* strings.c (scm_take_str): New function.
	(scm_take0str): Reimplement in terms of scm_take_str.  * strings.h
	(scm_take_str): New declaration.  * ioext.c (scm_read_line): Use
	scm_take_str, to avoid copying the string.
	Add some simple-minded support for line buffered ports.
	* ports.h (SCM_BUFLINE): New flag for ports.
	* init.c (scm_init_standard_ports): Request line-buffering on
	the standard output port.
*	* ports.c (scm_mode_bits): Recognize 'l' as a request for line
	buffering.
	(scm_putc, scm_puts, scm_lfwrite): If the port is line-buffered,
	and there's a newline to be written, flush the port.
	* ports.c: (scm_lseek): clear buffers even if just reading current
	position.
	* fports.c (local_fclose): call local_fflush unconditionally.
	(various): don't use the scm_must... memory procs.
	* ports.h (scm_port): make read_pos a pointer to const.
	strports.c: take care of rw_active and rw_randow.
	fports.c: scm_fport_drain_input: removed.  do it all in ports.c.
	strports.c (scm_mkstrport): check that pos is reasonable.
	ioext.c (scm_ftell, scm_fseek): use lseek.
	(SCM_CLEAR_BUFFERS): macro deleted.
	ioext.c (redirect_port: use ptob fflush, read_flush.
	ports.h (scm_ptobfuns): add ftruncate.
	ports.c (scm_newptob): set ftruncate.
	adjust ptob tables.
*	ports.c (scm_ftruncate): new procedure.
	fports.c (local_ftrunate), strports.c (str_ftruncate): new procs.
	strports.c (st_seek, st_grow_port): new procs.
	fports.h (scm_port): change size types from int to off_t.
	ports.c (scm_init_ports): initialise the seek symbols here
	instead of in ioext.c.
	strports.c (scm_call_with_output_string): start with an empty
	string, so seek and ftruncate can be used.
	* ports.h (scm_ptobfuns): add a read_flush procedure which is the
	equivalent to fflush for the read buffer.
	* ports.c (scm_newptob): set read_flush.
	ports.c (void_port_ptob): set read_flush.
	fports.c (local_read_flush): new proc.  add to ptob.
	strport.c (st_read_flush): likewise.
	vport.c (sf_read_flush): likewise.
	fports.h (struct scm_fport): remove random member.  there's nothing
	left but fdes.  leaving it as a struct to allow for future changes.
	fports.c: replace usage of scm_fport::random with scm_port::rw_random.
	ports.c: (scm_putc, scm_puts, scm_lfwrite): call the read_flush
	ptob proc if the read buffer is filled.
	* ports.h (scm_port): add a rw_random member and replace
	reading and writing members with rw_active member.
	SCM_PORT_READ/SCM_PORT_WRITE: new values.
	* ports.h (struct scm_port_table): add writing and reading members
	to replace write_needs_seek: it isn't good enough for non-fports.
	ports.c, ioext.c, fports.c: corresponding changes.
	(struct scm_port_table): give it a typedef and rename to scm_port.
	ports.c, fports.c, strports.c, vports.c, ioext.c, ports.h:
	corresponding changes.
	* ports.c (scm_newptob): bugfix: set seek member.
*	* (scm_lseek): new procedure, using code from ioext.c:scm_fseek
	and generalised to all port types.
	* scmsigs.c (scm_init_scmsigs): set the SA_RESTART flag for all
	signals (it was only being done for handlers installed from Scheme).
	Otherwise (for example) SIGSTOP followed by SIGCONT on an interpreter
	waiting for input caused an EINTR error from read.
	* ports.h (struct scm_port_table): make all the char members
	unsigned, so they convert to int without becoming negative if large.
	* fports.c (scm_fdes_wait_for_input): forgot to check compilation
	with threads enabled.  rename this procedure to
	fport_wait_for_input and take a port instead of a fdes.
	use scm_fport_input_waiting_p instead of scm_fdes_waiting_p.
	* readline.c (scm_readline): Applied a patch from Greg Harvey to
	get readline support working again: use fdopen to get FILE objects.
	* gc.c (scm_init_storage): install an atexit proc to flush the
	ports.
	(cleanup): the new proc.  it sets a global variable which can be
	checked by the ptob flush procs to avoid trying to throw
	exceptions during exit.  not very pleasant but it seems more reliable.
	* fports.c (local_fflush): check terminating variable and if set
	don't throw exception.
	* CHECKME: that the atexit proc is installed if unexec used.
	* throw.c (scm_handle_by_message): don't flush all ports here.
	it still causes bus errors.
	* fports.h (SCM_FPORT_CLEAR_BUFFERS): rename to SCM_CLEAR_BUFFERS
	and move to ioext.c.
	* fports.c (scm_fdes_waiting_p): merged into fport_input_waiting_p.
	* ports.c (scm_char_ready_p): check the port buffer and call the
	ptob entry if needed.
	* ports.h (scm_ptobfuns): input_waiting_p added.  change all the
	ptob initialisers.  use it in char-ready
	* ioext.c (scm_do_read_line): moved from ports.c.  make it static.
	* vports.c (sfflush): modified to write a char (since softports
	currently use shortbuf.)
	* fports.c (scm_standard_stream_to_port): moved to init.c and
	made static.
	* init.c (scm_init_standard_ports): make stdout and stderr
	unbuffered if connected to a terminal.  with stdio they
	were line-buffered by default.
	* ports.h (scm_ptobfuns): change fflush return to void.
	change flush proc definitions.
	* strports.c (scm_call_with_output_string): get size from
	buffer instead of port stream.
	(scm_strprint_obj): likewise.
	(st_flush): new proc.
	* ports.h (struct scm_port_table): added write_end member,
	as an optimisation.  set it where write_buf_size is set.
	* ports.h (struct scm_port_table): change stream from void *
	back to SCM.  SCM presumably must be large enough to hold a
	pointer (and probably vice versa but who knows.)
	(SCM_SSTREAM): deleted.  change users back to SCM_STREAM.
	(scm_puts): rewritten
	* fports.c (local_ffwrite, local_fputs): removed.
	* strports.c (stputc, stputs, stwrite): dyked out (FIXME)
	* vports.c (sfputc, sfputs, sfwrite) likewise.
	* ports.c (write_void_port, puts_void_port): removed.
	(putc_void_port, getc_void_port, fgets_void_port): likewise.
	* ports.c (scm_lfwrite): rewritten using fport.c version.
	* fports.c (local_fputc): deleted.
	* ports.c (scm_add_to_port_table): initialise write_needs_seek.
	* ports.h (scm_ptobfuns): add seek function pointer.
	* fports.c: set it to local_seek, new procedure.
	* fports.h (SCM_MAYBE_DRAIN_INPUT): moved to ports.c.
	use ptob for seek. take ptob instead of fport arg.
	* ports.h (struct scm_port_table): new member write_needs_seek,
	replaces reading member in fport struct.
	* vports.c (sfgetc): store the getted char into the buffer.
	rename to sf_fill_buffer and install it for fill-buffer in ptob.
	the Scheme interface is still a	procedure that gets a char.
	(scm_make_soft_port): set up the port buffer (shortbuf).
	* fports.c (local_fgetc, local_fgets): deleted.
	* strports.c (stgetc): likewise.
	* ports.c: scm_generic_fgets: likewise.
	* ports.h (scm_ptobfuns): add fill_buffer.
	* ports.c (scm_newptob): assign it.
	* strports.c (scm_mkstrport): set up the buffer.
	put just the string into the stream, not cons (pos stream).
	(stfill_buffer): new proc.
	* ports.h: fport buffer moved into port table: to be
	used for all port types.
	* throw.c (scm_handle_by_message): flush ports at exit.
	* socket.c (scm_sock_fd_to_port): use scm_fdes_to_port.
	(scm_getsockopt, scm_setsockopt, scm_shutdown, scm_connect,
	scm_bind, scm_listen, scm_accept, scm_getsockname,
	scm_getpeername, scm_recv, scm_send, scm_recvfrom,
	scm_sendto,
	use SCM_FPORT_FDES.  use SCM_OPFPORTP not SCM_FPORTP.
	* posix.c (scm_getgroups): use SCM_ALLOW/DEFER_INTS.
	(scm_ttyname): use SCM_FPORT_FDES.
	(scm_tcgetpgrp, scm_tcsetpgrp): likewise.
	* ioext.c (scm_isatty_p): use SCM_FPORT_FDES.
	(scm_fdes_to_ports): modified.
	(scm_fdopen): use scm_fdes_to_port.
	* ports.c (scm_init_ports): don't try to flush ports using
	atexit().  it's too late, errors will cause SEGV.
	* fports.c (scm_fport_buffer_add): new procedure.
	* fports.h (SCM_FDES_RANDOM_P): new macro.  use it in
	scm_fdes_to_port and scm_redirect_port.
	* ioext.c (scm_redirect_port): use setvbuf to set buffers in the
	new port.  reset fp->random.
	* fports.c (scm_fdes_to_port), ports.c (scm_void_port),
	filesys.c (scm_opendir):
	restore defer interrupts while the port is constructed.
*	(scm_setvbuf): if mode is _IOFBF and size is not supplied,
	derive buffer size from fdes or use a default.
	(scm_fdes_to_port): use setvbuf instead of creating the buffers
	directly.
	vports.c (various places): use SCM_SSTREAM.
	strports.c: likewise.
	* gdbint.c: likewise.
	* ports.h (SCM_SSTREAM): new macro.
	* fports.c (scm_input_waiting_p): use scm_return_first, since port
	may be removed from the stack by the tail call to scm_fdes_waiting_p.
	* fports.h (SCM_CLEAR_BUFFERS): new macro.
	* ports.c (scm_force_output): call scm_fflush.
	* print.c (scm_newline): don't check errno for EPIPE (it wouldn't
*	reach this point.)  don't flush port (if scm_cur_outp).
	* fports.h (SCM_FPORT_FDES): new macro.
	* vports.c (sfflush): don't need to set errno.
	* ports.c: install scm_flush_all_ports to be run on exit.
	ports.c fports.c ioext.c posix.c socket.c net_db.c filesys.c:
	removed all uses of SCM_DEFER/ALLOW ints for now.  they were mainly
	just protecting errno.  some may need to be put back.
	* scmsigs.c (take_signal): save and restore errno while this
	proc runs.
	*fports.c (print_pipe_port, local_pclose, scm_pipob): deleted.
*	open-pipe, close-pipe are emulated in (ice-9 popen)
	ports.c (scm_ports_prehistory): don't init scm_pipob.
	ports.h (scm_tc16_pipe): deleted.
	posix.c (scm_open_pipe, scm_close_pipe): deleted.
	* ioext.c (scm_primitive_move_to_fdes): use fport.
	* fport.c (scm_fport_fill_buffer): flush write buffer if needed.
	change arg type from scm_fport to SCM port.
	fport.h (SCM_SETFDES): removed.
	(SCM_MAYBE_DRAIN_INPUT): new macro.
	* ioext.c (scm_dup_to_fdes): use SCM_FSTREAM.
	(scm_ftell): always use lseek and account for the buffer.
	(scm_fileno): use fport buffer.
	(scm_fseek): clear fport buffers.  always use lseek.
	* posix.c (scm_pipe): use fport buffer.
	* unif.c: include fports.h instead of genio.h.
	* fports.c (scm_fdes_wait_for_input, scm_fport_fill_buffer): new
	procedures.
	(local_fgetc): use them.
	(local_ffwrite): use buffer.
	(local_fgets): use buffer.
	(scm_setbuf0): deleted.
	(scm_setvbuf): set the buffer.
	(scm_setfileno): deleted.
	(scm_evict_ports): set fdes directly.
*	(scm_freopen): deleted.  doesn't seem useful in Guile.
	(scm_stdio_to_port): deleted.
	fports.h (struct scm_fport): add shortbuf member to avoid separate
	code for unbuffered ports.
	(SCM_FPORTP, SCM_OPFPORTP, SCM_OPINFPORTP, SCM_OPOUTFPORTP): moved
	from ports.h.
	* genio.c, genio.h: move contents into ports.c, ports.h.  The
	division wasn't useful.
	* fports.c, fports.h (scm_fport_drain_input): new procedure.
	* ports.c (scm_drain_input): call scm_fport_drain_input.
	* scm_fdes_waiting_p: new procedure.
	* fports.c (scm_fdes_to_port): allocate read and/or write buffers.
	(scm_input_waiting_p): check the buffer.
	(local_fgetc, local_fflush, local_fputc): likewise.
	* fports.h (scm_fport): read/write_buf,_pos,_buf_end,,_buf_size:
	new members.
	* init.c (scm_init_standard_ports): pass fdes instead of FILE *.
*	* ports.c (scm_drain_input): new procedure.
	ports.h: prototype.
	* fports.c (FPORT_READ_SAFE, FPORT_WRITE_SAFE, FPORT_ALL_OKAY,
	pre_read, pre_write): removed.
	(local_fputc, local_fputs, local_ffwrite): use write, not stdio.
	(scm_standard_stream_to_port): change first arg from FILE * to
	int fdes.
	(local_fflush): flush fdes, not FILE *.
	* fports.h (SCM_NOFTELL): removed.
	* genio.c, ports.c: don't include filesys.h.
	* genio.c (scm_getc): don't use scm_internal_select if FPORT.
	do it in fports.c:local_fgetc.
	* genio.c: don't use SCM_SYSCALL when calling ptob procedures.
	do it where it's needed in the port smobs.
	* filesys.c (scm_input_waiting_p): moved to fports.c, stdio
	  buffer support removed.  take SCM arg, not FILE *.
	* filesys.h: prototype moved too.
	* fports.c (scm_fdes_to_port): new procedure.
	(local_fgetc): use read not fgetc.
	(local_fclose): use close, not fclose.
	(local_fgets): use read, not fgets
	* fports.h: prototype for scm_fdes_to_port.
	* fports.h (scm_fport): new struct.
	* fports.c (scm_open_file): use open, not fopen.
	#include fcntl.h
	* ports.h (struct scm_port_table): change stream from SCM to void *.
	* ports.c (scm_add_to_port_table): check for memory allocation error.
	(scm_prinport): remove MSDOS hair.
	(scm_void_port): set stream to 0 instead of SCM_BOOL_F.
	(scm_close_port): don't throw errors: do it in fports.c.

											
										
										
											1999-06-09 12:19:58 +00:00
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								SCM
 								scm_c_substring_read_only (SCM str, size_t start, size_t end)
 								{
 								  validate_substring_args (str, start, end);
 								  return scm_i_substring_read_only (str, start, end);
 								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								SCM
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								scm_c_substring_copy (SCM str, size_t start, size_t end)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  validate_substring_args (str, start, end);
 								  return scm_i_substring_copy (str, start, end);
 								}
 								SCM
 								scm_c_substring_shared (SCM str, size_t start, size_t end)
 								{
 								  validate_substring_args (str, start, end);
 								  return scm_i_substring_shared (str, start, end);
 								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Remove unused GC string/symbol functions.

* libguile/strings.c (scm_i_stringbuf_mark, scm_i_stringbuf_free,
  scm_i_string_mark, scm_i_string_free, scm_i_symbol_mark,
  scm_i_symbol_free): Remove.

* libguile/strings.h: Remove corresponding declarations.

											
										
										
											2008-09-15 22:59:08 +02:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								/* Internal accessors
 								 */
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Returns the number of characters in STR.  This may be different
 								   than the memory size of the string storage.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								size_t
 								scm_i_string_length (SCM str)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  return STRING_LENGTH (str);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* True if the string is 'narrow', meaning it has a 8-bit Latin-1
 								   encoding.  False if it is 'wide', having a 32-bit UCS-4
 								   encoding.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								int
 								scm_i_is_narrow_string (SCM str)
 								{
 								  return !STRINGBUF_WIDE (STRING_STRINGBUF (str));
 								}
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								/* Try to coerce a string to be narrow.  It if is narrow already, do
 								   nothing.  If it is wide, shrink it to narrow if none of its
 								   characters are above 0xFF.  Return true if the string is narrow or
 								   was made to be narrow.  */
 								int
 								scm_i_try_narrow_string (SCM str)
 								{
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  SET_STRING_STRINGBUF (str, narrow_stringbuf (STRING_STRINGBUF (str)));
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
 								  return scm_i_is_narrow_string (str);
 								}
-												Add `scm_i_string_data'.

* libguile/strings.c (STRINGBUF_CONTENTS): New macro.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Use it.
  (scm_i_string_data): New function.

* libguile/strings.h (scm_i_string_data): New declaration.

											
										
										
											2010-07-04 18:38:53 +02:00
+								/* Return a pointer to the raw data of the string, which can be either Latin-1
 								   or UCS-4 encoded data, depending on `scm_i_is_narrow_string (STR)'.  */
 								const void *
 								scm_i_string_data (SCM str)
 								{
 								  SCM buf;
 								  size_t start;
 								  const char *data;
 								  get_str_buf_start (&str, &buf, &start);
 								  data = STRINGBUF_CONTENTS (buf);
 								  data += start * (scm_i_is_narrow_string (str) ? 1 : 4);
 								  return data;
 								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Returns a pointer to the 8-bit Latin-1 encoded character array of
 								   STR.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								const char *
 								scm_i_string_chars (SCM str)
 								{
 								  SCM buf;
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								  size_t start;
 								  get_str_buf_start (&str, &buf, &start);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (scm_i_is_narrow_string (str))
-												Avoid double-casts of stringbuf

Conversion from char to scm_t_wchar require an intermediate cast to
unsigned char.  By changing the return type of SCM_STRINGBUF_INLINE_CHARS
to unsigned char *, doublecasts in the code can be avoided.  Also,
some clarification of return types.

* libguile/strings.c (STRINGBUF_OUTLINE_CHARS)
(STRINGBUF_INLINE_CHARS): now returns unsigned char *; all callers changed.

											
										
										
											2009-08-18 21:14:56 -07:00
+								    return (const char *) STRINGBUF_CHARS (buf) + start;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
 								    scm_misc_error (NULL, "Invalid read access of chars of wide string: ~s",
 								                    scm_list_1 (str));
 								  return NULL;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-												* Added function scm_str2string.  Thanks to Martin Baulig.

											
										
										
											2001-09-12 19:53:57 +00:00
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Returns a pointer to the 32-bit UCS-4 encoded character array of
 								   STR.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								const scm_t_wchar *
 								scm_i_string_wide_chars (SCM str)
 								{
 								  SCM buf;
 								  size_t start;
 								  get_str_buf_start (&str, &buf, &start);
 								  if (!scm_i_is_narrow_string (str))
-												Avoid double-casts of stringbuf

Conversion from char to scm_t_wchar require an intermediate cast to
unsigned char.  By changing the return type of SCM_STRINGBUF_INLINE_CHARS
to unsigned char *, doublecasts in the code can be avoided.  Also,
some clarification of return types.

* libguile/strings.c (STRINGBUF_OUTLINE_CHARS)
(STRINGBUF_INLINE_CHARS): now returns unsigned char *; all callers changed.

											
										
										
											2009-08-18 21:14:56 -07:00
+								    return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf) + start;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
 								    scm_misc_error (NULL, "Invalid read access of chars of narrow string: ~s",
 								                    scm_list_1 (str));
 								}
 								/* If the buffer in ORIG_STR is shared, copy ORIG_STR's characters to
 								   a new string buffer, so that it can be modified without modifying
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								   other strings.  Also, lock the string mutex.  Later, one must call
 								   scm_i_string_stop_writing to unlock the mutex.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								SCM
 								scm_i_string_start_writing (SCM orig_str)
-												* Added function scm_str2string.  Thanks to Martin Baulig.

											
										
										
											2001-09-12 19:53:57 +00:00
+								{
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								  SCM buf, str = orig_str;
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								  size_t start;
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								  get_str_buf_start (&str, &buf, &start);
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								  if (IS_RO_STRING (str))
 								    scm_misc_error (NULL, "string is read-only: ~s", scm_list_1 (orig_str));
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  if (STRINGBUF_SHARED (buf))
 								    {
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      /* Clone the stringbuf.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								      size_t len = STRING_LENGTH (str);
 								      SCM new_buf;
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      if (scm_i_is_narrow_string (str))
 								        {
 								          new_buf = make_stringbuf (len);
 								          memcpy (STRINGBUF_CHARS (new_buf),
 								                  STRINGBUF_CHARS (buf) + STRING_START (str), len);
 								        }
 								      else
 								        {
 								          new_buf = make_wide_stringbuf (len);
 								          u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
 								                   (scm_t_uint32 *) (STRINGBUF_WIDE_CHARS (buf)
 								                                     + STRING_START (str)), len);
 								        }
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
 								      SET_STRING_STRINGBUF (str, new_buf);
 								      start -= STRING_START (str);
-												Remove use of `scm_i_thread_put_to_sleep ()' in the string code.

* libguile/strings.c (scm_i_string_writable_chars): Remove use of
  `scm_i_thread_put_to_sleep ()'.  This leaves a race condition,
  which is hopefully not harmful.

											
										
										
											2008-09-17 20:41:41 +02:00
 								      /* FIXME: The following operations are not atomic, so other threads
 									 looking at STR may see an inconsistent state.  Nevertheless it can't
 									 hurt much since (i) accessing STR while it is being mutated can't
 									 yield a crash, and (ii) concurrent accesses to STR should be
 									 protected by a mutex at the application level.  The latter may not
 									 apply when STR != ORIG_STR, though.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								      SET_STRING_START (str, 0);
-												Remove use of `scm_i_thread_put_to_sleep ()' in the string code.

* libguile/strings.c (scm_i_string_writable_chars): Remove use of
  `scm_i_thread_put_to_sleep ()'.  This leaves a race condition,
  which is hopefully not harmful.

											
										
										
											2008-09-17 20:41:41 +02:00
+								      SET_STRING_STRINGBUF (str, new_buf);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
 								      buf = new_buf;
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  return orig_str;
 								}
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Return a pointer to the 8-bit Latin-1 chars of a string.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								char *
 								scm_i_string_writable_chars (SCM str)
 								{
 								  SCM buf;
 								  size_t start;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  get_str_buf_start (&str, &buf, &start);
 								  if (scm_i_is_narrow_string (str))
-												Avoid double-casts of stringbuf

Conversion from char to scm_t_wchar require an intermediate cast to
unsigned char.  By changing the return type of SCM_STRINGBUF_INLINE_CHARS
to unsigned char *, doublecasts in the code can be avoided.  Also,
some clarification of return types.

* libguile/strings.c (STRINGBUF_OUTLINE_CHARS)
(STRINGBUF_INLINE_CHARS): now returns unsigned char *; all callers changed.

											
										
										
											2009-08-18 21:14:56 -07:00
+								    return (char *) STRINGBUF_CHARS (buf) + start;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
 								    scm_misc_error (NULL, "Invalid write access of chars of wide string: ~s",
 								                    scm_list_1 (str));
 								  return NULL;
-												* Added function scm_str2string.  Thanks to Martin Baulig.

											
										
										
											2001-09-12 19:53:57 +00:00
+								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Return a pointer to the UCS-4 codepoints of a string.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								static scm_t_wchar *
 								scm_i_string_writable_wide_chars (SCM str)
 								{
 								  SCM buf;
 								  size_t start;
 								  get_str_buf_start (&str, &buf, &start);
 								  if (!scm_i_is_narrow_string (str))
 								    return STRINGBUF_WIDE_CHARS (buf) + start;
 								  else
-												Misleading error message text in scm_i_string_writable_wide_chars

* libguile/strings.c (scm_i_string_writable_wide_chars): change error text

											
										
										
											2009-08-19 21:25:23 -07:00
+								    scm_misc_error (NULL, "Invalid write access of chars of narrow string: ~s",
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								                    scm_list_1 (str));
-												* Added function scm_str2string.  Thanks to Martin Baulig.

											
										
										
											2001-09-12 19:53:57 +00:00
+								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Unlock the string mutex that was locked when
 								   scm_i_string_start_writing was called.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								void
 								scm_i_string_stop_writing (void)
 								{
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-												* Added function scm_str2string.  Thanks to Martin Baulig.

											
										
										
											2001-09-12 19:53:57 +00:00
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Return the Xth character of STR as a UCS-4 codepoint.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								scm_t_wchar
 								scm_i_string_ref (SCM str, size_t x)
 								{
 								  if (scm_i_is_narrow_string (str))
 								    return (scm_t_wchar) (unsigned char) (scm_i_string_chars (str)[x]);
 								  else
 								    return scm_i_string_wide_chars (str)[x];
 								}
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								/* Returns index+1 of the first char in STR that matches C, or
 if the char is not found.  */
 								int
 								scm_i_string_contains_char (SCM str, char ch)
 								{
 								  size_t i;
 								  size_t len = scm_i_string_length (str);
 								  i = 0;
 								  if (scm_i_is_narrow_string (str))
 								    {
 								      while (i < len)
 								        {
 								          if (scm_i_string_chars (str)[i] == ch)
 								            return i+1;
 								          i++;
 								        }
 								    }
 								  else
 								    {
 								      while (i < len)
 								        {
 								          if (scm_i_string_wide_chars (str)[i]
 								              == (unsigned char) ch)
 								            return i+1;
 								          i++;
 								        }
 								    }
 								  return 0;
 								}
-												Use string accessors for string->number conversion

* libguile/numbers.c (scm_i_print_fraction): use string accessors
  (XDIGIT2UINT): use libunistring function
  (mem2uinteger, mem2integer, mem2decimal_from_point, mem2ureal)
  (mem2complex): take scheme string instead of c string; use accessors
  (scm_i_string_to_number): new function
  (scm_c_locale_string_to_number): use scm_i_string_to_number

* libguile/numbers.h: declaration for scm_i_string_to_number

* libguile/strings.c (scm_i_string_strcmp): new function

* libguile/strings.h: declaration for scm_i_string_strcmp

											
										
										
											2009-08-21 09:18:30 -07:00
+								int
 								scm_i_string_strcmp (SCM sstr, size_t start_x, const char *cstr)
 								{
 								  if (scm_i_is_narrow_string (sstr))
 								    {
 								      const char *a = scm_i_string_chars (sstr) + start_x;
 								      const char *b = cstr;
 								      return strncmp (a, b, strlen(b));
 								    }
 								  else
 								    {
 								      size_t i;
 								      const scm_t_wchar *a = scm_i_string_wide_chars (sstr) + start_x;
 								      const char *b = cstr;
 								      for (i = 0; i < strlen (b); i++)
 								        {
 								          if (a[i] != (unsigned char) b[i])
 								            return 1;
 								        }
 								    }
 								  return 0;
 								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Set the Pth character of STR to UCS-4 codepoint CHR. */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								void
 								scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr)
 								{
 								  if (chr > 0xFF && scm_i_is_narrow_string (str))
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								    SET_STRING_STRINGBUF (str, wide_stringbuf (STRING_STRINGBUF (str)));
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
 								  if (scm_i_is_narrow_string (str))
 								    {
 								      char *dst = scm_i_string_writable_chars (str);
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								      dst[p] = chr;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								    }
 								  else
 								    {
 								      scm_t_wchar *dst = scm_i_string_writable_wide_chars (str);
 								      dst[p] = chr;
 								    }
 								}
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								/* Symbols.
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								   Basic symbol creation and accessing is done here, the rest is in
 								   symbols.[hc].  This has been done to keep stringbufs and the
 								   internals of strings and string-like objects confined to this file.
 								*/
 								#define SYMBOL_STRINGBUF SCM_CELL_OBJECT_1
 								SCM
-												* strings.h, strings.c (scm_i_make_symbol): Added FLAGS parameter.
* symbols.h, symbols.c (SCM_I_F_SYMBOL_UNINTERNED,
scm_i_symbol_is_interned, scm_i_mem2symbol,
scm_i_mem2uninternedsymbol): Use it to store uninternedness flag.

											
										
										
											2004-08-26 15:40:13 +00:00
+								scm_i_make_symbol (SCM name, scm_t_bits flags,
 										   unsigned long hash, SCM props)
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								{
 								  SCM buf;
 								  size_t start = STRING_START (name);
 								  size_t length = STRING_LENGTH (name);
 								  if (IS_SH_STRING (name))
 								    {
 								      name = SH_STRING_STRING (name);
 								      start += STRING_START (name);
 								    }
 								  buf = SYMBOL_STRINGBUF (name);
 								  if (start == 0 && length == STRINGBUF_LENGTH (buf))
 								    {
 								      /* reuse buf. */
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								      scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								      SET_STRINGBUF_SHARED (buf);
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								      scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    }
 								  else
 								    {
 								      /* make new buf. */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      if (scm_i_is_narrow_string (name))
 								        {
 								          SCM new_buf = make_stringbuf (length);
 								          memcpy (STRINGBUF_CHARS (new_buf),
 								                  STRINGBUF_CHARS (buf) + start, length);
 								          buf = new_buf;
 								        }
 								      else
 								        {
 								          SCM new_buf = make_wide_stringbuf (length);
 								          u32_cpy ((scm_t_uint32 *) STRINGBUF_WIDE_CHARS (new_buf),
 								                   (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf) + start,
 								                   length);
 								          buf = new_buf;
 								        }
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    }
-												* strings.h, strings.c (scm_i_make_symbol): Added FLAGS parameter.
* symbols.h, symbols.c (SCM_I_F_SYMBOL_UNINTERNED,
scm_i_symbol_is_interned, scm_i_mem2symbol,
scm_i_mem2uninternedsymbol): Use it to store uninternedness flag.

											
										
										
											2004-08-26 15:40:13 +00:00
+								  return scm_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+											  (scm_t_bits) hash, SCM_UNPACK (props));
 								}
-												patches by Ludovic CourtÃ¨s for symbol generation.

											
										
										
											2006-01-24 20:30:09 +00:00
+								SCM
 								scm_i_c_make_symbol (const char *name, size_t len,
 										     scm_t_bits flags, unsigned long hash, SCM props)
 								{
 								  SCM buf = make_stringbuf (len);
 								  memcpy (STRINGBUF_CHARS (buf), name, len);
-												Use immutable double-cells for symbols.

* libguile/strings.c (scm_i_make_symbol): Use `scm_immutable_double_cell ()'.

											
										
										
											2008-09-16 12:12:38 +02:00
+								  return scm_immutable_double_cell (scm_tc7_symbol | flags, SCM_UNPACK (buf),
 												    (scm_t_bits) hash, SCM_UNPACK (props));
-												patches by Ludovic CourtÃ¨s for symbol generation.

											
										
										
											2006-01-24 20:30:09 +00:00
+								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Returns the number of characters in SYM.  This may be different
 								   from the memory size of SYM.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								size_t
 								scm_i_symbol_length (SCM sym)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												Add `scm_c_symbol_length ()'.

											
										
										
											2008-07-05 20:10:44 +02:00
+								size_t
 								scm_c_symbol_length (SCM sym)
 								#define FUNC_NAME "scm_c_symbol_length"
 								{
 								  SCM_VALIDATE_SYMBOL (1, sym);
 								  return STRINGBUF_LENGTH (SYMBOL_STRINGBUF (sym));
 								}
 								#undef FUNC_NAME
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* True if the name of SYM is stored as a Latin-1 encoded string.
 								   False if it is stored as a 32-bit UCS-4-encoded string.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								int
 								scm_i_is_narrow_symbol (SCM sym)
 								{
 								  SCM buf;
 								  buf = SYMBOL_STRINGBUF (sym);
 								  return !STRINGBUF_WIDE (buf);
 								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Returns a pointer to the 8-bit Latin-1 encoded character array that
 								   contains the name of SYM.  */
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								const char *
 								scm_i_symbol_chars (SCM sym)
 								{
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  SCM buf;
 								  buf = SYMBOL_STRINGBUF (sym);
 								  if (!STRINGBUF_WIDE (buf))
-												Avoid double-casts of stringbuf

Conversion from char to scm_t_wchar require an intermediate cast to
unsigned char.  By changing the return type of SCM_STRINGBUF_INLINE_CHARS
to unsigned char *, doublecasts in the code can be avoided.  Also,
some clarification of return types.

* libguile/strings.c (STRINGBUF_OUTLINE_CHARS)
(STRINGBUF_INLINE_CHARS): now returns unsigned char *; all callers changed.

											
										
										
											2009-08-18 21:14:56 -07:00
+								    return (const char *) STRINGBUF_CHARS (buf);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
 								    scm_misc_error (NULL, "Invalid access of chars of a wide symbol ~S",
 								                    scm_list_1 (sym));
 								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Return a pointer to the 32-bit UCS-4-encoded character array of a
 								   symbol's name.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								const scm_t_wchar *
 								scm_i_symbol_wide_chars (SCM sym)
 								{
 								  SCM buf;
 								  buf = SYMBOL_STRINGBUF (sym);
 								  if (STRINGBUF_WIDE (buf))
-												Avoid double-casts of stringbuf

Conversion from char to scm_t_wchar require an intermediate cast to
unsigned char.  By changing the return type of SCM_STRINGBUF_INLINE_CHARS
to unsigned char *, doublecasts in the code can be avoided.  Also,
some clarification of return types.

* libguile/strings.c (STRINGBUF_OUTLINE_CHARS)
(STRINGBUF_INLINE_CHARS): now returns unsigned char *; all callers changed.

											
										
										
											2009-08-18 21:14:56 -07:00
+								    return (const scm_t_wchar *) STRINGBUF_WIDE_CHARS (buf);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
 								    scm_misc_error (NULL, "Invalid access of chars of a narrow symbol ~S",
 								                    scm_list_1 (sym));
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												* Replace function scm_makstr with new function scm_allocate_string.

											
										
										
											2001-03-22 12:52:03 +00:00
+								SCM
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								scm_i_symbol_substring (SCM sym, size_t start, size_t end)
-												* Replace function scm_makstr with new function scm_allocate_string.

											
										
										
											2001-03-22 12:52:03 +00:00
+								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  SCM buf = SYMBOL_STRINGBUF (sym);
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								  scm_i_pthread_mutex_lock (&stringbuf_write_mutex);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  SET_STRINGBUF_SHARED (buf);
-												See ChangeLog from 2005-03-02.

											
										
										
											2005-03-02 20:42:01 +00:00
+								  scm_i_pthread_mutex_unlock (&stringbuf_write_mutex);
-												Make `symbol->string' return a read-only string.

* libguile/strings.c (scm_i_symbol_substring): Return a read-only string
  since R5RS requires `symbol->string' to return a read-only string.
  Reported by Bill Schottstaedt <bil@ccrma.Stanford.EDU>.

* test-suite/tests/symbols.test: Add `define-module' clause.
  (exception:immutable-string): Adjust to current exception.
  ("symbol->string")["result is an immutable string"]: Use
  `pass-if-exception' instead of `expect-fail-exception'.

* NEWS: Update.

											
										
										
											2008-09-22 22:51:36 +02:00
+								  return scm_double_cell (RO_STRING_TAG, SCM_UNPACK (buf),
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+											  (scm_t_bits)start, (scm_t_bits) end - start);
 								}
-												* Replace function scm_makstr with new function scm_allocate_string.

											
										
										
											2001-03-22 12:52:03 +00:00
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Returns the Xth character of symbol SYM as a UCS-4 codepoint.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								scm_t_wchar
 								scm_i_symbol_ref (SCM sym, size_t x)
 								{
 								  if (scm_i_is_narrow_symbol (sym))
 								    return (scm_t_wchar) (unsigned char) (scm_i_symbol_chars (sym)[x]);
 								  else
 								    return scm_i_symbol_wide_chars (sym)[x];
 								}
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								/* Debugging
 								 */
-												* Replace function scm_makstr with new function scm_allocate_string.

											
										
										
											2001-03-22 12:52:03 +00:00
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								SCM_DEFINE (scm_sys_string_dump, "%string-dump", 1, 0, 0, (SCM str),
 								            "Returns an association list containing debugging information\n"
 								            "for @var{str}. The association list has the following entries."
 								            "@table @code\n"
 								            "@item string\n"
 								            "The string itself.\n"
 								            "@item start\n"
 								            "The start index of the string into its stringbuf\n"
 								            "@item length\n"
 								            "The length of the string\n"
 								            "@item shared\n"
 								            "If this string is a substring, it returns its parent string.\n"
 								            "Otherwise, it returns @code{#f}\n"
-												Fix %string-dump and %symbol-dump fields

        * libguile/strings.c (scm_sys_string_dump): don't print
        stringbuf. Print read-only status.
        (scm_sys_symbol_dump): don't print stringbuf.  Print interned
        status.

											
										
										
											2009-08-10 22:18:47 -07:00
+								            "@item read-only\n"
 								            "@code{#t} if the string is read-only\n"
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								            "@item stringbuf-chars\n"
 								            "A new string containing this string's stringbuf's characters\n"
 								            "@item stringbuf-length\n"
 								            "The number of characters in this stringbuf\n"
 								            "@item stringbuf-shared\n"
 								            "@code{#t} if this stringbuf is shared\n"
 								            "@item stringbuf-wide\n"
 								            "@code{#t} if this stringbuf's characters are stored in a\n"
 								            "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
 								            "buffer\n"
 								            "@end table")
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#define FUNC_NAME s_scm_sys_string_dump
 								{
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  SCM e1, e2, e3, e4, e5, e6, e7, e8, e9;
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								  SCM buf;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  SCM_VALIDATE_STRING (1, str);
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
 								  /* String info */
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e1 = scm_cons (scm_from_latin1_symbol ("string"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                 str);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e2 = scm_cons (scm_from_latin1_symbol ("start"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                 scm_from_size_t (STRING_START (str)));
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e3 = scm_cons (scm_from_latin1_symbol ("length"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                 scm_from_size_t (STRING_LENGTH (str)));
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  if (IS_SH_STRING (str))
 								    {
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								      e4 = scm_cons (scm_from_latin1_symbol ("shared"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                     SH_STRING_STRING (str));
 								      buf = STRING_STRINGBUF (SH_STRING_STRING (str));
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    }
 								  else
 								    {
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								      e4 = scm_cons (scm_from_latin1_symbol ("shared"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                     SCM_BOOL_F);
 								      buf = STRING_STRINGBUF (str);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												Fix %string-dump and %symbol-dump fields

        * libguile/strings.c (scm_sys_string_dump): don't print
        stringbuf. Print read-only status.
        (scm_sys_symbol_dump): don't print stringbuf.  Print interned
        status.

											
										
										
											2009-08-10 22:18:47 -07:00
+								  if (IS_RO_STRING (str))
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
-												Fix %string-dump and %symbol-dump fields

        * libguile/strings.c (scm_sys_string_dump): don't print
        stringbuf. Print read-only status.
        (scm_sys_symbol_dump): don't print stringbuf.  Print interned
        status.

											
										
										
											2009-08-10 22:18:47 -07:00
+								                   SCM_BOOL_T);
 								  else
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e5 = scm_cons (scm_from_latin1_symbol ("read-only"),
-												Fix %string-dump and %symbol-dump fields

        * libguile/strings.c (scm_sys_string_dump): don't print
        stringbuf. Print read-only status.
        (scm_sys_symbol_dump): don't print stringbuf.  Print interned
        status.

											
										
										
											2009-08-10 22:18:47 -07:00
+								                   SCM_BOOL_F);
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								  /* Stringbuf info */
 								  if (!STRINGBUF_WIDE (buf))
 								    {
 								      size_t len = STRINGBUF_LENGTH (buf);
 								      char *cbuf;
 								      SCM sbc = scm_i_make_string (len, &cbuf);
 								      memcpy (cbuf, STRINGBUF_CHARS (buf), len);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								      e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                     sbc);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    }
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								  else
 								    {
 								      size_t len = STRINGBUF_LENGTH (buf);
 								      scm_t_wchar *cbuf;
 								      SCM sbc = scm_i_make_wide_string (len, &cbuf);
 								      u32_cpy ((scm_t_uint32 *) cbuf,
 								               (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								      e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                     sbc);
 								    }
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                 scm_from_size_t (STRINGBUF_LENGTH (buf)));
 								  if (STRINGBUF_SHARED (buf))
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                   SCM_BOOL_T);
 								  else
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e8 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                   SCM_BOOL_F);
 								  if (STRINGBUF_WIDE (buf))
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+										   SCM_BOOL_T);
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								  else
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e9 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+										   SCM_BOOL_F);
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  return scm_list_n (e1, e2, e3, e4, e5, e6, e7, e8, e9, SCM_UNDEFINED);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
 								#undef FUNC_NAME
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								SCM_DEFINE (scm_sys_symbol_dump, "%symbol-dump", 1, 0, 0, (SCM sym),
 								            "Returns an association list containing debugging information\n"
 								            "for @var{sym}. The association list has the following entries."
 								            "@table @code\n"
 								            "@item symbol\n"
 								            "The symbol itself\n"
 								            "@item hash\n"
 								            "Its hash value\n"
-												Fix %string-dump and %symbol-dump fields

        * libguile/strings.c (scm_sys_string_dump): don't print
        stringbuf. Print read-only status.
        (scm_sys_symbol_dump): don't print stringbuf.  Print interned
        status.

											
										
										
											2009-08-10 22:18:47 -07:00
+								            "@item interned\n"
 								            "@code{#t} if it is an interned symbol\n"
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								            "@item stringbuf-chars\n"
 								            "A new string containing this symbols's stringbuf's characters\n"
 								            "@item stringbuf-length\n"
 								            "The number of characters in this stringbuf\n"
 								            "@item stringbuf-shared\n"
 								            "@code{#t} if this stringbuf is shared\n"
 								            "@item stringbuf-wide\n"
 								            "@code{#t} if this stringbuf's characters are stored in a\n"
 								            "32-bit buffer, or @code{#f} if they are stored in an 8-bit\n"
 								            "buffer\n"
 								            "@end table")
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#define FUNC_NAME s_scm_sys_symbol_dump
 								{
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  SCM e1, e2, e3, e4, e5, e6, e7;
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								  SCM buf;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  SCM_VALIDATE_SYMBOL (1, sym);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e1 = scm_cons (scm_from_latin1_symbol ("symbol"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                 sym);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e2 = scm_cons (scm_from_latin1_symbol ("hash"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                 scm_from_ulong (scm_i_symbol_hash (sym)));
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e3 = scm_cons (scm_from_latin1_symbol ("interned"),
-												Fix %string-dump and %symbol-dump fields

        * libguile/strings.c (scm_sys_string_dump): don't print
        stringbuf. Print read-only status.
        (scm_sys_symbol_dump): don't print stringbuf.  Print interned
        status.

											
										
										
											2009-08-10 22:18:47 -07:00
+								                 scm_symbol_interned_p (sym));
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								  buf = SYMBOL_STRINGBUF (sym);
 								  /* Stringbuf info */
 								  if (!STRINGBUF_WIDE (buf))
 								    {
 								      size_t len = STRINGBUF_LENGTH (buf);
 								      char *cbuf;
 								      SCM sbc = scm_i_make_string (len, &cbuf);
 								      memcpy (cbuf, STRINGBUF_CHARS (buf), len);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								      e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                     sbc);
 								    }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								    {
 								      size_t len = STRINGBUF_LENGTH (buf);
 								      scm_t_wchar *cbuf;
 								      SCM sbc = scm_i_make_wide_string (len, &cbuf);
 								      u32_cpy ((scm_t_uint32 *) cbuf,
 								               (scm_t_uint32 *) STRINGBUF_WIDE_CHARS (buf), len);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								      e4 = scm_cons (scm_from_latin1_symbol ("stringbuf-chars"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                     sbc);
 								    }
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  e5 = scm_cons (scm_from_latin1_symbol ("stringbuf-length"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                 scm_from_size_t (STRINGBUF_LENGTH (buf)));
 								  if (STRINGBUF_SHARED (buf))
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                   SCM_BOOL_T);
 								  else
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e6 = scm_cons (scm_from_latin1_symbol ("stringbuf-shared"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                   SCM_BOOL_F);
 								  if (STRINGBUF_WIDE (buf))
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                    SCM_BOOL_T);
 								  else
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								    e7 = scm_cons (scm_from_latin1_symbol ("stringbuf-wide"),
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
+								                    SCM_BOOL_F);
-												Remove the distinction between inline/outline storage for stringbufs.

* libguile/strings.c (STRINGBUF_HEADER_SIZE, STRINGBUF_HEADER_BYTES):
  New macros.
  (STRINGBUF_F_INLINE, STRINGBUF_INLINE, STRINGBUF_OUTLINE_CHARS,
  STRINGBUF_OUTLINE_LENGTH, STRINGBUF_INLINE_CHARS,
  STRINGBUF_INLINE_LENGTH, STRINGBUF_MAX_INLINE_LEN): Remove.
  (STRINGBUF_CHARS, STRINGBUF_WIDE_CHARS): Adjust to return a fixed
  location.
  (STRINGBUF_LENGTH): Get the length from word 1.
  (make_stringbuf, make_wide_stringbuf): Adjust to use a contiguous
  memory region.
  (wide_stringbuf): Renamed from `widen_stringbuf'.  Adjust similarly.
  Return the new stringbuf.  Callers updated.
  (narrow_stringbuf): Likewise.
  (scm_sys_string_dump, scm_sys_symbol_dump): Remove `stringbuf-inline'
  pair.

* test-suite/tests/strings.test ("string internals")["null strings are
  inlined", "short Latin-1 encoded strings are inlined", "long Latin-1
  encoded strings are not inlined", "short UCS-4 encoded strings are not
  inlined", "long UCS-4 encoded strings are not inlined"]: Remove.

* test-suite/tests/symbols.test ("symbol internals")["null symbols are
  inlined", "short Latin-1 encoded symbols are inlined", "long Latin-1
  encoded symbols are not inlined", "short UCS-4 encoded symbols are not
  inlined", "long UCS-4 encoded symbols are not inlined"]: Remove.

											
										
										
											2009-09-01 02:02:43 +02:00
+								  return scm_list_n (e1, e2, e3, e4, e5, e6, e7, SCM_UNDEFINED);
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
 								#undef FUNC_NAME
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								#ifdef SCM_STRING_LENGTH_HISTOGRAM
-												Improve %string-dump and %symbol-dump

%string-dump and %symbol-dump are modified to return assocation lists
of string and symbol attributes instead of printing to stderr.  They
are no longer conditional on SCM_DEBUG.

        * libguile/strings.c (scm_sys_string_dump)
        (scm_sys_symbol_dump): now returns alist of properties.  No longer
        require that SCM_DEBUG be defined.
        (scm_sys_stringbuf_hist): now conditional on
        SCM_STRING_LENGTH_HISTOGRAM

        * libguile/strings.h: scm_sys_string_dump and scm_sys_symbol dump
        are now declared as API

											
										
										
											2009-08-10 00:09:33 -07:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								SCM_DEFINE (scm_sys_stringbuf_hist, "%stringbuf-hist", 0, 0, 0, (void), "")
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								#define FUNC_NAME s_scm_sys_stringbuf_hist
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								{
 								  int i;
 								  for (i = 0; i < 1000; i++)
 								    if (lenhist[i])
 								      fprintf (stderr, " %3d: %u\n", i, lenhist[i]);
 								  fprintf (stderr, ">999: %u\n", lenhist[1000]);
 								  return SCM_UNSPECIFIED;
-												* Replace function scm_makstr with new function scm_allocate_string.

											
										
										
											2001-03-22 12:52:03 +00:00
+								}
 								#undef FUNC_NAME
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#endif
 								SCM_DEFINE (scm_string_p, "string?", 1, 0, 0,
 									    (SCM obj),
 									    "Return @code{#t} if @var{obj} is a string, else @code{#f}.")
 								#define FUNC_NAME s_scm_string_p
 								{
 								  return scm_from_bool (IS_STRING (obj));
 								}
 								#undef FUNC_NAME
 								SCM_REGISTER_PROC (s_scm_list_to_string, "list->string", 1, 0, 0, scm_string);
 								SCM_DEFINE (scm_string, "string", 0, 0, 1,
 								            (SCM chrs),
 									    "@deffnx {Scheme Procedure} list->string chrs\n"
 									    "Return a newly allocated string composed of the arguments,\n"
 									    "@var{chrs}.")
 								#define FUNC_NAME s_scm_string
 								{
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								  SCM result = SCM_BOOL_F;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  SCM rest;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  size_t len;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  size_t p = 0;
 								  long i;
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								  int wide = 0;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  /* Verify that this is a list of chars.  */
 								  i = scm_ilength (chrs);
-												Regression, scm_string fails to test for circular lists

* libguile/string.c (scm_string): Restores the functionality
  where scm_string tests for circular lists

* test-suite/tests/strings.test: add test for circular lists

											
										
										
											2009-08-12 09:21:37 -07:00
+								  SCM_ASSERT (i >= 0, chrs, SCM_ARG1, FUNC_NAME);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  len = (size_t) i;
 								  rest = chrs;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  while (len > 0 && scm_is_pair (rest))
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    {
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      SCM elt = SCM_CAR (rest);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								      SCM_VALIDATE_CHAR (SCM_ARGn, elt);
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								      if (SCM_CHAR (elt) > 0xFF)
 								        wide = 1;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      rest = SCM_CDR (rest);
 								      len--;
 								      scm_remember_upto_here_1 (elt);
 								    }
 								  /* Construct a string containing this list of chars.  */
 								  len = (size_t) i;
 								  rest = chrs;
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								  if (wide == 0)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								    {
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								      char *buf;
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								      result = scm_i_make_string (len, NULL);
 								      result = scm_i_string_start_writing (result);
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								      buf = scm_i_string_writable_chars (result);
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								      while (len > 0 && scm_is_pair (rest))
 								        {
 								          SCM elt = SCM_CAR (rest);
 								          buf[p] = (unsigned char) SCM_CHAR (elt);
 								          p++;
 								          rest = SCM_CDR (rest);
 								          len--;
 								          scm_remember_upto_here_1 (elt);
 								        }
 								    }
 								  else
 								    {
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								      scm_t_wchar *buf;
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								      result = scm_i_make_wide_string (len, NULL);
 								      result = scm_i_string_start_writing (result);
-												Remove references to undefined macros.

The intent is to allow compilation with `-Wundef', which in turn should
make it easier to catch erroneous uses of nonexistent macros.

* libguile/__scm.h: Don't assume `BUILDING_LIBGUILE' is defined.

* libguile/conv-uinteger.i.c (SCM_TO_TYPE_PROTO): Remove unneeded CPP
  conditional on `TYPE_MIN == 0'.

* libguile/fports.c: Check for the definition of `HAVE_CHSIZE' and
  `HAVE_FTRUNCATE', not for their value.

* libguile/ports.c: Likewise.

* libguile/numbers.c (guile_ieee_init): Likewise with `HAVE_DINFINITY'
  and `HAVE_DQNAN'.

* test-suite/standalone/test-conversion.c (ieee_init): Likewise.

* libguile/strings.c: Likewise with `SCM_STRING_LENGTH_HISTOGRAM'.

* libguile/strings.h: Likewise.

* libguile/tags.h: Likewise with `HAVE_INTTYPES_H' and `HAVE_STDINT_H'.

* libguile/threads.c: Likewise with `HAVE_PTHREAD_GET_STACKADDR_NP'.

* libguile/vm-engine.c (VM_NAME): Likewise with `VM_CHECK_IP'.

* libguile/gen-scmconfig.c (main): Use "#ifdef HAVE_", not "#if HAVE_".

* libguile/socket.c (scm_setsockopt): Likewise.

											
										
										
											2009-11-17 23:40:51 +01:00
+								      buf = scm_i_string_writable_wide_chars (result);
-												Try to optimize scm_string for speed

* libguile/strings.c (scm_string): optimize for speed

											
										
										
											2009-08-19 21:26:11 -07:00
+								      while (len > 0 && scm_is_pair (rest))
 								        {
 								          SCM elt = SCM_CAR (rest);
 								          buf[p] = SCM_CHAR (elt);
 								          p++;
 								          rest = SCM_CDR (rest);
 								          len--;
 								          scm_remember_upto_here_1 (elt);
 								        }
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  scm_i_string_stop_writing ();
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  if (len > 0)
 								    scm_misc_error (NULL, "list changed while constructing string", SCM_EOL);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (!scm_is_null (rest))
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    scm_wrong_type_arg_msg (NULL, 0, chrs, "proper list");
 								  return result;
 								}
 								#undef FUNC_NAME
-												* Replace function scm_makstr with new function scm_allocate_string.

											
										
										
											2001-03-22 12:52:03 +00:00
-												* *.[ch]: Whitespace changes -- added space after SCM_VALIDATE_*
macros and SCM_DEFINE macros to match GNU coding standards.

											
										
										
											2000-01-05 19:25:37 +00:00
+								SCM_DEFINE (scm_make_string, "make-string", 1, 1, 0,
-												* tag.c:  Added doc for `tag', but mark as deprecated since Mikael
suggests removing tag.c altogether (and using a new `class-of'
instead).

* strings.c:  Added documentation from Gregg A. Reynolds.  Edited
a bit by me to use FOO instead of @var{foo} and to have the
summary come before preconditions on input.  Also dropped trailing
(rnrs) note.

* gsubr.c: Do not use SCM_DEFINE for `gsubr-apply'. Register the
function with scm_make_subr_opt w/ last arg of 0 so it is not
visible at the Scheme level. Mikael says (on devel-guile) that
this is the right thing because the first arg to the proc is the
guts of a compiled closure and shouldn't be exposed to the Scheme
level.

											
										
										
											2000-01-26 18:07:07 +00:00
+								            (SCM k, SCM chr),
-												(scm_string_p, scm_make_string, scm_read_only_string_p, scm_string_length)
(scm_string_ref, scm_string_set_x, scm_substring, scm_string_append):
Added texinfo markup.

											
										
										
											2001-02-17 11:27:41 +00:00
+									    "Return a newly allocated string of\n"
 								            "length @var{k}.  If @var{chr} is given, then all elements of\n"
 									    "the string are initialized to @var{chr}, otherwise the contents\n"
-												make-string et al nulls memory if not given an initializer

* libguile/gc-malloc.c: Add a note that the gc-malloc does not clear the
  memory block, so users need to make sure it is initialized.

* libguile/bitvectors.c (scm_c_make_bitvector):
* libguile/bytevectors.c (scm_make_bytevector):
* libguile/strings.c (scm_c_make_string): If no initializer is given,
  initialize the bytes to 0. Prevents information leakage if an app uses
  make-string et al without initializers.

* libguile/foreign.c (make_cif): Initialize this too, to prevent leakage
  in the struct holes. Paranoia...

											
										
										
											2010-12-04 19:31:20 +01:00
+									    "of the @var{string} are all set to @var{#\nul}.")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_make_string
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  return scm_c_make_string (scm_to_size_t (k), chr);
 								}
 								#undef FUNC_NAME
 								SCM
 								scm_c_make_string (size_t len, SCM chr)
 								#define FUNC_NAME NULL
 								{
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  size_t p;
-												make-string et al nulls memory if not given an initializer

* libguile/gc-malloc.c: Add a note that the gc-malloc does not clear the
  memory block, so users need to make sure it is initialized.

* libguile/bitvectors.c (scm_c_make_bitvector):
* libguile/bytevectors.c (scm_make_bytevector):
* libguile/strings.c (scm_c_make_string): If no initializer is given,
  initialize the bytes to 0. Prevents information leakage if an app uses
  make-string et al without initializers.

* libguile/foreign.c (make_cif): Initialize this too, to prevent leakage
  in the struct holes. Paranoia...

											
										
										
											2010-12-04 19:31:20 +01:00
+								  char *contents = NULL;
 								  SCM res = scm_i_make_string (len, &contents);
-												* Fixed parameter checking for make-string.
* Corrected a bug introduced with the last patch.

											
										
										
											2001-02-08 11:40:51 +00:00
-												make-string et al nulls memory if not given an initializer

* libguile/gc-malloc.c: Add a note that the gc-malloc does not clear the
  memory block, so users need to make sure it is initialized.

* libguile/bitvectors.c (scm_c_make_bitvector):
* libguile/bytevectors.c (scm_make_bytevector):
* libguile/strings.c (scm_c_make_string): If no initializer is given,
  initialize the bytes to 0. Prevents information leakage if an app uses
  make-string et al without initializers.

* libguile/foreign.c (make_cif): Initialize this too, to prevent leakage
  in the struct holes. Paranoia...

											
										
										
											2010-12-04 19:31:20 +01:00
+								  /* If no char is given, initialize string contents to NULL.  */
 								  if (SCM_UNBNDP (chr))
 								    memset (contents, 0, len);
 								  else
-												* deprecated.h, deprecated.c, numbers.h (SCM_INUMP, SCM_NINUMP,
SCM_INUM): Deprecated by reenaming them to SCM_I_INUMP, SCM_I_NINUMP
and SCM_I_INUM, respectively and adding deprecated versions to
deprecated.h and deprecated.c.  Changed all uses to either use the
SCM_I_ variants or scm_is_*, scm_to_*, or scm_from_*, as appropriate.

											
										
										
											2004-07-23 15:43:02 +00:00
+								    {
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								      SCM_VALIDATE_CHAR (0, chr);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      res = scm_i_string_start_writing (res);
 								      for (p = 0; p < len; p++)
 								        scm_i_string_set_x (res, p, SCM_CHAR (chr));
 								      scm_i_string_stop_writing ();
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												* deprecated.h, deprecated.c, numbers.h (SCM_INUMP, SCM_NINUMP,
SCM_INUM): Deprecated by reenaming them to SCM_I_INUMP, SCM_I_NINUMP
and SCM_I_INUM, respectively and adding deprecated versions to
deprecated.h and deprecated.c.  Changed all uses to either use the
SCM_I_ variants or scm_is_*, scm_to_*, or scm_from_*, as appropriate.

											
										
										
											2004-07-23 15:43:02 +00:00
 								  return res;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												* *.[ch]: Whitespace changes -- added space after SCM_VALIDATE_*
macros and SCM_DEFINE macros to match GNU coding standards.

											
										
										
											2000-01-05 19:25:37 +00:00
+								SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
-												(scm_string_p, scm_make_string, scm_read_only_string_p, scm_string_length)
(scm_string_ref, scm_string_set_x, scm_substring, scm_string_append):
Added texinfo markup.

											
										
										
											2001-02-17 11:27:41 +00:00
+									    (SCM string),
 									    "Return the number of characters in @var{string}.")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_string_length
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												* Some more work to get rid of SCM_LENGTH
* Eliminated some cell type bit fiddling
* Various minor changes

											
										
										
											2000-10-25 11:01:03 +00:00
+								  SCM_VALIDATE_STRING (1, string);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  return scm_from_size_t (STRING_LENGTH (string));
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Rename string-width to string-bytes-per-char

* libguile/strings.h: rename scm_string_width to scm_string_bytes_per_char

* libguile/strings.c (scm_string_width): renamed to scm_string_bytes_per_char
  (scm_string_bytes_per_char): renamed from scm_string_width

* module/language/assembly/compile-bytecode.scm (write-bytecode): string-width
  -> string-bytes-per-char

* module/language/glil/compile-assembly.scm (dump-object): string-width
  -> string-bytes-per-char

											
										
										
											2009-08-19 21:24:23 -07:00
+								SCM_DEFINE (scm_string_bytes_per_char, "string-bytes-per-char", 1, 0, 0,
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            (SCM string),
 								            "Return the bytes used to represent a character in @var{string}."
 								            "This will return 1 or 4.")
-												Rename string-width to string-bytes-per-char

* libguile/strings.h: rename scm_string_width to scm_string_bytes_per_char

* libguile/strings.c (scm_string_width): renamed to scm_string_bytes_per_char
  (scm_string_bytes_per_char): renamed from scm_string_width

* module/language/assembly/compile-bytecode.scm (write-bytecode): string-width
  -> string-bytes-per-char

* module/language/glil/compile-assembly.scm (dump-object): string-width
  -> string-bytes-per-char

											
										
										
											2009-08-19 21:24:23 -07:00
+								#define FUNC_NAME s_scm_string_bytes_per_char
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								{
 								  SCM_VALIDATE_STRING (1, string);
 								  if (!scm_i_is_narrow_string (string))
 								    return scm_from_int (4);
 								  return scm_from_int (1);
 								}
 								#undef FUNC_NAME
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								size_t
 								scm_c_string_length (SCM string)
 								{
 								  if (!IS_STRING (string))
 								    scm_wrong_type_arg_msg (NULL, 0, string, "string");
 								  return STRING_LENGTH (string);
 								}
-													* strings.h: don't use SCM_P.  don't include <string.h>.
	* error.c, gh_data.c, ports.c, script.c, strop.c: include <string.h>.

	* strings.c (scm_string_ref): make the 2nd argument compulsory.
	previously it defaulted to zero for no good reason that I can see.
	use a local variable for SCM_INUM (k).  replace
	SCM_VALIDATE_INUM_DEF with SCM_VALIDATE_INUM_COPY.

	(scm_makfromstr): cosmetic changes.

	(scm_string): Accept only chars in the list, not strings, for
	conformance to R5RS (particularly for list->string, which is
	supposed to be the inverse of string->list.)  remove
	SCM_DEFER_INTS/SCM_ALLOW_INTS, which is unnecessary since
	scm_makstr handles the cell allocation.  when reporting wrong-type
	arg, don't report the position as 1.

	* posix.c (scm_init_posix): intern PIPE_BUF if it's defined.

	* boot-9.scm (find-and-link-dynamic-module): pass strings, not symbols,
	to string-append.

											
										
										
											2000-01-31 18:29:56 +00:00
+								SCM_DEFINE (scm_string_ref, "string-ref", 2, 0, 0,
-												* tag.c:  Added doc for `tag', but mark as deprecated since Mikael
suggests removing tag.c altogether (and using a new `class-of'
instead).

* strings.c:  Added documentation from Gregg A. Reynolds.  Edited
a bit by me to use FOO instead of @var{foo} and to have the
summary come before preconditions on input.  Also dropped trailing
(rnrs) note.

* gsubr.c: Do not use SCM_DEFINE for `gsubr-apply'. Register the
function with scm_make_subr_opt w/ last arg of 0 so it is not
visible at the Scheme level. Mikael says (on devel-guile) that
this is the right thing because the first arg to the proc is the
guts of a compiled closure and shouldn't be exposed to the Scheme
level.

											
										
										
											2000-01-26 18:07:07 +00:00
+								            (SCM str, SCM k),
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            "Return character @var{k} of @var{str} using zero-origin\n"
 								            "indexing. @var{k} must be a valid index of @var{str}.")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_string_ref
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												Fix sloppy bound checking in `string-{ref,set!}' with the empty string.

* libguile/strings.c (scm_string_ref): Add proper range checking for the
  empty string.
  (scm_string_set_x): Likewise.
  Reported by Bill Schottstaedt <bil@ccrma.Stanford.EDU>.

* test-suite/tests/strings.test ("string-ref"): New test prefix.
  ("string-set!")["empty string", "empty string and non-zero index",
  "out of range", "negative index", "regular string"]: New tests.

* NEWS: Update.

											
										
										
											2008-12-02 19:42:39 +01:00
+								  size_t len;
-												* validate.h, deprecated.h (SCM_VALIDATE_INUM, SCM_VALIDATE_INUM_COPY,
SCM_VALIDATE_BIGINT, SCM_VALIDATE_INUM_MIN,
SCM_VALIDATE_INUM_MIN_COPY,
SCM_VALIDATE_INUM_MIN_DEF_COPY,SCM_VALIDATE_INUM_DEF,
SCM_VALIDATE_INUM_DEF_COPY, SCM_VALIDATE_INUM_RANGE,
SCM_VALIDATE_INUM_RANGE_COPY): Deprecated because they make the
fixnum/bignum distinction visible.  Changed all uses to scm_to_size_t
or similar.

											
										
										
											2004-07-10 14:35:36 +00:00
+								  unsigned long idx;
-													* strings.h: don't use SCM_P.  don't include <string.h>.
	* error.c, gh_data.c, ports.c, script.c, strop.c: include <string.h>.

	* strings.c (scm_string_ref): make the 2nd argument compulsory.
	previously it defaulted to zero for no good reason that I can see.
	use a local variable for SCM_INUM (k).  replace
	SCM_VALIDATE_INUM_DEF with SCM_VALIDATE_INUM_COPY.

	(scm_makfromstr): cosmetic changes.

	(scm_string): Accept only chars in the list, not strings, for
	conformance to R5RS (particularly for list->string, which is
	supposed to be the inverse of string->list.)  remove
	SCM_DEFER_INTS/SCM_ALLOW_INTS, which is unnecessary since
	scm_makstr handles the cell allocation.  when reporting wrong-type
	arg, don't report the position as 1.

	* posix.c (scm_init_posix): intern PIPE_BUF if it's defined.

	* boot-9.scm (find-and-link-dynamic-module): pass strings, not symbols,
	to string-append.

											
										
										
											2000-01-31 18:29:56 +00:00
-												* Some more work to get rid of SCM_LENGTH
* Eliminated some cell type bit fiddling
* Various minor changes

											
										
										
											2000-10-25 11:01:03 +00:00
+								  SCM_VALIDATE_STRING (1, str);
-												Fix sloppy bound checking in `string-{ref,set!}' with the empty string.

* libguile/strings.c (scm_string_ref): Add proper range checking for the
  empty string.
  (scm_string_set_x): Likewise.
  Reported by Bill Schottstaedt <bil@ccrma.Stanford.EDU>.

* test-suite/tests/strings.test ("string-ref"): New test prefix.
  ("string-set!")["empty string", "empty string and non-zero index",
  "out of range", "negative index", "regular string"]: New tests.

* NEWS: Update.

											
										
										
											2008-12-02 19:42:39 +01:00
 								  len = scm_i_string_length (str);
 								  if (SCM_LIKELY (len > 0))
 								    idx = scm_to_unsigned_integer (k, 0, len - 1);
 								  else
 								    scm_out_of_range (NULL, k);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (scm_i_is_narrow_string (str))
 								    return SCM_MAKE_CHAR (scm_i_string_chars (str)[idx]);
 								  else
 								    return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[idx]);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								SCM
 								scm_c_string_ref (SCM str, size_t p)
 								{
 								  if (p >= scm_i_string_length (str))
 								    scm_out_of_range (NULL, scm_from_size_t (p));
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (scm_i_is_narrow_string (str))
 								    return SCM_MAKE_CHAR (scm_i_string_chars (str)[p]);
 								  else
 								    return SCM_MAKE_CHAR (scm_i_string_wide_chars (str)[p]);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-												* Deprecated SCM_RWSTRINGP and SCM_VALIDATE_RWSTRING.
* Prepared SCM_STRING_U?CHARS to replace SCM_ROU?CHARS.

											
										
										
											2000-11-21 17:32:38 +00:00
-												* *.[ch]: Whitespace changes -- added space after SCM_VALIDATE_*
macros and SCM_DEFINE macros to match GNU coding standards.

											
										
										
											2000-01-05 19:25:37 +00:00
+								SCM_DEFINE (scm_string_set_x, "string-set!", 3, 0, 0,
-												* tag.c:  Added doc for `tag', but mark as deprecated since Mikael
suggests removing tag.c altogether (and using a new `class-of'
instead).

* strings.c:  Added documentation from Gregg A. Reynolds.  Edited
a bit by me to use FOO instead of @var{foo} and to have the
summary come before preconditions on input.  Also dropped trailing
(rnrs) note.

* gsubr.c: Do not use SCM_DEFINE for `gsubr-apply'. Register the
function with scm_make_subr_opt w/ last arg of 0 so it is not
visible at the Scheme level. Mikael says (on devel-guile) that
this is the right thing because the first arg to the proc is the
guts of a compiled closure and shouldn't be exposed to the Scheme
level.

											
										
										
											2000-01-26 18:07:07 +00:00
+								            (SCM str, SCM k, SCM chr),
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            "Store @var{chr} in element @var{k} of @var{str} and return\n"
 								            "an unspecified value. @var{k} must be a valid index of\n"
 								            "@var{str}.")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_string_set_x
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												Fix sloppy bound checking in `string-{ref,set!}' with the empty string.

* libguile/strings.c (scm_string_ref): Add proper range checking for the
  empty string.
  (scm_string_set_x): Likewise.
  Reported by Bill Schottstaedt <bil@ccrma.Stanford.EDU>.

* test-suite/tests/strings.test ("string-ref"): New test prefix.
  ("string-set!")["empty string", "empty string and non-zero index",
  "out of range", "negative index", "regular string"]: New tests.

* NEWS: Update.

											
										
										
											2008-12-02 19:42:39 +01:00
+								  size_t len;
-												* validate.h, deprecated.h (SCM_VALIDATE_INUM, SCM_VALIDATE_INUM_COPY,
SCM_VALIDATE_BIGINT, SCM_VALIDATE_INUM_MIN,
SCM_VALIDATE_INUM_MIN_COPY,
SCM_VALIDATE_INUM_MIN_DEF_COPY,SCM_VALIDATE_INUM_DEF,
SCM_VALIDATE_INUM_DEF_COPY, SCM_VALIDATE_INUM_RANGE,
SCM_VALIDATE_INUM_RANGE_COPY): Deprecated because they make the
fixnum/bignum distinction visible.  Changed all uses to scm_to_size_t
or similar.

											
										
										
											2004-07-10 14:35:36 +00:00
+								  unsigned long idx;
-												* Deprecated SCM_RWSTRINGP and SCM_VALIDATE_RWSTRING.
* Prepared SCM_STRING_U?CHARS to replace SCM_ROU?CHARS.

											
										
										
											2000-11-21 17:32:38 +00:00
+								  SCM_VALIDATE_STRING (1, str);
-												Fix sloppy bound checking in `string-{ref,set!}' with the empty string.

* libguile/strings.c (scm_string_ref): Add proper range checking for the
  empty string.
  (scm_string_set_x): Likewise.
  Reported by Bill Schottstaedt <bil@ccrma.Stanford.EDU>.

* test-suite/tests/strings.test ("string-ref"): New test prefix.
  ("string-set!")["empty string", "empty string and non-zero index",
  "out of range", "negative index", "regular string"]: New tests.

* NEWS: Update.

											
										
										
											2008-12-02 19:42:39 +01:00
 								  len = scm_i_string_length (str);
 								  if (SCM_LIKELY (len > 0))
 								    idx = scm_to_unsigned_integer (k, 0, len - 1);
 								  else
 								    scm_out_of_range (NULL, k);
--07-20  Han-Wen  <hanwen@cs.uu.nl>

* *.c: add space after commas everywhere.

* *.c: use SCM_VECTOR_SET everywhere, where a vector is written.
Document cases where SCM_WRITABLE_VELTS() is used.

* vectors.h (SCM_VELTS): prepare for write barrier, and let
SCM_VELTS() return a const pointer
(SCM_VECTOR_SET): add macro.

* autogen.sh (mscripts): find and check version number of
autoconf. Complain if 2.53 is not found.

											
										
										
											2002-07-20 14:08:34 +00:00
+								  SCM_VALIDATE_CHAR (3, chr);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  str = scm_i_string_start_writing (str);
 								  scm_i_string_set_x (str, idx, SCM_CHAR (chr));
 								  scm_i_string_stop_writing ();
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								  return SCM_UNSPECIFIED;
 								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								void
 								scm_c_string_set_x (SCM str, size_t p, SCM chr)
 								{
 								  if (p >= scm_i_string_length (str))
 								    scm_out_of_range (NULL, scm_from_size_t (p));
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  str = scm_i_string_start_writing (str);
 								  scm_i_string_set_x (str, p, SCM_CHAR (chr));
 								  scm_i_string_stop_writing ();
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												* *.[ch]: Whitespace changes -- added space after SCM_VALIDATE_*
macros and SCM_DEFINE macros to match GNU coding standards.

											
										
										
											2000-01-05 19:25:37 +00:00
+								SCM_DEFINE (scm_substring, "substring", 2, 1, 0,
-												(scm_string_p, scm_make_string, scm_read_only_string_p, scm_string_length)
(scm_string_ref, scm_string_set_x, scm_substring, scm_string_append):
Added texinfo markup.

											
										
										
											2001-02-17 11:27:41 +00:00
+									    (SCM str, SCM start, SCM end),
 									    "Return a newly allocated string formed from the characters\n"
 								            "of @var{str} beginning with index @var{start} (inclusive) and\n"
 									    "ending with index @var{end} (exclusive).\n"
 								            "@var{str} must be a string, @var{start} and @var{end} must be\n"
 									    "exact integers satisfying:\n\n"
 								            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_substring
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  size_t len, from, to;
-												* Eliminated use of SCM_ASSERT to check for range errors.
* Fix some error reporting code in list.c
* Added some test cases.

											
										
										
											2000-06-30 10:46:35 +00:00
-												* Some more work to get rid of SCM_LENGTH
* Eliminated some cell type bit fiddling
* Various minor changes

											
										
										
											2000-10-25 11:01:03 +00:00
+								  SCM_VALIDATE_STRING (1, str);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  len = scm_i_string_length (str);
 								  from = scm_to_unsigned_integer (start, 0, len);
-												* validate.h, deprecated.h (SCM_VALIDATE_INUM, SCM_VALIDATE_INUM_COPY,
SCM_VALIDATE_BIGINT, SCM_VALIDATE_INUM_MIN,
SCM_VALIDATE_INUM_MIN_COPY,
SCM_VALIDATE_INUM_MIN_DEF_COPY,SCM_VALIDATE_INUM_DEF,
SCM_VALIDATE_INUM_DEF_COPY, SCM_VALIDATE_INUM_RANGE,
SCM_VALIDATE_INUM_RANGE_COPY): Deprecated because they make the
fixnum/bignum distinction visible.  Changed all uses to scm_to_size_t
or similar.

											
										
										
											2004-07-10 14:35:36 +00:00
+								  if (SCM_UNBNDP (end))
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    to = len;
-												* validate.h, deprecated.h (SCM_VALIDATE_INUM, SCM_VALIDATE_INUM_COPY,
SCM_VALIDATE_BIGINT, SCM_VALIDATE_INUM_MIN,
SCM_VALIDATE_INUM_MIN_COPY,
SCM_VALIDATE_INUM_MIN_DEF_COPY,SCM_VALIDATE_INUM_DEF,
SCM_VALIDATE_INUM_DEF_COPY, SCM_VALIDATE_INUM_RANGE,
SCM_VALIDATE_INUM_RANGE_COPY): Deprecated because they make the
fixnum/bignum distinction visible.  Changed all uses to scm_to_size_t
or similar.

											
										
										
											2004-07-10 14:35:36 +00:00
+								  else
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								    to = scm_to_unsigned_integer (end, from, len);
 								  return scm_i_substring (str, from, to);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												(scm_substring_read_only,
scm_c_substring_read_only, scm_i_substring_read_only): New.
(RO_STRING_TAG, IS_RO_STRING): New.
(scm_i_string_writable_chars): Bail on read-only strings.

											
										
										
											2004-09-22 13:54:15 +00:00
+								SCM_DEFINE (scm_substring_read_only, "substring/read-only", 2, 1, 0,
 									    (SCM str, SCM start, SCM end),
 									    "Return a newly allocated string formed from the characters\n"
 								            "of @var{str} beginning with index @var{start} (inclusive) and\n"
 									    "ending with index @var{end} (exclusive).\n"
 								            "@var{str} must be a string, @var{start} and @var{end} must be\n"
 									    "exact integers satisfying:\n"
 									    "\n"
 								            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).\n"
 									    "\n"
 									    "The returned string is read-only.\n")
 								#define FUNC_NAME s_scm_substring_read_only
 								{
 								  size_t len, from, to;
 								  SCM_VALIDATE_STRING (1, str);
 								  len = scm_i_string_length (str);
 								  from = scm_to_unsigned_integer (start, 0, len);
 								  if (SCM_UNBNDP (end))
 								    to = len;
 								  else
 								    to = scm_to_unsigned_integer (end, from, len);
 								  return scm_i_substring_read_only (str, from, to);
 								}
 								#undef FUNC_NAME
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								SCM_DEFINE (scm_substring_copy, "substring/copy", 2, 1, 0,
 									    (SCM str, SCM start, SCM end),
 									    "Return a newly allocated string formed from the characters\n"
 								            "of @var{str} beginning with index @var{start} (inclusive) and\n"
 									    "ending with index @var{end} (exclusive).\n"
 								            "@var{str} must be a string, @var{start} and @var{end} must be\n"
 									    "exact integers satisfying:\n\n"
 								            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
 								#define FUNC_NAME s_scm_substring_copy
 								{
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								  /* For the Scheme version, START is mandatory, but for the C
 								     version, it is optional.  See scm_string_copy in srfi-13.c for a
 								     rationale.
 								  */
 								  size_t from, to;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
 								  SCM_VALIDATE_STRING (1, str);
-												(get_str_buf_start): New helper function.
(scm_i_substring, scm_i_substring_copy, scm_i_substring_shared,
scm_i_string_char, scm_i_string_writable_chars): Use it.
(scm_i_substring_copy): Make START argument optional for C
callers, for upcoming SRFI-13 integration.

											
										
										
											2004-08-22 19:29:19 +00:00
+								  scm_i_get_substring_spec (scm_i_string_length (str),
 											    start, &from, end, &to);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  return scm_i_substring_copy (str, from, to);
 								}
 								#undef FUNC_NAME
 								SCM_DEFINE (scm_substring_shared, "substring/shared", 2, 1, 0,
 									    (SCM str, SCM start, SCM end),
 									    "Return string that indirectly refers to the characters\n"
 								            "of @var{str} beginning with index @var{start} (inclusive) and\n"
 									    "ending with index @var{end} (exclusive).\n"
 								            "@var{str} must be a string, @var{start} and @var{end} must be\n"
 									    "exact integers satisfying:\n\n"
 								            "0 <= @var{start} <= @var{end} <= (string-length @var{str}).")
 								#define FUNC_NAME s_scm_substring_shared
 								{
 								  size_t len, from, to;
 								  SCM_VALIDATE_STRING (1, str);
 								  len = scm_i_string_length (str);
 								  from = scm_to_unsigned_integer (start, 0, len);
 								  if (SCM_UNBNDP (end))
 								    to = len;
 								  else
 								    to = scm_to_unsigned_integer (end, from, len);
 								  return scm_i_substring_shared (str, from, to);
 								}
 								#undef FUNC_NAME
-												* Eliminated use of SCM_ASSERT to check for range errors.
* Fix some error reporting code in list.c
* Added some test cases.

											
										
										
											2000-06-30 10:46:35 +00:00
-												* *.[ch]: Whitespace changes -- added space after SCM_VALIDATE_*
macros and SCM_DEFINE macros to match GNU coding standards.

											
										
										
											2000-01-05 19:25:37 +00:00
+								SCM_DEFINE (scm_string_append, "string-append", 0, 0, 1,
-												* tag.c:  Added doc for `tag', but mark as deprecated since Mikael
suggests removing tag.c altogether (and using a new `class-of'
instead).

* strings.c:  Added documentation from Gregg A. Reynolds.  Edited
a bit by me to use FOO instead of @var{foo} and to have the
summary come before preconditions on input.  Also dropped trailing
(rnrs) note.

* gsubr.c: Do not use SCM_DEFINE for `gsubr-apply'. Register the
function with scm_make_subr_opt w/ last arg of 0 so it is not
visible at the Scheme level. Mikael says (on devel-guile) that
this is the right thing because the first arg to the proc is the
guts of a compiled closure and shouldn't be exposed to the Scheme
level.

											
										
										
											2000-01-26 18:07:07 +00:00
+								            (SCM args),
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            "Return a newly allocated string whose characters form the\n"
-												(scm_string_p, scm_make_string, scm_read_only_string_p, scm_string_length)
(scm_string_ref, scm_string_set_x, scm_substring, scm_string_append):
Added texinfo markup.

											
										
										
											2001-02-17 11:27:41 +00:00
+								            "concatenation of the given strings, @var{args}.")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_string_append
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
 								  SCM res;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  size_t len = 0;
 								  int wide = 0;
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								  SCM l, s;
-												Some signed/unsigned comparison and conversions

* libguile/ports.c (scm_lfwrite_str, scm_lfwrite_substr): signed/unsigned
  conversion and comparison

* libguile/strings.c (scm_string_append): signed/unsigned comparison

											
										
										
											2009-08-12 08:50:12 -07:00
+								  size_t i;
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								  union
 								  {
 								    char *narrow;
 								    scm_t_wchar *wide;
 								  } data;
-												* Unified some rest argument checking and handling.

											
										
										
											2000-05-18 08:47:52 +00:00
 								  SCM_VALIDATE_REST_ARGUMENT (args);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  for (l = args; !scm_is_null (l); l = SCM_CDR (l))
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    {
 								      s = SCM_CAR (l);
 								      SCM_VALIDATE_STRING (SCM_ARGn, s);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      len += scm_i_string_length (s);
 								      if (!scm_i_is_narrow_string (s))
 								        wide = 1;
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    }
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								  data.narrow = NULL;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (!wide)
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								    res = scm_i_make_string (len, &data.narrow);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								    res = scm_i_make_wide_string (len, &data.wide);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
 								  for (l = args; !scm_is_null (l); l = SCM_CDR (l))
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    {
--09-03  Stefan Jahn  <stefan@lkcc.org>

        * configure.in (isinf): Let configure find the isinf() function
        on MinGW32 systems.

2004-09-03  Stefan Jahn  <stefan@lkcc.org>

        * threads.c (scm_threads_mark_stacks):  Fixed local variable
        definitions.

        * strings.c (scm_i_substring_copy, s_scm_string_append): Fixed
        local variable definitions.

        * stime.c (_POSIX_C_SOURCE):  Do not define this item on
        MinGW32 because it conflicts with its pthread headers.
        (s_scm_mktime): Consider the HAVE_STRUCT_TM_TM_ZONE define.
        (s_scm_strftime): Using scm_from_locale_string() instead of
        scm_makfrom0str().

        * posix.c (s_scm_putenv): Fixed typo in the !HAVE_UNSETENV
        part.

        * numbers.c (scm_init_numbers): Removed check_sanity() call
        inside GUILE_DEBUG.  The function has been removed somewhen...

        * filesys.c (_POSIX_C_SOURCE): Do not define this item on
        MinGW32 because it conflicts with its pthread headers.

2004-09-03  Stefan Jahn  <stefan@lkcc.org>

        * srfi-1.c, srfi-1.h: Renamed any 'lst1' into 'list1' because
        lst1 is a #define on Win32 systems.

											
										
										
											2004-09-03 19:45:37 +00:00
+								      size_t len;
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								      s = SCM_CAR (l);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								      SCM_VALIDATE_STRING (SCM_ARGn, s);
--09-03  Stefan Jahn  <stefan@lkcc.org>

        * configure.in (isinf): Let configure find the isinf() function
        on MinGW32 systems.

2004-09-03  Stefan Jahn  <stefan@lkcc.org>

        * threads.c (scm_threads_mark_stacks):  Fixed local variable
        definitions.

        * strings.c (scm_i_substring_copy, s_scm_string_append): Fixed
        local variable definitions.

        * stime.c (_POSIX_C_SOURCE):  Do not define this item on
        MinGW32 because it conflicts with its pthread headers.
        (s_scm_mktime): Consider the HAVE_STRUCT_TM_TM_ZONE define.
        (s_scm_strftime): Using scm_from_locale_string() instead of
        scm_makfrom0str().

        * posix.c (s_scm_putenv): Fixed typo in the !HAVE_UNSETENV
        part.

        * numbers.c (scm_init_numbers): Removed check_sanity() call
        inside GUILE_DEBUG.  The function has been removed somewhen...

        * filesys.c (_POSIX_C_SOURCE): Do not define this item on
        MinGW32 because it conflicts with its pthread headers.

2004-09-03  Stefan Jahn  <stefan@lkcc.org>

        * srfi-1.c, srfi-1.h: Renamed any 'lst1' into 'list1' because
        lst1 is a #define on Win32 systems.

											
										
										
											2004-09-03 19:45:37 +00:00
+								      len = scm_i_string_length (s);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      if (!wide)
 								        {
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								          memcpy (data.narrow, scm_i_string_chars (s), len);
 								          data.narrow += len;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								        }
 								      else
 								        {
 								          if (scm_i_is_narrow_string (s))
 								            {
 								              for (i = 0; i < scm_i_string_length (s); i++)
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								                data.wide[i] = (unsigned char) scm_i_string_chars (s)[i];
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            }
 								          else
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								            u32_cpy ((scm_t_uint32 *) data.wide,
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								                     (scm_t_uint32 *) scm_i_string_wide_chars (s), len);
-												Avoid unitialized and unused warnings in scm_string_append

* libguile/strings.c (scm_string_append): avoid warnings

											
										
										
											2009-08-11 22:01:20 -07:00
+								          data.wide += len;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								        }
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								      scm_remember_upto_here_1 (s);
 								    }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								  return res;
 								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												* eval.[ch] (scm_deval_args):  Made static.

* srcprop.c (scm_source_property):  Remove redundant SCM_IMP
test.

* strings.c (scm_c_string2str):  Clarified comment.  Replaced
THINKME by FIXME for uniformness.  Removed question about whether
arguments need to be protected from garbage collection:  Arguments
must be protected as any other variable.

											
										
										
											2002-01-10 21:11:22 +00:00
-												Use `encoding-error' instead of `misc-error' for string encoding errors.

* libguile/strings.c (scm_encoding_error): New function.
  (scm_from_stringn, scm_to_stringn): Use it instead of `scm_misc_error ()'.

* test-suite/lib.scm (exception:encoding-error): Adjust accordingly.

* test-suite/tests/encoding-escapes.test (exception:conversion):
  Remove.  Use `exception:encoding-error' instead.

* test-suite/tests/encoding-iso88591.test: Likewise.

* test-suite/tests/encoding-iso88597.test: Likewise.

* test-suite/tests/encoding-utf8.test: Likewise.

											
										
										
											2010-01-07 00:37:10 +01:00
-												Have `read-char' & co. throw to `decoding-error'.

* libguile/ports.c (scm_read_char): Mention `decoding-error' in the
  docstring.
  (get_codepoint): Change to return an error code; add `codepoint'
  output parameter.  Don't raise an error from here.
  (scm_getc): Raise an error with `scm_decoding_error' if
  `get_codepoint' returns an error.
  (scm_peek_char): Likewise.  Update docstring.

* libguile/strings.c (scm_decoding_error_key): New variable.
  (scm_decoding_error): New function.
  (scm_from_stringn): Use `scm_decoding_error' instead of
  `scm_encoding_error'.

* libguile/strings.h (scm_decoding_error): New declaration.

* test-suite/tests/ports.test ("string ports")["read-char, wrong
  encoding, error"]: Change to expect `decoding-error'.  Make sure PORT
  points past the error.
  ["read-char, wrong encoding, escape"]: Likewise.
  ["peek-char, wrong encoding, error"]: New test.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Change to
  expect `decoding-error'.
  ("8.2.6  Input and output ports")["transcoded-port [error handling
  mode = raise]"]: Likewise.

* test-suite/tests/rdelim.test ("read-line")["decoding error", "decoding
  error, substitute"]: New tests.

* doc/ref/api-io.texi (Reading): Update documentation of `read-char' and
  `peek-char'.
  (Line/Delimited): Update documentation of `read-line'.

											
										
										
											2011-02-02 15:52:56 +01:00
+								/* Charset conversion error handling.  */
-												Use `encoding-error' instead of `misc-error' for string encoding errors.

* libguile/strings.c (scm_encoding_error): New function.
  (scm_from_stringn, scm_to_stringn): Use it instead of `scm_misc_error ()'.

* test-suite/lib.scm (exception:encoding-error): Adjust accordingly.

* test-suite/tests/encoding-escapes.test (exception:conversion):
  Remove.  Use `exception:encoding-error' instead.

* test-suite/tests/encoding-iso88591.test: Likewise.

* test-suite/tests/encoding-iso88597.test: Likewise.

* test-suite/tests/encoding-utf8.test: Likewise.

											
										
										
											2010-01-07 00:37:10 +01:00
 								SCM_SYMBOL (scm_encoding_error_key, "encoding-error");
-												Have `read-char' & co. throw to `decoding-error'.

* libguile/ports.c (scm_read_char): Mention `decoding-error' in the
  docstring.
  (get_codepoint): Change to return an error code; add `codepoint'
  output parameter.  Don't raise an error from here.
  (scm_getc): Raise an error with `scm_decoding_error' if
  `get_codepoint' returns an error.
  (scm_peek_char): Likewise.  Update docstring.

* libguile/strings.c (scm_decoding_error_key): New variable.
  (scm_decoding_error): New function.
  (scm_from_stringn): Use `scm_decoding_error' instead of
  `scm_encoding_error'.

* libguile/strings.h (scm_decoding_error): New declaration.

* test-suite/tests/ports.test ("string ports")["read-char, wrong
  encoding, error"]: Change to expect `decoding-error'.  Make sure PORT
  points past the error.
  ["read-char, wrong encoding, escape"]: Likewise.
  ["peek-char, wrong encoding, error"]: New test.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Change to
  expect `decoding-error'.
  ("8.2.6  Input and output ports")["transcoded-port [error handling
  mode = raise]"]: Likewise.

* test-suite/tests/rdelim.test ("read-line")["decoding error", "decoding
  error, substitute"]: New tests.

* doc/ref/api-io.texi (Reading): Update documentation of `read-char' and
  `peek-char'.
  (Line/Delimited): Update documentation of `read-line'.

											
										
										
											2011-02-02 15:52:56 +01:00
+								SCM_SYMBOL (scm_decoding_error_key, "decoding-error");
-												Change `scm_encoding_error' to pass the port and faulty character.

* libguile/strings.c (scm_encoding_error): Remove the `from', `to', and
  `string_or_bv' parameters; add `port' and `chr'.
  (scm_to_stringn): Update accordingly.

* libguile/strings.h (scm_encoding_error): Update accordingly.

* libguile/ports.c (scm_ungetc): Update accordingly.

* libguile/print.c (iprin1, scm_write_char): Update accordingly.

* test-suite/tests/encoding-escapes.test ("display output
  errors")["ultima", "Rashomon"]: Check the arguments of
  `encoding-error'.
  ["tekniko"]: New test.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Adjust
  to new `encoding-error' arguments.

											
										
										
											2011-02-02 17:38:03 +01:00
+								/* Raise an exception informing that character CHR could not be written
 								   to PORT in its current encoding.  */
-												Expose `scm_encoding_error'.

* libguile/strings.c (scm_encoding_error): Make public.

* libguile/strings.h (scm_encoding_error): New internal declaration.

											
										
										
											2010-07-15 18:45:29 +02:00
+								void
-												Improve encoding error reporting.

* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

											
										
										
											2010-03-18 20:23:12 +01:00
+								scm_encoding_error (const char *subr, int err, const char *message,
-												Change `scm_encoding_error' to pass the port and faulty character.

* libguile/strings.c (scm_encoding_error): Remove the `from', `to', and
  `string_or_bv' parameters; add `port' and `chr'.
  (scm_to_stringn): Update accordingly.

* libguile/strings.h (scm_encoding_error): Update accordingly.

* libguile/ports.c (scm_ungetc): Update accordingly.

* libguile/print.c (iprin1, scm_write_char): Update accordingly.

* test-suite/tests/encoding-escapes.test ("display output
  errors")["ultima", "Rashomon"]: Check the arguments of
  `encoding-error'.
  ["tekniko"]: New test.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Adjust
  to new `encoding-error' arguments.

											
										
										
											2011-02-02 17:38:03 +01:00
+										    SCM port, SCM chr)
-												Improve encoding error reporting.

* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

											
										
										
											2010-03-18 20:23:12 +01:00
+								{
 								  scm_throw (scm_encoding_error_key,
 									     scm_list_n (scm_from_locale_string (subr),
 											 scm_from_locale_string (message),
 											 scm_from_int (err),
-												Change `scm_encoding_error' to pass the port and faulty character.

* libguile/strings.c (scm_encoding_error): Remove the `from', `to', and
  `string_or_bv' parameters; add `port' and `chr'.
  (scm_to_stringn): Update accordingly.

* libguile/strings.h (scm_encoding_error): Update accordingly.

* libguile/ports.c (scm_ungetc): Update accordingly.

* libguile/print.c (iprin1, scm_write_char): Update accordingly.

* test-suite/tests/encoding-escapes.test ("display output
  errors")["ultima", "Rashomon"]: Check the arguments of
  `encoding-error'.
  ["tekniko"]: New test.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Adjust
  to new `encoding-error' arguments.

											
										
										
											2011-02-02 17:38:03 +01:00
+											 port, chr,
-												Improve encoding error reporting.

* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

											
										
										
											2010-03-18 20:23:12 +01:00
+											 SCM_UNDEFINED));
-												Use `encoding-error' instead of `misc-error' for string encoding errors.

* libguile/strings.c (scm_encoding_error): New function.
  (scm_from_stringn, scm_to_stringn): Use it instead of `scm_misc_error ()'.

* test-suite/lib.scm (exception:encoding-error): Adjust accordingly.

* test-suite/tests/encoding-escapes.test (exception:conversion):
  Remove.  Use `exception:encoding-error' instead.

* test-suite/tests/encoding-iso88591.test: Likewise.

* test-suite/tests/encoding-iso88597.test: Likewise.

* test-suite/tests/encoding-utf8.test: Likewise.

											
										
										
											2010-01-07 00:37:10 +01:00
+								}
-												Have `read-char' & co. throw to `decoding-error'.

* libguile/ports.c (scm_read_char): Mention `decoding-error' in the
  docstring.
  (get_codepoint): Change to return an error code; add `codepoint'
  output parameter.  Don't raise an error from here.
  (scm_getc): Raise an error with `scm_decoding_error' if
  `get_codepoint' returns an error.
  (scm_peek_char): Likewise.  Update docstring.

* libguile/strings.c (scm_decoding_error_key): New variable.
  (scm_decoding_error): New function.
  (scm_from_stringn): Use `scm_decoding_error' instead of
  `scm_encoding_error'.

* libguile/strings.h (scm_decoding_error): New declaration.

* test-suite/tests/ports.test ("string ports")["read-char, wrong
  encoding, error"]: Change to expect `decoding-error'.  Make sure PORT
  points past the error.
  ["read-char, wrong encoding, escape"]: Likewise.
  ["peek-char, wrong encoding, error"]: New test.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Change to
  expect `decoding-error'.
  ("8.2.6  Input and output ports")["transcoded-port [error handling
  mode = raise]"]: Likewise.

* test-suite/tests/rdelim.test ("read-line")["decoding error", "decoding
  error, substitute"]: New tests.

* doc/ref/api-io.texi (Reading): Update documentation of `read-char' and
  `peek-char'.
  (Line/Delimited): Update documentation of `read-line'.

											
										
										
											2011-02-02 15:52:56 +01:00
+								/* Raise an exception informing of an encoding error on PORT.  This
 								   means that a character could not be written in PORT's encoding.  */
 								void
 								scm_decoding_error (const char *subr, int err, const char *message, SCM port)
 								{
 								  scm_throw (scm_decoding_error_key,
 									     scm_list_n (scm_from_locale_string (subr),
 											 scm_from_locale_string (message),
 											 scm_from_int (err),
 											 port,
 											 SCM_UNDEFINED));
 								}
 								/* String conversion to/from C.  */
-												Fix encoding errors with strings returned by string ports

String ports, being 8-bit, store strings using the character encoding
of the port.  This fixes a bug where the default character encoding, and
not the port's encoding, was being used to convert the string port data
back to a string.

* libguile/strports.c: extra comments
  (scm_strport_to_string):  use port's encoding when converting port data
  to a string

* libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn
  and made internal.  All callers changed.
  (scm_from_stringn): renamed to scm_i_from_stringn.

* libguile/strings.h: declaration for scm_i_from_stringn

											
										
										
											2009-08-30 15:41:56 -07:00
+								SCM
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								scm_from_stringn (const char *str, size_t len, const char *encoding,
 								                  scm_t_string_failed_conversion_handler handler)
 								{
 								  size_t u32len, i;
 								  scm_t_wchar *u32;
 								  int wide = 0;
 								  SCM res;
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								  /* The order of these checks is important. */
-												fix scm_from_stringn empty string case

* libguile/strings.c (scm_from_stringn): Fix empty string case
  (oops...).

											
										
										
											2011-01-07 22:16:10 -08:00
+								  if (!str && len != 0)
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								    scm_misc_error ("scm_from_stringn", "NULL string pointer", SCM_EOL);
 								  if (len == (size_t) -1)
 								    len = strlen (str);
-												fix scm_from_stringn empty string case

* libguile/strings.c (scm_from_stringn): Fix empty string case
  (oops...).

											
										
										
											2011-01-07 22:16:10 -08:00
+								  if (len == 0)
 								    return scm_nullstr;
-												Fix encoding errors with strings returned by string ports

String ports, being 8-bit, store strings using the character encoding
of the port.  This fixes a bug where the default character encoding, and
not the port's encoding, was being used to convert the string port data
back to a string.

* libguile/strports.c: extra comments
  (scm_strport_to_string):  use port's encoding when converting port data
  to a string

* libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn
  and made internal.  All callers changed.
  (scm_from_stringn): renamed to scm_i_from_stringn.

* libguile/strings.h: declaration for scm_i_from_stringn

											
										
										
											2009-08-30 15:41:56 -07:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  if (encoding == NULL)
 								    {
 								      /* If encoding is null, use Latin-1.  */
 								      char *buf;
 								      res = scm_i_make_string (len, &buf);
 								      memcpy (buf, str, len);
 								      return res;
 								    }
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								  u32len = 0;
 								  u32 = (scm_t_wchar *) u32_conv_from_encoding (encoding,
 								                                                (enum iconv_ilseq_handler)
 								                                                handler,
 								                                                str, len,
 								                                                NULL,
 								                                                NULL, &u32len);
-												Improve encoding error reporting.

* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

											
										
										
											2010-03-18 20:23:12 +01:00
+								  if (SCM_UNLIKELY (u32 == NULL))
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								    {
-												Improve encoding error reporting.

* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

											
										
										
											2010-03-18 20:23:12 +01:00
+								      /* Raise an error and pass the raw C string as a bytevector to the `throw'
 									 handler.  */
 								      SCM bv;
 								      signed char *buf;
 								      buf = scm_gc_malloc_pointerless (len, "bytevector");
 								      memcpy (buf, str, len);
 								      bv = scm_c_take_bytevector (buf, len);
-												Have `read-char' & co. throw to `decoding-error'.

* libguile/ports.c (scm_read_char): Mention `decoding-error' in the
  docstring.
  (get_codepoint): Change to return an error code; add `codepoint'
  output parameter.  Don't raise an error from here.
  (scm_getc): Raise an error with `scm_decoding_error' if
  `get_codepoint' returns an error.
  (scm_peek_char): Likewise.  Update docstring.

* libguile/strings.c (scm_decoding_error_key): New variable.
  (scm_decoding_error): New function.
  (scm_from_stringn): Use `scm_decoding_error' instead of
  `scm_encoding_error'.

* libguile/strings.h (scm_decoding_error): New declaration.

* test-suite/tests/ports.test ("string ports")["read-char, wrong
  encoding, error"]: Change to expect `decoding-error'.  Make sure PORT
  points past the error.
  ["read-char, wrong encoding, escape"]: Likewise.
  ["peek-char, wrong encoding, error"]: New test.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Change to
  expect `decoding-error'.
  ("8.2.6  Input and output ports")["transcoded-port [error handling
  mode = raise]"]: Likewise.

* test-suite/tests/rdelim.test ("read-line")["decoding error", "decoding
  error, substitute"]: New tests.

* doc/ref/api-io.texi (Reading): Update documentation of `read-char' and
  `peek-char'.
  (Line/Delimited): Update documentation of `read-line'.

											
										
										
											2011-02-02 15:52:56 +01:00
+								      scm_decoding_error (__func__, errno,
 											  "input locale conversion error", bv);
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								    }
 								  i = 0;
 								  while (i < u32len)
 								    if (u32[i++] > 0xFF)
 								      {
 								        wide = 1;
 								        break;
 								      }
 								  if (!wide)
 								    {
 								      char *dst;
 								      res = scm_i_make_string (u32len, &dst);
 								      for (i = 0; i < u32len; i ++)
 								        dst[i] = (unsigned char) u32[i];
 								      dst[u32len] = '\0';
 								    }
 								  else
 								    {
 								      scm_t_wchar *wdst;
 								      res = scm_i_make_wide_string (u32len, &wdst);
 								      u32_cpy ((scm_t_uint32 *) wdst, (scm_t_uint32 *) u32, u32len);
 								      wdst[u32len] = 0;
 								    }
 								  free (u32);
 								  return res;
 								}
-												Provide non-locale C/Scheme string conversion functions

* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn,
  scm_to_latin1_stringn, and scm_from_latin1_stringn
* libguile/strings.h (scm_to_stringn): make public
  (scm_to_latin1_stringn): new declaration
  (scm_from_latin1_stringn): new declaration
* libguile/strings.c (scm_to_latin1_stringn): new function
  (scm_from_latin1_stringn): new function

											
										
										
											2010-09-12 08:29:31 -07:00
+								SCM
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								scm_from_locale_string (const char *str)
-												Provide non-locale C/Scheme string conversion functions

* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn,
  scm_to_latin1_stringn, and scm_from_latin1_stringn
* libguile/strings.h (scm_to_stringn): make public
  (scm_to_latin1_stringn): new declaration
  (scm_from_latin1_stringn): new declaration
* libguile/strings.c (scm_to_latin1_stringn): new function
  (scm_from_latin1_stringn): new function

											
										
										
											2010-09-12 08:29:31 -07:00
+								{
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								  return scm_from_locale_stringn (str, -1);
-												Provide non-locale C/Scheme string conversion functions

* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn,
  scm_to_latin1_stringn, and scm_from_latin1_stringn
* libguile/strings.h (scm_to_stringn): make public
  (scm_to_latin1_stringn): new declaration
  (scm_from_latin1_stringn): new declaration
* libguile/strings.c (scm_to_latin1_stringn): new function
  (scm_from_latin1_stringn): new function

											
										
										
											2010-09-12 08:29:31 -07:00
+								}
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								SCM
 								scm_from_locale_stringn (const char *str, size_t len)
 								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  const char *enc;
 								  scm_t_string_failed_conversion_handler hndl;
 								  SCM inport;
 								  scm_t_port *pt;
--12-08  Stefan Jahn  <stefan@lkcc.org>

        * strings.c (scm_c_string2str): New function.  Converts a
        given Scheme string into a C string.  Also put in two
        THINKME's regarding the malloc policy for the missing converter
        routines.

											
										
										
											2001-12-08 12:50:37 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  inport = scm_current_input_port ();
 								  if (!SCM_UNBNDP (inport) && SCM_OPINPORTP (inport))
 								    {
 								      pt = SCM_PTAB_ENTRY (inport);
 								      enc = pt->encoding;
 								      hndl = pt->ilseq_handler;
 								    }
 								  else
 								    {
 								      enc = NULL;
 								      hndl = SCM_FAILED_CONVERSION_ERROR;
 								    }
 								  return scm_from_stringn (str, len, enc, hndl);
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								}
--12-08  Stefan Jahn  <stefan@lkcc.org>

        * strings.c (scm_c_string2str): New function.  Converts a
        given Scheme string into a C string.  Also put in two
        THINKME's regarding the malloc policy for the missing converter
        routines.

											
										
										
											2001-12-08 12:50:37 +00:00
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								SCM
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								scm_from_latin1_string (const char *str)
--12-08  Stefan Jahn  <stefan@lkcc.org>

        * strings.c (scm_c_string2str): New function.  Converts a
        given Scheme string into a C string.  Also put in two
        THINKME's regarding the malloc policy for the missing converter
        routines.

											
										
										
											2001-12-08 12:50:37 +00:00
+								{
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								  return scm_from_latin1_stringn (str, -1);
 								}
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								SCM
 								scm_from_latin1_stringn (const char *str, size_t len)
 								{
-												Optimize `scm_{to,from}_latin1_string'.

* libguile/strings.c (scm_from_latin1_stringn): Directly return a narrow
  string instead of going through `scm_from_stringn'.
  (scm_to_latin1_stringn): Directly return a copy of STR's raw bytes when
  it's narrow.

											
										
										
											2011-01-25 23:24:36 +01:00
+								  char *buf;
 								  SCM result;
 								  if (len == (size_t) -1)
 								    len = strlen (str);
 								  /* Make a narrow string and copy STR as is.  */
 								  result = scm_i_make_string (len, &buf);
 								  memcpy (buf, str, len);
 								  return result;
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								}
--12-08  Stefan Jahn  <stefan@lkcc.org>

        * strings.c (scm_c_string2str): New function.  Converts a
        given Scheme string into a C string.  Also put in two
        THINKME's regarding the malloc policy for the missing converter
        routines.

											
										
										
											2001-12-08 12:50:37 +00:00
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								SCM
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								scm_from_utf8_string (const char *str)
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								{
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								  return scm_from_utf8_stringn (str, -1);
 								}
 								SCM
 								scm_from_utf8_stringn (const char *str, size_t len)
 								{
 								  return scm_from_stringn (str, len, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								}
-												Add `scm_{to,from}_utf32_string'.

* libguile/strings.c (scm_from_utf32_string, scm_from_utf32_stringn,
  scm_to_utf32_string, scm_to_utf32_stringn): New functions.

* libguile/strings.h (scm_from_utf32_string, scm_from_utf32_stringn,
  scm_to_utf32_string, scm_to_utf32_stringn): New declarations.

* doc/ref/api-data.texi (Conversion to/from C): Document
  `scm_{to,from}_{utf8,utf32}_stringn'.

											
										
										
											2011-01-25 23:36:35 +01:00
+								SCM
 								scm_from_utf32_string (const scm_t_wchar *str)
 								{
 								  return scm_from_utf32_stringn (str, -1);
 								}
 								SCM
 								scm_from_utf32_stringn (const scm_t_wchar *str, size_t len)
 								{
 								  SCM result;
 								  scm_t_wchar *buf;
 								  if (len == (size_t) -1)
 								    len = u32_strlen ((uint32_t *) str);
 								  result = scm_i_make_wide_string (len, &buf);
 								  memcpy (buf, str, len * sizeof (scm_t_wchar));
 								  scm_i_try_narrow_string (result);
 								  return result;
 								}
-												More comments for string functions

        * libguile/strings.c: comments

											
										
										
											2009-08-09 18:06:59 -07:00
+								/* Create a new scheme string from the C string STR.  The memory of
 								   STR may be used directly as storage for the new string.  */
-												Fix leaky handling of `scm_take_locale_{symbol,string} ()'.

* libguile/strings.c (scm_i_take_stringbufn, scm_i_c_take_symbol):
  Remove.
  (scm_take_locale_stringn): Rewrite in terms of `scm_from_locale_stringn ()'.

* libguile/strings.h (scm_i_c_take_symbol, scm_i_take_stringbufn):
  Remove declarations.

											
										
										
											2009-09-01 00:38:40 +02:00
+								/* FIXME: GC-wise, the only way to use the memory area pointed to by STR
 								   would be to register a finalizer to eventually free(3) STR, which isn't
 								   worth it.  Should we just deprecate the `scm_take_' functions?  */
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								SCM
 								scm_take_locale_stringn (char *str, size_t len)
 								{
-												Fix leaky handling of `scm_take_locale_{symbol,string} ()'.

* libguile/strings.c (scm_i_take_stringbufn, scm_i_c_take_symbol):
  Remove.
  (scm_take_locale_stringn): Rewrite in terms of `scm_from_locale_stringn ()'.

* libguile/strings.h (scm_i_c_take_symbol, scm_i_take_stringbufn):
  Remove declarations.

											
										
										
											2009-09-01 00:38:40 +02:00
+								  SCM res;
-												(scm_take_locale_stringn): Use realloc to make room for
null-terminator, rather than mallocing a whole new block.
(scm_take_locale_string): Use scm_take_locale_stringn len==-1.

											
										
										
											2005-06-24 21:43:36 +00:00
-												Fix leaky handling of `scm_take_locale_{symbol,string} ()'.

* libguile/strings.c (scm_i_take_stringbufn, scm_i_c_take_symbol):
  Remove.
  (scm_take_locale_stringn): Rewrite in terms of `scm_from_locale_stringn ()'.

* libguile/strings.h (scm_i_c_take_symbol, scm_i_take_stringbufn):
  Remove declarations.

											
										
										
											2009-09-01 00:38:40 +02:00
+								  res = scm_from_locale_stringn (str, len);
 								  free (str);
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
 								  return res;
 								}
-												(scm_take_locale_stringn): Use realloc to make room for
null-terminator, rather than mallocing a whole new block.
(scm_take_locale_string): Use scm_take_locale_stringn len==-1.

											
										
										
											2005-06-24 21:43:36 +00:00
+								SCM
 								scm_take_locale_string (char *str)
 								{
 								  return scm_take_locale_stringn (str, -1);
 								}
-												Fix write-beyond-end-of-string error in the conversion to R6RS string escapes.

Reported by Mike Gran <spk121@yahoo.com>.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Augment comments.
  (scm_to_stringn): When `handler ==
  SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', realloc
  BUF so that it's large enough for the worst case.

* libguile/print.c (display_character): When `result != NULL && strategy
  == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', make
  LOCALE_ENCODED large enough to hold an R6RS escape.

											
										
										
											2010-09-15 23:32:28 +02:00
+								/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
 								   *LENP-byte locale-encoded string, to `\xXX', `\uXXXX', or `\UXXXXXX'.
-												Hide the string escaping hacks.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes): Rename
  to...
  (unistring_escapes_to_guile_escapes): ... this.  Make `static'.
  (scm_i_unistring_escapes_to_r6rs_escapes): Rename to...
  (unistring_escapes_to_r6rs_escapes): ... this.  Make `static'.

* libguile/strings.h (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Remove declarations.

											
										
										
											2011-01-23 00:13:27 +01:00
+								   Set *LENP to the size of the resulting string.
 								   FIXME: This is a hack we should get rid of.  See
 								   <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00004.html>
 								   for details.  */
 								static void
 								unistring_escapes_to_guile_escapes (char *buf, size_t *lenp)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								{
 								  char *before, *after;
 								  size_t i, j;
-												Internally expose `scm_i_unistring_escapes_to_{guile,r6rs}_escapes'.

* libguile/strings.c (unistring_escapes_to_guile_escapes): Rename to...
  (scm_i_unistring_escapes_to_guile_escapes): ... this.  Change `char **bufp'
  to `char *buf'; leave realloc responsibility to the caller.  Update caller.
  (unistring_escapes_to_r6rs_escapes): Rename to...
  (scm_i_unistring_escapes_to_r6rs_escapes): ... this.  Likewise.

											
										
										
											2010-09-14 16:04:26 +02:00
+								  before = buf;
 								  after = buf;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  i = 0;
 								  j = 0;
 								  while (i < *lenp)
 								    {
 								      if ((i <= *lenp - 6)
 								          && before[i] == '\\'
 								          && before[i + 1] == 'u'
 								          && before[i + 2] == '0' && before[i + 3] == '0')
 								        {
 								          /* Convert \u00NN to \xNN */
 								          after[j] = '\\';
 								          after[j + 1] = 'x';
-												Only pass ints to tolower and toupper

* libguile/strings.c (unistring_escapes_to_guile_escapes): cast
  tolower's parameter to int

* libguile/read.c (CHAR_DOWNCASE): cast tolower's parameter to int

											
										
										
											2009-08-11 21:12:52 -07:00
+								          after[j + 2] = tolower ((int) before[i + 4]);
 								          after[j + 3] = tolower ((int) before[i + 5]);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								          i += 6;
 								          j += 4;
 								        }
 								      else if ((i <= *lenp - 10)
 								               && before[i] == '\\'
 								               && before[i + 1] == 'U'
 								               && before[i + 2] == '0' && before[i + 3] == '0')
 								        {
 								          /* Convert \U00NNNNNN to \UNNNNNN */
 								          after[j] = '\\';
 								          after[j + 1] = 'U';
-												Only pass ints to tolower and toupper

* libguile/strings.c (unistring_escapes_to_guile_escapes): cast
  tolower's parameter to int

* libguile/read.c (CHAR_DOWNCASE): cast tolower's parameter to int

											
										
										
											2009-08-11 21:12:52 -07:00
+								          after[j + 2] = tolower ((int) before[i + 4]);
 								          after[j + 3] = tolower ((int) before[i + 5]);
 								          after[j + 4] = tolower ((int) before[i + 6]);
 								          after[j + 5] = tolower ((int) before[i + 7]);
 								          after[j + 6] = tolower ((int) before[i + 8]);
 								          after[j + 7] = tolower ((int) before[i + 9]);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								          i += 10;
 								          j += 8;
 								        }
 								      else
 								        {
 								          after[j] = before[i];
 								          i++;
 								          j++;
 								        }
 								    }
 								  *lenp = j;
 								}
-												Fix write-beyond-end-of-string error in the conversion to R6RS string escapes.

Reported by Mike Gran <spk121@yahoo.com>.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Augment comments.
  (scm_to_stringn): When `handler ==
  SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', realloc
  BUF so that it's large enough for the worst case.

* libguile/print.c (display_character): When `result != NULL && strategy
  == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', make
  LOCALE_ENCODED large enough to hold an R6RS escape.

											
										
										
											2010-09-15 23:32:28 +02:00
+								/* Change libunistring escapes (`\uXXXX' and `\UXXXXXXXX') in BUF, a
 								   *LENP-byte locale-encoded string, to `\xXXXX;'.  Set *LEN to the size
 								   of the resulting string.  BUF must be large enough to handle the
 								   worst case when `\uXXXX' escapes (6 characters) are replaced by
 								   `\xXXXX;' (7 characters).  */
-												Hide the string escaping hacks.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes): Rename
  to...
  (unistring_escapes_to_guile_escapes): ... this.  Make `static'.
  (scm_i_unistring_escapes_to_r6rs_escapes): Rename to...
  (unistring_escapes_to_r6rs_escapes): ... this.  Make `static'.

* libguile/strings.h (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Remove declarations.

											
										
										
											2011-01-23 00:13:27 +01:00
+								static void
 								unistring_escapes_to_r6rs_escapes (char *buf, size_t *lenp)
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
+								{
 								  char *before, *after;
 								  size_t i, j;
 								  /* The worst case is if the input string contains all 4-digit hex escapes.
 								     "\uXXXX" (six characters) becomes "\xXXXX;" (seven characters) */
 								  size_t max_out_len = (*lenp * 7) / 6 + 1;
 								  size_t nzeros, ndigits;
-												Internally expose `scm_i_unistring_escapes_to_{guile,r6rs}_escapes'.

* libguile/strings.c (unistring_escapes_to_guile_escapes): Rename to...
  (scm_i_unistring_escapes_to_guile_escapes): ... this.  Change `char **bufp'
  to `char *buf'; leave realloc responsibility to the caller.  Update caller.
  (unistring_escapes_to_r6rs_escapes): Rename to...
  (scm_i_unistring_escapes_to_r6rs_escapes): ... this.  Likewise.

											
										
										
											2010-09-14 16:04:26 +02:00
+								  before = buf;
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
+								  after = alloca (max_out_len);
 								  i = 0;
 								  j = 0;
 								  while (i < *lenp)
 								    {
 								      if (((i <= *lenp - 6) && before[i] == '\\' && before[i + 1] == 'u')
 								          || ((i <= *lenp - 10) && before[i] == '\\' && before[i + 1] == 'U'))
 								        {
 								          if (before[i + 1] == 'u')
 								            ndigits = 4;
 								          else if (before[i + 1] == 'U')
 								            ndigits = 8;
 								          else
 								            abort ();
 								          /* Add the R6RS hex escape initial sequence.  */
 								          after[j] = '\\';
 								          after[j + 1] = 'x';
 								          /* Move string positions to the start of the hex numbers.  */
 								          i += 2;
 								          j += 2;
 								          /* Find the number of initial zeros in this hex number.  */
 								          nzeros = 0;
 								          while (before[i + nzeros] == '0' && nzeros < ndigits)
 								            nzeros++;
 								          /* Copy the number, skipping initial zeros, and then move the string
 								             positions.  */
 								          if (nzeros == ndigits)
 								            {
 								              after[j] = '0';
 								              i += ndigits;
 								              j += 1;
 								            }
 								          else
 								            {
 								              int pos;
 								              for (pos = 0; pos < ndigits - nzeros; pos++)
 								                after[j + pos] = tolower ((int) before[i + nzeros + pos]);
 								              i += ndigits;
 								              j += (ndigits - nzeros);
 								            }
 								          /* Add terminating semicolon.  */
 								          after[j] = ';';
 								          j++;
 								        }
 								      else
 								        {
 								          after[j] = before[i];
 								          i++;
 								          j++;
 								        }
 								    }
 								  *lenp = j;
 								  memcpy (before, after, j);
 								}
-												Provide non-locale C/Scheme string conversion functions

* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn,
  scm_to_latin1_stringn, and scm_from_latin1_stringn
* libguile/strings.h (scm_to_stringn): make public
  (scm_to_latin1_stringn): new declaration
  (scm_from_latin1_stringn): new declaration
* libguile/strings.c (scm_to_latin1_stringn): new function
  (scm_from_latin1_stringn): new function

											
										
										
											2010-09-12 08:29:31 -07:00
+								char *
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								scm_to_locale_string (SCM str)
-												Provide non-locale C/Scheme string conversion functions

* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn,
  scm_to_latin1_stringn, and scm_from_latin1_stringn
* libguile/strings.h (scm_to_stringn): make public
  (scm_to_latin1_stringn): new declaration
  (scm_from_latin1_stringn): new declaration
* libguile/strings.c (scm_to_latin1_stringn): new function
  (scm_from_latin1_stringn): new function

											
										
										
											2010-09-12 08:29:31 -07:00
+								{
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								  return scm_to_locale_stringn (str, NULL);
-												Provide non-locale C/Scheme string conversion functions

* doc/ref/api-data.texi: document scm_to_stringn, scm_from_stringn,
  scm_to_latin1_stringn, and scm_from_latin1_stringn
* libguile/strings.h (scm_to_stringn): make public
  (scm_to_latin1_stringn): new declaration
  (scm_from_latin1_stringn): new declaration
* libguile/strings.c (scm_to_latin1_stringn): new function
  (scm_from_latin1_stringn): new function

											
										
										
											2010-09-12 08:29:31 -07:00
+								}
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								char *
-												Fix encoding errors with strings returned by string ports

String ports, being 8-bit, store strings using the character encoding
of the port.  This fixes a bug where the default character encoding, and
not the port's encoding, was being used to convert the string port data
back to a string.

* libguile/strports.c: extra comments
  (scm_strport_to_string):  use port's encoding when converting port data
  to a string

* libguile/strings.c (scm_i_from_stringn): renamed from scm_from_stringn
  and made internal.  All callers changed.
  (scm_from_stringn): renamed to scm_i_from_stringn.

* libguile/strings.h: declaration for scm_i_from_stringn

											
										
										
											2009-08-30 15:41:56 -07:00
+								scm_to_locale_stringn (SCM str, size_t *lenp)
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM outport;
 								  scm_t_port *pt;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  const char *enc;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  outport = scm_current_output_port ();
 								  if (!SCM_UNBNDP (outport) && SCM_OPOUTPORTP (outport))
 								    {
 								      pt = SCM_PTAB_ENTRY (outport);
 								      enc = pt->encoding;
 								    }
 								  else
 								    enc = NULL;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  return scm_to_stringn (str, lenp,
 								                         enc,
 								                         scm_i_get_conversion_strategy (SCM_BOOL_F));
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								}
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								char *
 								scm_to_latin1_string (SCM str)
 								{
 								  return scm_to_latin1_stringn (str, NULL);
 								}
 								char *
 								scm_to_latin1_stringn (SCM str, size_t *lenp)
-												Optimize `scm_{to,from}_latin1_string'.

* libguile/strings.c (scm_from_latin1_stringn): Directly return a narrow
  string instead of going through `scm_from_stringn'.
  (scm_to_latin1_stringn): Directly return a copy of STR's raw bytes when
  it's narrow.

											
										
										
											2011-01-25 23:24:36 +01:00
+								#define FUNC_NAME "scm_to_latin1_stringn"
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								{
-												Optimize `scm_{to,from}_latin1_string'.

* libguile/strings.c (scm_from_latin1_stringn): Directly return a narrow
  string instead of going through `scm_from_stringn'.
  (scm_to_latin1_stringn): Directly return a copy of STR's raw bytes when
  it's narrow.

											
										
										
											2011-01-25 23:24:36 +01:00
+								  char *result;
 								  SCM_VALIDATE_STRING (1, str);
 								  if (scm_i_is_narrow_string (str))
 								    {
 								      if (lenp)
 									*lenp = scm_i_string_length (str);
 								      result = scm_strdup (scm_i_string_data (str));
 								    }
 								  else
 								    result = scm_to_stringn (str, lenp, NULL,
 											     SCM_FAILED_CONVERSION_ERROR);
 								  return result;
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
+								}
-												Optimize `scm_{to,from}_latin1_string'.

* libguile/strings.c (scm_from_latin1_stringn): Directly return a narrow
  string instead of going through `scm_from_stringn'.
  (scm_to_latin1_stringn): Directly return a copy of STR's raw bytes when
  it's narrow.

											
										
										
											2011-01-25 23:24:36 +01:00
+								#undef FUNC_NAME
-												add scm_{to,from}_{utf8,latin1}_string{n,}

* libguile/strings.h:
* libguile/strings.c (scm_from_latin1_string, scm_to_latin1_string): New
  functions, in terms of the latin1_stringn variants.
  (scm_from_utf8_string, scm_from_utf8_stringn)
  (scm_to_utf8_string, scm_to_utf8_stringn): New functions.
  (scm_i_from_utf8_string, scm_i_to_utf8_string): Removed these internal
  functions.
  (scm_from_stringn): Handle -1 as a length. Unlike the previous
  behavior of scm_from_locale_string (NULL), which returned the empty
  string, we now raise an error.  The null pointer is not the same as
  the empty string.

* libguile/stime.c (scm_strftime, scm_strptime): Adapt to publishing of
  utf8 functions.

											
										
										
											2011-01-05 18:21:54 -06:00
 								char *
 								scm_to_utf8_string (SCM str)
 								{
 								  return scm_to_utf8_stringn (str, NULL);
 								}
 								char *
 								scm_to_utf8_stringn (SCM str, size_t *lenp)
 								{
 								  return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
 								}
-												Add `scm_{to,from}_utf32_string'.

* libguile/strings.c (scm_from_utf32_string, scm_from_utf32_stringn,
  scm_to_utf32_string, scm_to_utf32_stringn): New functions.

* libguile/strings.h (scm_from_utf32_string, scm_from_utf32_stringn,
  scm_to_utf32_string, scm_to_utf32_stringn): New declarations.

* doc/ref/api-data.texi (Conversion to/from C): Document
  `scm_{to,from}_{utf8,utf32}_stringn'.

											
										
										
											2011-01-25 23:36:35 +01:00
+								scm_t_wchar *
 								scm_to_utf32_string (SCM str)
 								{
 								  return scm_to_utf32_stringn (str, NULL);
 								}
 								scm_t_wchar *
 								scm_to_utf32_stringn (SCM str, size_t *lenp)
 								#define FUNC_NAME "scm_to_utf32_stringn"
 								{
 								  scm_t_wchar *result;
 								  SCM_VALIDATE_STRING (1, str);
 								  if (scm_i_is_narrow_string (str))
 								    result = (scm_t_wchar *)
 								      scm_to_stringn (str, lenp, "UTF-32",
 										      SCM_FAILED_CONVERSION_ERROR);
 								  else
 								    {
 								      size_t len;
 								      len = scm_i_string_length (str);
 								      if (lenp)
 									*lenp = len;
 								      result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
 								      memcpy (result, scm_i_string_wide_chars (str),
 									      len * sizeof (scm_t_wchar));
 								      result[len] = 0;
 								    }
 								  return result;
 								}
 								#undef FUNC_NAME
-												Add in-source documentation of `scm_to_stringn ()'.

* libguile/strings.c (scm_to_stringn): Add documentation comment.

											
										
										
											2010-01-06 23:52:40 +01:00
+								/* Return a malloc(3)-allocated buffer containing the contents of STR encoded
 								   according to ENCODING.  If LENP is non-NULL, set it to the size in bytes of
 								   the returned buffer.  If the conversion to ENCODING fails, apply the strategy
 								   defined by HANDLER.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								char *
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
+								scm_to_stringn (SCM str, size_t *lenp, const char *encoding,
-												Don't include libunistring headers in Guile public headers

This requres the creation of a new type
scm_t_string_failed_conversion_handler to replace libunistring's
enum iconveh_ilseq_handler.

* libguile/strings.h: don't include <uniconv.h>
(scm_t_string_failed_conversion_handler): new enum type
(SCM_FAILED_CONVERSION_ERROR, SCM_FAILED_CONVERSION_QUESTION_MARK):
(SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE): new enum type values

* libguile/strings.c (scm_to_stringn): now takes type
scm_t_string_failed_conversion_handler.  All callers changed.

* libguile/print.c: include <uniconv.h>

* libguile/ports.c (scm_lfwrite_substr): use
scm_t_string_conversion_handler's constants

* libguile/gen-scmconfig.c (SCM_ICONVEH_ERROR):
(SCM_ICONVEH_QUESTION_MARK, SCM_ICONVEH_ESCAPE_SEQUENCE): store
iconveh_ilseq_hander constants as #define's

											
										
										
											2009-08-12 08:30:59 -07:00
+								                scm_t_string_failed_conversion_handler handler)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								{
 								  char *buf;
 								  size_t ilen, len, i;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  int ret;
 								  const char *enc;
--12-08  Stefan Jahn  <stefan@lkcc.org>

        * strings.c (scm_c_string2str): New function.  Converts a
        given Scheme string into a C string.  Also put in two
        THINKME's regarding the malloc policy for the missing converter
        routines.

											
										
										
											2001-12-08 12:50:37 +00:00
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  if (!scm_is_string (str))
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    scm_wrong_type_arg_msg (NULL, 0, str, "string");
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  ilen = scm_i_string_length (str);
 								  if (ilen == 0)
 								    {
 								      buf = scm_malloc (1);
 								      buf[0] = '\0';
 								      if (lenp)
 								        *lenp = 0;
 								      return buf;
 								    }
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								  if (lenp == NULL)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								    for (i = 0; i < ilen; i++)
 								      if (scm_i_string_ref (str, i) == '\0')
 								        scm_misc_error (NULL,
 								                        "string contains #\\nul character: ~S",
 								                        scm_list_1 (str));
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  if (scm_i_is_narrow_string (str) && (encoding == NULL))
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    {
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      /* If using native Latin-1 encoding, just copy the string
 								         contents.  */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      if (lenp)
 								        {
 								          buf = scm_malloc (ilen);
 								          memcpy (buf, scm_i_string_chars (str), ilen);
 								          *lenp = ilen;
 								          return buf;
 								        }
 								      else
 								        {
 								          buf = scm_malloc (ilen + 1);
 								          memcpy (buf, scm_i_string_chars (str), ilen);
 								          buf[ilen] = '\0';
 								          return buf;
 								        }
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												Modify socket and time functions for wide strings

* libguile/socket.c (scm_recv): receive the message without holding the
  stringbuf writing lock
  (scm_send): try to narrow a string before using it

* libguile/stime.c (strftime): convert string to UTF-8 so that it can
  be safely passed to strftime
  (strptime): convert input string to UTF-8 so that it can be safely
  passed through strptime

* libguile/strings.c (narrow_stringbuf): new function
  (scm_i_try_narrow_string): new function

* libguile/strings.h: new declaration for scm_i_try_narrow_string

											
										
										
											2009-08-23 06:50:45 -07:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  buf = NULL;
 								  len = 0;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  enc = encoding;
 								  if (enc == NULL)
 								    enc = "ISO-8859-1";
 								  if (scm_i_is_narrow_string (str))
 								    {
 								      ret = mem_iconveh (scm_i_string_chars (str), ilen,
 								                         "ISO-8859-1", enc,
 								                         (enum iconv_ilseq_handler) handler, NULL,
 								                         &buf, &len);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      if (ret != 0)
-												Improve encoding error reporting.

* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

											
										
										
											2010-03-18 20:23:12 +01:00
+								        scm_encoding_error (__func__, errno,
-												Change `scm_encoding_error' to pass the port and faulty character.

* libguile/strings.c (scm_encoding_error): Remove the `from', `to', and
  `string_or_bv' parameters; add `port' and `chr'.
  (scm_to_stringn): Update accordingly.

* libguile/strings.h (scm_encoding_error): Update accordingly.

* libguile/ports.c (scm_ungetc): Update accordingly.

* libguile/print.c (iprin1, scm_write_char): Update accordingly.

* test-suite/tests/encoding-escapes.test ("display output
  errors")["ultima", "Rashomon"]: Check the arguments of
  `encoding-error'.
  ["tekniko"]: New test.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Adjust
  to new `encoding-error' arguments.

											
										
										
											2011-02-02 17:38:03 +01:00
+											    "cannot convert narrow string to output locale",
 											    SCM_BOOL_F,
 											    /* FIXME: Faulty character unknown.  */
 											    SCM_BOOL_F);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    }
 								  else
 								    {
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
+								      buf = u32_conv_to_encoding (enc,
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								                                  (enum iconv_ilseq_handler) handler,
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
+								                                  (scm_t_uint32 *) scm_i_string_wide_chars (str),
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								                                  ilen,
 								                                  NULL,
 								                                  NULL, &len);
 								      if (buf == NULL)
-												Improve encoding error reporting.

* libguile/strings.c (scm_encoding_error): Change arguments to convey
  more information.  Raise the error with `scm_throw ()', passing all
  the information to the handler.
  (scm_from_stringn, scm_to_stringn): Update accordingly.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Check
  the arguments passed to the `throw' handler.

* test-suite/tests/r6rs-ports.test ("7.2.11 Binary
  Output")["put-bytevector with wrong-encoding string port"]: Likewise.

											
										
										
											2010-03-18 20:23:12 +01:00
+								        scm_encoding_error (__func__, errno,
-												Change `scm_encoding_error' to pass the port and faulty character.

* libguile/strings.c (scm_encoding_error): Remove the `from', `to', and
  `string_or_bv' parameters; add `port' and `chr'.
  (scm_to_stringn): Update accordingly.

* libguile/strings.h (scm_encoding_error): Update accordingly.

* libguile/ports.c (scm_ungetc): Update accordingly.

* libguile/print.c (iprin1, scm_write_char): Update accordingly.

* test-suite/tests/encoding-escapes.test ("display output
  errors")["ultima", "Rashomon"]: Check the arguments of
  `encoding-error'.
  ["tekniko"]: New test.

* test-suite/tests/ports.test ("string ports")["wrong encoding"]: Adjust
  to new `encoding-error' arguments.

											
										
										
											2011-02-02 17:38:03 +01:00
+											    "cannot convert wide string to output locale",
 											    SCM_BOOL_F,
 											    /* FIXME: Faulty character unknown.  */
 											    SCM_BOOL_F);
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
+								    }
 								  if (handler == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE)
 								    {
 								      if (SCM_R6RS_ESCAPES_P)
-												Fix write-beyond-end-of-string error in the conversion to R6RS string escapes.

Reported by Mike Gran <spk121@yahoo.com>.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Augment comments.
  (scm_to_stringn): When `handler ==
  SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', realloc
  BUF so that it's large enough for the worst case.

* libguile/print.c (display_character): When `result != NULL && strategy
  == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', make
  LOCALE_ENCODED large enough to hold an R6RS escape.

											
										
										
											2010-09-15 23:32:28 +02:00
+									{
 									  /* The worst case is if the input string contains all 4-digit
 									     hex escapes.  "\uXXXX" (six characters) becomes "\xXXXX;"
 									     (seven characters).  Make BUF large enough to hold
 									     that.  */
 									  buf = scm_realloc (buf, (len * 7) / 6 + 1);
-												Hide the string escaping hacks.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes): Rename
  to...
  (unistring_escapes_to_guile_escapes): ... this.  Make `static'.
  (scm_i_unistring_escapes_to_r6rs_escapes): Rename to...
  (unistring_escapes_to_r6rs_escapes): ... this.  Make `static'.

* libguile/strings.h (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Remove declarations.

											
										
										
											2011-01-23 00:13:27 +01:00
+									  unistring_escapes_to_r6rs_escapes (buf, &len);
-												Fix write-beyond-end-of-string error in the conversion to R6RS string escapes.

Reported by Mike Gran <spk121@yahoo.com>.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Augment comments.
  (scm_to_stringn): When `handler ==
  SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', realloc
  BUF so that it's large enough for the worst case.

* libguile/print.c (display_character): When `result != NULL && strategy
  == SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE && SCM_R6RS_ESCAPES_P', make
  LOCALE_ENCODED large enough to hold an R6RS escape.

											
										
										
											2010-09-15 23:32:28 +02:00
+									}
-												R6RS string escapes broken on string output

scm_to_stringn failed to do the necessary escape conversion for
R6RS hex escapes

* libguile/strings.c (unistring_escapes_to_r6rs_escapes): new function
  (scm_to_stringn): use new function when r6rs hex escapes are enabled

* test-suite/tests/reader.test: new test for string display

											
										
										
											2010-01-23 09:15:10 -08:00
+								      else
-												Hide the string escaping hacks.

* libguile/strings.c (scm_i_unistring_escapes_to_guile_escapes): Rename
  to...
  (unistring_escapes_to_guile_escapes): ... this.  Make `static'.
  (scm_i_unistring_escapes_to_r6rs_escapes): Rename to...
  (unistring_escapes_to_r6rs_escapes): ... this.  Make `static'.

* libguile/strings.h (scm_i_unistring_escapes_to_guile_escapes,
  scm_i_unistring_escapes_to_r6rs_escapes): Remove declarations.

											
										
										
											2011-01-23 00:13:27 +01:00
+								        unistring_escapes_to_guile_escapes (buf, &len);
-												Internally expose `scm_i_unistring_escapes_to_{guile,r6rs}_escapes'.

* libguile/strings.c (unistring_escapes_to_guile_escapes): Rename to...
  (scm_i_unistring_escapes_to_guile_escapes): ... this.  Change `char **bufp'
  to `char *buf'; leave realloc responsibility to the caller.  Update caller.
  (unistring_escapes_to_r6rs_escapes): Rename to...
  (scm_i_unistring_escapes_to_r6rs_escapes): ... this.  Likewise.

											
										
										
											2010-09-14 16:04:26 +02:00
 								      buf = scm_realloc (buf, len);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  if (lenp)
--12-08  Stefan Jahn  <stefan@lkcc.org>

        * strings.c (scm_c_string2str): New function.  Converts a
        given Scheme string into a C string.  Also put in two
        THINKME's regarding the malloc policy for the missing converter
        routines.

											
										
										
											2001-12-08 12:50:37 +00:00
+								    *lenp = len;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  else
 								    {
 								      buf = scm_realloc (buf, len + 1);
 								      buf[len] = '\0';
 								    }
-												* eval.[ch] (scm_deval_args):  Made static.

* srcprop.c (scm_source_property):  Remove redundant SCM_IMP
test.

* strings.c (scm_c_string2str):  Clarified comment.  Replaced
THINKME by FIXME for uniformness.  Removed question about whether
arguments need to be protected from garbage collection:  Arguments
must be protected as any other variable.

											
										
										
											2002-01-10 21:11:22 +00:00
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								  scm_remember_upto_here_1 (str);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  return buf;
--12-08  Stefan Jahn  <stefan@lkcc.org>

        * strings.c (scm_c_string2str): New function.  Converts a
        given Scheme string into a C string.  Also put in two
        THINKME's regarding the malloc policy for the missing converter
        routines.

											
										
										
											2001-12-08 12:50:37 +00:00
+								}
--01-28  Stefan Jahn  <stefan@lkcc.org>

        * configure.in (guile_cv_have_uint32_t): Look also in
        `stdint.h' for uint32_t.

2002-01-28  Stefan Jahn  <stefan@lkcc.org>

        * symbols.c (scm_c_symbol2str): New function, replacement for
        `gh_scm2newsymbol()'.

        * strings.c (scm_c_substring2str): New function.  Proper
        replacement for `gh_get_substr()'.

        * socket.c: Include `stdint.h' if available for the `uint32_t'
        declaration.

        * scmsigs.c (s_scm_sigaction): Initialize `chandler' (inhibits
        compiler warning).

        * backtrace.c: Include `lang.h' for GUILE_DEBUG conditional.

											
										
										
											2002-01-28 21:15:55 +00:00
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								size_t
 								scm_to_locale_stringbuf (SCM str, char *buf, size_t max_len)
 								{
 								  size_t len;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  char *result = NULL;
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  if (!scm_is_string (str))
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    scm_wrong_type_arg_msg (NULL, 0, str, "string");
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  result = scm_to_locale_stringn (str, &len);
 								  memcpy (buf, result, (len > max_len) ? max_len : len);
 								  free (result);
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								  scm_remember_upto_here_1 (str);
 								  return len;
 								}
--01-28  Stefan Jahn  <stefan@lkcc.org>

        * configure.in (guile_cv_have_uint32_t): Look also in
        `stdint.h' for uint32_t.

2002-01-28  Stefan Jahn  <stefan@lkcc.org>

        * symbols.c (scm_c_symbol2str): New function, replacement for
        `gh_scm2newsymbol()'.

        * strings.c (scm_c_substring2str): New function.  Proper
        replacement for `gh_get_substr()'.

        * socket.c: Include `stdint.h' if available for the `uint32_t'
        declaration.

        * scmsigs.c (s_scm_sigaction): Initialize `chandler' (inhibits
        compiler warning).

        * backtrace.c: Include `lang.h' for GUILE_DEBUG conditional.

											
										
										
											2002-01-28 21:15:55 +00:00
-												Use `encoding-error' instead of `misc-error' for string encoding errors.

* libguile/strings.c (scm_encoding_error): New function.
  (scm_from_stringn, scm_to_stringn): Use it instead of `scm_misc_error ()'.

* test-suite/lib.scm (exception:encoding-error): Adjust accordingly.

* test-suite/tests/encoding-escapes.test (exception:conversion):
  Remove.  Use `exception:encoding-error' instead.

* test-suite/tests/encoding-iso88591.test: Likewise.

* test-suite/tests/encoding-iso88597.test: Likewise.

* test-suite/tests/encoding-utf8.test: Likewise.

											
										
										
											2010-01-07 00:37:10 +01:00
 								/* Unicode string normalization.  */
-												Support for Unicode string normalization functions

* libguile/strings.c, libguile/strings.h (normalize_str,
  scm_string_normalize_nfc, scm_string_normalize_nfd, scm_normalize_nfkc,
  scm_string_normalize_nfkd): New functions.
* test-suite/tests/strings.test: Unit tests for `string-normalize-nfc',
  `string-normalize-nfd', `string-normalize-nfkc', and
  `string-normalize-nfkd'.
* doc/ref/api-data.texi (String Comparison): Documentation for normalization
  functions.

											
										
										
											2010-01-03 01:06:05 -05:00
+								/* This function is a partial clone of SCM_STRING_TO_U32_BUF from
 								   libguile/i18n.c.  It would be useful to have this factored out into a more
 								   convenient location, but its use of alloca makes that tricky to do. */
 								static SCM
 								normalize_str (SCM string, uninorm_t form)
 								{
 								  SCM ret;
 								  scm_t_uint32 *w_str;
 								  scm_t_wchar *cbuf;
 								  size_t rlen, len = scm_i_string_length (string);
 								  if (scm_i_is_narrow_string (string))
 								    {
 								      size_t i;
 								      const char *buf = scm_i_string_chars (string);
 								      w_str = alloca (sizeof (scm_t_wchar) * (len + 1));
 								      for (i = 0; i < len; i ++)
 									w_str[i] = (unsigned char) buf[i];
 								      w_str[len] = 0;
 								    }
-												Attempt to narrow normalized strings.

* libguile/strings.c (normalize_str): Clean up indentation.  Add call to
  scm_i_try_narrow_string.

											
										
										
											2010-01-06 23:01:05 -05:00
+								  else
 								    w_str = (scm_t_uint32 *) scm_i_string_wide_chars (string);
-												Support for Unicode string normalization functions

* libguile/strings.c, libguile/strings.h (normalize_str,
  scm_string_normalize_nfc, scm_string_normalize_nfd, scm_normalize_nfkc,
  scm_string_normalize_nfkd): New functions.
* test-suite/tests/strings.test: Unit tests for `string-normalize-nfc',
  `string-normalize-nfd', `string-normalize-nfkc', and
  `string-normalize-nfkd'.
* doc/ref/api-data.texi (String Comparison): Documentation for normalization
  functions.

											
										
										
											2010-01-03 01:06:05 -05:00
+								  w_str = u32_normalize (form, w_str, len, NULL, &rlen);
 								  ret = scm_i_make_wide_string (rlen, &cbuf);
 								  u32_cpy ((scm_t_uint32 *) cbuf, w_str, rlen);
 								  free (w_str);
-												Attempt to narrow normalized strings.

* libguile/strings.c (normalize_str): Clean up indentation.  Add call to
  scm_i_try_narrow_string.

											
										
										
											2010-01-06 23:01:05 -05:00
 								  scm_i_try_narrow_string (ret);
-												Support for Unicode string normalization functions

* libguile/strings.c, libguile/strings.h (normalize_str,
  scm_string_normalize_nfc, scm_string_normalize_nfd, scm_normalize_nfkc,
  scm_string_normalize_nfkd): New functions.
* test-suite/tests/strings.test: Unit tests for `string-normalize-nfc',
  `string-normalize-nfd', `string-normalize-nfkc', and
  `string-normalize-nfkd'.
* doc/ref/api-data.texi (String Comparison): Documentation for normalization
  functions.

											
										
										
											2010-01-03 01:06:05 -05:00
+								  return ret;
 								}
 								SCM_DEFINE (scm_string_normalize_nfc, "string-normalize-nfc", 1, 0, 0,
 									    (SCM string),
 									    "Returns the NFC normalized form of @var{string}.")
 								#define FUNC_NAME s_scm_string_normalize_nfc
 								{
 								  SCM_VALIDATE_STRING (1, string);
 								  return normalize_str (string, UNINORM_NFC);
 								}
 								#undef FUNC_NAME
 								SCM_DEFINE (scm_string_normalize_nfd, "string-normalize-nfd", 1, 0, 0,
 									    (SCM string),
 									    "Returns the NFD normalized form of @var{string}.")
 								#define FUNC_NAME s_scm_string_normalize_nfd
 								{
 								  SCM_VALIDATE_STRING (1, string);
 								  return normalize_str (string, UNINORM_NFD);
 								}
 								#undef FUNC_NAME
 								SCM_DEFINE (scm_string_normalize_nfkc, "string-normalize-nfkc", 1, 0, 0,
 									    (SCM string),
 									    "Returns the NFKC normalized form of @var{string}.")
 								#define FUNC_NAME s_scm_string_normalize_nfkc
 								{
 								  SCM_VALIDATE_STRING (1, string);
 								  return normalize_str (string, UNINORM_NFKC);
 								}
 								#undef FUNC_NAME
 								SCM_DEFINE (scm_string_normalize_nfkd, "string-normalize-nfkd", 1, 0, 0,
 									    (SCM string),
 									    "Returns the NFKD normalized form of @var{string}.")
 								#define FUNC_NAME s_scm_string_normalize_nfkd
 								{
 								  SCM_VALIDATE_STRING (1, string);
 								  return normalize_str (string, UNINORM_NFKD);
 								}
 								#undef FUNC_NAME
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								/* converts C scm_array of strings to SCM scm_list of strings. */
 								/* If argc < 0, a null terminated scm_array is assumed. */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								SCM
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								scm_makfromstrs (int argc, char **argv)
 								{
 								  int i = argc;
 								  SCM lst = SCM_EOL;
 								  if (0 > i)
 								    for (i = 0; argv[i]; i++);
 								  while (i--)
 								    lst = scm_cons (scm_from_locale_string (argv[i]), lst);
 								  return lst;
 								}
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								/* Return a newly allocated array of char pointers to each of the strings
 								   in args, with a terminating NULL pointer.  */
 								char **
 								scm_i_allocate_string_pointers (SCM list)
-												Use `scm_gc malloc_pointerless ()' in `scm_i allocate_string_pointers ()'.

* libguile/dynl.c (free_string_pointers): Remove.
  (scm_dynamic_args_call): Remove reference to `free_string_pointers ()'
  and remove dynwind.

* libguile/posix.c (free_string_pointers): Remove.
  (scm_execl, scm_execlp, scm_execle, scm_environ): Remove references
  to `free_string_pointers ()'.

* libguile/simpos.c (free_string_pointers): Remove.
  (scm_system_star): Remove reference to `free_string_pointers ()',
  remove enclosing dynwind.

* libguile/strings.c (scm_i_allocate_string_pointers): Use
  `scm_gc_malloc_pointerless ()' and `scm_gc_malloc ()'
  instead of `scm_malloc ()' and `scm_to_locale_string ()',
  so that the result is automatically GC'd when no longer
  referenced.  Remove unneeded dynwind.
  (scm_i_free_string_pointers): Remove.

* libguile/strings.h (scm_i_free_string_pointers): Remove declaration.

											
										
										
											2009-01-18 15:33:37 +01:00
+								#define FUNC_NAME "scm_i_allocate_string_pointers"
--01-28  Stefan Jahn  <stefan@lkcc.org>

        * configure.in (guile_cv_have_uint32_t): Look also in
        `stdint.h' for uint32_t.

2002-01-28  Stefan Jahn  <stefan@lkcc.org>

        * symbols.c (scm_c_symbol2str): New function, replacement for
        `gh_scm2newsymbol()'.

        * strings.c (scm_c_substring2str): New function.  Proper
        replacement for `gh_get_substr()'.

        * socket.c: Include `stdint.h' if available for the `uint32_t'
        declaration.

        * scmsigs.c (s_scm_sigaction): Initialize `chandler' (inhibits
        compiler warning).

        * backtrace.c: Include `lang.h' for GUILE_DEBUG conditional.

											
										
										
											2002-01-28 21:15:55 +00:00
+								{
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								  char **result;
 								  int len = scm_ilength (list);
 								  int i;
 								  if (len < 0)
 								    scm_wrong_type_arg_msg (NULL, 0, list, "proper list");
-												Use `scm_gc malloc_pointerless ()' in `scm_i allocate_string_pointers ()'.

* libguile/dynl.c (free_string_pointers): Remove.
  (scm_dynamic_args_call): Remove reference to `free_string_pointers ()'
  and remove dynwind.

* libguile/posix.c (free_string_pointers): Remove.
  (scm_execl, scm_execlp, scm_execle, scm_environ): Remove references
  to `free_string_pointers ()'.

* libguile/simpos.c (free_string_pointers): Remove.
  (scm_system_star): Remove reference to `free_string_pointers ()',
  remove enclosing dynwind.

* libguile/strings.c (scm_i_allocate_string_pointers): Use
  `scm_gc_malloc_pointerless ()' and `scm_gc_malloc ()'
  instead of `scm_malloc ()' and `scm_to_locale_string ()',
  so that the result is automatically GC'd when no longer
  referenced.  Remove unneeded dynwind.
  (scm_i_free_string_pointers): Remove.

* libguile/strings.h (scm_i_free_string_pointers): Remove declaration.

											
										
										
											2009-01-18 15:33:37 +01:00
+								  result = scm_gc_malloc ((len + 1) * sizeof (char *),
 											  "string pointers");
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								  result[len] = NULL;
 								  /* The list might be have been modified in another thread, so
 								     we check LIST before each access.
 								   */
-												*** empty log message ***

											
										
										
											2004-09-22 17:41:37 +00:00
+								  for (i = 0; i < len && scm_is_pair (list); i++)
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								    {
-												Use `scm_gc malloc_pointerless ()' in `scm_i allocate_string_pointers ()'.

* libguile/dynl.c (free_string_pointers): Remove.
  (scm_dynamic_args_call): Remove reference to `free_string_pointers ()'
  and remove dynwind.

* libguile/posix.c (free_string_pointers): Remove.
  (scm_execl, scm_execlp, scm_execle, scm_environ): Remove references
  to `free_string_pointers ()'.

* libguile/simpos.c (free_string_pointers): Remove.
  (scm_system_star): Remove reference to `free_string_pointers ()',
  remove enclosing dynwind.

* libguile/strings.c (scm_i_allocate_string_pointers): Use
  `scm_gc_malloc_pointerless ()' and `scm_gc_malloc ()'
  instead of `scm_malloc ()' and `scm_to_locale_string ()',
  so that the result is automatically GC'd when no longer
  referenced.  Remove unneeded dynwind.
  (scm_i_free_string_pointers): Remove.

* libguile/strings.h (scm_i_free_string_pointers): Remove declaration.

											
										
										
											2009-01-18 15:33:37 +01:00
+								      SCM str;
 								      size_t len;
 								      str = SCM_CAR (list);
 								      len = scm_c_string_length (str);
 								      result[i] = scm_gc_malloc_pointerless (len + 1, "string pointers");
 								      memcpy (result[i], scm_i_string_chars (str), len);
 								      result[i][len] = '\0';
-												* strings.h, deprecated.h (SCM_STRING_COERCE_0TERMINATION_X):
Moved from string.h to deprecated.h.

* deprecated.c, deprecated.h (SCM_CHARS, SCM_LENGTH): Removed.

* strings.h, strings.c (SCM_MAKE_STRING_TAG): Rename dto
SCM_I_MAKE_STRING_TAG, changed all uses.
(SCM_STRING_CHARS, SCM_STRING_UCHARS, SCM_STRING_LENGTH): Renamed
to SCM_I_STRING_CHARS, SCM_I_STRING_UCHARS, and SCM_I_LENGTH
respectively.  For a short time, the old names are still there as
aliases.  Not all uses have been changed yet, but the ones in
strings.c have.
(SCM_STRING_MAX_LEN): Do not hardcode to 24 bits, compute from
SCM_T_BITS_MAX.
(scm_is_string, scm_from_locale_string, scm_from_locale_stringn,
scm_take_locale_string, scm_take_locale_stringn,
scm_to_locale_string, scm_to_locale_stringn,
scm_to_locale_stringbuf): New.
(scm_c_string2str, scm_c_substring2str): Deprecated by moving to
deprecated.[hc]. Implemented in terms of the new functions above.
(scm_take_str, scm_take0str, scm_mem2string, scm_str2string,
scm_makfrom0str): Reimplemented in terms of the new functions from
above.  They will be discouraged shortly.
(scm_substring): Do not use scm_mem2string.
(scm_i_allocate_string_pointers, scm_i_free_string_pointers): New,
to replace similar code from posix.c, simpos.c, and dynl.c.
(scm_string_append): Use memcpy instead of explicit loop.  Do not
use register keyword.  Use plain 'char' instead of 'unsigned
char'.

											
										
										
											2004-08-10 13:20:59 +00:00
+								      list = SCM_CDR (list);
 								    }
 								  return result;
--01-28  Stefan Jahn  <stefan@lkcc.org>

        * configure.in (guile_cv_have_uint32_t): Look also in
        `stdint.h' for uint32_t.

2002-01-28  Stefan Jahn  <stefan@lkcc.org>

        * symbols.c (scm_c_symbol2str): New function, replacement for
        `gh_scm2newsymbol()'.

        * strings.c (scm_c_substring2str): New function.  Proper
        replacement for `gh_get_substr()'.

        * socket.c: Include `stdint.h' if available for the `uint32_t'
        declaration.

        * scmsigs.c (s_scm_sigaction): Initialize `chandler' (inhibits
        compiler warning).

        * backtrace.c: Include `lang.h' for GUILE_DEBUG conditional.

											
										
										
											2002-01-28 21:15:55 +00:00
+								}
-												Use `scm_gc malloc_pointerless ()' in `scm_i allocate_string_pointers ()'.

* libguile/dynl.c (free_string_pointers): Remove.
  (scm_dynamic_args_call): Remove reference to `free_string_pointers ()'
  and remove dynwind.

* libguile/posix.c (free_string_pointers): Remove.
  (scm_execl, scm_execlp, scm_execle, scm_environ): Remove references
  to `free_string_pointers ()'.

* libguile/simpos.c (free_string_pointers): Remove.
  (scm_system_star): Remove reference to `free_string_pointers ()',
  remove enclosing dynwind.

* libguile/strings.c (scm_i_allocate_string_pointers): Use
  `scm_gc_malloc_pointerless ()' and `scm_gc_malloc ()'
  instead of `scm_malloc ()' and `scm_to_locale_string ()',
  so that the result is automatically GC'd when no longer
  referenced.  Remove unneeded dynwind.
  (scm_i_free_string_pointers): Remove.

* libguile/strings.h (scm_i_free_string_pointers): Remove declaration.

											
										
										
											2009-01-18 15:33:37 +01:00
+								#undef FUNC_NAME
-												* eval.[ch] (scm_deval_args):  Made static.

* srcprop.c (scm_source_property):  Remove redundant SCM_IMP
test.

* strings.c (scm_c_string2str):  Clarified comment.  Replaced
THINKME by FIXME for uniformness.  Removed question about whether
arguments need to be protected from garbage collection:  Arguments
must be protected as any other variable.

											
										
										
											2002-01-10 21:11:22 +00:00
-												* strings.h, strings.c (scm_i_get_substring_spec): New.

* socket.c, rw.c, deprecated.h, validate.h
(SCM_VALIDATE_STRING_COPY): Deprecated.  Replaced all uses with
SCM_VALIDATE_STRING plus SCM_I_STRING_CHARS or
scm_to_locale_string, etc.
(SCM_VALIDATE_SUBSTRING_SPEC_COPY): Deprecated.  Replaced as
above, plus scm_i_get_substring_spec.

											
										
										
											2004-08-12 17:43:41 +00:00
+								void
 								scm_i_get_substring_spec (size_t len,
 											  SCM start, size_t *cstart,
 											  SCM end, size_t *cend)
 								{
 								  if (SCM_UNBNDP (start))
 								    *cstart = 0;
 								  else
 								    *cstart = scm_to_unsigned_integer (start, 0, len);
 								  if (SCM_UNBNDP (end))
 								    *cend = len;
 								  else
 								    *cend = scm_to_unsigned_integer (end, *cstart, len);
 								}
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								#if SCM_ENABLE_DEPRECATED
-												(SCM_STRING_CHARS): Explicitely reject read-only strings with an error
message that blames SCM_STRING_CHARS.

											
										
										
											2004-09-29 17:58:39 +00:00
+								/* When these definitions are removed, it becomes reasonable to use
 								   read-only strings for string literals.  For that, change the reader
 								   to create string literals with scm_c_substring_read_only instead of
 								   with scm_c_substring_copy.
 								*/
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								int
-												Turn all deprecated features that once were macros but are now
functions back into macros.

											
										
										
											2005-01-12 11:10:02 +00:00
+								scm_i_deprecated_stringp (SCM str)
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								{
 								  scm_c_issue_deprecation_warning
 								    ("SCM_STRINGP is deprecated.  Use scm_is_string instead.");
-												(SCM_STRINGP): Accept all strings.
(SCM_STRING_CHARS): Reject shared substrings here.

											
										
										
											2004-08-27 12:45:37 +00:00
+								  return scm_is_string (str);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								}
 								char *
-												Turn all deprecated features that once were macros but are now
functions back into macros.

											
										
										
											2005-01-12 11:10:02 +00:00
+								scm_i_deprecated_string_chars (SCM str)
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								{
 								  char *chars;
 								  scm_c_issue_deprecation_warning
 								    ("SCM_STRING_CHARS is deprecated.  See the manual for alternatives.");
-												(SCM_STRINGP): Accept all strings.
(SCM_STRING_CHARS): Reject shared substrings here.

											
										
										
											2004-08-27 12:45:37 +00:00
+								  /* We don't accept shared substrings here since they are not
 								     null-terminated.
 								  */
 								  if (IS_SH_STRING (str))
-												Fix stylistic issues revealed by "make syntax-check".

* libguile/gc-malloc.c (scm_must_free): Remove unnecessary `if' before
  `free ()'.

* libguile/stime.c (scm_localtime, scm_mktime): Likewise.

* libguile/eval.i.c (ceval): Don't cast the result of alloca(3).

* libguile/i18n.c (SCM_STRING_TO_U32_BUF): Likewise.

* test-suite/standalone/test-unwind.c: Likewise.

* libguile/strings.c (scm_i_deprecated_string_chars): Don't end error
  message in period.

											
										
										
											2009-11-17 23:07:10 +01:00
+								    scm_misc_error (NULL,
 										    "SCM_STRING_CHARS does not work with shared substrings",
-												(SCM_STRINGP): Accept all strings.
(SCM_STRING_CHARS): Reject shared substrings here.

											
										
										
											2004-08-27 12:45:37 +00:00
+										    SCM_EOL);
-												Fix `explicitely' typos, should be `explicitly'

											
										
										
											2009-05-20 18:50:52 +01:00
+								  /* We explicitly test for read-only strings to produce a better
-												(SCM_STRING_CHARS): Explicitely reject read-only strings with an error
message that blames SCM_STRING_CHARS.

											
										
										
											2004-09-29 17:58:39 +00:00
+								     error message.
 								  */
 								  if (IS_RO_STRING (str))
-												Fix stylistic issues revealed by "make syntax-check".

* libguile/gc-malloc.c (scm_must_free): Remove unnecessary `if' before
  `free ()'.

* libguile/stime.c (scm_localtime, scm_mktime): Likewise.

* libguile/eval.i.c (ceval): Don't cast the result of alloca(3).

* libguile/i18n.c (SCM_STRING_TO_U32_BUF): Likewise.

* test-suite/standalone/test-unwind.c: Likewise.

* libguile/strings.c (scm_i_deprecated_string_chars): Don't end error
  message in period.

											
										
										
											2009-11-17 23:07:10 +01:00
+								    scm_misc_error (NULL,
 										    "SCM_STRING_CHARS does not work with read-only strings",
-												(SCM_STRING_CHARS): Explicitely reject read-only strings with an error
message that blames SCM_STRING_CHARS.

											
										
										
											2004-09-29 17:58:39 +00:00
+										    SCM_EOL);
-												Fix stylistic issues revealed by "make syntax-check".

* libguile/gc-malloc.c (scm_must_free): Remove unnecessary `if' before
  `free ()'.

* libguile/stime.c (scm_localtime, scm_mktime): Likewise.

* libguile/eval.i.c (ceval): Don't cast the result of alloca(3).

* libguile/i18n.c (SCM_STRING_TO_U32_BUF): Likewise.

* test-suite/standalone/test-unwind.c: Likewise.

* libguile/strings.c (scm_i_deprecated_string_chars): Don't end error
  message in period.

											
										
										
											2009-11-17 23:07:10 +01:00
-												(SCM_STRINGP): Accept all strings.
(SCM_STRING_CHARS): Reject shared substrings here.

											
										
										
											2004-08-27 12:45:37 +00:00
+								  /* The following is still wrong, of course...
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								   */
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  str = scm_i_string_start_writing (str);
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  chars = scm_i_string_writable_chars (str);
 								  scm_i_string_stop_writing ();
 								  return chars;
 								}
 								size_t
-												Turn all deprecated features that once were macros but are now
functions back into macros.

											
										
										
											2005-01-12 11:10:02 +00:00
+								scm_i_deprecated_string_length (SCM str)
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								{
 								  scm_c_issue_deprecation_warning
 								    ("SCM_STRING_LENGTH is deprecated.  Use scm_c_string_length instead.");
 								  return scm_c_string_length (str);
 								}
 								#endif
-												add generic array implementation facility

* libguile/array-handle.c (scm_i_register_array_implementation):
  (scm_i_array_implementation_for_obj): Add generic array facility,
  which will (in a few commits) detangle the array code.
  (scm_array_get_handle): Use the generic array facility. Note that
  scm_t_array_handle no longer has ref and set function pointers;
  instead it has a pointer to the array implementation. It is unlikely
  that code out there used these functions, however, as the supported
  way was through scm_array_handle_ref/set_x.
  (scm_array_handle_pos): Move this function here from arrays.c.
  (scm_array_handle_element_type): New function, returns a Scheme value
  representing the type of element stored in this array.

* libguile/array-handle.h (scm_t_array_element_type): New enum, for
  generically determining the type of an array.
  (scm_array_handle_rank):
  (scm_array_handle_dims): These are now just #defines.

* libguile/arrays.c:
* libguile/bitvectors.c:
* libguile/bytevectors.c:
* libguile/srfi-4.c:
* libguile/strings.c:
* libguile/vectors.c: Register array implementations for all of these.

* libguile/inline.h: Update for array_handle_ref/set change.
* libguile/deprecated.h: Need to include arrays.h now.

											
										
										
											2009-07-19 15:04:40 +02:00
+								static SCM
 								string_handle_ref (scm_t_array_handle *h, size_t index)
 								{
 								  return scm_c_string_ref (h->array, index);
 								}
 								static void
 								string_handle_set (scm_t_array_handle *h, size_t index, SCM val)
 								{
 								  scm_c_string_set_x (h->array, index, val);
 								}
 								static void
 								string_get_handle (SCM v, scm_t_array_handle *h)
 								{
 								  h->array = v;
 								  h->ndims = 1;
 								  h->dims = &h->dim0;
 								  h->dim0.lbnd = 0;
 								  h->dim0.ubnd = scm_c_string_length (v) - 1;
 								  h->dim0.inc = 1;
 								  h->element_type = SCM_ARRAY_ELEMENT_TYPE_CHAR;
 								  h->elements = h->writable_elements = NULL;
 								}
-												fix bug in string array implementation type mask

* libguile/strings.c (SCM_ARRAY_IMPLEMENTATION): The mask for the string
  array implementation should be 0x7f, without masking out 0x2.
  Otherwise numbers were being thought to be vectors!

* test-suite/tests/unif.test: Add test.

* libguile/vectors.c (SCM_ARRAY_IMPLEMENTATION): Only register one
  implementation, because weak vectors can be checked with the mask &
  ~2, and the functions are the same.

											
										
										
											2009-12-29 12:35:13 +01:00
+								SCM_ARRAY_IMPLEMENTATION (scm_tc7_string, 0x7f,
-												add generic array implementation facility

* libguile/array-handle.c (scm_i_register_array_implementation):
  (scm_i_array_implementation_for_obj): Add generic array facility,
  which will (in a few commits) detangle the array code.
  (scm_array_get_handle): Use the generic array facility. Note that
  scm_t_array_handle no longer has ref and set function pointers;
  instead it has a pointer to the array implementation. It is unlikely
  that code out there used these functions, however, as the supported
  way was through scm_array_handle_ref/set_x.
  (scm_array_handle_pos): Move this function here from arrays.c.
  (scm_array_handle_element_type): New function, returns a Scheme value
  representing the type of element stored in this array.

* libguile/array-handle.h (scm_t_array_element_type): New enum, for
  generically determining the type of an array.
  (scm_array_handle_rank):
  (scm_array_handle_dims): These are now just #defines.

* libguile/arrays.c:
* libguile/bitvectors.c:
* libguile/bytevectors.c:
* libguile/srfi-4.c:
* libguile/strings.c:
* libguile/vectors.c: Register array implementations for all of these.

* libguile/inline.h: Update for array_handle_ref/set change.
* libguile/deprecated.h: Need to include arrays.h now.

											
										
										
											2009-07-19 15:04:40 +02:00
+								                          string_handle_ref, string_handle_set,
-												Remove extraneous semicolon after `SCM_ARRAY_IMPLEMENTATION' & co.

* libguile/bitvectors.c, libguile/srfi-4.c, libguile/strings.c,
  libguile/vectors.c: Remove extraneous semicolon after
  `SCM_ARRAY_IMPLEMENTATION' and `SCM_VECTOR_IMPLEMENTATION'
  invocations.

											
										
										
											2009-12-15 00:53:13 +01:00
+								                          string_get_handle)
 								SCM_VECTOR_IMPLEMENTATION (SCM_ARRAY_ELEMENT_TYPE_CHAR, scm_make_string)
-												add generic array implementation facility

* libguile/array-handle.c (scm_i_register_array_implementation):
  (scm_i_array_implementation_for_obj): Add generic array facility,
  which will (in a few commits) detangle the array code.
  (scm_array_get_handle): Use the generic array facility. Note that
  scm_t_array_handle no longer has ref and set function pointers;
  instead it has a pointer to the array implementation. It is unlikely
  that code out there used these functions, however, as the supported
  way was through scm_array_handle_ref/set_x.
  (scm_array_handle_pos): Move this function here from arrays.c.
  (scm_array_handle_element_type): New function, returns a Scheme value
  representing the type of element stored in this array.

* libguile/array-handle.h (scm_t_array_element_type): New enum, for
  generically determining the type of an array.
  (scm_array_handle_rank):
  (scm_array_handle_dims): These are now just #defines.

* libguile/arrays.c:
* libguile/bitvectors.c:
* libguile/bytevectors.c:
* libguile/srfi-4.c:
* libguile/strings.c:
* libguile/vectors.c: Register array implementations for all of these.

* libguile/inline.h: Update for array_handle_ref/set change.
* libguile/deprecated.h: Need to include arrays.h now.

											
										
										
											2009-07-19 15:04:40 +02:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								void
 								scm_init_strings ()
 								{
-												* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 16:49:42 +00:00
+								  scm_nullstr = scm_i_make_string (0, NULL);
-												* Make SCM_DEBUG_CELL_ACCESSES=1 work with GUILE_DEBUG_FREELIST.

											
										
										
											2001-05-15 09:45:10 +00:00
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/strings.x"
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-													* *.[hc]: add Emacs magic at the end of file, to ensure GNU
 	indentation style.

											
										
										
											2000-03-19 19:01:16 +00:00
 								/*
 								  Local Variables:
 								  c-file-style: "gnu"
 								  End:
 								*/