guile/libguile/read.c

/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
 *   2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 3 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301 USA
 */


#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include <unicase.h>
#include <unictype.h>

#include "libguile/_scm.h"
#include "libguile/bytevectors.h"
#include "libguile/chars.h"
#include "libguile/eval.h"
#include "libguile/arrays.h"
#include "libguile/bitvectors.h"
#include "libguile/keywords.h"
#include "libguile/alist.h"
#include "libguile/srcprop.h"
#include "libguile/hashtab.h"
#include "libguile/hash.h"
#include "libguile/ports.h"
#include "libguile/fports.h"
#include "libguile/root.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
#include "libguile/vectors.h"
#include "libguile/validate.h"
#include "libguile/srfi-4.h"
#include "libguile/srfi-13.h"

#include "libguile/read.h"
#include "libguile/private-options.h"


SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
SCM_SYMBOL (scm_keyword_prefix, "prefix");
SCM_SYMBOL (scm_keyword_postfix, "postfix");
SCM_SYMBOL (sym_nil, "nil");

/* SRFI-105 curly infix expression support */
SCM_SYMBOL (sym_nfx, "$nfx$");
SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");

scm_t_option scm_read_opts[] =
  {
    { SCM_OPTION_BOOLEAN, "copy", 0,
      "Copy source code expressions." },
    { SCM_OPTION_BOOLEAN, "positions", 1,
      "Record positions of source code expressions." },
    { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
      "Convert symbols to lower case."},
    { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
      "Style of keyword recognition: #f, 'prefix or 'postfix."},
    { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
      "Use R6RS variable-length character and string hex escapes."},
    { SCM_OPTION_BOOLEAN, "square-brackets", 1,
      "Treat `[' and `]' as parentheses, for R6RS compatibility."},
    { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
      "In strings, consume leading whitespace after an escaped end-of-line."},
    { SCM_OPTION_BOOLEAN, "curly-infix", 0,
      "Support SRFI-105 curly infix expressions."},
    { 0, },
  };
 
/* Internal read options structure.  This is initialized by 'scm_read'
   from the global and per-port read options, and a pointer is passed
   down to all helper functions. */

enum t_keyword_style
  {
    KEYWORD_STYLE_HASH_PREFIX,
    KEYWORD_STYLE_PREFIX,
    KEYWORD_STYLE_POSTFIX
  };

struct t_read_opts
{
  enum t_keyword_style keyword_style;
  unsigned int copy_source_p        : 1;
  unsigned int record_positions_p   : 1;
  unsigned int case_insensitive_p   : 1;
  unsigned int r6rs_escapes_p       : 1;
  unsigned int square_brackets_p    : 1;
  unsigned int hungry_eol_escapes_p : 1;
  unsigned int curly_infix_p        : 1;
  unsigned int neoteric_p           : 1;
};

typedef struct t_read_opts scm_t_read_opts;


/*
  Give meaningful error messages for errors

  We use the format

  FILE:LINE:COL: MESSAGE
  This happened in ....

  This is not standard GNU format, but the test-suite likes the real
  message to be in front.

 */


void
scm_i_input_error (char const *function,
		   SCM port, const char *message, SCM arg)
{
  SCM fn = (scm_is_string (SCM_FILENAME(port))
	    ? SCM_FILENAME(port)
	    : scm_from_locale_string ("#<unknown port>"));

  SCM string_port = scm_open_output_string ();
  SCM string = SCM_EOL;
  scm_simple_format (string_port,
		     scm_from_locale_string ("~A:~S:~S: ~A"),
		     scm_list_4 (fn,
				 scm_from_long (SCM_LINUM (port) + 1),
				 scm_from_int (SCM_COL (port) + 1),
				 scm_from_locale_string (message)));
    
  string = scm_get_output_string (string_port);
  scm_close_output_port (string_port);
  scm_error_scm (scm_from_latin1_symbol ("read-error"),
		 function? scm_from_locale_string (function) : SCM_BOOL_F,
		 string,
		 arg,
		 SCM_BOOL_F);
}


SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0, 
            (SCM setting),
	    "Option interface for the read options. Instead of using\n"
	    "this procedure directly, use the procedures @code{read-enable},\n"
	    "@code{read-disable}, @code{read-set!} and @code{read-options}.")
#define FUNC_NAME s_scm_read_options
{
  SCM ans = scm_options (setting,
			 scm_read_opts,
			 FUNC_NAME);
  if (SCM_COPY_SOURCE_P)
    SCM_RECORD_POSITIONS_P = 1;
  return ans;
}
#undef FUNC_NAME

/* A fluid referring to an association list mapping extra hash
   characters to procedures.  */
static SCM *scm_i_read_hash_procedures;

static SCM
scm_i_read_hash_procedures_ref (void)
{
  return scm_fluid_ref (*scm_i_read_hash_procedures);
}

static void
scm_i_read_hash_procedures_set_x (SCM value)
{
  scm_fluid_set_x (*scm_i_read_hash_procedures, value);
}


/* Token readers.  */


/* Size of the C buffer used to read symbols and numbers.  */
#define READER_BUFFER_SIZE            128

/* Number of 32-bit codepoints in the buffer used to read strings.  */
#define READER_STRING_BUFFER_SIZE     128

/* The maximum size of Scheme character names.  */
#define READER_CHAR_NAME_MAX_SIZE      50

/* The maximum size of reader directive names.  */
#define READER_DIRECTIVE_NAME_MAX_SIZE 50


/* `isblank' is only in C99.  */
#define CHAR_IS_BLANK_(_chr)					\
  (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n')	\
   || ((_chr) == '\f') || ((_chr) == '\r'))

#ifdef MSDOS
# define CHAR_IS_BLANK(_chr)			\
  ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
#else
# define CHAR_IS_BLANK CHAR_IS_BLANK_
#endif


/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
   structure'').  */
#define CHAR_IS_R5RS_DELIMITER(c)				\
  (CHAR_IS_BLANK (c)						\
   || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')

#define CHAR_IS_DELIMITER(c)                                    \
  (CHAR_IS_R5RS_DELIMITER (c)                                   \
   || (((c) == ']' || (c) == '[') && (opts->square_brackets_p   \
                                      || opts->curly_infix_p))  \
   || (((c) == '}' || (c) == '{') && opts->curly_infix_p))

/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
   Structure''.  */
#define CHAR_IS_EXPONENT_MARKER(_chr)				\
  (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f')	\
   || ((_chr) == 'd') || ((_chr) == 'l'))

/* Read an SCSH block comment.  */
static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
static SCM scm_get_hash_procedure (int);

/* Read from PORT until a delimiter (e.g., a whitespace) is read.  Put the
   result in the pre-allocated buffer BUF.  Return zero if the whole token has
   fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
   bytes actually read.  */
static int
read_token (SCM port, scm_t_read_opts *opts,
            char *buf, size_t buf_size, size_t *read)
{
   *read = 0;

   while (*read < buf_size)
     {
       int chr;

       chr = scm_get_byte_or_eof (port);

       if (chr == EOF)
        return 0;
      else if (CHAR_IS_DELIMITER (chr))
        {
          scm_unget_byte (chr, port);
          return 0;
        }
      else
        {
          *buf = (char) chr;
          buf++, (*read)++;
        }
     }

   return 1;
 }

/* Like `read_token', but return either BUFFER, or a GC-allocated buffer
   if the token doesn't fit in BUFFER_SIZE bytes.  */
static char *
read_complete_token (SCM port, scm_t_read_opts *opts,
                     char *buffer, size_t buffer_size, size_t *read)
{
  int overflow = 0;
  size_t bytes_read, overflow_size = 0;
  char *overflow_buffer = NULL;

  do
    {
      overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
      if (bytes_read == 0)
        break;
      if (overflow || overflow_size != 0)
        {
          if (overflow_size == 0)
            {
              overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
              memcpy (overflow_buffer, buffer, bytes_read);
              overflow_size = bytes_read;
            }
          else
            {
	      char *new_buf =
		scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");

	      memcpy (new_buf, overflow_buffer, overflow_size);
              memcpy (new_buf + overflow_size, buffer, bytes_read);

	      overflow_buffer = new_buf;
              overflow_size += bytes_read;
            }
        }
    }
  while (overflow);

  if (overflow_size)
    *read = overflow_size;
  else
    *read = bytes_read;

  return (overflow_size > 0 ? overflow_buffer : buffer);
}

/* Skip whitespace from PORT and return the first non-whitespace character
   read.  Raise an error on end-of-file.  */
static int
flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
{
  scm_t_wchar c;
  while (1)
    switch (c = scm_getc (port))
      {
      case EOF:
      goteof:
	if (eoferr)
	  {
	    scm_i_input_error (eoferr,
			       port,
			       "end of file",
			       SCM_EOL);
	  }
	return c;

      case ';':
      lp:
	switch (c = scm_getc (port))
	  {
	  case EOF:
	    goto goteof;
	  default:
	    goto lp;
	  case SCM_LINE_INCREMENTORS:
	    break;
	  }
	break;

      case '#':
	switch (c = scm_getc (port))
	  {
	  case EOF:
	    eoferr = "read_sharp";
	    goto goteof;
	  case '!':
	    scm_read_shebang (c, port, opts);
	    break;
	  case ';':
	    scm_read_commented_expression (c, port, opts);
	    break;
	  case '|':
	    if (scm_is_false (scm_get_hash_procedure (c)))
	      {
		scm_read_r6rs_block_comment (c, port);
		break;
	      }
	    /* fall through */
	  default:
	    scm_ungetc (c, port);
	    return '#';
	  }
	break;

      case SCM_LINE_INCREMENTORS:
      case SCM_SINGLE_SPACES:
      case '\t':
	break;

      default:
	return c;
      }

  return 0;
}


/* Token readers.  */

static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
                           long line, int column);


static SCM
maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
                       long line, int column)
{
  if (opts->record_positions_p)
    scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
  return x;
}

static SCM
scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "scm_i_lreadparen"
{
  int c;
  SCM tmp, tl, ans = SCM_EOL;
  const int curly_list_p = (chr == '{') && opts->curly_infix_p;
  const int terminating_char = ((chr == '{') ? '}'
                                : ((chr == '[') ? ']'
                                   : ')'));

  /* Need to capture line and column numbers here. */
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;

  c = flush_ws (port, opts, FUNC_NAME);
  if (terminating_char == c)
    return SCM_EOL;

  scm_ungetc (c, port);
  tmp = scm_read_expression (port, opts);

  /* Note that it is possible for scm_read_expression to return
     scm_sym_dot, but not as part of a dotted pair: as in #{.}#.  So
     check that it's a real dot by checking `c'.  */
  if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
    {
      ans = scm_read_expression (port, opts);
      if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
	scm_i_input_error (FUNC_NAME, port, "missing close paren",
			   SCM_EOL);
      return ans;
    }

  /* Build the head of the list structure. */
  ans = tl = scm_cons (tmp, SCM_EOL);

  while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
    {
      SCM new_tail;

      if (c == ')' || (c == ']' && opts->square_brackets_p)
          || ((c == '}' || c == ']') && opts->curly_infix_p))
        scm_i_input_error (FUNC_NAME, port,
                           "in pair: mismatched close paren: ~A",
                           scm_list_1 (SCM_MAKE_CHAR (c)));

      scm_ungetc (c, port);
      tmp = scm_read_expression (port, opts);

      /* See above note about scm_sym_dot.  */
      if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
	{
	  SCM_SETCDR (tl, scm_read_expression (port, opts));

	  c = flush_ws (port, opts, FUNC_NAME);
	  if (terminating_char != c)
	    scm_i_input_error (FUNC_NAME, port,
			       "in pair: missing close paren", SCM_EOL);
	  break;
	}

      new_tail = scm_cons (tmp, SCM_EOL);
      SCM_SETCDR (tl, new_tail);
      tl = new_tail;
    }

  if (curly_list_p)
    {
      /* In addition to finding the length, 'scm_ilength' checks for
         improper or circular lists, in which case it returns -1. */
      int len = scm_ilength (ans);

      /* The (len == 0) case is handled above */
      if (len == 1)
        /* Return directly to avoid re-annotating the element's source
           location with the position of the outer brace.  Also, it
           might not be possible to annotate the element. */
        return scm_car (ans);  /* {e} => e */
      else if (len == 2)
        ;  /* Leave the list unchanged: {e1 e2} => (e1 e2) */
      else if (len >= 3 && (len & 1))
        {
          /* It's a proper list whose length is odd and at least 3.  If
             the elements at odd indices (the infix operator positions)
             are all 'equal?', then it's a simple curly-infix list.
             Otherwise it's a mixed curly-infix list. */
          SCM op = scm_cadr (ans);

          /* Check to see if the elements at odd indices are 'equal?' */
          for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
            {
              if (scm_is_null (tl))
                {
                  /* Convert simple curly-infix list to prefix:
                     {a <op> b <op> ...} => (<op> a b ...) */
                  tl = ans;
                  while (scm_is_pair (scm_cdr (tl)))
                    {
                      tmp = scm_cddr (tl);
                      SCM_SETCDR (tl, tmp);
                      tl = tmp;
                    }
                  ans = scm_cons (op, ans);
                  break;
                }
              else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
                {
                  /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
                  ans = scm_cons (sym_nfx, ans);
                  break;
                }
            }
        }
      else
        /* Mixed curly-infix (possibly improper) list:
           {e . tail} => ($nfx$ e . tail) */
        ans = scm_cons (sym_nfx, ans);
    }

  return maybe_annotate_source (ans, port, opts, line, column);
}
#undef FUNC_NAME


/* Read a hexadecimal number NDIGITS in length.  Put its value into the variable
   C.  If TERMINATOR is non-null, terminate early if the TERMINATOR character is
   found.  */
#define SCM_READ_HEX_ESCAPE(ndigits, terminator)                   \
  do                                                               \
    {                                                              \
      scm_t_wchar a;                                               \
      size_t i = 0;                                                \
      c = 0;                                                       \
      while (i < ndigits)                                          \
        {                                                          \
          a = scm_getc (port);                                     \
          if (a == EOF)                                            \
            goto str_eof;                                          \
          if (terminator                                           \
              && (a == (scm_t_wchar) terminator)                   \
              && (i > 0))                                          \
            break;                                                 \
          if ('0' <= a && a <= '9')                                \
            a -= '0';                                              \
          else if ('A' <= a && a <= 'F')                           \
            a = a - 'A' + 10;                                      \
          else if ('a' <= a && a <= 'f')                           \
            a = a - 'a' + 10;                                      \
          else                                                     \
            {                                                      \
              c = a;                                               \
              goto bad_escaped;                                    \
            }                                                      \
          c = c * 16 + a;                                          \
          i ++;                                                    \
        }                                                          \
    } while (0)

static void
skip_intraline_whitespace (SCM port)
{
  scm_t_wchar c;
  
  do
    {
      c = scm_getc (port);
      if (c == EOF)
        return;
    }
  while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));

  scm_ungetc (c, port);
}                                         

static SCM
scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "scm_lreadr"
{
  /* For strings smaller than C_STR, this function creates only one Scheme
     object (the string returned).  */

  SCM str = SCM_EOL;
  size_t c_str_len = 0;
  scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];

  /* Need to capture line and column numbers here. */
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;

  while ('"' != (c = scm_getc (port)))
    {
      if (c == EOF)
        {
        str_eof:
          scm_i_input_error (FUNC_NAME, port,
                             "end of file in string constant", SCM_EOL);
        }

      if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
	{
	  str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
	  c_str_len = 0;
	}

      if (c == '\\')
        {
          switch (c = scm_getc (port))
            {
            case EOF:
              goto str_eof;
            case '"':
            case '\\':
              break;
            case '\n':
              if (opts->hungry_eol_escapes_p)
                skip_intraline_whitespace (port);
              continue;
            case '0':
              c = '\0';
              break;
            case 'f':
              c = '\f';
              break;
            case 'n':
              c = '\n';
              break;
            case 'r':
              c = '\r';
              break;
            case 't':
              c = '\t';
              break;
            case 'a':
              c = '\007';
              break;
            case 'v':
              c = '\v';
              break;
            case 'b':
              c = '\010';
              break;
            case 'x':
              if (opts->r6rs_escapes_p)
                SCM_READ_HEX_ESCAPE (10, ';');
              else
                SCM_READ_HEX_ESCAPE (2, '\0');
              break;
            case 'u':
              if (!opts->r6rs_escapes_p)
                {
                  SCM_READ_HEX_ESCAPE (4, '\0');
                  break;
                }
            case 'U':
              if (!opts->r6rs_escapes_p)
                {
                  SCM_READ_HEX_ESCAPE (6, '\0');
                  break;
                }
            default:
            bad_escaped:
              scm_i_input_error (FUNC_NAME, port,
                                 "illegal character in escape sequence: ~S",
                                 scm_list_1 (SCM_MAKE_CHAR (c)));
            }
        }

      c_str[c_str_len++] = c;
    }

  if (scm_is_null (str))
    /* Fast path: we got a string that fits in C_STR.  */
    str = scm_from_utf32_stringn (c_str, c_str_len);
  else
    {
      if (c_str_len > 0)
	str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);

      str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
    }

  return maybe_annotate_source (str, port, opts, line, column);
}
#undef FUNC_NAME


static SCM
scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
{
  SCM result, str = SCM_EOL;
  char local_buffer[READER_BUFFER_SIZE], *buffer;
  size_t bytes_read;
  scm_t_port *pt = SCM_PTAB_ENTRY (port);

  /* Need to capture line and column numbers here. */
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;

  scm_ungetc (chr, port);
  buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
				&bytes_read);

  str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler);

  result = scm_string_to_number (str, SCM_UNDEFINED);
  if (scm_is_false (result))
    {
      /* Return a symbol instead of a number */
      if (opts->case_insensitive_p)
        str = scm_string_downcase_x (str);
      result = scm_string_to_symbol (str);
    }
  else if (SCM_NIMP (result))
    result = maybe_annotate_source (result, port, opts, line, column);

  SCM_COL (port) += scm_i_string_length (str);
  return result;
}

static SCM
scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
{
  SCM result;
  int ends_with_colon = 0;
  size_t bytes_read;
  int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
  char local_buffer[READER_BUFFER_SIZE], *buffer;
  scm_t_port *pt = SCM_PTAB_ENTRY (port);
  SCM str;

  scm_ungetc (chr, port);
  buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
				&bytes_read);
  if (bytes_read > 0)
    ends_with_colon = buffer[bytes_read - 1] == ':';

  if (postfix && ends_with_colon && (bytes_read > 1))
    {
      str = scm_from_stringn (buffer, bytes_read - 1,
			      pt->encoding, pt->ilseq_handler);

      if (opts->case_insensitive_p)
        str = scm_string_downcase_x (str);
      result = scm_symbol_to_keyword (scm_string_to_symbol (str));
    }
  else
    {
      str = scm_from_stringn (buffer, bytes_read,
			      pt->encoding, pt->ilseq_handler);

      if (opts->case_insensitive_p)
        str = scm_string_downcase_x (str);
      result = scm_string_to_symbol (str);
    }

  SCM_COL (port) += scm_i_string_length (str);
  return result;
}

static SCM
scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "scm_lreadr"
{
  SCM result;
  size_t read;
  char local_buffer[READER_BUFFER_SIZE], *buffer;
  unsigned int radix;
  SCM str;
  scm_t_port *pt;

  switch (chr)
    {
    case 'B':
    case 'b':
      radix = 2;
      break;

    case 'o':
    case 'O':
      radix = 8;
      break;

    case 'd':
    case 'D':
      radix = 10;
      break;

    case 'x':
    case 'X':
      radix = 16;
      break;

    default:
      scm_ungetc (chr, port);
      scm_ungetc ('#', port);
      radix = 10;
    }

  buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
				&read);

  pt = SCM_PTAB_ENTRY (port);
  str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler);

  result = scm_string_to_number (str, scm_from_uint (radix));

  SCM_COL (port) += scm_i_string_length (str);

  if (scm_is_true (result))
    return result;

  scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);

  return SCM_BOOL_F;
}
#undef FUNC_NAME

static SCM
scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
{
  SCM p;
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;

  switch (chr)
    {
    case '`':
      p = scm_sym_quasiquote;
      break;

    case '\'':
      p = scm_sym_quote;
      break;

    case ',':
      {
	scm_t_wchar c;

	c = scm_getc (port);
	if ('@' == c)
	  p = scm_sym_uq_splicing;
	else
	  {
	    scm_ungetc (c, port);
	    p = scm_sym_unquote;
	  }
	break;
      }

    default:
      fprintf (stderr, "%s: unhandled quote character (%i)\n",
	       "scm_read_quote", chr);
      abort ();
    }

  p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
  return maybe_annotate_source (p, port, opts, line, column);
}

SCM_SYMBOL (sym_syntax, "syntax");
SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
SCM_SYMBOL (sym_unsyntax, "unsyntax");
SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");

static SCM
scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
{
  SCM p;
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;

  switch (chr)
    {
    case '`':
      p = sym_quasisyntax;
      break;

    case '\'':
      p = sym_syntax;
      break;

    case ',':
      {
	int c;

	c = scm_getc (port);
	if ('@' == c)
	  p = sym_unsyntax_splicing;
	else
	  {
	    scm_ungetc (c, port);
	    p = sym_unsyntax;
	  }
	break;
      }

    default:
      fprintf (stderr, "%s: unhandled syntax character (%i)\n",
	       "scm_read_syntax", chr);
      abort ();
    }

  p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
  return maybe_annotate_source (p, port, opts, line, column);
}

static SCM
scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
{
  SCM id = scm_read_mixed_case_symbol (chr, port, opts);

  if (!scm_is_eq (id, sym_nil))
    scm_i_input_error ("scm_read_nil", port,
                       "unexpected input while reading #nil: ~a",
                       scm_list_1 (id));

  return SCM_ELISP_NIL;
}
  
static SCM
scm_read_semicolon_comment (int chr, SCM port)
{
  int c;

  /* We use the get_byte here because there is no need to get the
     locale correct with comment input. This presumes that newline
     always represents itself no matter what the encoding is.  */
  for (c = scm_get_byte_or_eof (port);
       (c != EOF) && (c != '\n');
       c = scm_get_byte_or_eof (port));

  return SCM_UNSPECIFIED;
}


/* Sharp readers, i.e. readers called after a `#' sign has been read.  */

static SCM
scm_read_boolean (int chr, SCM port)
{
  switch (chr)
    {
    case 't':
    case 'T':
      return SCM_BOOL_T;

    case 'f':
    case 'F':
      return SCM_BOOL_F;
    }

  return SCM_UNSPECIFIED;
}

static SCM
scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "scm_lreadr"
{
  char buffer[READER_CHAR_NAME_MAX_SIZE];
  SCM charname;
  size_t charname_len, bytes_read;
  scm_t_wchar cp;
  int overflow;
  scm_t_port *pt;

  overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
                         &bytes_read);
  if (overflow)
    scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);

  if (bytes_read == 0)
    {
      chr = scm_getc (port);
      if (chr == EOF)
	scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
			   "while reading character", SCM_EOL);

      /* CHR must be a token delimiter, like a whitespace.  */
      return (SCM_MAKE_CHAR (chr));
    }

  pt = SCM_PTAB_ENTRY (port);

  /* Simple ASCII characters can be processed immediately.  Also, simple
     ISO-8859-1 characters can be processed immediately if the encoding for this
     port is ISO-8859-1.  */
  if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
    {
      SCM_COL (port) += 1;
      return SCM_MAKE_CHAR (buffer[0]);
    }

  /* Otherwise, convert the buffer into a proper scheme string for
     processing.  */
  charname = scm_from_stringn (buffer, bytes_read, pt->encoding,
			       pt->ilseq_handler);
  charname_len = scm_i_string_length (charname);
  SCM_COL (port) += charname_len;
  cp = scm_i_string_ref (charname, 0);
  if (charname_len == 1)
    return SCM_MAKE_CHAR (cp);

  /* Ignore dotted circles, which may be used to keep combining characters from
     combining with the backslash in #\charname.  */
  if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
    return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));

  if (cp >= '0' && cp < '8')
    {
      /* Dirk:FIXME::  This type of character syntax is not R5RS
       * compliant.  Further, it should be verified that the constant
       * does only consist of octal digits.  */
      SCM p = scm_string_to_number (charname, scm_from_uint (8));
      if (SCM_I_INUMP (p))
        {
          scm_t_wchar c = scm_to_uint32 (p);
          if (SCM_IS_UNICODE_CHAR (c))
            return SCM_MAKE_CHAR (c);
          else
            scm_i_input_error (FUNC_NAME, port,
                               "out-of-range octal character escape: ~a",
                               scm_list_1 (charname));
        }
    }

  if (cp == 'x' && (charname_len > 1))
    {
      SCM p;

      /* Convert from hex, skipping the initial 'x' character in CHARNAME */
      p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
                                scm_from_uint (16));
      if (SCM_I_INUMP (p))
        {
          scm_t_wchar c = scm_to_uint32 (p);
          if (SCM_IS_UNICODE_CHAR (c))
            return SCM_MAKE_CHAR (c);
          else
            scm_i_input_error (FUNC_NAME, port,
                               "out-of-range hex character escape: ~a",
                               scm_list_1 (charname));
        }
    }

  /* The names of characters should never have non-Latin1
     characters.  */
  if (scm_i_is_narrow_string (charname)
      || scm_i_try_narrow_string (charname))
    { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
                                       charname_len);
      if (scm_is_true (ch))
        return ch;
    }

  scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
		     scm_list_1 (charname));

  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

static SCM
scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
{
  SCM symbol;

  /* Read the symbol that comprises the keyword.  Doing this instead of
     invoking a specific symbol reader function allows `scm_read_keyword ()'
     to adapt to the delimiters currently valid of symbols.

     XXX: This implementation allows sloppy syntaxes like `#:  key'.  */
  symbol = scm_read_expression (port, opts);
  if (!scm_is_symbol (symbol))
    scm_i_input_error ("scm_read_keyword", port,
		       "keyword prefix `~a' not followed by a symbol: ~s",
		       scm_list_2 (SCM_MAKE_CHAR (chr), symbol));

  return (scm_symbol_to_keyword (symbol));
}

static SCM
scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
                 long line, int column)
{
  /* Note: We call `scm_read_sexp ()' rather than READER here in order to
     guarantee that it's going to do what we want.  After all, this is an
     implementation detail of `scm_read_vector ()', not a desirable
     property.  */
  return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
                                port, opts, line, column);
}

/* Helper used by scm_read_array */
static int
read_decimal_integer (SCM port, int c, ssize_t *resp)
{
  ssize_t sign = 1;
  ssize_t res = 0;
  int got_it = 0;

  if (c == '-')
    {
      sign = -1;
      c = scm_getc (port);
    }

  while ('0' <= c && c <= '9')
    {
      res = 10*res + c-'0';
      got_it = 1;
      c = scm_getc (port);
    }

  if (got_it)
    *resp = sign * res;
  return c;
}

/* Read an array.  This function can also read vectors and uniform
   vectors.  Also, the conflict between '#f' and '#f32' and '#f64' is
   handled here.

   C is the first character read after the '#'. */
static SCM
scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
{
  ssize_t rank;
  scm_t_wchar tag_buf[8];
  int tag_len;

  SCM tag, shape = SCM_BOOL_F, elements, array;

  /* XXX - shortcut for ordinary vectors.  Shouldn't be necessary but
     the array code can not deal with zero-length dimensions yet, and
     we want to allow zero-length vectors, of course. */
  if (c == '(')
    return scm_read_vector (c, port, opts, line, column);

  /* Disambiguate between '#f' and uniform floating point vectors. */
  if (c == 'f')
    {
      c = scm_getc (port);
      if (c != '3' && c != '6')
	{
	  if (c != EOF)
	    scm_ungetc (c, port);
	  return SCM_BOOL_F;
	}
      rank = 1;
      tag_buf[0] = 'f';
      tag_len = 1;
      goto continue_reading_tag;
    }

  /* Read rank. */
  rank = 1;
  c = read_decimal_integer (port, c, &rank);
  if (rank < 0)
    scm_i_input_error (NULL, port, "array rank must be non-negative",
		       SCM_EOL);

  /* Read tag. */
  tag_len = 0;
 continue_reading_tag:
  while (c != EOF && c != '(' && c != '@' && c != ':'
         && tag_len < sizeof tag_buf / sizeof tag_buf[0])
    {
      tag_buf[tag_len++] = c;
      c = scm_getc (port);
    }
  if (tag_len == 0)
    tag = SCM_BOOL_T;
  else
    {
      tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
      if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
        scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
                           scm_list_1 (tag));
    }

  /* Read shape. */
  if (c == '@' || c == ':')
    {
      shape = SCM_EOL;

      do
	{
	  ssize_t lbnd = 0, len = 0;
	  SCM s;

	  if (c == '@')
	    {
	      c = scm_getc (port);
	      c = read_decimal_integer (port, c, &lbnd);
	    }

	  s = scm_from_ssize_t (lbnd);

	  if (c == ':')
	    {
	      c = scm_getc (port);
	      c = read_decimal_integer (port, c, &len);
	      if (len < 0)
		scm_i_input_error (NULL, port,
				   "array length must be non-negative",
				   SCM_EOL);

	      s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
	    }

	  shape = scm_cons (s, shape);
	} while (c == '@' || c == ':');

      shape = scm_reverse_x (shape, SCM_EOL);
    }

  /* Read nested lists of elements. */
  if (c != '(')
    scm_i_input_error (NULL, port,
		       "missing '(' in vector or array literal",
		       SCM_EOL);
  elements = scm_read_sexp (c, port, opts);

  if (scm_is_false (shape))
    shape = scm_from_ssize_t (rank);
  else if (scm_ilength (shape) != rank)
    scm_i_input_error
      (NULL, port,
       "the number of shape specifications must match the array rank",
       SCM_EOL);

  /* Handle special print syntax of rank zero arrays; see
     scm_i_print_array for a rationale. */
  if (rank == 0)
    {
      if (!scm_is_pair (elements))
	scm_i_input_error (NULL, port,
			   "too few elements in array literal, need 1",
			   SCM_EOL);
      if (!scm_is_null (SCM_CDR (elements)))
	scm_i_input_error (NULL, port,
			   "too many elements in array literal, want 1",
			   SCM_EOL);
      elements = SCM_CAR (elements);
    }

  /* Construct array, annotate with source location, and return. */
  array = scm_list_to_typed_array (tag, shape, elements);
  return maybe_annotate_source (array, port, opts, line, column);
}

static SCM
scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
                       long line, int column)
{
  return scm_read_array (chr, port, opts, line, column);
}

static SCM
scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
                     long line, int column)
{
  chr = scm_getc (port);
  if (chr != 'u')
    goto syntax;

  chr = scm_getc (port);
  if (chr != '8')
    goto syntax;

  chr = scm_getc (port);
  if (chr != '(')
    goto syntax;

  return maybe_annotate_source
    (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
     port, opts, line, column);

 syntax:
  scm_i_input_error ("read_bytevector", port,
		     "invalid bytevector prefix",
		     SCM_MAKE_CHAR (chr));
  return SCM_UNSPECIFIED;
}

static SCM
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
                           long line, int column)
{
  /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
     terribly inefficient but who cares?  */
  SCM s_bits = SCM_EOL;

  for (chr = scm_getc (port);
       (chr != EOF) && ((chr == '0') || (chr == '1'));
       chr = scm_getc (port))
    {
      s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
    }

  if (chr != EOF)
    scm_ungetc (chr, port);

  return maybe_annotate_source
    (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
     port, opts, line, column);
}

static SCM
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
{
  int bang_seen = 0;

  for (;;)
    {
      int c = scm_getc (port);

      if (c == EOF)
	scm_i_input_error ("skip_block_comment", port,
			   "unterminated `#! ... !#' comment", SCM_EOL);

      if (c == '!')
	bang_seen = 1;
      else if (c == '#' && bang_seen)
	break;
      else
	bang_seen = 0;
    }

  return SCM_UNSPECIFIED;
}

static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
                                         int value);
static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
                                        int value);
static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
                                    int value);

static SCM
scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
{
  char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
  int c;
  int i = 0;

  while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
    {
      c = scm_getc (port);
      if (c == EOF)
	scm_i_input_error ("skip_block_comment", port,
			   "unterminated `#! ... !#' comment", SCM_EOL);
      else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
        name[i++] = c;
      else if (CHAR_IS_DELIMITER (c))
        {
          scm_ungetc (c, port);
          name[i] = '\0';
          if (0 == strcmp ("r6rs", name))
            ;  /* Silently ignore */
          else if (0 == strcmp ("fold-case", name))
            set_port_case_insensitive_p (port, opts, 1);
          else if (0 == strcmp ("no-fold-case", name))
            set_port_case_insensitive_p (port, opts, 0);
          else if (0 == strcmp ("curly-infix", name))
            set_port_curly_infix_p (port, opts, 1);
          else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
            {
              set_port_curly_infix_p (port, opts, 1);
              set_port_square_brackets_p (port, opts, 0);
            }
          else
            break;

          return SCM_UNSPECIFIED;
        }
      else
        {
          scm_ungetc (c, port);
          break;
        }
    }
  while (i > 0)
    scm_ungetc (name[--i], port);
  return scm_read_scsh_block_comment (chr, port);
}

static SCM
scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
{
  /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
     nested.  So care must be taken.  */
  int nesting_level = 1;

  int a = scm_getc (port);

  if (a == EOF)
    scm_i_input_error ("scm_read_r6rs_block_comment", port,
                       "unterminated `#| ... |#' comment", SCM_EOL);

  while (nesting_level > 0)
    {
      int b = scm_getc (port);

      if (b == EOF)
	scm_i_input_error ("scm_read_r6rs_block_comment", port,
			   "unterminated `#| ... |#' comment", SCM_EOL);

      if (a == '|' && b == '#')
        {
          nesting_level--;
          b = EOF;
        }
      else if (a == '#' && b == '|')
        {
          nesting_level++;
          b = EOF;
        }

      a = b;
    }

  return SCM_UNSPECIFIED;
}

static SCM
scm_read_commented_expression (scm_t_wchar chr, SCM port,
                               scm_t_read_opts *opts)
{
  scm_t_wchar c;
  
  c = flush_ws (port, opts, (char *) NULL);
  if (EOF == c)
    scm_i_input_error ("read_commented_expression", port,
                       "no expression after #; comment", SCM_EOL);
  scm_ungetc (c, port);
  scm_read_expression (port, opts);
  return SCM_UNSPECIFIED;
}

static SCM
scm_read_extended_symbol (scm_t_wchar chr, SCM port)
{
  /* Guile's extended symbol read syntax looks like this:

       #{This is all a symbol name}#

     So here, CHR is expected to be `{'.  */
  int saw_brace = 0;
  size_t len = 0;
  SCM buf = scm_i_make_string (1024, NULL, 0);

  buf = scm_i_string_start_writing (buf);

  while ((chr = scm_getc (port)) != EOF)
    {
      if (saw_brace)
	{
	  if (chr == '#')
	    {
	      break;
	    }
	  else
	    {
	      saw_brace = 0;
	      scm_i_string_set_x (buf, len++, '}');
	    }
	}

      if (chr == '}')
	saw_brace = 1;
      else if (chr == '\\')
        {
          /* It used to be that print.c would print extended-read-syntax
             symbols with backslashes before "non-standard" chars, but
             this routine wouldn't do anything with those escapes.
             Bummer.  What we've done is to change print.c to output
             R6RS hex escapes for those characters, relying on the fact
             that the extended read syntax would never put a `\' before
             an `x'.  For now, we just ignore other instances of
             backslash in the string.  */
          switch ((chr = scm_getc (port)))
            {
            case EOF:
              goto done;
            case 'x':
              {
                scm_t_wchar c;
                
                SCM_READ_HEX_ESCAPE (10, ';');
                scm_i_string_set_x (buf, len++, c);
                break;

              str_eof:
                chr = EOF;
                goto done;

              bad_escaped:
                scm_i_string_stop_writing ();
                scm_i_input_error ("scm_read_extended_symbol", port,
                                   "illegal character in escape sequence: ~S",
                                   scm_list_1 (SCM_MAKE_CHAR (c)));
                break;
              }
            default:
	      scm_i_string_set_x (buf, len++, chr);
              break;
            }
        }
      else
        scm_i_string_set_x (buf, len++, chr);

      if (len >= scm_i_string_length (buf) - 2)
	{
	  SCM addy;

	  scm_i_string_stop_writing ();
	  addy = scm_i_make_string (1024, NULL, 0);
	  buf = scm_string_append (scm_list_2 (buf, addy));
	  len = 0;
	  buf = scm_i_string_start_writing (buf);
	}
    }

 done:
  scm_i_string_stop_writing ();
  if (chr == EOF)
    scm_i_input_error ("scm_read_extended_symbol", port,
                       "end of file while reading symbol", SCM_EOL);

  return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
}


/* Top-level token readers, i.e., dispatchers.  */

static SCM
scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
{
  SCM proc;

  proc = scm_get_hash_procedure (chr);
  if (scm_is_true (scm_procedure_p (proc)))
    {
      long line = SCM_LINUM (port);
      int column = SCM_COL (port) - 2;
      SCM got;

      got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);

      if (opts->record_positions_p && SCM_NIMP (got)
          && !scm_i_has_source_properties (got))
        scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
      
      return got;
    }

  return SCM_UNSPECIFIED;
}

/* The reader for the sharp `#' character.  It basically dispatches reads
   among the above token readers.   */
static SCM
scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
                long line, int column)
#define FUNC_NAME "scm_lreadr"
{
  SCM result;

  chr = scm_getc (port);

  result = scm_read_sharp_extension (chr, port, opts);
  if (!scm_is_eq (result, SCM_UNSPECIFIED))
    return result;

  switch (chr)
    {
    case '\\':
      return (scm_read_character (chr, port, opts));
    case '(':
      return (scm_read_vector (chr, port, opts, line, column));
    case 's':
    case 'u':
    case 'f':
    case 'c':
      /* This one may return either a boolean or an SRFI-4 vector.  */
      return (scm_read_srfi4_vector (chr, port, opts, line, column));
    case 'v':
      return (scm_read_bytevector (chr, port, opts, line, column));
    case '*':
      return (scm_read_guile_bit_vector (chr, port, opts, line, column));
    case 't':
    case 'T':
    case 'F':
      return (scm_read_boolean (chr, port));
    case ':':
      return (scm_read_keyword (chr, port, opts));
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
    case '@':
#if SCM_ENABLE_DEPRECATED
      /* See below for 'i' and 'e'. */
    case 'a':
    case 'y':
    case 'h':
    case 'l':
#endif
      return (scm_read_array (chr, port, opts, line, column));

    case 'i':
    case 'e':
#if SCM_ENABLE_DEPRECATED
      {
	/* When next char is '(', it really is an old-style
	   uniform array. */
	scm_t_wchar next_c = scm_getc (port);
	if (next_c != EOF)
	  scm_ungetc (next_c, port);
	if (next_c == '(')
	  return scm_read_array (chr, port, opts, line, column);
	/* Fall through. */
      }
#endif
    case 'b':
    case 'B':
    case 'o':
    case 'O':
    case 'd':
    case 'D':
    case 'x':
    case 'X':
    case 'I':
    case 'E':
      return (scm_read_number_and_radix (chr, port, opts));
    case '{':
      return (scm_read_extended_symbol (chr, port));
    case '!':
      return (scm_read_shebang (chr, port, opts));
    case ';':
      return (scm_read_commented_expression (chr, port, opts));
    case '`':
    case '\'':
    case ',':
      return (scm_read_syntax (chr, port, opts));
    case 'n':
      return (scm_read_nil (chr, port, opts));
    default:
      result = scm_read_sharp_extension (chr, port, opts);
      if (scm_is_eq (result, SCM_UNSPECIFIED))
	{
	  /* To remain compatible with 1.8 and earlier, the following
	     characters have lower precedence than `read-hash-extend'
	     characters.  */
	  switch (chr)
	    {
	    case '|':
	      return scm_read_r6rs_block_comment (chr, port);
	    default:
	      scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
				 scm_list_1 (SCM_MAKE_CHAR (chr)));
	    }
	}
      else
	return result;
    }

  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

static SCM
read_inner_expression (SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "read_inner_expression"
{
  while (1)
    {
      scm_t_wchar chr;

      chr = scm_getc (port);

      switch (chr)
	{
	case SCM_WHITE_SPACES:
	case SCM_LINE_INCREMENTORS:
	  break;
	case ';':
	  (void) scm_read_semicolon_comment (chr, port);
	  break;
        case '{':
          if (opts->curly_infix_p)
            {
              if (opts->neoteric_p)
                return scm_read_sexp (chr, port, opts);
              else
                {
                  SCM expr;

                  /* Enable neoteric expressions within curly braces */
                  opts->neoteric_p = 1;
                  expr = scm_read_sexp (chr, port, opts);
                  opts->neoteric_p = 0;
                  return expr;
                }
            }
          else
            return scm_read_mixed_case_symbol (chr, port, opts);
	case '[':
          if (opts->square_brackets_p)
            return scm_read_sexp (chr, port, opts);
          else if (opts->curly_infix_p)
            {
              /* The syntax of neoteric expressions requires that '[' be
                 a delimiter when curly-infix is enabled, so it cannot
                 be part of an unescaped symbol.  We might as well do
                 something useful with it, so we adopt Kawa's convention:
                 [...] => ($bracket-list$ ...) */
              long line = SCM_LINUM (port);
              int column = SCM_COL (port) - 1;
              return maybe_annotate_source
                (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
                 port, opts, line, column);
            }
          else
            return scm_read_mixed_case_symbol (chr, port, opts);
	case '(':
	  return (scm_read_sexp (chr, port, opts));
	case '"':
	  return (scm_read_string (chr, port, opts));
	case '\'':
	case '`':
	case ',':
	  return (scm_read_quote (chr, port, opts));
	case '#':
	  {
            long line  = SCM_LINUM (port);
            int column = SCM_COL (port) - 1;
	    SCM result = scm_read_sharp (chr, port, opts, line, column);
	    if (scm_is_eq (result, SCM_UNSPECIFIED))
	      /* We read a comment or some such.  */
	      break;
	    else
	      return result;
	  }
	case ')':
	  scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
	  break;
        case '}':
          if (opts->curly_infix_p)
            scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
          else
            return scm_read_mixed_case_symbol (chr, port, opts);
	case ']':
          if (opts->square_brackets_p)
            scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
          /* otherwise fall through */
	case EOF:
	  return SCM_EOF_VAL;
	case ':':
	  if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
	    return scm_symbol_to_keyword (scm_read_expression (port, opts));
	  /* Fall through.  */

	default:
	  {
	    if (((chr >= '0') && (chr <= '9'))
		|| (strchr ("+-.", chr)))
	      return (scm_read_number (chr, port, opts));
	    else
	      return (scm_read_mixed_case_symbol (chr, port, opts));
	  }
	}
    }
}
#undef FUNC_NAME

static SCM
scm_read_expression (SCM port, scm_t_read_opts *opts)
#define FUNC_NAME "scm_read_expression"
{
  if (!opts->neoteric_p)
    return read_inner_expression (port, opts);
  else
    {
      long line = 0;
      int column = 0;
      SCM expr;

      if (opts->record_positions_p)
        {
          /* We need to get the position of the first non-whitespace
             character in order to correctly annotate neoteric
             expressions.  For example, for the expression 'f(x)', the
             first call to 'read_inner_expression' reads the 'f' (which
             cannot be annotated), and then we later read the '(x)' and
             use it to construct the new list (f x). */
          int c = flush_ws (port, opts, (char *) NULL);
          if (c == EOF)
            return SCM_EOF_VAL;
          scm_ungetc (c, port);
          line = SCM_LINUM (port);
          column = SCM_COL (port);
        }

      expr = read_inner_expression (port, opts);

      /* 'expr' is the first component of the neoteric expression.  Now
         we loop, and as long as the next character is '(', '[', or '{',
         (without any intervening whitespace), we use it to construct a
         new expression.  For example, f{n - 1}(x) => ((f (- n 1)) x). */
      for (;;)
        {
          int chr = scm_getc (port);

          if (chr == '(')
            /* e(...) => (e ...) */
            expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
          else if (chr == '[')
            /* e[...] => ($bracket-apply$ e ...) */
            expr = scm_cons (sym_bracket_apply,
                             scm_cons (expr,
                                       scm_read_sexp (chr, port, opts)));
          else if (chr == '{')
            {
              SCM arg = scm_read_sexp (chr, port, opts);

              if (scm_is_null (arg))
                expr = scm_list_1 (expr);       /* e{} => (e) */
              else
                expr = scm_list_2 (expr, arg);  /* e{...} => (e {...}) */
            }
          else
            {
              if (chr != EOF)
                scm_ungetc (chr, port);
              break;
            }
          maybe_annotate_source (expr, port, opts, line, column);
        }
      return expr;
    }
}
#undef FUNC_NAME


/* Actual reader.  */

static void init_read_options (SCM port, scm_t_read_opts *opts);

SCM_DEFINE (scm_read, "read", 0, 1, 0, 
            (SCM port),
	    "Read an s-expression from the input port @var{port}, or from\n"
	    "the current input port if @var{port} is not specified.\n"
	    "Any whitespace before the next token is discarded.")
#define FUNC_NAME s_scm_read
{
  scm_t_read_opts opts;
  int c;

  if (SCM_UNBNDP (port))
    port = scm_current_input_port ();
  SCM_VALIDATE_OPINPORT (1, port);

  init_read_options (port, &opts);

  c = flush_ws (port, &opts, (char *) NULL);
  if (EOF == c)
    return SCM_EOF_VAL;
  scm_ungetc (c, port);

  return (scm_read_expression (port, &opts));
}
#undef FUNC_NAME


/* Manipulate the read-hash-procedures alist.  This could be written in
   Scheme, but maybe it will also be used by C code during initialisation.  */
SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
            (SCM chr, SCM proc),
	    "Install the procedure @var{proc} for reading expressions\n"
	    "starting with the character sequence @code{#} and @var{chr}.\n"
	    "@var{proc} will be called with two arguments:  the character\n"
	    "@var{chr} and the port to read further data from. The object\n"
	    "returned will be the return value of @code{read}. \n"
	    "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
	    )
#define FUNC_NAME s_scm_read_hash_extend
{
  SCM this;
  SCM prev;

  SCM_VALIDATE_CHAR (1, chr);
  SCM_ASSERT (scm_is_false (proc)
	      || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
	      proc, SCM_ARG2, FUNC_NAME);

  /* Check if chr is already in the alist.  */
  this = scm_i_read_hash_procedures_ref ();
  prev = SCM_BOOL_F;
  while (1)
    {
      if (scm_is_null (this))
	{
	  /* not found, so add it to the beginning.  */
	  if (scm_is_true (proc))
	    {
              SCM new = scm_cons (scm_cons (chr, proc),
                                  scm_i_read_hash_procedures_ref ());
	      scm_i_read_hash_procedures_set_x (new);
	    }
	  break;
	}
      if (scm_is_eq (chr, SCM_CAAR (this)))
	{
	  /* already in the alist.  */
	  if (scm_is_false (proc))
	    {
	      /* remove it.  */
	      if (scm_is_false (prev))
		{
                  SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
		  scm_i_read_hash_procedures_set_x (rest);
		}
	      else
		scm_set_cdr_x (prev, SCM_CDR (this));
	    }
	  else
	    {
	      /* replace it.  */
	      scm_set_cdr_x (SCM_CAR (this), proc);
	    }
	  break;
	}
      prev = this;
      this = SCM_CDR (this);
    }

  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

/* Recover the read-hash procedure corresponding to char c.  */
static SCM
scm_get_hash_procedure (int c)
{
  SCM rest = scm_i_read_hash_procedures_ref ();

  while (1)
    {
      if (scm_is_null (rest))
	return SCM_BOOL_F;
  
      if (SCM_CHAR (SCM_CAAR (rest)) == c)
	return SCM_CDAR (rest);
     
      rest = SCM_CDR (rest);
    }
}

#define SCM_ENCODING_SEARCH_SIZE (500)

/* Search the first few hundred characters of a file for an Emacs-like coding
   declaration.  Returns either NULL or a string whose storage has been
   allocated with `scm_gc_malloc ()'.  */
char *
scm_i_scan_for_encoding (SCM port)
{
  scm_t_port *pt;
  char header[SCM_ENCODING_SEARCH_SIZE+1];
  size_t bytes_read, encoding_length, i;
  char *encoding = NULL;
  int utf8_bom = 0;
  char *pos, *encoding_start;
  int in_comment;

  pt = SCM_PTAB_ENTRY (port);

  if (pt->rw_active == SCM_PORT_WRITE)
    scm_flush (port);

  if (pt->rw_random)
    pt->rw_active = SCM_PORT_READ;

  if (pt->read_pos == pt->read_end)
    {
      /* We can use the read buffer, and thus avoid a seek. */
      if (scm_fill_input (port) == EOF)
        return NULL;

      bytes_read = pt->read_end - pt->read_pos;
      if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
        bytes_read = SCM_ENCODING_SEARCH_SIZE;

      if (bytes_read <= 1)
        /* An unbuffered port -- don't scan.  */
        return NULL;

      memcpy (header, pt->read_pos, bytes_read);
      header[bytes_read] = '\0';
    }
  else
    {
      /* Try to read some bytes and then seek back.  Not all ports
         support seeking back; and indeed some file ports (like
         /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
         check performed by SCM_FPORT_FDES---but fail to seek
         backwards.  Hence this block comes second.  We prefer to use
         the read buffer in-place.  */
      if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
        return NULL;

      bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
      header[bytes_read] = '\0';
      scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
    }

  if (bytes_read > 3 
      && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
    utf8_bom = 1;

  /* search past "coding[:=]" */
  pos = header;
  while (1)
    {
      if ((pos = strstr(pos, "coding")) == NULL)
        return NULL;

      pos += strlen("coding");
      if (pos - header >= SCM_ENCODING_SEARCH_SIZE || 
          (*pos == ':' || *pos == '='))
        {
          pos ++;
          break;
        }
    }

  /* skip spaces */
  while (pos - header <= SCM_ENCODING_SEARCH_SIZE && 
	 (*pos == ' ' || *pos == '\t'))
    pos ++;

  /* grab the next token */
  encoding_start = pos;
  i = 0;
  while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
         && encoding_start + i - header < bytes_read
	 && (isalnum ((int) encoding_start[i])
	     || strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
    i++;

  encoding_length = i;
  if (encoding_length == 0)
    return NULL;

  encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
  for (i = 0; i < encoding_length; i++)
    encoding[i] = toupper ((int) encoding[i]);

  /* push backwards to make sure we were in a comment */
  in_comment = 0;
  pos = encoding_start;
  while (pos >= header)
    {
      if (*pos == ';')
	{
	  in_comment = 1;
	  break;
	}
      else if (*pos == '\n' || pos == header)
	{
	  /* This wasn't in a semicolon comment. Check for a
	   hash-bang comment. */
	  char *beg = strstr (header, "#!");
	  char *end = strstr (header, "!#");
	  if (beg < encoding_start && encoding_start + encoding_length <= end)
	    in_comment = 1;
	  break;
	}
      else
        {
          pos --;
          continue;
        }
    }
  if (!in_comment)
    /* This wasn't in a comment */
    return NULL;

  if (utf8_bom && strcmp(encoding, "UTF-8"))
    scm_misc_error (NULL,
		    "the port input declares the encoding ~s but is encoded as UTF-8",
		    scm_list_1 (scm_from_locale_string (encoding)));

  return encoding;
}

SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
            (SCM port),
            "Scans the port for an Emacs-like character coding declaration\n"
            "near the top of the contents of a port with random-accessible contents.\n"
            "The coding declaration is of the form\n"
            "@code{coding: XXXXX} and must appear in a scheme comment.\n"
            "\n"
            "Returns a string containing the character encoding of the file\n"
            "if a declaration was found, or @code{#f} otherwise.\n")
#define FUNC_NAME s_scm_file_encoding
{
  char *enc;
  SCM s_enc;

  SCM_VALIDATE_OPINPORT (SCM_ARG1, port);

  enc = scm_i_scan_for_encoding (port);
  if (enc == NULL)
    return SCM_BOOL_F;
  else
    {
      s_enc = scm_from_locale_string (enc);
      return s_enc;
    }

  return SCM_BOOL_F;
}
#undef FUNC_NAME


/* Per-port read options.

   We store per-port read options in the 'port-read-options' key of the
   port's alist, which is stored in 'scm_i_port_weak_hash'.  The value
   stored in the alist is a single integer that contains a two-bit field
   for each read option.

   If a bit field contains READ_OPTION_INHERIT (3), that indicates that
   the applicable value should be inherited from the corresponding
   global read option.  Otherwise, the bit field contains the value of
   the read option.  For boolean read options that have been set
   per-port, the possible values are 0 or 1.  If the 'keyword_style'
   read option has been set per-port, its possible values are those in
   'enum t_keyword_style'. */

/* Key to read options in per-port alists. */
SCM_SYMBOL (sym_port_read_options, "port-read-options");

/* Offsets of bit fields for each per-port override */
#define READ_OPTION_COPY_SOURCE_P          0
#define READ_OPTION_RECORD_POSITIONS_P     2
#define READ_OPTION_CASE_INSENSITIVE_P     4
#define READ_OPTION_KEYWORD_STYLE          6
#define READ_OPTION_R6RS_ESCAPES_P         8
#define READ_OPTION_SQUARE_BRACKETS_P     10
#define READ_OPTION_HUNGRY_EOL_ESCAPES_P  12
#define READ_OPTION_CURLY_INFIX_P         14

/* The total width in bits of the per-port overrides */
#define READ_OPTIONS_NUM_BITS             16

#define READ_OPTIONS_INHERIT_ALL  ((1UL << READ_OPTIONS_NUM_BITS) - 1)
#define READ_OPTIONS_MAX_VALUE    READ_OPTIONS_INHERIT_ALL

#define READ_OPTION_MASK     3
#define READ_OPTION_INHERIT  3

static void
set_port_read_option (SCM port, int option, int new_value)
{
  SCM alist, scm_read_options;
  unsigned int read_options;

  new_value &= READ_OPTION_MASK;
  scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
  alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
  scm_read_options = scm_assq_ref (alist, sym_port_read_options);
  if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
    read_options = scm_to_uint (scm_read_options);
  else
    read_options = READ_OPTIONS_INHERIT_ALL;
  read_options &= ~(READ_OPTION_MASK << option);
  read_options |= new_value << option;
  scm_read_options = scm_from_uint (read_options);
  alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options);
  scm_hashq_set_x (scm_i_port_weak_hash, port, alist);
  scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
}

/* Set OPTS and PORT's case-insensitivity according to VALUE. */
static void
set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
{
  value = !!value;
  opts->case_insensitive_p = value;
  set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
}

/* Set OPTS and PORT's square_brackets_p option according to VALUE. */
static void
set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
{
  value = !!value;
  opts->square_brackets_p = value;
  set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
}

/* Set OPTS and PORT's curly_infix_p option according to VALUE. */
static void
set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
{
  value = !!value;
  opts->curly_infix_p = value;
  set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
}

/* Initialize OPTS based on PORT's read options and the global read
   options. */
static void
init_read_options (SCM port, scm_t_read_opts *opts)
{
  SCM alist, val, scm_read_options;
  unsigned int read_options, x;

  scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
  alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
  scm_read_options = scm_assq_ref (alist, sym_port_read_options);
  scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);

  if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
    read_options = scm_to_uint (scm_read_options);
  else
    read_options = READ_OPTIONS_INHERIT_ALL;

  x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
  if (x == READ_OPTION_INHERIT)
    {
      val = SCM_PACK (SCM_KEYWORD_STYLE);
      if (scm_is_eq (val, scm_keyword_prefix))
        x = KEYWORD_STYLE_PREFIX;
      else if (scm_is_eq (val, scm_keyword_postfix))
        x = KEYWORD_STYLE_POSTFIX;
      else
        x = KEYWORD_STYLE_HASH_PREFIX;
    }
  opts->keyword_style = x;

#define RESOLVE_BOOLEAN_OPTION(NAME, name)                              \
  do                                                                    \
    {                                                                   \
      x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME);    \
      if (x == READ_OPTION_INHERIT)                                     \
        x = !!SCM_ ## NAME;                                             \
          opts->name = x;                                               \
    }                                                                   \
  while (0)

  RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P,        copy_source_p);
  RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P,   record_positions_p);
  RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P,   case_insensitive_p);
  RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P,       r6rs_escapes_p);
  RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P,    square_brackets_p);
  RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
  RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P,        curly_infix_p);

#undef RESOLVE_BOOLEAN_OPTION

  opts->neoteric_p = 0;
}

void
scm_init_read ()
{
  SCM read_hash_procs;

  read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
  
  scm_i_read_hash_procedures =
    SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));

  scm_init_opts (scm_read_options, scm_read_opts);
#include "libguile/read.x"
}

/*
  Local Variables:
  c-file-style: "gnu"
  End:
*/
-												Remove incorrect comment in read.c

* libguile/read.c (scm_read_sharp): Remove incorrect comment that
  claims that scm_read_boolean might return a SRFI-4 vector.

											
										
										
											2012-02-08 03:14:17 -05:00
+								/* Copyright (C) 1995, 1996, 1997, 1999, 2000, 2001, 2003, 2004, 2006,
-												Revert "detect and consume byte-order marks for textual ports"

This reverts commit b2cb557d75e4daf8c7c8cd43313f4cc51d9a3f1b, which was
pushed accidentally.

											
										
										
											2013-01-30 15:30:31 +01:00
+								 *   2007, 2008, 2009, 2010, 2011, 2012 Free Software Foundation, Inc.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * This library is free software; you can redistribute it and/or
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * modify it under the terms of the GNU Lesser General Public License
 								 * as published by the Free Software Foundation; either version 3 of
 								 * the License, or (at your option) any later version.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * This library is distributed in the hope that it will be useful, but
 								 * WITHOUT ANY WARRANTY; without even the implied warranty of
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 								 * Lesser General Public License for more details.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * You should have received a copy of the GNU Lesser General Public
 								 * License along with this library; if not, write to the Free Software
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 								 * 02110-1301 USA
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 */
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#ifdef HAVE_CONFIG_H
 								# include <config.h>
 								#endif
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								#include <stdio.h>
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#include <ctype.h>
 								#include <string.h>
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								#include <unistd.h>
 								#include <unicase.h>
-												implement r6rs hungry escaped EOL

* libguile/private-options.h (SCM_HUNGRY_EOL_ESCAPES_P): New private
  option.
* libguile/read.c: Define SCM_HUNGRY_EOL_ESCAPES_P, defaulting to #f.
  (skip_intraline_whitespace): New helper.
  (scm_read_string): If SCM_HUNGRY_EOL_ESCAPES_P,
  skip_intraline_whitespace after an escaped EOL.

* test-suite/tests/reader.test ("read-options"): Add test.

											
										
										
											2011-01-21 08:57:39 +01:00
+								#include <unictype.h>
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/_scm.h"
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								#include "libguile/bytevectors.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/chars.h"
 								#include "libguile/eval.h"
-												rename unif.[ch] to arrays.[ch]

* libguile/Makefile.am:
* libguile/unif.c:
* libguile/unif.h:
* libguile/arrays.c:
* libguile/arrays.h: Rename unif.[ch] to arrays.[ch].

* libguile.h:
* libguile/array-handle.c:
* libguile/array-map.c:
* libguile/bitvectors.c:
* libguile/bytevectors.c:
* libguile/eq.c:
* libguile/gc-card.c:
* libguile/gc-malloc.c:
* libguile/gc-mark.c:
* libguile/gc.c:
* libguile/init.c:
* libguile/inline.h:
* libguile/print.c:
* libguile/random.c:
* libguile/read.c:
* libguile/socket.c:
* libguile/sort.c:
* libguile/srfi-4.c:
* libguile/srfi-4.h:
* libguile/strports.c:
* libguile/vectors.c:
* libguile/vectors.h: Update includers.

											
										
										
											2009-07-17 01:08:35 +02:00
+								#include "libguile/arrays.h"
-												bitvector exodus from unif.[ch]

* libguile/Makefile.am:
* libguile/unif.c:
* libguile/unif.h:
* libguile/bitvectors.c:
* libguile/bitvectors.h: Move bitvector functionality out of unif.[ch].

* libguile/array-handle.c:
* libguile/array-map.c:
* libguile/init.c:
* libguile/read.c:
* libguile/srfi-4.c:
* libguile/vectors.c: Oh, what a tangled web we weave...

											
										
										
											2009-07-17 00:58:32 +02:00
+								#include "libguile/bitvectors.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/keywords.h"
 								#include "libguile/alist.h"
 								#include "libguile/srcprop.h"
 								#include "libguile/hashtab.h"
 								#include "libguile/hash.h"
 								#include "libguile/ports.h"
-												Disable encoding scanning on non-seekable file ports.

* libguile/read.c (scm_i_scan_for_encoding): Don't attempt to scan
  non-seekable file ports.

											
										
										
											2009-11-27 17:00:51 +01:00
+								#include "libguile/fports.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/root.h"
 								#include "libguile/strings.h"
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								#include "libguile/strports.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/vectors.h"
 								#include "libguile/validate.h"
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+								#include "libguile/srfi-4.h"
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#include "libguile/srfi-13.h"
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/read.h"
-												* backtrace.c, debug.c, debug.h, deprecation.c, eq.c, eval.c
eval.h, gsubr.c, init.c, macros.c, print.c, print.h, read.c,
read.h, stacks.c, symbols.c, throw.c: use private-options.h

* private-options.h: new file: contain hardcoded option
definitions.

											
										
										
											2007-01-22 15:14:40 +00:00
+								#include "libguile/private-options.h"
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-													* eval.c, eval.h, read.c, read.h (scm_sym_dot): Moved from eval to
	read.  This will allow to make the definition in read.c static.

											
										
										
											2003-05-06 20:17:26 +00:00
+								SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
-													* read.h (SCM_N_READ_OPTIONS): increase SCM_N_READ_OPTIONS to 4.
	(SCM_KEYWORD_STYLE): defined.

	* read.c (scm_read_opts): add a keywords option.  This isn't a
	boolean option, in case someone wants to add support for DSSSL
	keywords too.
	Setup scm_keyword_prefix symbol.
	(scm_lreadr): Only process keywords if SCM_KEYWORD_STYLE is
	set to 'prefix.
*	I've left keyword support disabled by default, since it doesn't
	seem to break the module system and it gives R4RS standard behaviour.
	It can be reactivated with (read-set! keywords 'prefix).

											
										
										
											1997-03-10 06:49:15 +00:00
+								SCM_SYMBOL (scm_keyword_prefix, "prefix");
-												Add support for SRFI-88-like postfix keyword read syntax.

											
										
										
											2008-04-15 19:52:43 +02:00
+								SCM_SYMBOL (scm_keyword_postfix, "postfix");
-												add read syntax for #nil

* libguile/evalext.c (scm_self_evaluating_p): #nil is self-evaluating.

* libguile/read.c (scm_read_nil, scm_read_sharp): Add read syntax for
  #nil.

											
										
										
											2010-04-09 14:15:16 +02:00
+								SCM_SYMBOL (sym_nil, "nil");
-													* read.h (SCM_N_READ_OPTIONS): increase SCM_N_READ_OPTIONS to 4.
	(SCM_KEYWORD_STYLE): defined.

	* read.c (scm_read_opts): add a keywords option.  This isn't a
	boolean option, in case someone wants to add support for DSSSL
	keywords too.
	Setup scm_keyword_prefix symbol.
	(scm_lreadr): Only process keywords if SCM_KEYWORD_STYLE is
	set to 'prefix.
*	I've left keyword support disabled by default, since it doesn't
	seem to break the module system and it gives R4RS standard behaviour.
	It can be reactivated with (read-set! keywords 'prefix).

											
										
										
											1997-03-10 06:49:15 +00:00
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								/* SRFI-105 curly infix expression support */
 								SCM_SYMBOL (sym_nfx, "$nfx$");
 								SCM_SYMBOL (sym_bracket_list, "$bracket-list$");
 								SCM_SYMBOL (sym_bracket_apply, "$bracket-apply$");
 								scm_t_option scm_read_opts[] =
 								  {
 								    { SCM_OPTION_BOOLEAN, "copy", 0,
 								      "Copy source code expressions." },
 								    { SCM_OPTION_BOOLEAN, "positions", 1,
 								      "Record positions of source code expressions." },
 								    { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
 								      "Convert symbols to lower case."},
 								    { SCM_OPTION_SCM, "keywords", (scm_t_bits) SCM_BOOL_F_BITS,
 								      "Style of keyword recognition: #f, 'prefix or 'postfix."},
 								    { SCM_OPTION_BOOLEAN, "r6rs-hex-escapes", 0,
 								      "Use R6RS variable-length character and string hex escapes."},
 								    { SCM_OPTION_BOOLEAN, "square-brackets", 1,
 								      "Treat `[' and `]' as parentheses, for R6RS compatibility."},
 								    { SCM_OPTION_BOOLEAN, "hungry-eol-escapes", 0,
 								      "In strings, consume leading whitespace after an escaped end-of-line."},
 								    { SCM_OPTION_BOOLEAN, "curly-infix", 0,
 								      "Support SRFI-105 curly infix expressions."},
 								    { 0, },
 								  };
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
 								/* Internal read options structure.  This is initialized by 'scm_read'
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
+								   from the global and per-port read options, and a pointer is passed
 								   down to all helper functions. */
 								enum t_keyword_style
 								  {
 								    KEYWORD_STYLE_HASH_PREFIX,
 								    KEYWORD_STYLE_PREFIX,
 								    KEYWORD_STYLE_POSTFIX
 								  };
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
+								struct t_read_opts
 								{
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  enum t_keyword_style keyword_style;
 								  unsigned int copy_source_p        : 1;
 								  unsigned int record_positions_p   : 1;
 								  unsigned int case_insensitive_p   : 1;
 								  unsigned int r6rs_escapes_p       : 1;
 								  unsigned int square_brackets_p    : 1;
 								  unsigned int hungry_eol_escapes_p : 1;
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								  unsigned int curly_infix_p        : 1;
 								  unsigned int neoteric_p           : 1;
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								};
 								typedef struct t_read_opts scm_t_read_opts;
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
-												(INPUT_ERROR): Prepare for file:line:column error
messages for errors in scm_lreadr() and friends.

											
										
										
											2002-08-04 23:33:28 +00:00
+								/*
 								  Give meaningful error messages for errors
 								  We use the format
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								  FILE:LINE:COL: MESSAGE
-												(INPUT_ERROR): Prepare for file:line:column error
messages for errors in scm_lreadr() and friends.

											
										
										
											2002-08-04 23:33:28 +00:00
+								  This happened in ....
 								  This is not standard GNU format, but the test-suite likes the real
 								  message to be in front.
 								 */
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+								void
 								scm_i_input_error (char const *function,
 										   SCM port, const char *message, SCM arg)
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								{
-												(scm_input_error): Use a SCM value for 'fn', not a C string.  This
avoids a conversion round-trip.

											
										
										
											2004-08-10 13:54:01 +00:00
+								  SCM fn = (scm_is_string (SCM_FILENAME(port))
 									    ? SCM_FILENAME(port)
 									    : scm_from_locale_string ("#<unknown port>"));
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
-												(scm_input_error): Use a SCM value for 'fn', not a C string.  This
avoids a conversion round-trip.

											
										
										
											2004-08-10 13:54:01 +00:00
+								  SCM string_port = scm_open_output_string ();
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								  SCM string = SCM_EOL;
 								  scm_simple_format (string_port,
-												(scm_i_casei_streq): New, for counted strings.

* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 17:17:43 +00:00
+										     scm_from_locale_string ("~A:~S:~S: ~A"),
-												(scm_input_error): Use a SCM value for 'fn', not a C string.  This
avoids a conversion round-trip.

											
										
										
											2004-08-10 13:54:01 +00:00
+										     scm_list_4 (fn,
-												merge from 1.8 branch

											
										
										
											2007-03-07 23:35:55 +00:00
+												 scm_from_long (SCM_LINUM (port) + 1),
-												* numbers.h, numbers.c, discouraged.h, discouraged.c (scm_short2num,
scm_ushort2num, scm_int2num, scm_uint2num, scm_long2num,
scm_ulong2num, scm_size2num, scm_ptrdiff2num, scm_num2short,
scm_num2ushort, scm_num2int, scm_num2uint, scm_num2long,
scm_num2ulong, scm_num2size, scm_num2ptrdiff, scm_long_long2num,
scm_ulong_long2num, scm_num2long_long, scm_num2ulong_long):
Discouraged by moving to discouraged.h and discouraged.c and
reimplementing in terms of scm_from_* and scm_to_*.  Changed all uses
to the new scm_from_* and scm_to_* functions.

											
										
										
											2004-08-02 16:14:04 +00:00
+												 scm_from_int (SCM_COL (port) + 1),
-												(scm_i_casei_streq): New, for counted strings.

* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 17:17:43 +00:00
+												 scm_from_locale_string (message)));
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
 								  string = scm_get_output_string (string_port);
 								  scm_close_output_port (string_port);
-												use scm_from_latin1_symboln for string literals and load-symbol

* libguile/bytevectors.c:
* libguile/eval.c:
* libguile/goops.c:
* libguile/i18n.c:
* libguile/load.c:
* libguile/memoize.c:
* libguile/modules.c:
* libguile/ports.c:
* libguile/print.c:
* libguile/procs.c:
* libguile/programs.c:
* libguile/read.c:
* libguile/script.c:
* libguile/srfi-14.c:
* libguile/stacks.c:
* libguile/strings.c:
* libguile/throw.c:
* libguile/vm.c: Use scm_from_latin1_symboln to make symbols from string
  literals, because they aren't in the user's locale -- they are in
  ASCII, and we can optimize this case.

* libguile/vm-i-loader.c: Also use scm_from_latin1_symboln when loading
  narrow symbols.

											
										
										
											2011-01-07 09:08:58 -08:00
+								  scm_error_scm (scm_from_latin1_symbol ("read-error"),
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+										 function? scm_from_locale_string (function) : SCM_BOOL_F,
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+										 string,
-												(scm_input_error): Pass arg list parameter to scm_error_scm,
rather than SCM_EOL.  Needed by "Unknown # object" case in scm_lreadr.

											
										
										
											2003-06-04 16:36:03 +00:00
+										 arg,
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+										 SCM_BOOL_F);
 								}
-												(INPUT_ERROR): Prepare for file:line:column error
messages for errors in scm_lreadr() and friends.

											
										
										
											2002-08-04 23:33:28 +00:00
-												*.[ch]: Replace GUILE_PROC w/ SCM_DEFINE.

											
										
										
											2000-01-05 19:05:23 +00:00
+								SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								            (SCM setting),
-												(scm_read_options, scm_read, scm_read_hash_extend): Added docstrings.

											
										
										
											2001-02-16 15:17:20 +00:00
+									    "Option interface for the read options. Instead of using\n"
 									    "this procedure directly, use the procedures @code{read-enable},\n"
-												Change 3 instances of @var to @code in docstrings.

											
										
										
											2002-03-15 09:40:57 +00:00
+									    "@code{read-disable}, @code{read-set!} and @code{read-options}.")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_read_options
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
+								{
-												* eval.c, print.h, print.c, read.h, read.c: Modifications to
run-time options.

											
										
										
											1996-08-23 01:20:34 +00:00
+								  SCM ans = scm_options (setting,
 											 scm_read_opts,
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+											 FUNC_NAME);
-												* eval.c, print.h, print.c, read.h, read.c: Modifications to
run-time options.

											
										
										
											1996-08-23 01:20:34 +00:00
+								  if (SCM_COPY_SOURCE_P)
 								    SCM_RECORD_POSITIONS_P = 1;
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
+								  return ans;
 								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								/* A fluid referring to an association list mapping extra hash
 								   characters to procedures.  */
 								static SCM *scm_i_read_hash_procedures;
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								scm_i_read_hash_procedures_ref (void)
 								{
 								  return scm_fluid_ref (*scm_i_read_hash_procedures);
 								}
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static void
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								scm_i_read_hash_procedures_set_x (SCM value)
 								{
 								  scm_fluid_set_x (*scm_i_read_hash_procedures, value);
 								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Token readers.  */
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* Size of the C buffer used to read symbols and numbers.  */
 								#define READER_BUFFER_SIZE            128
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Optimize `scm_read_string'.

According to the new benchmarks, this leads a 5% speed improvement when
reading small strings, and a 27% improvement when reading large strings.

* libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update
  comment to mention codepoints.
  (scm_read_string): Make `str' a list of strings, instead of a string.
  Store characters read in buffer `c_str'.  Cons to STR when C_STR is
  full, and concatenate/reverse at the end.

* benchmark-suite/benchmarks/read.bm (small, large): New variables.
  Set %DEFAULT-PORT-ENCODING to "UTF-8".
  ("read")["small strings", "large strings"]: New benchmarks.

											
										
										
											2012-05-07 00:32:01 +02:00
+								/* Number of 32-bit codepoints in the buffer used to read strings.  */
 								#define READER_STRING_BUFFER_SIZE     128
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* The maximum size of Scheme character names.  */
 								#define READER_CHAR_NAME_MAX_SIZE      50
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Generalize scm_read_shebang to handle other reader directives.

* libguile/read.c (READER_DIRECTIVE_NAME_MAX_SIZE): New C macro.
  (scm_read_shebang): Rewrite to handle arbitrary reader directives.

											
										
										
											2012-10-23 00:29:07 -04:00
+								/* The maximum size of reader directive names.  */
 								#define READER_DIRECTIVE_NAME_MAX_SIZE 50
-												* Don't call scm_vector_set_length_x for non-vector arguments.

											
										
										
											2000-10-06 16:51:08 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* `isblank' is only in C99.  */
 								#define CHAR_IS_BLANK_(_chr)					\
 								  (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n')	\
-												Changes from arch/CVS synchronization

											
										
										
											2007-10-17 21:56:10 +00:00
+								   || ((_chr) == '\f') || ((_chr) == '\r'))
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								#ifdef MSDOS
 								# define CHAR_IS_BLANK(_chr)			\
 								  ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
 								#else
 								# define CHAR_IS_BLANK CHAR_IS_BLANK_
 								#endif
 								/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
 								   structure'').  */
 								#define CHAR_IS_R5RS_DELIMITER(c)				\
 								  (CHAR_IS_BLANK (c)						\
-												Minor tweaks to delimiter handling in read.c

* libguile/read.c (CHAR_IS_R5RS_DELIMITER, CHAR_IS_DELIMITER): Move the
  '[' and ']' delimiters from CHAR_IS_R5RS_DELIMITER to
  CHAR_IS_DELIMITER.  Parenthesize all references to the macro
  parameter.  Don't check the global square-brackets read option until
  after we know the character is '[' or ']'.
  (scm_read_sexp): Don't check the global square-brackets read option
  until after we know the character is ']'.

											
										
										
											2012-10-22 23:28:56 -04:00
+								   || (c) == ')' || (c) == '(' || (c) == ';' || (c) == '"')
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Minor tweaks to delimiter handling in read.c

* libguile/read.c (CHAR_IS_R5RS_DELIMITER, CHAR_IS_DELIMITER): Move the
  '[' and ']' delimiters from CHAR_IS_R5RS_DELIMITER to
  CHAR_IS_DELIMITER.  Parenthesize all references to the macro
  parameter.  Don't check the global square-brackets read option until
  after we know the character is '[' or ']'.
  (scm_read_sexp): Don't check the global square-brackets read option
  until after we know the character is ']'.

											
										
										
											2012-10-22 23:28:56 -04:00
+								#define CHAR_IS_DELIMITER(c)                                    \
 								  (CHAR_IS_R5RS_DELIMITER (c)                                   \
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								   || (((c) == ']' || (c) == '[') && (opts->square_brackets_p   \
 								                                      || opts->curly_infix_p))  \
 								   || (((c) == '}' || (c) == '{') && opts->curly_infix_p))
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
 								   Structure''.  */
 								#define CHAR_IS_EXPONENT_MARKER(_chr)				\
 								  (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f')	\
 								   || ((_chr) == 'd') || ((_chr) == 'l'))
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
+								/* Read an SCSH block comment.  */
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM scm_read_scsh_block_comment (scm_t_wchar, SCM);
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
+								static SCM scm_read_r6rs_block_comment (scm_t_wchar, SCM);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								static SCM scm_read_commented_expression (scm_t_wchar, SCM, scm_t_read_opts *);
 								static SCM scm_read_shebang (scm_t_wchar, SCM, scm_t_read_opts *);
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
+								static SCM scm_get_hash_procedure (int);
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								/* Read from PORT until a delimiter (e.g., a whitespace) is read.  Put the
 								   result in the pre-allocated buffer BUF.  Return zero if the whole token has
 								   fewer than BUF_SIZE bytes, non-zero otherwise. READ will be set the number of
 								   bytes actually read.  */
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static int
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								read_token (SCM port, scm_t_read_opts *opts,
 								            char *buf, size_t buf_size, size_t *read)
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								{
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								   *read = 0;
-												(scm_flush_ws): Detect "#!"-style comments here.
(scm_lreadr): Abort on seeing "#!", which should no longer happen.
(skip_scsh_block_comment): Use scm_input_error instead of
scm_misc_error in case of EOF.

											
										
										
											2004-09-07 09:18:59 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								   while (*read < buf_size)
 								     {
 								       int chr;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								       chr = scm_get_byte_or_eof (port);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								       if (chr == EOF)
 								        return 0;
 								      else if (CHAR_IS_DELIMITER (chr))
 								        {
 								          scm_unget_byte (chr, port);
 								          return 0;
 								        }
 								      else
 								        {
 								          *buf = (char) chr;
 								          buf++, (*read)++;
 								        }
 								     }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								   return 1;
 								 }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								/* Like `read_token', but return either BUFFER, or a GC-allocated buffer
 								   if the token doesn't fit in BUFFER_SIZE bytes.  */
 								static char *
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								read_complete_token (SCM port, scm_t_read_opts *opts,
 								                     char *buffer, size_t buffer_size, size_t *read)
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								{
 								  int overflow = 0;
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								  size_t bytes_read, overflow_size = 0;
 								  char *overflow_buffer = NULL;
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  do
 								    {
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      overflow = read_token (port, opts, buffer, buffer_size, &bytes_read);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								      if (bytes_read == 0)
 								        break;
 								      if (overflow || overflow_size != 0)
 								        {
 								          if (overflow_size == 0)
 								            {
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								              overflow_buffer = scm_gc_malloc_pointerless (bytes_read, "read");
 								              memcpy (overflow_buffer, buffer, bytes_read);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								              overflow_size = bytes_read;
 								            }
 								          else
 								            {
-												read: Avoid `void *' pointer arithmetic.

* libguile/read.c (read_complete_token): Make `new_buf' a `char *' to
  avoid pointer arithmetic on `void *'.

											
										
										
											2012-05-06 22:23:58 +02:00
+									      char *new_buf =
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+										scm_gc_malloc_pointerless (overflow_size + bytes_read, "read");
 									      memcpy (new_buf, overflow_buffer, overflow_size);
 								              memcpy (new_buf + overflow_size, buffer, bytes_read);
 									      overflow_buffer = new_buf;
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								              overflow_size += bytes_read;
 								            }
 								        }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    }
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  while (overflow);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  if (overflow_size)
 								    *read = overflow_size;
-												Fix bugs reading long tokens

The commit "don't take string-write mutex in read.c:read_token", from
8b0d7b9d94b9f142dc4f08ce12b345321359b3cd, had a number of bugs. Not sure
how I missed these before.

* libguile/read.c (read_token): Remove a couple of bogus
  scm_i_string_stop_writing () calls, now that we no longer take the
  string-write mutex.
  (read_complete_token): read_token really needs a fresh buffer, which
  was not the case when we are reading long tokens and thus hit the
  overflow case. Fixes fractions.test.

											
										
										
											2009-12-28 17:35:48 +01:00
+								  else
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								    *read = bytes_read;
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								  return (overflow_size > 0 ? overflow_buffer : buffer);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Skip whitespace from PORT and return the first non-whitespace character
 								   read.  Raise an error on end-of-file.  */
 								static int
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								flush_ws (SCM port, scm_t_read_opts *opts, const char *eoferr)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								  scm_t_wchar c;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								  while (1)
-													* Makefile.in: Rebuilt.
	* Makefile.am (libguile_la_SOURCES): Removed extchrs.c,
	mbstrings.c.
	(modinclude_HEADERS): Removed extchrs.h, mbstrings.h.
	* unif.c (scm_vector_set_length_x): Don't handle multibyte
	strings.
	* tag.c (scm_utag_mb_string, scm_utag_mb_substring): Removed.
	(scm_tag): Don't handle multibyte strings.
	* read.c: Don't include mbstrings.h.
	(scm_lreadr): Don't handle multibyte ports.
	* kw.c: Don't include mbstrings.h.
	* init.c: Don't include mbstrings.h.
	(scm_boot_guile_1): Don't init mbstrings module.
	* hash.c (scm_hasher): Don't handle mbstrings.
	* gscm.c (gscm_run_scm): Don't init mbstrings module.
	* gc.c (scm_gc_mark): Don't handle mbstrings.
	(scm_gc_sweep): Likewise.
	* eval.c (SCM_CEVAL): Don't handle mbstrings.
	* eq.c (scm_equal_p): Use SCM_TYP7S, not SCM_TYP7SD.
	* tags.h (SCM_TYP7SD): Removed.
	(SCM_TYP7D): Removed.
	(scm_tc7_mb_string): Removed.
	(scm_tc7_mb_substring): Removed.
	* print.c (scm_iprin1): Handle char printing directly.  Don't
	handle mbstrings.
	Don't include "mbstrings.h".
	* symbols.c (scm_intern_obarray_soft, scm_string_to_symbol,
	scm_string_to_obarray_symbol, msymbolize): Don't set symbol's
	multi-byte flag.
	Don't include "mbstrings.h".
	* symbols.h (SCM_SYMBOL_MULTI_BYTE_STRINGP): Removed.
	(SCM_SYMBOL_SLOTS): Define as 4.
	(SCM_ROSTRINGP): Use SCM_TYP7S, not SCM_TYP7SD.
	* arbiters.c, backtrace.c, debug.c, dynl.c, eval.c, fluids.c,
	gc.c, gsubr.c, ioext.c, kw.c, mallocs.c, numbers.c, ports.c,
	print.c, read.c, regex-posix.c, root.c, srcprop.c, stackchk.c,
	struct.c, threads.c, throw.c, unif.c, variable.c: Use new
	("gen"-less) I/O function names.
	* ports.c (scm_add_to_port_table): Don't set port's
	representation.
	* ports.h (scm_port_representation_type): Removed.
	(scm_string_representation_type): Removed.
	(struct scm_port_table ): Removed representation field.
	(SCM_PORT_REPRESENTATION): Removed.
	(SCM_SET_PORT_REPRESENTATION): Removed.
	* genio.h: Use new function names.
	* genio.c: Don't include "extchrs.h".
	(scm_gen_putc, scm_gen_puts, scm_gen_write, scm_get_getc):
	Removed.
	(scm_putc, scm_puts, scm_lfwrite): No longer static.
	(scm_getc): No longer static; handle line and column changes.
	(scm_ungetc): Renamed from scm_gen_ungetc.
	(scm_do_read_line): Renamed from scm_gen_read_line.
	* libguile.h: Don't include "extchrs.h" or "mbstrings.h"
	* extchrs.h, extchrs.c, mbstrings.h, mbstrings.c: Removed.

											
										
										
											1997-10-15 17:18:32 +00:00
+								    switch (c = scm_getc (port))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      {
 								      case EOF:
 								      goteof:
 									if (eoferr)
-												* read.c (scm_flush_ws): Include filename in error message when it
is not `#f'.

											
										
										
											2000-08-06 22:04:11 +00:00
+									  {
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+									    scm_i_input_error (eoferr,
 											       port,
 											       "end of file",
 											       SCM_EOL);
-												* read.c (scm_flush_ws): Include filename in error message when it
is not `#f'.

											
										
										
											2000-08-06 22:04:11 +00:00
+									  }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									return c;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      case ';':
 								      lp:
-													* Makefile.in: Rebuilt.
	* Makefile.am (libguile_la_SOURCES): Removed extchrs.c,
	mbstrings.c.
	(modinclude_HEADERS): Removed extchrs.h, mbstrings.h.
	* unif.c (scm_vector_set_length_x): Don't handle multibyte
	strings.
	* tag.c (scm_utag_mb_string, scm_utag_mb_substring): Removed.
	(scm_tag): Don't handle multibyte strings.
	* read.c: Don't include mbstrings.h.
	(scm_lreadr): Don't handle multibyte ports.
	* kw.c: Don't include mbstrings.h.
	* init.c: Don't include mbstrings.h.
	(scm_boot_guile_1): Don't init mbstrings module.
	* hash.c (scm_hasher): Don't handle mbstrings.
	* gscm.c (gscm_run_scm): Don't init mbstrings module.
	* gc.c (scm_gc_mark): Don't handle mbstrings.
	(scm_gc_sweep): Likewise.
	* eval.c (SCM_CEVAL): Don't handle mbstrings.
	* eq.c (scm_equal_p): Use SCM_TYP7S, not SCM_TYP7SD.
	* tags.h (SCM_TYP7SD): Removed.
	(SCM_TYP7D): Removed.
	(scm_tc7_mb_string): Removed.
	(scm_tc7_mb_substring): Removed.
	* print.c (scm_iprin1): Handle char printing directly.  Don't
	handle mbstrings.
	Don't include "mbstrings.h".
	* symbols.c (scm_intern_obarray_soft, scm_string_to_symbol,
	scm_string_to_obarray_symbol, msymbolize): Don't set symbol's
	multi-byte flag.
	Don't include "mbstrings.h".
	* symbols.h (SCM_SYMBOL_MULTI_BYTE_STRINGP): Removed.
	(SCM_SYMBOL_SLOTS): Define as 4.
	(SCM_ROSTRINGP): Use SCM_TYP7S, not SCM_TYP7SD.
	* arbiters.c, backtrace.c, debug.c, dynl.c, eval.c, fluids.c,
	gc.c, gsubr.c, ioext.c, kw.c, mallocs.c, numbers.c, ports.c,
	print.c, read.c, regex-posix.c, root.c, srcprop.c, stackchk.c,
	struct.c, threads.c, throw.c, unif.c, variable.c: Use new
	("gen"-less) I/O function names.
	* ports.c (scm_add_to_port_table): Don't set port's
	representation.
	* ports.h (scm_port_representation_type): Removed.
	(scm_string_representation_type): Removed.
	(struct scm_port_table ): Removed representation field.
	(SCM_PORT_REPRESENTATION): Removed.
	(SCM_SET_PORT_REPRESENTATION): Removed.
	* genio.h: Use new function names.
	* genio.c: Don't include "extchrs.h".
	(scm_gen_putc, scm_gen_puts, scm_gen_write, scm_get_getc):
	Removed.
	(scm_putc, scm_puts, scm_lfwrite): No longer static.
	(scm_getc): No longer static; handle line and column changes.
	(scm_ungetc): Renamed from scm_gen_ungetc.
	(scm_do_read_line): Renamed from scm_gen_read_line.
	* libguile.h: Don't include "extchrs.h" or "mbstrings.h"
	* extchrs.h, extchrs.c, mbstrings.h, mbstrings.c: Removed.

											
										
										
											1997-10-15 17:18:32 +00:00
+									switch (c = scm_getc (port))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									  {
 									  case EOF:
 									    goto goteof;
 									  default:
 									    goto lp;
 									  case SCM_LINE_INCREMENTORS:
 									    break;
 									  }
 									break;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
+								      case '#':
 									switch (c = scm_getc (port))
 									  {
 									  case EOF:
 									    eoferr = "read_sharp";
 									    goto goteof;
 									  case '!':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									    scm_read_shebang (c, port, opts);
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
+									    break;
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+									  case ';':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									    scm_read_commented_expression (c, port, opts);
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+									    break;
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
+									  case '|':
 									    if (scm_is_false (scm_get_hash_procedure (c)))
 									      {
 										scm_read_r6rs_block_comment (c, port);
 										break;
 									      }
 									    /* fall through */
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
+									  default:
 									    scm_ungetc (c, port);
 									    return '#';
 									  }
 									break;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      case SCM_LINE_INCREMENTORS:
 								      case SCM_SINGLE_SPACES:
 								      case '\t':
 									break;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      default:
 									return c;
 								      }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return 0;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Token readers.  */
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								static SCM scm_read_expression (SCM port, scm_t_read_opts *opts);
 								static SCM scm_read_sharp (int chr, SCM port, scm_t_read_opts *opts,
 								                           long line, int column);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Add and use maybe_annotate_source helper in read.c

* libguile/read.c (maybe_annotate_source): New static helper function.
  (scm_read_sexp, scm_read_quote, scm_read_syntax): Use
  'maybe_annotate_source'.

											
										
										
											2012-02-08 03:10:11 -05:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								maybe_annotate_source (SCM x, SCM port, scm_t_read_opts *opts,
 								                       long line, int column)
-												Add and use maybe_annotate_source helper in read.c

* libguile/read.c (maybe_annotate_source): New static helper function.
  (scm_read_sexp, scm_read_quote, scm_read_syntax): Use
  'maybe_annotate_source'.

											
										
										
											2012-02-08 03:10:11 -05:00
+								{
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  if (opts->record_positions_p)
-												Add and use maybe_annotate_source helper in read.c

* libguile/read.c (maybe_annotate_source): New static helper function.
  (scm_read_sexp, scm_read_quote, scm_read_syntax): Use
  'maybe_annotate_source'.

											
										
										
											2012-02-08 03:10:11 -05:00
+								    scm_i_set_source_properties_x (x, line, column, SCM_FILENAME (port));
 								  return x;
 								}
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_sexp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_i_lreadparen"
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								{
-												read + source properties simplification

* libguile/srcprop.h: Remove internal scm_source_whash declaration.
* libguile/srcprop.c (scm_i_set_source_properties_x)
  (scm_i_has_source_properties): New helpers.
  (scm_source_whash): Make static.

* libguile/read.c (scm_read_sexp): Remove register declarations here;
  let's trust the compiler.  Remove code to incrementally build up a
  copy; instead let's let scm_i_set_source_properties_x handle copying
  the expression if needed.
  (scm_read_quote, scm_read_syntax): Use scm_i_set_source_properties_x.
  (recsexpr): Remove this helper from 1996.
  (scm_read_sharp_extension): Instead of trying to recursively label
  sharp-read subforms with source properties, just label the outside
  form and rely on the macro-expander to propagate it down.

											
										
										
											2011-05-24 21:25:11 +02:00
+								  int c;
 								  SCM tmp, tl, ans = SCM_EOL;
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								  const int curly_list_p = (chr == '{') && opts->curly_infix_p;
 								  const int terminating_char = ((chr == '{') ? '}'
 								                                : ((chr == '[') ? ']'
 								                                   : ')'));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  /* Need to capture line and column numbers here. */
 								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
-												* read.c (scm_lreadr): Recognize SCSH-style block comments; text
between `#!' and `!#' is ignored.
(skip_scsh_block_comment): New function.

											
										
										
											1996-10-25 08:30:26 +00:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  c = flush_ws (port, opts, FUNC_NAME);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  if (terminating_char == c)
 								    return SCM_EOL;
-												* read.c (scm_lreadr): Recognize SCSH-style block comments; text
between `#!' and `!#' is ignored.
(skip_scsh_block_comment): New function.

											
										
										
											1996-10-25 08:30:26 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  scm_ungetc (c, port);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  tmp = scm_read_expression (port, opts);
-												fix '(a #{.} b)

* libguile/read.c (scm_read_sexp): Don't confuse `#{.}#' with `.' for
  the purpose of reading dotted pairs.  Thanks to CRLF0710 for the
  report.

* test-suite/tests/reader.test ("#{}#"): Add test.

											
										
										
											2011-07-01 12:20:52 +02:00
 								  /* Note that it is possible for scm_read_expression to return
 								     scm_sym_dot, but not as part of a dotted pair: as in #{.}#.  So
 								     check that it's a real dot by checking `c'.  */
 								  if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    {
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      ans = scm_read_expression (port, opts);
 								      if (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									scm_i_input_error (FUNC_NAME, port, "missing close paren",
 											   SCM_EOL);
 								      return ans;
 								    }
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  /* Build the head of the list structure. */
 								  ans = tl = scm_cons (tmp, SCM_EOL);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  while (terminating_char != (c = flush_ws (port, opts, FUNC_NAME)))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      SCM new_tail;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								      if (c == ')' || (c == ']' && opts->square_brackets_p)
 								          || ((c == '}' || c == ']') && opts->curly_infix_p))
-												fix '(] infinite loop

* libguile/read.c (scm_read_sexp): Fix reader infinite loop. Thanks to
  Bill Schottstaedt for the report.
* test-suite/tests/reader.test: Add test.

											
										
										
											2010-07-13 21:53:41 +02:00
+								        scm_i_input_error (FUNC_NAME, port,
 								                           "in pair: mismatched close paren: ~A",
 								                           scm_list_1 (SCM_MAKE_CHAR (c)));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      scm_ungetc (c, port);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      tmp = scm_read_expression (port, opts);
-												fix '(] infinite loop

* libguile/read.c (scm_read_sexp): Fix reader infinite loop. Thanks to
  Bill Schottstaedt for the report.
* test-suite/tests/reader.test: Add test.

											
										
										
											2010-07-13 21:53:41 +02:00
-												fix '(a #{.} b)

* libguile/read.c (scm_read_sexp): Don't confuse `#{.}#' with `.' for
  the purpose of reading dotted pairs.  Thanks to CRLF0710 for the
  report.

* test-suite/tests/reader.test ("#{}#"): Add test.

											
										
										
											2011-07-01 12:20:52 +02:00
+								      /* See above note about scm_sym_dot.  */
 								      if (c == '.' && scm_is_eq (scm_sym_dot, tmp))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									{
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									  SCM_SETCDR (tl, scm_read_expression (port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									  c = flush_ws (port, opts, FUNC_NAME);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  if (terminating_char != c)
 									    scm_i_input_error (FUNC_NAME, port,
 											       "in pair: missing close paren", SCM_EOL);
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+									  break;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									}
-													* read.c (scm_lreadr): When reading a hash token, check for a
	user-defined hash procedure first, so that overriding the builtin
	hash characters is possible (this was needed for implementing
	SRFI-4's read synax `f32(...)').

	* num2integral.i.c: Use scm_t_signed_bits instead of scm_t_bits,
	because the latter is unsigned now and breaks comparisons like
	(n < (scm_t_signed_bits)MIN_VALUE).

											
										
										
											2001-06-27 13:15:20 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      new_tail = scm_cons (tmp, SCM_EOL);
 								      SCM_SETCDR (tl, new_tail);
 								      tl = new_tail;
 								    }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								  if (curly_list_p)
 								    {
 								      /* In addition to finding the length, 'scm_ilength' checks for
 								         improper or circular lists, in which case it returns -1. */
 								      int len = scm_ilength (ans);
 								      /* The (len == 0) case is handled above */
 								      if (len == 1)
 								        /* Return directly to avoid re-annotating the element's source
 								           location with the position of the outer brace.  Also, it
 								           might not be possible to annotate the element. */
 								        return scm_car (ans);  /* {e} => e */
 								      else if (len == 2)
 								        ;  /* Leave the list unchanged: {e1 e2} => (e1 e2) */
 								      else if (len >= 3 && (len & 1))
 								        {
 								          /* It's a proper list whose length is odd and at least 3.  If
 								             the elements at odd indices (the infix operator positions)
 								             are all 'equal?', then it's a simple curly-infix list.
 								             Otherwise it's a mixed curly-infix list. */
 								          SCM op = scm_cadr (ans);
 								          /* Check to see if the elements at odd indices are 'equal?' */
 								          for (tl = scm_cdddr (ans); ; tl = scm_cddr (tl))
 								            {
 								              if (scm_is_null (tl))
 								                {
 								                  /* Convert simple curly-infix list to prefix:
 								                     {a <op> b <op> ...} => (<op> a b ...) */
 								                  tl = ans;
 								                  while (scm_is_pair (scm_cdr (tl)))
 								                    {
 								                      tmp = scm_cddr (tl);
 								                      SCM_SETCDR (tl, tmp);
 								                      tl = tmp;
 								                    }
 								                  ans = scm_cons (op, ans);
 								                  break;
 								                }
 								              else if (scm_is_false (scm_equal_p (op, scm_car (tl))))
 								                {
 								                  /* Mixed curly-infix list: {e ...} => ($nfx$ e ...) */
 								                  ans = scm_cons (sym_nfx, ans);
 								                  break;
 								                }
 								            }
 								        }
 								      else
 								        /* Mixed curly-infix (possibly improper) list:
 								           {e . tail} => ($nfx$ e . tail) */
 								        ans = scm_cons (sym_nfx, ans);
 								    }
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  return maybe_annotate_source (ans, port, opts, line, column);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								}
 								#undef FUNC_NAME
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
-												Refactor repeated code in scm_read_string

* libguile/read.c (SCM_READ_HEX_ESCAPE): new macro
  (scm_read_string): use new macro SCM_READ_HEX_ESCAPE

											
										
										
											2010-01-10 18:24:23 -08:00
 								/* Read a hexadecimal number NDIGITS in length.  Put its value into the variable
-												Reader option for R6RS hex escapes

This adds a reader option 'r6rs-hex-escapes that modifies the
behavior of numeric escapes in characters and strings.  When enabled,
variable-length character hex escapes (#\xNNN) are allowed and become
the default output format for numerically-escaped characters.  Also,
string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).

* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
  (iprin1): use new macro PRINT_CHAR_ESCAPE

* libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define

* libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
  (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
  (scm_read_string): parse R6RS hex string escapes
  (scm_read_character): parse R6RS hex character escapes

* test-suite/tests/chars.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

* test-suite/tests/strings.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

											
										
										
											2010-01-12 21:02:41 -08:00
+								   C.  If TERMINATOR is non-null, terminate early if the TERMINATOR character is
 								   found.  */
 								#define SCM_READ_HEX_ESCAPE(ndigits, terminator)                   \
 								  do                                                               \
 								    {                                                              \
 								      scm_t_wchar a;                                               \
 								      size_t i = 0;                                                \
 								      c = 0;                                                       \
 								      while (i < ndigits)                                          \
 								        {                                                          \
 								          a = scm_getc (port);                                     \
 								          if (a == EOF)                                            \
 								            goto str_eof;                                          \
 								          if (terminator                                           \
 								              && (a == (scm_t_wchar) terminator)                   \
 								              && (i > 0))                                          \
 								            break;                                                 \
 								          if ('0' <= a && a <= '9')                                \
 								            a -= '0';                                              \
 								          else if ('A' <= a && a <= 'F')                           \
 								            a = a - 'A' + 10;                                      \
 								          else if ('a' <= a && a <= 'f')                           \
 								            a = a - 'a' + 10;                                      \
 								          else                                                     \
 								            {                                                      \
 								              c = a;                                               \
 								              goto bad_escaped;                                    \
 								            }                                                      \
 								          c = c * 16 + a;                                          \
 								          i ++;                                                    \
 								        }                                                          \
-												Refactor repeated code in scm_read_string

* libguile/read.c (SCM_READ_HEX_ESCAPE): new macro
  (scm_read_string): use new macro SCM_READ_HEX_ESCAPE

											
										
										
											2010-01-10 18:24:23 -08:00
+								    } while (0)
-												implement r6rs hungry escaped EOL

* libguile/private-options.h (SCM_HUNGRY_EOL_ESCAPES_P): New private
  option.
* libguile/read.c: Define SCM_HUNGRY_EOL_ESCAPES_P, defaulting to #f.
  (skip_intraline_whitespace): New helper.
  (scm_read_string): If SCM_HUNGRY_EOL_ESCAPES_P,
  skip_intraline_whitespace after an escaped EOL.

* test-suite/tests/reader.test ("read-options"): Add test.

											
										
										
											2011-01-21 08:57:39 +01:00
+								static void
 								skip_intraline_whitespace (SCM port)
 								{
 								  scm_t_wchar c;
 								  do
 								    {
 								      c = scm_getc (port);
 								      if (c == EOF)
 								        return;
 								    }
 								  while (c == '\t' || uc_is_general_category (c, UC_SPACE_SEPARATOR));
 								  scm_ungetc (c, port);
 								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_string (int chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_lreadr"
 								{
 								  /* For strings smaller than C_STR, this function creates only one Scheme
 								     object (the string returned).  */
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Optimize `scm_read_string'.

According to the new benchmarks, this leads a 5% speed improvement when
reading small strings, and a 27% improvement when reading large strings.

* libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update
  comment to mention codepoints.
  (scm_read_string): Make `str' a list of strings, instead of a string.
  Store characters read in buffer `c_str'.  Cons to STR when C_STR is
  full, and concatenate/reverse at the end.

* benchmark-suite/benchmarks/read.bm (small, large): New variables.
  Set %DEFAULT-PORT-ENCODING to "UTF-8".
  ("read")["small strings", "large strings"]: New benchmarks.

											
										
										
											2012-05-07 00:32:01 +02:00
+								  SCM str = SCM_EOL;
 								  size_t c_str_len = 0;
 								  scm_t_wchar c, c_str[READER_STRING_BUFFER_SIZE];
-												(scm_lreadr): Call scm_i_read_array for all characters followinf '#'
that can start an array.  Explicitely disambiguate 'i' and 'e' between
introducing numbers and uniform vectors.  Do not call
scm_i_read_homogenous_vector, since that is also handled by
scm_i_read_array now.

											
										
										
											2004-10-29 14:45:19 +00:00
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								  /* Need to capture line and column numbers here. */
 								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  while ('"' != (c = scm_getc (port)))
 								    {
 								      if (c == EOF)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								        {
 								        str_eof:
 								          scm_i_input_error (FUNC_NAME, port,
 								                             "end of file in string constant", SCM_EOL);
 								        }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Optimize `scm_read_string'.

According to the new benchmarks, this leads a 5% speed improvement when
reading small strings, and a 27% improvement when reading large strings.

* libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update
  comment to mention codepoints.
  (scm_read_string): Make `str' a list of strings, instead of a string.
  Store characters read in buffer `c_str'.  Cons to STR when C_STR is
  full, and concatenate/reverse at the end.

* benchmark-suite/benchmarks/read.bm (small, large): New variables.
  Set %DEFAULT-PORT-ENCODING to "UTF-8".
  ("read")["small strings", "large strings"]: New benchmarks.

											
										
										
											2012-05-07 00:32:01 +02:00
+								      if (c_str_len + 1 >= READER_STRING_BUFFER_SIZE)
 									{
 									  str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
 									  c_str_len = 0;
 									}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (c == '\\')
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								        {
 								          switch (c = scm_getc (port))
 								            {
 								            case EOF:
 								              goto str_eof;
 								            case '"':
 								            case '\\':
 								              break;
 								            case '\n':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								              if (opts->hungry_eol_escapes_p)
-												implement r6rs hungry escaped EOL

* libguile/private-options.h (SCM_HUNGRY_EOL_ESCAPES_P): New private
  option.
* libguile/read.c: Define SCM_HUNGRY_EOL_ESCAPES_P, defaulting to #f.
  (skip_intraline_whitespace): New helper.
  (scm_read_string): If SCM_HUNGRY_EOL_ESCAPES_P,
  skip_intraline_whitespace after an escaped EOL.

* test-suite/tests/reader.test ("read-options"): Add test.

											
										
										
											2011-01-21 08:57:39 +01:00
+								                skip_intraline_whitespace (port);
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								              continue;
 								            case '0':
 								              c = '\0';
 								              break;
 								            case 'f':
 								              c = '\f';
 								              break;
 								            case 'n':
 								              c = '\n';
 								              break;
 								            case 'r':
 								              c = '\r';
 								              break;
 								            case 't':
 								              c = '\t';
 								              break;
 								            case 'a':
 								              c = '\007';
 								              break;
 								            case 'v':
 								              c = '\v';
 								              break;
-												Add R6RS backspace string escape

R6RS suggests that '\b' should be a string escape for the backspace
character.

* libguile/read.c (scm_read_string): parse backspace escape

* test-suite/tests/strings.test (R6RS backslash escapes): new test
  (Guile extensions backslash escapes): remove R6RS escapes from test.

* doc/ref/api-data.texi (Strings): document new string escape

											
										
										
											2010-01-10 15:41:37 -08:00
+								            case 'b':
 								              c = '\010';
 								              break;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            case 'x':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								              if (opts->r6rs_escapes_p)
-												Reader option for R6RS hex escapes

This adds a reader option 'r6rs-hex-escapes that modifies the
behavior of numeric escapes in characters and strings.  When enabled,
variable-length character hex escapes (#\xNNN) are allowed and become
the default output format for numerically-escaped characters.  Also,
string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).

* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
  (iprin1): use new macro PRINT_CHAR_ESCAPE

* libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define

* libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
  (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
  (scm_read_string): parse R6RS hex string escapes
  (scm_read_character): parse R6RS hex character escapes

* test-suite/tests/chars.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

* test-suite/tests/strings.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

											
										
										
											2010-01-12 21:02:41 -08:00
+								                SCM_READ_HEX_ESCAPE (10, ';');
 								              else
 								                SCM_READ_HEX_ESCAPE (2, '\0');
-												Refactor repeated code in scm_read_string

* libguile/read.c (SCM_READ_HEX_ESCAPE): new macro
  (scm_read_string): use new macro SCM_READ_HEX_ESCAPE

											
										
										
											2010-01-10 18:24:23 -08:00
+								              break;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            case 'u':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								              if (!opts->r6rs_escapes_p)
-												Disable \u and \U escapes when r6rs-hex-escapes enabled

When the reader option 'r6rs-hex-escapes is enabled, the \uNNNN and
\UNNNNNN string escape sequences should be disabled.

* libguile/read.c (scm_read_string): added checks for SCM_R6RS_ESCAPES_P

											
										
										
											2010-01-13 07:02:07 -08:00
+								                {
 								                  SCM_READ_HEX_ESCAPE (4, '\0');
 								                  break;
 								                }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            case 'U':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								              if (!opts->r6rs_escapes_p)
-												Disable \u and \U escapes when r6rs-hex-escapes enabled

When the reader option 'r6rs-hex-escapes is enabled, the \uNNNN and
\UNNNNNN string escape sequences should be disabled.

* libguile/read.c (scm_read_string): added checks for SCM_R6RS_ESCAPES_P

											
										
										
											2010-01-13 07:02:07 -08:00
+								                {
 								                  SCM_READ_HEX_ESCAPE (6, '\0');
 								                  break;
 								                }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            default:
 								            bad_escaped:
 								              scm_i_input_error (FUNC_NAME, port,
 								                                 "illegal character in escape sequence: ~S",
 								                                 scm_list_1 (SCM_MAKE_CHAR (c)));
 								            }
 								        }
-												Optimize `scm_read_string'.

According to the new benchmarks, this leads a 5% speed improvement when
reading small strings, and a 27% improvement when reading large strings.

* libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update
  comment to mention codepoints.
  (scm_read_string): Make `str' a list of strings, instead of a string.
  Store characters read in buffer `c_str'.  Cons to STR when C_STR is
  full, and concatenate/reverse at the end.

* benchmark-suite/benchmarks/read.bm (small, large): New variables.
  Set %DEFAULT-PORT-ENCODING to "UTF-8".
  ("read")["small strings", "large strings"]: New benchmarks.

											
										
										
											2012-05-07 00:32:01 +02:00
 								      c_str[c_str_len++] = c;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    }
-												Optimize `scm_read_string'.

According to the new benchmarks, this leads a 5% speed improvement when
reading small strings, and a 27% improvement when reading large strings.

* libguile/read.c (READER_STRING_BUFFER_SIZE): Change to 128; update
  comment to mention codepoints.
  (scm_read_string): Make `str' a list of strings, instead of a string.
  Store characters read in buffer `c_str'.  Cons to STR when C_STR is
  full, and concatenate/reverse at the end.

* benchmark-suite/benchmarks/read.bm (small, large): New variables.
  Set %DEFAULT-PORT-ENCODING to "UTF-8".
  ("read")["small strings", "large strings"]: New benchmarks.

											
										
										
											2012-05-07 00:32:01 +02:00
 								  if (scm_is_null (str))
 								    /* Fast path: we got a string that fits in C_STR.  */
 								    str = scm_from_utf32_stringn (c_str, c_str_len);
 								  else
 								    {
 								      if (c_str_len > 0)
 									str = scm_cons (scm_from_utf32_stringn (c_str, c_str_len), str);
 								      str = scm_string_concatenate_reverse (str, SCM_UNDEFINED, SCM_UNDEFINED);
 								    }
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  return maybe_annotate_source (str, port, opts, line, column);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* Eliminate some calls to scm_wta.

											
										
										
											2001-03-04 17:09:34 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_number (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  SCM result, str = SCM_EOL;
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								  char local_buffer[READER_BUFFER_SIZE], *buffer;
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  size_t bytes_read;
 								  scm_t_port *pt = SCM_PTAB_ENTRY (port);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Add support for source properties on non-immediate numbers

* libguile/read.c (scm_read_number): Set source properties on
  non-immediate numbers if the 'positions' reader option is set.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-15 11:47:31 -05:00
+								  /* Need to capture line and column numbers here. */
 								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  scm_ungetc (chr, port);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+												&bytes_read);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								  str = scm_from_stringn (buffer, bytes_read, pt->encoding, pt->ilseq_handler);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
 								  result = scm_string_to_number (str, SCM_UNDEFINED);
-												Add support for source properties on non-immediate numbers

* libguile/read.c (scm_read_number): Set source properties on
  non-immediate numbers if the 'positions' reader option is set.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-15 11:47:31 -05:00
+								  if (scm_is_false (result))
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								    {
 								      /* Return a symbol instead of a number */
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      if (opts->case_insensitive_p)
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								        str = scm_string_downcase_x (str);
 								      result = scm_string_to_symbol (str);
 								    }
-												Add support for source properties on non-immediate numbers

* libguile/read.c (scm_read_number): Set source properties on
  non-immediate numbers if the 'positions' reader option is set.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-15 11:47:31 -05:00
+								  else if (SCM_NIMP (result))
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								    result = maybe_annotate_source (result, port, opts, line, column);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  SCM_COL (port) += scm_i_string_length (str);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  return result;
 								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM result;
 								  int ends_with_colon = 0;
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  size_t bytes_read;
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  int postfix = (opts->keyword_style == KEYWORD_STYLE_POSTFIX);
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								  char local_buffer[READER_BUFFER_SIZE], *buffer;
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  scm_t_port *pt = SCM_PTAB_ENTRY (port);
 								  SCM str;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  scm_ungetc (chr, port);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+												&bytes_read);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  if (bytes_read > 0)
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								    ends_with_colon = buffer[bytes_read - 1] == ':';
-												Add support for SRFI-88-like postfix keyword read syntax.

											
										
										
											2008-04-15 19:52:43 +02:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  if (postfix && ends_with_colon && (bytes_read > 1))
 								    {
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								      str = scm_from_stringn (buffer, bytes_read - 1,
 											      pt->encoding, pt->ilseq_handler);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      if (opts->case_insensitive_p)
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								        str = scm_string_downcase_x (str);
 								      result = scm_symbol_to_keyword (scm_string_to_symbol (str));
 								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  else
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								    {
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								      str = scm_from_stringn (buffer, bytes_read,
 											      pt->encoding, pt->ilseq_handler);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      if (opts->case_insensitive_p)
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								        str = scm_string_downcase_x (str);
 								      result = scm_string_to_symbol (str);
 								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  SCM_COL (port) += scm_i_string_length (str);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  return result;
 								}
 								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_number_and_radix (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_lreadr"
 								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM result;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  size_t read;
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								  char local_buffer[READER_BUFFER_SIZE], *buffer;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  unsigned int radix;
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  SCM str;
 								  scm_t_port *pt;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  switch (chr)
 								    {
 								    case 'B':
 								    case 'b':
 								      radix = 2;
 								      break;
 								    case 'o':
 								    case 'O':
 								      radix = 8;
 								      break;
 								    case 'd':
 								    case 'D':
 								      radix = 10;
 								      break;
 								    case 'x':
 								    case 'X':
 								      radix = 16;
 								      break;
 								    default:
 								      scm_ungetc (chr, port);
 								      scm_ungetc ('#', port);
 								      radix = 10;
 								    }
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  buffer = read_complete_token (port, opts, local_buffer, sizeof local_buffer,
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+												&read);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
 								  pt = SCM_PTAB_ENTRY (port);
-												Simplify the reader's `read_complete_token'.

* libguile/read.c (read_token): Remove unneeded `const' before `size_t'.
  (read_complete_token): Remove `overflow_buffer' parameter; return
  `char *' instead of `int'.  Allocate the overflow buffer with
  `scm_gc_malloc_pointerless' instead of `scm_malloc'.  Return either
  the overflow buffer or BUFFER.
  (scm_read_number, scm_read_mixed_case_symbol,
  scm_read_number_and_radix): Rename `buffer' to `local_buffer', and
  `overflow_buffer' to `buffer'.  Remove `overflow'.  Adjust code to new
  `read_complete_token'.

											
										
										
											2012-05-04 22:36:27 +02:00
+								  str = scm_from_stringn (buffer, read, pt->encoding, pt->ilseq_handler);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
 								  result = scm_string_to_number (str, scm_from_uint (radix));
 								  SCM_COL (port) += scm_i_string_length (str);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  if (scm_is_true (result))
 								    return result;
 								  scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
 								  return SCM_BOOL_F;
 								}
 								#undef FUNC_NAME
 								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_quote (int chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  SCM p;
-												Changes from arch/CVS synchronization

											
										
										
											2007-08-23 21:17:24 +00:00
+								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  switch (chr)
 								    {
 								    case '`':
 								      p = scm_sym_quasiquote;
 								      break;
 								    case '\'':
 								      p = scm_sym_quote;
 								      break;
 								    case ',':
 								      {
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									scm_t_wchar c;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 									c = scm_getc (port);
 									if ('@' == c)
 									  p = scm_sym_uq_splicing;
 									else
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									  {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									    scm_ungetc (c, port);
 									    p = scm_sym_unquote;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									  }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									break;
 								      }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    default:
 								      fprintf (stderr, "%s: unhandled quote character (%i)\n",
-												More compilation fixes with Sun CC (bug #21378).

											
										
										
											2008-02-07 09:54:47 +00:00
+									       "scm_read_quote", chr);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      abort ();
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
 								  return maybe_annotate_source (p, port, opts, line, column);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								}
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								SCM_SYMBOL (sym_syntax, "syntax");
 								SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
 								SCM_SYMBOL (sym_unsyntax, "unsyntax");
 								SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
 								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_syntax (int chr, SCM port, scm_t_read_opts *opts)
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								{
 								  SCM p;
 								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
 								  switch (chr)
 								    {
 								    case '`':
 								      p = sym_quasisyntax;
 								      break;
 								    case '\'':
 								      p = sym_syntax;
 								      break;
 								    case ',':
 								      {
 									int c;
 									c = scm_getc (port);
 									if ('@' == c)
 									  p = sym_unsyntax_splicing;
 									else
 									  {
 									    scm_ungetc (c, port);
 									    p = sym_unsyntax;
 									  }
 									break;
 								      }
 								    default:
 								      fprintf (stderr, "%s: unhandled syntax character (%i)\n",
 									       "scm_read_syntax", chr);
 								      abort ();
 								    }
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  p = scm_cons2 (p, scm_read_expression (port, opts), SCM_EOL);
 								  return maybe_annotate_source (p, port, opts, line, column);
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								}
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_nil (int chr, SCM port, scm_t_read_opts *opts)
-												add read syntax for #nil

* libguile/evalext.c (scm_self_evaluating_p): #nil is self-evaluating.

* libguile/read.c (scm_read_nil, scm_read_sharp): Add read syntax for
  #nil.

											
										
										
											2010-04-09 14:15:16 +02:00
+								{
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  SCM id = scm_read_mixed_case_symbol (chr, port, opts);
-												add read syntax for #nil

* libguile/evalext.c (scm_self_evaluating_p): #nil is self-evaluating.

* libguile/read.c (scm_read_nil, scm_read_sharp): Add read syntax for
  #nil.

											
										
										
											2010-04-09 14:15:16 +02:00
 								  if (!scm_is_eq (id, sym_nil))
 								    scm_i_input_error ("scm_read_nil", port,
 								                       "unexpected input while reading #nil: ~a",
 								                       scm_list_1 (id));
 								  return SCM_ELISP_NIL;
 								}
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								scm_read_semicolon_comment (int chr, SCM port)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
 								  int c;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  /* We use the get_byte here because there is no need to get the
 								     locale correct with comment input. This presumes that newline
 								     always represents itself no matter what the encoding is.  */
 								  for (c = scm_get_byte_or_eof (port);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								       (c != EOF) && (c != '\n');
-												Faster read of semicolon comments

There is no need to do character encoding processing within
semicolon comments.

* libguile/read.c (scm_read_semicolon_comment): changed

											
										
										
											2010-02-15 20:45:58 -08:00
+								       c = scm_get_byte_or_eof (port));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return SCM_UNSPECIFIED;
 								}
 								/* Sharp readers, i.e. readers called after a `#' sign has been read.  */
 								static SCM
 								scm_read_boolean (int chr, SCM port)
 								{
 								  switch (chr)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case 't':
 								    case 'T':
 								      return SCM_BOOL_T;
 								    case 'f':
 								    case 'F':
 								      return SCM_BOOL_F;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return SCM_UNSPECIFIED;
 								}
 								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_character (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_lreadr"
 								{
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  char buffer[READER_CHAR_NAME_MAX_SIZE];
 								  SCM charname;
 								  size_t charname_len, bytes_read;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_t_wchar cp;
 								  int overflow;
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  scm_t_port *pt;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  overflow = read_token (port, opts, buffer, READER_CHAR_NAME_MAX_SIZE,
 								                         &bytes_read);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  if (overflow)
-												uninitialized var in scm_read_character

* libguile/read.c (scm_read_character): Fix error condition where
  charname could be uninitialized.

											
										
										
											2010-10-18 13:29:58 +02:00
+								    scm_i_input_error (FUNC_NAME, port, "character name too long", SCM_EOL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  if (bytes_read == 0)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      chr = scm_getc (port);
 								      if (chr == EOF)
 									scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
 											   "while reading character", SCM_EOL);
 								      /* CHR must be a token delimiter, like a whitespace.  */
 								      return (SCM_MAKE_CHAR (chr));
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  pt = SCM_PTAB_ENTRY (port);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  /* Simple ASCII characters can be processed immediately.  Also, simple
 								     ISO-8859-1 characters can be processed immediately if the encoding for this
 								     port is ISO-8859-1.  */
 								  if (bytes_read == 1 && ((unsigned char) buffer[0] <= 127 || pt->encoding == NULL))
 								    {
 								      SCM_COL (port) += 1;
 								      return SCM_MAKE_CHAR (buffer[0]);
 								    }
 								  /* Otherwise, convert the buffer into a proper scheme string for
 								     processing.  */
 								  charname = scm_from_stringn (buffer, bytes_read, pt->encoding,
 											       pt->ilseq_handler);
 								  charname_len = scm_i_string_length (charname);
 								  SCM_COL (port) += charname_len;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  cp = scm_i_string_ref (charname, 0);
-												Optimize reader by preferring stack-allocated buffers

* libguile/read.c (read_token): now takes a C buffer instead of a SCM.
  string.  All callers changed.
  (read_complete_token): now takes C buffers, not SCM strings.  No longer
  does port position updates or encoding processing.  All callers changed.
  (scm_read_number, scm_read_mixed_case_symbol, scm_read_number_and_radix)
  (scm_read_character): Do port updates and string processing no longer
  done by read_complete_token. Some reordering for optimization.

											
										
										
											2010-02-02 20:33:41 -08:00
+								  if (charname_len == 1)
 								    return SCM_MAKE_CHAR (cp);
 								  /* Ignore dotted circles, which may be used to keep combining characters from
 								     combining with the backslash in #\charname.  */
-												Modify read and print of combining characters

Since combining characters, such as accents, modify the appearance of the
previous letter, it looks awkward in its character literal form (#\name)
since it modified the backslash.  This instead prints the combining
character on a small circle.

* libguile/chars.h (SCM_CODEPOINT_DOTTED_CIRCLE): new #define

* libguile/print.c (iprint1): print combining characters on dotted circles

* libguile/read.c (scm_read_character): parse the combination of combining
  characters and dotted circles

											
										
										
											2009-09-03 07:47:26 -07:00
+								  if (cp == SCM_CODEPOINT_DOTTED_CIRCLE && charname_len == 2)
 								    return SCM_MAKE_CHAR (scm_i_string_ref (charname, 1));
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  if (cp >= '0' && cp < '8')
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    {
 								      /* Dirk:FIXME::  This type of character syntax is not R5RS
 								       * compliant.  Further, it should be verified that the constant
-												Range check octal-escaped characters

* libguile/read.c (scm_read_character): range check octal escapes

											
										
										
											2009-08-29 07:14:49 -07:00
+								       * does only consist of octal digits.  */
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      SCM p = scm_string_to_number (charname, scm_from_uint (8));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (SCM_I_INUMP (p))
-												Range check octal-escaped characters

* libguile/read.c (scm_read_character): range check octal escapes

											
										
										
											2009-08-29 07:14:49 -07:00
+								        {
-												fix a number of assuptions that a long could hold an inum

* libguile/bytevectors.c:
* libguile/goops.c:
* libguile/instructions.c:
* libguile/numbers.c:
* libguile/random.c:
* libguile/read.c:
* libguile/vm-i-scheme.c: Fix a number of assumptions that a long could
  hold an inum. This is not the case on platforms whose void* is larger
  than their long.

* libguile/numbers.c (scm_i_inum2big): New helper, only implemented for
  sizeof(void*) == sizeof(long); produces a compile error on other
  platforms. Basically gmp doesn't have a nice interface for converting
  between mpz values and intmax_t.

											
										
										
											2010-11-19 11:29:26 +01:00
+								          scm_t_wchar c = scm_to_uint32 (p);
-												Range check octal-escaped characters

* libguile/read.c (scm_read_character): range check octal escapes

											
										
										
											2009-08-29 07:14:49 -07:00
+								          if (SCM_IS_UNICODE_CHAR (c))
 								            return SCM_MAKE_CHAR (c);
 								          else
-												Enable character hex escapes by default

R6RS character hex escapes do not conflict with legacy Guile octal
character escapes, so they can be enabled by default.

* libguile/read.c (scm_read_character): modified
* test-suite/tests/reader.test: modify character escape tests
* doc/ref/api-data.texi: modified
* doc/ref/api-options.texi: modified

											
										
										
											2010-07-17 04:16:57 -07:00
+								            scm_i_input_error (FUNC_NAME, port,
-												Range check octal-escaped characters

* libguile/read.c (scm_read_character): range check octal escapes

											
										
										
											2009-08-29 07:14:49 -07:00
+								                               "out-of-range octal character escape: ~a",
 								                               scm_list_1 (charname));
 								        }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    }
-												Enable character hex escapes by default

R6RS character hex escapes do not conflict with legacy Guile octal
character escapes, so they can be enabled by default.

* libguile/read.c (scm_read_character): modified
* test-suite/tests/reader.test: modify character escape tests
* doc/ref/api-data.texi: modified
* doc/ref/api-options.texi: modified

											
										
										
											2010-07-17 04:16:57 -07:00
+								  if (cp == 'x' && (charname_len > 1))
-												Reader option for R6RS hex escapes

This adds a reader option 'r6rs-hex-escapes that modifies the
behavior of numeric escapes in characters and strings.  When enabled,
variable-length character hex escapes (#\xNNN) are allowed and become
the default output format for numerically-escaped characters.  Also,
string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).

* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
  (iprin1): use new macro PRINT_CHAR_ESCAPE

* libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define

* libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
  (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
  (scm_read_string): parse R6RS hex string escapes
  (scm_read_character): parse R6RS hex character escapes

* test-suite/tests/chars.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

* test-suite/tests/strings.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

											
										
										
											2010-01-12 21:02:41 -08:00
+								    {
 								      SCM p;
-												Enable character hex escapes by default

R6RS character hex escapes do not conflict with legacy Guile octal
character escapes, so they can be enabled by default.

* libguile/read.c (scm_read_character): modified
* test-suite/tests/reader.test: modify character escape tests
* doc/ref/api-data.texi: modified
* doc/ref/api-options.texi: modified

											
										
										
											2010-07-17 04:16:57 -07:00
-												Reader option for R6RS hex escapes

This adds a reader option 'r6rs-hex-escapes that modifies the
behavior of numeric escapes in characters and strings.  When enabled,
variable-length character hex escapes (#\xNNN) are allowed and become
the default output format for numerically-escaped characters.  Also,
string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).

* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
  (iprin1): use new macro PRINT_CHAR_ESCAPE

* libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define

* libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
  (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
  (scm_read_string): parse R6RS hex string escapes
  (scm_read_character): parse R6RS hex character escapes

* test-suite/tests/chars.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

* test-suite/tests/strings.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

											
										
										
											2010-01-12 21:02:41 -08:00
+								      /* Convert from hex, skipping the initial 'x' character in CHARNAME */
 								      p = scm_string_to_number (scm_c_substring (charname, 1, charname_len),
 								                                scm_from_uint (16));
 								      if (SCM_I_INUMP (p))
 								        {
-												fix a number of assuptions that a long could hold an inum

* libguile/bytevectors.c:
* libguile/goops.c:
* libguile/instructions.c:
* libguile/numbers.c:
* libguile/random.c:
* libguile/read.c:
* libguile/vm-i-scheme.c: Fix a number of assumptions that a long could
  hold an inum. This is not the case on platforms whose void* is larger
  than their long.

* libguile/numbers.c (scm_i_inum2big): New helper, only implemented for
  sizeof(void*) == sizeof(long); produces a compile error on other
  platforms. Basically gmp doesn't have a nice interface for converting
  between mpz values and intmax_t.

											
										
										
											2010-11-19 11:29:26 +01:00
+								          scm_t_wchar c = scm_to_uint32 (p);
-												Reader option for R6RS hex escapes

This adds a reader option 'r6rs-hex-escapes that modifies the
behavior of numeric escapes in characters and strings.  When enabled,
variable-length character hex escapes (#\xNNN) are allowed and become
the default output format for numerically-escaped characters.  Also,
string hex escapes switch to a semicolon terminated hex escape (\xNNNN;).

* libguile/print.c (PRINT_CHAR_ESCAPE): new macro
  (iprin1): use new macro PRINT_CHAR_ESCAPE

* libguile/private-options.h (SCM_R6RS_ESCAPES_P): new #define

* libguile/read.c (scm_read_opts): add new option r6rs-hex-escapes
  (SCM_READ_HEX_ESCAPE): modify to take a terminator parameter
  (scm_read_string): parse R6RS hex string escapes
  (scm_read_character): parse R6RS hex character escapes

* test-suite/tests/chars.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

* test-suite/tests/strings.test (with-read-options): new procedure
  (R6RS hex escapes): new tests

											
										
										
											2010-01-12 21:02:41 -08:00
+								          if (SCM_IS_UNICODE_CHAR (c))
 								            return SCM_MAKE_CHAR (c);
 								          else
 								            scm_i_input_error (FUNC_NAME, port,
 								                               "out-of-range hex character escape: ~a",
 								                               scm_list_1 (charname));
 								        }
 								    }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  /* The names of characters should never have non-Latin1
 								     characters.  */
 								  if (scm_i_is_narrow_string (charname)
 								      || scm_i_try_narrow_string (charname))
-												fix uninitialized variable in scm_read_character

* libguile/read.c (scm_read_character): Fix uninitialized variable.

											
										
										
											2009-08-26 13:15:07 +02:00
+								    { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
 								                                       charname_len);
 								      if (scm_is_true (ch))
 								        return ch;
 								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+										     scm_list_1 (charname));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return SCM_UNSPECIFIED;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* Eliminate some calls to scm_wta.

											
										
										
											2001-03-04 17:09:34 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_keyword (int chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  SCM symbol;
 								  /* Read the symbol that comprises the keyword.  Doing this instead of
 								     invoking a specific symbol reader function allows `scm_read_keyword ()'
 								     to adapt to the delimiters currently valid of symbols.
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								     XXX: This implementation allows sloppy syntaxes like `#:  key'.  */
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  symbol = scm_read_expression (port, opts);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  if (!scm_is_symbol (symbol))
-												More compilation fixes with Sun CC (bug #21378).

											
										
										
											2008-02-07 09:54:47 +00:00
+								    scm_i_input_error ("scm_read_keyword", port,
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+										       "keyword prefix `~a' not followed by a symbol: ~s",
 										       scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
 								  return (scm_symbol_to_keyword (symbol));
 								}
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_vector (int chr, SCM port, scm_t_read_opts *opts,
 								                 long line, int column)
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  /* Note: We call `scm_read_sexp ()' rather than READER here in order to
 								     guarantee that it's going to do what we want.  After all, this is an
 								     implementation detail of `scm_read_vector ()', not a desirable
 								     property.  */
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  return maybe_annotate_source (scm_vector (scm_read_sexp (chr, port, opts)),
 								                                port, opts, line, column);
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								}
-												Move array reader from arrays.c to read.c

* libguile/arrays.c (read_decimal_integer): Move to read.c.
  (scm_i_read_array): Remove.  Incorporate the code into the
  'scm_read_array' static function in read.c.

* libguile/arrays.h (scm_i_read_array): Remove prototype.

* libguile/read.c (read_decimal_integer): Move here from read.c.
  (scm_read_array): Incorporate the code from 'scm_i_read_array'.  Call
  'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read'.

											
										
										
											2012-10-22 23:23:45 -04:00
+								/* Helper used by scm_read_array */
 								static int
 								read_decimal_integer (SCM port, int c, ssize_t *resp)
 								{
 								  ssize_t sign = 1;
 								  ssize_t res = 0;
 								  int got_it = 0;
 								  if (c == '-')
 								    {
 								      sign = -1;
 								      c = scm_getc (port);
 								    }
 								  while ('0' <= c && c <= '9')
 								    {
 								      res = 10*res + c-'0';
 								      got_it = 1;
 								      c = scm_getc (port);
 								    }
 								  if (got_it)
 								    *resp = sign * res;
 								  return c;
 								}
 								/* Read an array.  This function can also read vectors and uniform
 								   vectors.  Also, the conflict between '#f' and '#f32' and '#f64' is
 								   handled here.
-												Fix formatting of comments in scm_read_array.

* libguile/read.c (scm_read_array): Fix formatting of comments.

											
										
										
											2012-10-30 22:58:19 -04:00
+								   C is the first character read after the '#'. */
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_array (int c, SCM port, scm_t_read_opts *opts, long line, int column)
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								{
-												Move array reader from arrays.c to read.c

* libguile/arrays.c (read_decimal_integer): Move to read.c.
  (scm_i_read_array): Remove.  Incorporate the code into the
  'scm_read_array' static function in read.c.

* libguile/arrays.h (scm_i_read_array): Remove prototype.

* libguile/read.c (read_decimal_integer): Move here from read.c.
  (scm_read_array): Incorporate the code from 'scm_i_read_array'.  Call
  'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read'.

											
										
										
											2012-10-22 23:23:45 -04:00
+								  ssize_t rank;
 								  scm_t_wchar tag_buf[8];
 								  int tag_len;
 								  SCM tag, shape = SCM_BOOL_F, elements, array;
 								  /* XXX - shortcut for ordinary vectors.  Shouldn't be necessary but
 								     the array code can not deal with zero-length dimensions yet, and
-												Fix formatting of comments in scm_read_array.

* libguile/read.c (scm_read_array): Fix formatting of comments.

											
										
										
											2012-10-30 22:58:19 -04:00
+								     we want to allow zero-length vectors, of course. */
-												Move array reader from arrays.c to read.c

* libguile/arrays.c (read_decimal_integer): Move to read.c.
  (scm_i_read_array): Remove.  Incorporate the code into the
  'scm_read_array' static function in read.c.

* libguile/arrays.h (scm_i_read_array): Remove prototype.

* libguile/read.c (read_decimal_integer): Move here from read.c.
  (scm_read_array): Incorporate the code from 'scm_i_read_array'.  Call
  'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read'.

											
										
										
											2012-10-22 23:23:45 -04:00
+								  if (c == '(')
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								    return scm_read_vector (c, port, opts, line, column);
-												Move array reader from arrays.c to read.c

* libguile/arrays.c (read_decimal_integer): Move to read.c.
  (scm_i_read_array): Remove.  Incorporate the code into the
  'scm_read_array' static function in read.c.

* libguile/arrays.h (scm_i_read_array): Remove prototype.

* libguile/read.c (read_decimal_integer): Move here from read.c.
  (scm_read_array): Incorporate the code from 'scm_i_read_array'.  Call
  'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read'.

											
										
										
											2012-10-22 23:23:45 -04:00
-												Fix formatting of comments in scm_read_array.

* libguile/read.c (scm_read_array): Fix formatting of comments.

											
										
										
											2012-10-30 22:58:19 -04:00
+								  /* Disambiguate between '#f' and uniform floating point vectors. */
-												Move array reader from arrays.c to read.c

* libguile/arrays.c (read_decimal_integer): Move to read.c.
  (scm_i_read_array): Remove.  Incorporate the code into the
  'scm_read_array' static function in read.c.

* libguile/arrays.h (scm_i_read_array): Remove prototype.

* libguile/read.c (read_decimal_integer): Move here from read.c.
  (scm_read_array): Incorporate the code from 'scm_i_read_array'.  Call
  'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read'.

											
										
										
											2012-10-22 23:23:45 -04:00
+								  if (c == 'f')
 								    {
 								      c = scm_getc (port);
 								      if (c != '3' && c != '6')
 									{
 									  if (c != EOF)
 									    scm_ungetc (c, port);
 									  return SCM_BOOL_F;
 									}
 								      rank = 1;
 								      tag_buf[0] = 'f';
 								      tag_len = 1;
 								      goto continue_reading_tag;
 								    }
 								  /* Read rank. */
 								  rank = 1;
 								  c = read_decimal_integer (port, c, &rank);
 								  if (rank < 0)
 								    scm_i_input_error (NULL, port, "array rank must be non-negative",
 										       SCM_EOL);
 								  /* Read tag. */
 								  tag_len = 0;
 								 continue_reading_tag:
 								  while (c != EOF && c != '(' && c != '@' && c != ':'
 								         && tag_len < sizeof tag_buf / sizeof tag_buf[0])
 								    {
 								      tag_buf[tag_len++] = c;
 								      c = scm_getc (port);
 								    }
 								  if (tag_len == 0)
 								    tag = SCM_BOOL_T;
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								  else
-												Move array reader from arrays.c to read.c

* libguile/arrays.c (read_decimal_integer): Move to read.c.
  (scm_i_read_array): Remove.  Incorporate the code into the
  'scm_read_array' static function in read.c.

* libguile/arrays.h (scm_i_read_array): Remove prototype.

* libguile/read.c (read_decimal_integer): Move here from read.c.
  (scm_read_array): Incorporate the code from 'scm_i_read_array'.  Call
  'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read'.

											
										
										
											2012-10-22 23:23:45 -04:00
+								    {
 								      tag = scm_string_to_symbol (scm_from_utf32_stringn (tag_buf, tag_len));
 								      if (tag_len == sizeof tag_buf / sizeof tag_buf[0])
 								        scm_i_input_error (NULL, port, "invalid array tag, starting with: ~a",
 								                           scm_list_1 (tag));
 								    }
 								  /* Read shape. */
 								  if (c == '@' || c == ':')
 								    {
 								      shape = SCM_EOL;
 								      do
 									{
 									  ssize_t lbnd = 0, len = 0;
 									  SCM s;
 									  if (c == '@')
 									    {
 									      c = scm_getc (port);
 									      c = read_decimal_integer (port, c, &lbnd);
 									    }
 									  s = scm_from_ssize_t (lbnd);
 									  if (c == ':')
 									    {
 									      c = scm_getc (port);
 									      c = read_decimal_integer (port, c, &len);
 									      if (len < 0)
 										scm_i_input_error (NULL, port,
 												   "array length must be non-negative",
 												   SCM_EOL);
 									      s = scm_list_2 (s, scm_from_ssize_t (lbnd+len-1));
 									    }
 									  shape = scm_cons (s, shape);
 									} while (c == '@' || c == ':');
 								      shape = scm_reverse_x (shape, SCM_EOL);
 								    }
 								  /* Read nested lists of elements. */
 								  if (c != '(')
 								    scm_i_input_error (NULL, port,
 										       "missing '(' in vector or array literal",
 										       SCM_EOL);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  elements = scm_read_sexp (c, port, opts);
-												Move array reader from arrays.c to read.c

* libguile/arrays.c (read_decimal_integer): Move to read.c.
  (scm_i_read_array): Remove.  Incorporate the code into the
  'scm_read_array' static function in read.c.

* libguile/arrays.h (scm_i_read_array): Remove prototype.

* libguile/read.c (read_decimal_integer): Move here from read.c.
  (scm_read_array): Incorporate the code from 'scm_i_read_array'.  Call
  'scm_read_vector' and 'scm_read_sexp' instead of 'scm_read'.

											
										
										
											2012-10-22 23:23:45 -04:00
 								  if (scm_is_false (shape))
 								    shape = scm_from_ssize_t (rank);
 								  else if (scm_ilength (shape) != rank)
 								    scm_i_input_error
 								      (NULL, port,
 								       "the number of shape specifications must match the array rank",
 								       SCM_EOL);
 								  /* Handle special print syntax of rank zero arrays; see
 								     scm_i_print_array for a rationale. */
 								  if (rank == 0)
 								    {
 								      if (!scm_is_pair (elements))
 									scm_i_input_error (NULL, port,
 											   "too few elements in array literal, need 1",
 											   SCM_EOL);
 								      if (!scm_is_null (SCM_CDR (elements)))
 									scm_i_input_error (NULL, port,
 											   "too many elements in array literal, want 1",
 											   SCM_EOL);
 								      elements = SCM_CAR (elements);
 								    }
 								  /* Construct array, annotate with source location, and return. */
 								  array = scm_list_to_typed_array (tag, shape, elements);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  return maybe_annotate_source (array, port, opts, line, column);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								}
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_srfi4_vector (int chr, SCM port, scm_t_read_opts *opts,
 								                       long line, int column)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  return scm_read_array (chr, port, opts, line, column);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								}
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_bytevector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
 								                     long line, int column)
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								{
 								  chr = scm_getc (port);
 								  if (chr != 'u')
 								    goto syntax;
 								  chr = scm_getc (port);
 								  if (chr != '8')
 								    goto syntax;
 								  chr = scm_getc (port);
 								  if (chr != '(')
 								    goto syntax;
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								  return maybe_annotate_source
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								    (scm_u8_list_to_bytevector (scm_read_sexp (chr, port, opts)),
 								     port, opts, line, column);
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
 								 syntax:
 								  scm_i_input_error ("read_bytevector", port,
 										     "invalid bytevector prefix",
 										     SCM_MAKE_CHAR (chr));
 								  return SCM_UNSPECIFIED;
 								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_guile_bit_vector (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
 								                           long line, int column)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
 								     terribly inefficient but who cares?  */
 								  SCM s_bits = SCM_EOL;
 								  for (chr = scm_getc (port);
 								       (chr != EOF) && ((chr == '0') || (chr == '1'));
 								       chr = scm_getc (port))
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  if (chr != EOF)
 								    scm_ungetc (chr, port);
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								  return maybe_annotate_source
 								    (scm_bitvector (scm_reverse_x (s_bits, SCM_EOL)),
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								     port, opts, line, column);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								}
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  int bang_seen = 0;
 								  for (;;)
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    {
-												Have `read' update line/column numbers when reading SCSH block comments.

* libguile/read.c (scm_read_scsh_block_comment): Use `scm_getc' instead
  of `scm_get_byte_or_eof'.

* test-suite/tests/reader.test ("read-options")["position of SCSH block
  comment"]: New test.

											
										
										
											2011-02-28 23:33:47 +01:00
+								      int c = scm_getc (port);
-												* Don't use return value from SCM_SETCDR or SCM_WHASHSET.

											
										
										
											2000-07-18 16:09:09 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (c == EOF)
 									scm_i_input_error ("skip_block_comment", port,
 											   "unterminated `#! ... !#' comment", SCM_EOL);
 								      if (c == '!')
 									bang_seen = 1;
 								      else if (c == '#' && bang_seen)
 									break;
 								      else
 									bang_seen = 0;
 								    }
 								  return SCM_UNSPECIFIED;
 								}
-												Implement #!fold-case and #!no-fold-case reader directives.

* libguile/read.c (set_port_case_insensitive_p): New function.

  (scm_read_shebang): Handle #!fold-case and #!no-fold-case.

* doc/ref/api-evaluation.texi (Case Sensitivity, Scheme Read): Document
  the #!fold-case and #!no-fold-case reader directives.

* test-suite/tests/reader.test ("per-port-read-options"): Add tests.

											
										
										
											2012-10-24 14:37:36 -04:00
+								static void set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts,
 								                                         int value);
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								static void set_port_square_brackets_p (SCM port, scm_t_read_opts *opts,
 								                                        int value);
 								static void set_port_curly_infix_p (SCM port, scm_t_read_opts *opts,
 								                                    int value);
-												Implement #!fold-case and #!no-fold-case reader directives.

* libguile/read.c (set_port_case_insensitive_p): New function.

  (scm_read_shebang): Handle #!fold-case and #!no-fold-case.

* doc/ref/api-evaluation.texi (Case Sensitivity, Scheme Read): Document
  the #!fold-case and #!no-fold-case reader directives.

* test-suite/tests/reader.test ("per-port-read-options"): Add tests.

											
										
										
											2012-10-24 14:37:36 -04:00
-												Make the definition of `scm_read_shebang' match its declaration.

* libguile/read.c (scm_read_shebang): Remove the `inline' keyword.

											
										
										
											2011-05-08 16:25:01 +02:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_shebang (scm_t_wchar chr, SCM port, scm_t_read_opts *opts)
-												Support for the #!r6rs lexeme.

* libguile/read.c (scm_read_shebang): New function;
  (scm_read_sharp): Call scm_read_shebang on '!', which delegates to
  scm_read_scsh_block_comment as necessary.
* test-suite/tests/reader.test ("R6RS lexeme comment", "partial R6RS
  lexeme comment"): New tests.

											
										
										
											2010-05-27 09:20:53 -04:00
+								{
-												Generalize scm_read_shebang to handle other reader directives.

* libguile/read.c (READER_DIRECTIVE_NAME_MAX_SIZE): New C macro.
  (scm_read_shebang): Rewrite to handle arbitrary reader directives.

											
										
										
											2012-10-23 00:29:07 -04:00
+								  char name[READER_DIRECTIVE_NAME_MAX_SIZE + 1];
 								  int c;
 								  int i = 0;
 								  while (i <= READER_DIRECTIVE_NAME_MAX_SIZE)
-												Support for the #!r6rs lexeme.

* libguile/read.c (scm_read_shebang): New function;
  (scm_read_sharp): Call scm_read_shebang on '!', which delegates to
  scm_read_scsh_block_comment as necessary.
* test-suite/tests/reader.test ("R6RS lexeme comment", "partial R6RS
  lexeme comment"): New tests.

											
										
										
											2010-05-27 09:20:53 -04:00
+								    {
-												Generalize scm_read_shebang to handle other reader directives.

* libguile/read.c (READER_DIRECTIVE_NAME_MAX_SIZE): New C macro.
  (scm_read_shebang): Rewrite to handle arbitrary reader directives.

											
										
										
											2012-10-23 00:29:07 -04:00
+								      c = scm_getc (port);
 								      if (c == EOF)
 									scm_i_input_error ("skip_block_comment", port,
 											   "unterminated `#! ... !#' comment", SCM_EOL);
 								      else if (('a' <= c && c <= 'z') || ('0' <= c && c <= '9') || c == '-')
 								        name[i++] = c;
 								      else if (CHAR_IS_DELIMITER (c))
 								        {
 								          scm_ungetc (c, port);
 								          name[i] = '\0';
 								          if (0 == strcmp ("r6rs", name))
 								            ;  /* Silently ignore */
-												Implement #!fold-case and #!no-fold-case reader directives.

* libguile/read.c (set_port_case_insensitive_p): New function.

  (scm_read_shebang): Handle #!fold-case and #!no-fold-case.

* doc/ref/api-evaluation.texi (Case Sensitivity, Scheme Read): Document
  the #!fold-case and #!no-fold-case reader directives.

* test-suite/tests/reader.test ("per-port-read-options"): Add tests.

											
										
										
											2012-10-24 14:37:36 -04:00
+								          else if (0 == strcmp ("fold-case", name))
 								            set_port_case_insensitive_p (port, opts, 1);
 								          else if (0 == strcmp ("no-fold-case", name))
 								            set_port_case_insensitive_p (port, opts, 0);
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								          else if (0 == strcmp ("curly-infix", name))
 								            set_port_curly_infix_p (port, opts, 1);
 								          else if (0 == strcmp ("curly-infix-and-bracket-lists", name))
 								            {
 								              set_port_curly_infix_p (port, opts, 1);
 								              set_port_square_brackets_p (port, opts, 0);
 								            }
-												Generalize scm_read_shebang to handle other reader directives.

* libguile/read.c (READER_DIRECTIVE_NAME_MAX_SIZE): New C macro.
  (scm_read_shebang): Rewrite to handle arbitrary reader directives.

											
										
										
											2012-10-23 00:29:07 -04:00
+								          else
 								            break;
 								          return SCM_UNSPECIFIED;
 								        }
-												scm_read_shebang: handle non-ascii characters properly.

* libguile/read.c (scm_read_shebang): Abort scan for reader directive
  if a character other than [-a-z0-9] is encountered.

											
										
										
											2012-10-30 22:53:22 -04:00
+								      else
 								        {
 								          scm_ungetc (c, port);
 								          break;
 								        }
-												Support for the #!r6rs lexeme.

* libguile/read.c (scm_read_shebang): New function;
  (scm_read_sharp): Call scm_read_shebang on '!', which delegates to
  scm_read_scsh_block_comment as necessary.
* test-suite/tests/reader.test ("R6RS lexeme comment", "partial R6RS
  lexeme comment"): New tests.

											
										
										
											2010-05-27 09:20:53 -04:00
+								    }
-												Generalize scm_read_shebang to handle other reader directives.

* libguile/read.c (READER_DIRECTIVE_NAME_MAX_SIZE): New C macro.
  (scm_read_shebang): Rewrite to handle arbitrary reader directives.

											
										
										
											2012-10-23 00:29:07 -04:00
+								  while (i > 0)
 								    scm_ungetc (name[--i], port);
 								  return scm_read_scsh_block_comment (chr, port);
-												Support for the #!r6rs lexeme.

* libguile/read.c (scm_read_shebang): New function;
  (scm_read_sharp): Call scm_read_shebang on '!', which delegates to
  scm_read_scsh_block_comment as necessary.
* test-suite/tests/reader.test ("R6RS lexeme comment", "partial R6RS
  lexeme comment"): New tests.

											
										
										
											2010-05-27 09:20:53 -04:00
+								}
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
+								static SCM
 								scm_read_r6rs_block_comment (scm_t_wchar chr, SCM port)
 								{
 								  /* Unlike SCSH-style block comments, SRFI-30/R6RS block comments may be
 								     nested.  So care must be taken.  */
 								  int nesting_level = 1;
-												fix reading of #||||#

* libguile/read.c (scm_read_r6rs_block_comment):
* test-suite/tests/reader.test ("reading"): Fix reading of #||||#,
  originally reported in bug debbugs.gnu.org/9672, by Bruno Haible.
  Thanks, Bruno!

											
										
										
											2011-10-05 20:41:11 +02:00
 								  int a = scm_getc (port);
 								  if (a == EOF)
 								    scm_i_input_error ("scm_read_r6rs_block_comment", port,
 								                       "unterminated `#| ... |#' comment", SCM_EOL);
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
 								  while (nesting_level > 0)
 								    {
-												fix reading of #||||#

* libguile/read.c (scm_read_r6rs_block_comment):
* test-suite/tests/reader.test ("reading"): Fix reading of #||||#,
  originally reported in bug debbugs.gnu.org/9672, by Bruno Haible.
  Thanks, Bruno!

											
										
										
											2011-10-05 20:41:11 +02:00
+								      int b = scm_getc (port);
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
-												fix reading of #||||#

* libguile/read.c (scm_read_r6rs_block_comment):
* test-suite/tests/reader.test ("reading"): Fix reading of #||||#,
  originally reported in bug debbugs.gnu.org/9672, by Bruno Haible.
  Thanks, Bruno!

											
										
										
											2011-10-05 20:41:11 +02:00
+								      if (b == EOF)
-												Remove uses of the non-standard `__FUNCTION__'.

* libguile/gc.c (scm_gc_sweep): Replace `__FUNCTION__' by `FUNC_NAME'.

* libguile/read.c (scm_read_r6rs_block_comment): Likewise.

											
										
										
											2009-12-14 15:21:54 +01:00
+									scm_i_input_error ("scm_read_r6rs_block_comment", port,
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
+											   "unterminated `#| ... |#' comment", SCM_EOL);
-												fix reading of #||||#

* libguile/read.c (scm_read_r6rs_block_comment):
* test-suite/tests/reader.test ("reading"): Fix reading of #||||#,
  originally reported in bug debbugs.gnu.org/9672, by Bruno Haible.
  Thanks, Bruno!

											
										
										
											2011-10-05 20:41:11 +02:00
+								      if (a == '|' && b == '#')
 								        {
 								          nesting_level--;
 								          b = EOF;
 								        }
 								      else if (a == '#' && b == '|')
 								        {
 								          nesting_level++;
 								          b = EOF;
 								        }
 								      a = b;
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
+								    }
 								  return SCM_UNSPECIFIED;
 								}
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_commented_expression (scm_t_wchar chr, SCM port,
 								                               scm_t_read_opts *opts)
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_t_wchar c;
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  c = flush_ws (port, opts, (char *) NULL);
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								  if (EOF == c)
 								    scm_i_input_error ("read_commented_expression", port,
 								                       "no expression after #; comment", SCM_EOL);
 								  scm_ungetc (c, port);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  scm_read_expression (port, opts);
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								  return SCM_UNSPECIFIED;
 								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_extended_symbol (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  /* Guile's extended symbol read syntax looks like this:
 								       #{This is all a symbol name}#
 								     So here, CHR is expected to be `{'.  */
-												read-extended-symbol handles backslash better, including r6rs hex escapes

* libguile/read.c (scm_read_extended_symbol): Interpret '\' as an escape
  character.  Due to some historical oddities we have to support '\'
  before any character, but since we never emitted '\' in front of
  "normal" characters like 'x' we can interpret "\x..;" to be an R6RS
  hex escape.

* test-suite/tests/reader.test ("#{}#"): Add tests.

											
										
										
											2011-04-11 12:48:06 +02:00
+								  int saw_brace = 0;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  size_t len = 0;
-												Make VM string literals immutable.

* libguile/strings.c (scm_i_make_string, scm_i_make_wide_string): Add
  `read_only_p' parameter.  All callers updated.

* libguile/vm-i-loader.c (load_string, load_wide_string): Push read-only
  strings.

* test-suite/tests/strings.test ("literals"): New test prefix.

											
										
										
											2011-03-20 23:34:42 +01:00
+								  SCM buf = scm_i_make_string (1024, NULL, 0);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  buf = scm_i_string_start_writing (buf);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  while ((chr = scm_getc (port)) != EOF)
 								    {
 								      if (saw_brace)
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+									{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  if (chr == '#')
 									    {
 									      break;
 									    }
 									  else
 									    {
 									      saw_brace = 0;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									      scm_i_string_set_x (buf, len++, '}');
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									    }
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+									}
-												read-extended-symbol handles backslash better, including r6rs hex escapes

* libguile/read.c (scm_read_extended_symbol): Interpret '\' as an escape
  character.  Due to some historical oddities we have to support '\'
  before any character, but since we never emitted '\' in front of
  "normal" characters like 'x' we can interpret "\x..;" to be an R6RS
  hex escape.

* test-suite/tests/reader.test ("#{}#"): Add tests.

											
										
										
											2011-04-11 12:48:06 +02:00
 								      if (chr == '}')
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									saw_brace = 1;
-												read-extended-symbol handles backslash better, including r6rs hex escapes

* libguile/read.c (scm_read_extended_symbol): Interpret '\' as an escape
  character.  Due to some historical oddities we have to support '\'
  before any character, but since we never emitted '\' in front of
  "normal" characters like 'x' we can interpret "\x..;" to be an R6RS
  hex escape.

* test-suite/tests/reader.test ("#{}#"): Add tests.

											
										
										
											2011-04-11 12:48:06 +02:00
+								      else if (chr == '\\')
 								        {
 								          /* It used to be that print.c would print extended-read-syntax
 								             symbols with backslashes before "non-standard" chars, but
 								             this routine wouldn't do anything with those escapes.
 								             Bummer.  What we've done is to change print.c to output
 								             R6RS hex escapes for those characters, relying on the fact
 								             that the extended read syntax would never put a `\' before
 								             an `x'.  For now, we just ignore other instances of
 								             backslash in the string.  */
 								          switch ((chr = scm_getc (port)))
 								            {
 								            case EOF:
 								              goto done;
 								            case 'x':
 								              {
 								                scm_t_wchar c;
 								                SCM_READ_HEX_ESCAPE (10, ';');
 								                scm_i_string_set_x (buf, len++, c);
 								                break;
 								              str_eof:
 								                chr = EOF;
 								                goto done;
 								              bad_escaped:
 								                scm_i_string_stop_writing ();
 								                scm_i_input_error ("scm_read_extended_symbol", port,
 								                                   "illegal character in escape sequence: ~S",
 								                                   scm_list_1 (SCM_MAKE_CHAR (c)));
 								                break;
 								              }
 								            default:
 									      scm_i_string_set_x (buf, len++, chr);
 								              break;
 								            }
 								        }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      else
-												read-extended-symbol handles backslash better, including r6rs hex escapes

* libguile/read.c (scm_read_extended_symbol): Interpret '\' as an escape
  character.  Due to some historical oddities we have to support '\'
  before any character, but since we never emitted '\' in front of
  "normal" characters like 'x' we can interpret "\x..;" to be an R6RS
  hex escape.

* test-suite/tests/reader.test ("#{}#"): Add tests.

											
										
										
											2011-04-11 12:48:06 +02:00
+								        scm_i_string_set_x (buf, len++, chr);
-												* Don't use return value from SCM_SETCDR or SCM_WHASHSET.

											
										
										
											2000-07-18 16:09:09 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      if (len >= scm_i_string_length (buf) - 2)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									{
-												Fix C99-style declarations after statements.

* libguile/eval.i.c (ceval): Move declarations before statements.

* libguile/read.c (scm_read_extended_symbol): Likewise.

* libguile/struct.c (scm_make_struct_layout): Likewise.

* libguile/threads.c (fat_mutex_unlock): Likewise.

* libguile/vm-i-system.c (br_if_nargs_ne, br_if_nargs_lt): Likewise.

* libguile/vm.c (make_vm): Likewise.

											
										
										
											2009-11-17 01:26:25 +01:00
+									  SCM addy;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									  scm_i_string_stop_writing ();
-												Make VM string literals immutable.

* libguile/strings.c (scm_i_make_string, scm_i_make_wide_string): Add
  `read_only_p' parameter.  All callers updated.

* libguile/vm-i-loader.c (load_string, load_wide_string): Push read-only
  strings.

* test-suite/tests/strings.test ("literals"): New test prefix.

											
										
										
											2011-03-20 23:34:42 +01:00
+									  addy = scm_i_make_string (1024, NULL, 0);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									  buf = scm_string_append (scm_list_2 (buf, addy));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  len = 0;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									  buf = scm_i_string_start_writing (buf);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									}
 								    }
-												read-extended-symbol handles backslash better, including r6rs hex escapes

* libguile/read.c (scm_read_extended_symbol): Interpret '\' as an escape
  character.  Due to some historical oddities we have to support '\'
  before any character, but since we never emitted '\' in front of
  "normal" characters like 'x' we can interpret "\x..;" to be an R6RS
  hex escape.

* test-suite/tests/reader.test ("#{}#"): Add tests.

											
										
										
											2011-04-11 12:48:06 +02:00
 								 done:
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_i_string_stop_writing ();
-												read-extended-symbol handles backslash better, including r6rs hex escapes

* libguile/read.c (scm_read_extended_symbol): Interpret '\' as an escape
  character.  Due to some historical oddities we have to support '\'
  before any character, but since we never emitted '\' in front of
  "normal" characters like 'x' we can interpret "\x..;" to be an R6RS
  hex escape.

* test-suite/tests/reader.test ("#{}#"): Add tests.

											
										
										
											2011-04-11 12:48:06 +02:00
+								  if (chr == EOF)
 								    scm_i_input_error ("scm_read_extended_symbol", port,
 								                       "end of file while reading symbol", SCM_EOL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								}
 								/* Top-level token readers, i.e., dispatchers.  */
 								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_sharp_extension (int chr, SCM port, scm_t_read_opts *opts)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  SCM proc;
 								  proc = scm_get_hash_procedure (chr);
 								  if (scm_is_true (scm_procedure_p (proc)))
 								    {
 								      long line = SCM_LINUM (port);
 								      int column = SCM_COL (port) - 2;
 								      SCM got;
 								      got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
-												read + source properties simplification

* libguile/srcprop.h: Remove internal scm_source_whash declaration.
* libguile/srcprop.c (scm_i_set_source_properties_x)
  (scm_i_has_source_properties): New helpers.
  (scm_source_whash): Make static.

* libguile/read.c (scm_read_sexp): Remove register declarations here;
  let's trust the compiler.  Remove code to incrementally build up a
  copy; instead let's let scm_i_set_source_properties_x handle copying
  the expression if needed.
  (scm_read_quote, scm_read_syntax): Use scm_i_set_source_properties_x.
  (recsexpr): Remove this helper from 1996.
  (scm_read_sharp_extension): Instead of trying to recursively label
  sharp-read subforms with source properties, just label the outside
  form and rely on the macro-expander to propagate it down.

											
										
										
											2011-05-24 21:25:11 +02:00
-												Add source properties to more datum types in scm_read_sharp_extension.

* libguile/read.c (scm_read_sharp_extension): Attach source properties
  to the result of a custom token reader if the returned datum is not
  immediate.  Previously, source properties were added to pairs only.

											
										
										
											2012-10-23 00:21:12 -04:00
+								      if (opts->record_positions_p && SCM_NIMP (got)
 								          && !scm_i_has_source_properties (got))
-												read + source properties simplification

* libguile/srcprop.h: Remove internal scm_source_whash declaration.
* libguile/srcprop.c (scm_i_set_source_properties_x)
  (scm_i_has_source_properties): New helpers.
  (scm_source_whash): Make static.

* libguile/read.c (scm_read_sexp): Remove register declarations here;
  let's trust the compiler.  Remove code to incrementally build up a
  copy; instead let's let scm_i_set_source_properties_x handle copying
  the expression if needed.
  (scm_read_quote, scm_read_syntax): Use scm_i_set_source_properties_x.
  (recsexpr): Remove this helper from 1996.
  (scm_read_sharp_extension): Instead of trying to recursively label
  sharp-read subforms with source properties, just label the outside
  form and rely on the macro-expander to propagate it down.

											
										
										
											2011-05-24 21:25:11 +02:00
+								        scm_i_set_source_properties_x (got, line, column, SCM_FILENAME (port));
 								      return got;
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return SCM_UNSPECIFIED;
 								}
 								/* The reader for the sharp `#' character.  It basically dispatches reads
 								   among the above token readers.   */
 								static SCM
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								scm_read_sharp (scm_t_wchar chr, SCM port, scm_t_read_opts *opts,
 								                long line, int column)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_lreadr"
 								{
 								  SCM result;
 								  chr = scm_getc (port);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  result = scm_read_sharp_extension (chr, port, opts);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  if (!scm_is_eq (result, SCM_UNSPECIFIED))
 								    return result;
 								  switch (chr)
 								    {
 								    case '\\':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_character (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case '(':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_vector (chr, port, opts, line, column));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case 's':
 								    case 'u':
 								    case 'f':
-												Undeprecate read syntax for uniform complex vectors

* libguile/read.c (scm_read_sharp): Move the "#c..." case outside of
  #if SCM_ENABLE_DEPRECATED, and to the same section which handles
  "#s...", "#u..." and "#f...".
  Thanks to Andreas Rottmann <a.rottmann@gmx.at> for the bug report.

											
										
										
											2011-04-05 19:42:06 -04:00
+								    case 'c':
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      /* This one may return either a boolean or an SRFI-4 vector.  */
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_srfi4_vector (chr, port, opts, line, column));
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								    case 'v':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_bytevector (chr, port, opts, line, column));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case '*':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_guile_bit_vector (chr, port, opts, line, column));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case 't':
 								    case 'T':
 								    case 'F':
 								      return (scm_read_boolean (chr, port));
 								    case ':':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_keyword (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case '0': case '1': case '2': case '3': case '4':
 								    case '5': case '6': case '7': case '8': case '9':
 								    case '@':
 								#if SCM_ENABLE_DEPRECATED
 								      /* See below for 'i' and 'e'. */
 								    case 'a':
 								    case 'y':
 								    case 'h':
 								    case 'l':
 								#endif
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_array (chr, port, opts, line, column));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								    case 'i':
 								    case 'e':
 								#if SCM_ENABLE_DEPRECATED
 								      {
 									/* When next char is '(', it really is an old-style
 									   uniform array. */
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									scm_t_wchar next_c = scm_getc (port);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									if (next_c != EOF)
 									  scm_ungetc (next_c, port);
 									if (next_c == '(')
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									  return scm_read_array (chr, port, opts, line, column);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									/* Fall through. */
 								      }
 								#endif
 								    case 'b':
 								    case 'B':
 								    case 'o':
 								    case 'O':
 								    case 'd':
 								    case 'D':
 								    case 'x':
 								    case 'X':
 								    case 'I':
 								    case 'E':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_number_and_radix (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case '{':
 								      return (scm_read_extended_symbol (chr, port));
 								    case '!':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_shebang (chr, port, opts));
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								    case ';':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_commented_expression (chr, port, opts));
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								    case '`':
 								    case '\'':
 								    case ',':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_syntax (chr, port, opts));
-												add read syntax for #nil

* libguile/evalext.c (scm_self_evaluating_p): #nil is self-evaluating.

* libguile/read.c (scm_read_nil, scm_read_sharp): Add read syntax for
  #nil.

											
										
										
											2010-04-09 14:15:16 +02:00
+								    case 'n':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      return (scm_read_nil (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    default:
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								      result = scm_read_sharp_extension (chr, port, opts);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (scm_is_eq (result, SCM_UNSPECIFIED))
-												Add support for R6RS/SRFI-30 nested block comments.

Suggested by Andreas Rottmann <a.rottmann@gmx.at>.

* libguile/read.c (flush_ws, scm_read_sharp): Add support for
  R6RS/SRFI-30 block comments.
  (scm_read_r6rs_block_comment): New function.

* test-suite/tests/reader.test (exception:unterminated-block-comment):
  Adjust to match both block comment styles.
  ("reading")["R6RS/SRFI-30 block comment", "R6RS/SRFI-30 nested block
  comment", "R6RS/SRFI-30 block comment syntax overridden"]: New tests.
  ("exceptions")["R6RS/SRFI-30 unterminated nested block comment"]: New
  test.

* doc/ref/api-evaluation.texi (Block Comments): Mention SRFI-30/R6RS
  block comments.

* doc/ref/srfi-modules.texi (SRFI-30): New node.

											
										
										
											2009-10-19 22:38:34 +02:00
+									{
 									  /* To remain compatible with 1.8 and earlier, the following
 									     characters have lower precedence than `read-hash-extend'
 									     characters.  */
 									  switch (chr)
 									    {
 									    case '|':
 									      return scm_read_r6rs_block_comment (chr, port);
 									    default:
 									      scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
 												 scm_list_1 (SCM_MAKE_CHAR (chr)));
 									    }
 									}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      else
 									return result;
 								    }
 								  return SCM_UNSPECIFIED;
 								}
 								#undef FUNC_NAME
 								static SCM
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								read_inner_expression (SCM port, scm_t_read_opts *opts)
 								#define FUNC_NAME "read_inner_expression"
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  while (1)
 								    {
-												Remove inline and register attributes from read.c

* libguile/read.c: Remove all 'inline' and 'register' attributes.

											
										
										
											2012-02-08 03:00:15 -05:00
+								      scm_t_wchar chr;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								      chr = scm_getc (port);
 								      switch (chr)
 									{
 									case SCM_WHITE_SPACES:
 									case SCM_LINE_INCREMENTORS:
 									  break;
 									case ';':
 									  (void) scm_read_semicolon_comment (chr, port);
 									  break;
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								        case '{':
 								          if (opts->curly_infix_p)
 								            {
 								              if (opts->neoteric_p)
 								                return scm_read_sexp (chr, port, opts);
 								              else
 								                {
 								                  SCM expr;
 								                  /* Enable neoteric expressions within curly braces */
 								                  opts->neoteric_p = 1;
 								                  expr = scm_read_sexp (chr, port, opts);
 								                  opts->neoteric_p = 0;
 								                  return expr;
 								                }
 								            }
 								          else
 								            return scm_read_mixed_case_symbol (chr, port, opts);
-												add reader option for parsing [] as ().

* libguile/private-options.h:
* libguile/read.c (scm_read_opts, SCM_SQUARE_BRACKETS_P): Add an option
  for treating [ and ] as parentheses, on by default. Note that this
  makes them delimiters also, so [ and ] cannot appear in a symbol name,
  with this read option on.
  (scm_read_sexp): If we start with [, we end with ].
  (scm_read_expression): Add case for [.

											
										
										
											2010-01-15 22:24:31 +01:00
+									case '[':
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								          if (opts->square_brackets_p)
 								            return scm_read_sexp (chr, port, opts);
 								          else if (opts->curly_infix_p)
 								            {
 								              /* The syntax of neoteric expressions requires that '[' be
 								                 a delimiter when curly-infix is enabled, so it cannot
 								                 be part of an unescaped symbol.  We might as well do
 								                 something useful with it, so we adopt Kawa's convention:
 								                 [...] => ($bracket-list$ ...) */
 								              long line = SCM_LINUM (port);
 								              int column = SCM_COL (port) - 1;
 								              return maybe_annotate_source
 								                (scm_cons (sym_bracket_list, scm_read_sexp (chr, port, opts)),
 								                 port, opts, line, column);
 								            }
 								          else
 								            return scm_read_mixed_case_symbol (chr, port, opts);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									case '(':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									  return (scm_read_sexp (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									case '"':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									  return (scm_read_string (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									case '\'':
 									case '`':
 									case ',':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									  return (scm_read_quote (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									case '#':
 									  {
-												Add source properties to many more types of data

* libguile/read.c (scm_read_array): New internal helper that
  calls scm_i_read_array and sets its source property if the
  'positions' reader option is set.

  (scm_read_string): Set source properties on strings if the 'positions'
  reader option is set.

  (scm_read_vector, scm_read_srfi4_vector, scm_read_bytevector,
  scm_read_guile_bitvector, scm_read_sharp): Add new arguments for the
  'line' and 'column' of the first character of the datum being read.
  Set source properties if the 'positions' reader option is set.

  (scm_read_expression): Pass 'line' and 'column' to scm_read_sharp.

* doc/ref/api-debug.texi (Source Properties): Update manual.

											
										
										
											2012-02-08 15:51:38 -05:00
+								            long line  = SCM_LINUM (port);
 								            int column = SCM_COL (port) - 1;
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									    SCM result = scm_read_sharp (chr, port, opts, line, column);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									    if (scm_is_eq (result, SCM_UNSPECIFIED))
 									      /* We read a comment or some such.  */
 									      break;
 									    else
 									      return result;
 									  }
 									case ')':
 									  scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
 									  break;
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								        case '}':
 								          if (opts->curly_infix_p)
 								            scm_i_input_error (FUNC_NAME, port, "unexpected \"}\"", SCM_EOL);
 								          else
 								            return scm_read_mixed_case_symbol (chr, port, opts);
-												need read error for extra closing square brackets

* libguile/read.c (scm_read_expression): add test

											
										
										
											2010-11-04 22:07:50 -07:00
+									case ']':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								          if (opts->square_brackets_p)
-												need read error for extra closing square brackets

* libguile/read.c (scm_read_expression): add test

											
										
										
											2010-11-04 22:07:50 -07:00
+								            scm_i_input_error (FUNC_NAME, port, "unexpected \"]\"", SCM_EOL);
 								          /* otherwise fall through */
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									case EOF:
 									  return SCM_EOF_VAL;
 									case ':':
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									  if (opts->keyword_style == KEYWORD_STYLE_PREFIX)
 									    return scm_symbol_to_keyword (scm_read_expression (port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  /* Fall through.  */
 									default:
 									  {
 									    if (((chr >= '0') && (chr <= '9'))
 										|| (strchr ("+-.", chr)))
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									      return (scm_read_number (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									    else
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+									      return (scm_read_mixed_case_symbol (chr, port, opts));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  }
 									}
 								    }
 								}
 								#undef FUNC_NAME
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								static SCM
 								scm_read_expression (SCM port, scm_t_read_opts *opts)
 								#define FUNC_NAME "scm_read_expression"
 								{
 								  if (!opts->neoteric_p)
 								    return read_inner_expression (port, opts);
 								  else
 								    {
 								      long line = 0;
 								      int column = 0;
 								      SCM expr;
 								      if (opts->record_positions_p)
 								        {
 								          /* We need to get the position of the first non-whitespace
 								             character in order to correctly annotate neoteric
 								             expressions.  For example, for the expression 'f(x)', the
 								             first call to 'read_inner_expression' reads the 'f' (which
 								             cannot be annotated), and then we later read the '(x)' and
 								             use it to construct the new list (f x). */
 								          int c = flush_ws (port, opts, (char *) NULL);
 								          if (c == EOF)
 								            return SCM_EOF_VAL;
 								          scm_ungetc (c, port);
 								          line = SCM_LINUM (port);
 								          column = SCM_COL (port);
 								        }
 								      expr = read_inner_expression (port, opts);
 								      /* 'expr' is the first component of the neoteric expression.  Now
 								         we loop, and as long as the next character is '(', '[', or '{',
 								         (without any intervening whitespace), we use it to construct a
 								         new expression.  For example, f{n - 1}(x) => ((f (- n 1)) x). */
 								      for (;;)
 								        {
 								          int chr = scm_getc (port);
 								          if (chr == '(')
 								            /* e(...) => (e ...) */
 								            expr = scm_cons (expr, scm_read_sexp (chr, port, opts));
 								          else if (chr == '[')
 								            /* e[...] => ($bracket-apply$ e ...) */
 								            expr = scm_cons (sym_bracket_apply,
 								                             scm_cons (expr,
 								                                       scm_read_sexp (chr, port, opts)));
 								          else if (chr == '{')
 								            {
 								              SCM arg = scm_read_sexp (chr, port, opts);
 								              if (scm_is_null (arg))
 								                expr = scm_list_1 (expr);       /* e{} => (e) */
 								              else
 								                expr = scm_list_2 (expr, arg);  /* e{...} => (e {...}) */
 								            }
 								          else
 								            {
 								              if (chr != EOF)
 								                scm_ungetc (chr, port);
 								              break;
 								            }
 								          maybe_annotate_source (expr, port, opts, line, column);
 								        }
 								      return expr;
 								    }
 								}
 								#undef FUNC_NAME
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Actual reader.  */
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
+								static void init_read_options (SCM port, scm_t_read_opts *opts);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								SCM_DEFINE (scm_read, "read", 0, 1, 0,
 								            (SCM port),
 									    "Read an s-expression from the input port @var{port}, or from\n"
 									    "the current input port if @var{port} is not specified.\n"
 									    "Any whitespace before the next token is discarded.")
 								#define FUNC_NAME s_scm_read
 								{
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  scm_t_read_opts opts;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  int c;
 								  if (SCM_UNBNDP (port))
 								    port = scm_current_input_port ();
 								  SCM_VALIDATE_OPINPORT (1, port);
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
+								  init_read_options (port, &opts);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
 								  c = flush_ws (port, &opts, (char *) NULL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  if (EOF == c)
 								    return SCM_EOF_VAL;
 								  scm_ungetc (c, port);
-												Change reader to pass read options to helpers via explicit parameter.

* libguile/read.c (enum t_keyword_style, struct t_read_opts,
  scm_t_read_opts): New types.
  (init_read_options): New function.

  (CHAR_IS_DELIMITER): Look up square-brackets option via local 'opts'.

  (scm_read): Call 'init_read_options', and pass 'opts' to helpers.

  (flush_ws, maybe_annotate_source, read_complete_token, read_token,
  scm_read_bytevector, scm_read_character,
  scm_read_commented_expression, scm_read_expression,
  scm_read_guile_bit_vector, scm_read_keyword,
  scm_read_mixed_case_symbol, scm_read_nil, scm_read_number,
  scm_read_number_and_radix, scm_read_quote, scm_read_sexp,
  scm_read_sharp, scm_read_sharp_extension, scm_read_shebang,
  scm_read_srfi4_vector, scm_read_string, scm_read_syntax,
  scm_read_vector, scm_read_array): Add 'opts' as an additional
  parameter, and use it to look up read options.  Previously the global
  read options were consulted directly.

											
										
										
											2012-10-23 17:11:41 -04:00
+								  return (scm_read_expression (port, &opts));
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								}
-												* Eliminate some calls to scm_wta.

											
										
										
											2001-03-04 17:09:34 +00:00
+								#undef FUNC_NAME
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								/* Manipulate the read-hash-procedures alist.  This could be written in
 								   Scheme, but maybe it will also be used by C code during initialisation.  */
-												*.[ch]: Replace GUILE_PROC w/ SCM_DEFINE.

											
										
										
											2000-01-05 19:05:23 +00:00
+								SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								            (SCM chr, SCM proc),
-												(scm_read_options, scm_read, scm_read_hash_extend): Added docstrings.

											
										
										
											2001-02-16 15:17:20 +00:00
+									    "Install the procedure @var{proc} for reading expressions\n"
 									    "starting with the character sequence @code{#} and @var{chr}.\n"
 									    "@var{proc} will be called with two arguments:  the character\n"
 									    "@var{chr} and the port to read further data from. The object\n"
-												(s_scm_read_hash_extend): document #f argument to
read-hash-extend.

											
										
										
											2007-01-06 18:20:35 +00:00
+									    "returned will be the return value of @code{read}. \n"
 									    "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
 									    )
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_read_hash_extend
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								{
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+								  SCM this;
 								  SCM prev;
-												* Deprecated scm_makfromstr and added scm_mem2string as a replacement.
* Eliminated some potential gc problems.
* Eliminated some signedness problems.
* Minor changes.

											
										
										
											2001-06-26 10:59:34 +00:00
+								  SCM_VALIDATE_CHAR (1, chr);
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+								  SCM_ASSERT (scm_is_false (proc)
-												* tags.h, deprecated.h (SCM_EQ_P): Deprecated by moving it into
deprecated.h.  Replaced all uses with scm_is_eq.

											
										
										
											2004-07-27 15:41:49 +00:00
+									      || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
-												* Deprecated scm_makfromstr and added scm_mem2string as a replacement.
* Eliminated some potential gc problems.
* Eliminated some signedness problems.
* Minor changes.

											
										
										
											2001-06-26 10:59:34 +00:00
+									      proc, SCM_ARG2, FUNC_NAME);
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								  /* Check if chr is already in the alist.  */
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								  this = scm_i_read_hash_procedures_ref ();
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								  prev = SCM_BOOL_F;
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+								  while (1)
 								    {
-												*** empty log message ***

											
										
										
											2004-09-22 17:41:37 +00:00
+								      if (scm_is_null (this))
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									{
 									  /* not found, so add it to the beginning.  */
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+									  if (scm_is_true (proc))
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									    {
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								              SCM new = scm_cons (scm_cons (chr, proc),
 								                                  scm_i_read_hash_procedures_ref ());
 									      scm_i_read_hash_procedures_set_x (new);
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									    }
 									  break;
 									}
-												* tags.h, deprecated.h (SCM_EQ_P): Deprecated by moving it into
deprecated.h.  Replaced all uses with scm_is_eq.

											
										
										
											2004-07-27 15:41:49 +00:00
+								      if (scm_is_eq (chr, SCM_CAAR (this)))
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									{
 									  /* already in the alist.  */
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+									  if (scm_is_false (proc))
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+									    {
 									      /* remove it.  */
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+									      if (scm_is_false (prev))
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+										{
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								                  SCM rest = SCM_CDR (scm_i_read_hash_procedures_ref ());
 										  scm_i_read_hash_procedures_set_x (rest);
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+										}
 									      else
 										scm_set_cdr_x (prev, SCM_CDR (this));
 									    }
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									  else
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+									    {
 									      /* replace it.  */
 									      scm_set_cdr_x (SCM_CAR (this), proc);
 									    }
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									  break;
 									}
 								      prev = this;
 								      this = SCM_CDR (this);
 								    }
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
 								  return SCM_UNSPECIFIED;
 								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								/* Recover the read-hash procedure corresponding to char c.  */
 								static SCM
-												* *.c: Finish replacing K&R style prototypes with ANSI C
prototypes.

* eval.c: Make scm_m_mody's 3rd argument be a const char *, not a
char *.  ANSI prototypes caught this.

* strorder.c: Use GUILE_PROC1 for the couple SCM_PROC1 expansions
that I missed.

* scm_validate.h: Use SCM_BOOLP for validating bools.  Do not
expand macros if SCM_DOCSTRING_SNARF.

											
										
										
											1999-12-12 20:35:02 +00:00
+								scm_get_hash_procedure (int c)
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								{
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								  SCM rest = scm_i_read_hash_procedures_ref ();
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								  while (1)
 								    {
-												*** empty log message ***

											
										
										
											2004-09-22 17:41:37 +00:00
+								      if (scm_is_null (rest))
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+									return SCM_BOOL_F;
-												* list.c: Moved append docs to append! Thanks Dirk Hermann.  Also,
added append docs from R4RS.

* strings.c: Docstring typo fix, + eliminate unneeded IMP tests.
Thanks Dirk Hermann!

* chars.h: Provide SCM_CHARP, SCM_CHAR, SCM_MAKE_CHAR and
deprecate SCM_ICHRP, SCM_ICHR, SCM_MAKICHR.  Thanks Dirk Hermann!

* *.h, *.c: Use SCM_CHARP, SCM_CHAR, SCM_MAKE_CHAR throughout.
Drop use of SCM_P for function prototypes... assume an ANSI C
compiler.  Thanks Dirk Hermann!

											
										
										
											2000-03-02 20:54:43 +00:00
+								      if (SCM_CHAR (SCM_CAAR (rest)) == c)
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+									return SCM_CDAR (rest);
 								      rest = SCM_CDR (rest);
 								    }
 								}
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								#define SCM_ENCODING_SEARCH_SIZE (500)
-												Have `scm_scan_for_encoding ()' use GC-managed memory.

* libguile/read.c (scm_scan_for_encoding): Rename to ...
  (scm_i_scan_for_encoding): ... this; update callers.  Use
  `scm_gc_strndup ()' instead of `scm_malloc ()'.

* libguile/read.h: Update accordingly.

* libguile/load.c (scm_primitive_load): Don't call free(3) on the value
  returned by `scm_i_scan_for_encoding ()'.

											
										
										
											2009-11-14 16:27:28 +01:00
+								/* Search the first few hundred characters of a file for an Emacs-like coding
 								   declaration.  Returns either NULL or a string whose storage has been
 								   allocated with `scm_gc_malloc ()'.  */
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								char *
-												Have `scm_scan_for_encoding ()' use GC-managed memory.

* libguile/read.c (scm_scan_for_encoding): Rename to ...
  (scm_i_scan_for_encoding): ... this; update callers.  Use
  `scm_gc_strndup ()' instead of `scm_malloc ()'.

* libguile/read.h: Update accordingly.

* libguile/load.c (scm_primitive_load): Don't call free(3) on the value
  returned by `scm_i_scan_for_encoding ()'.

											
										
										
											2009-11-14 16:27:28 +01:00
+								scm_i_scan_for_encoding (SCM port)
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								{
-												fix encoding scanning for non-seekable ports

* libguile/read.c (scm_i_scan_for_encoding): If possible, just use the
  read buffer for the encoding scan, and avoid seeking.  Fixes
  `(open-input-file "/dev/urandom")', because /dev/urandom can't be
  seeked backwards.

											
										
										
											2011-03-03 12:46:49 +01:00
+								  scm_t_port *pt;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  char header[SCM_ENCODING_SEARCH_SIZE+1];
-												More explicit variable names in scm_i_scan_for_encoding

Note especially that the variable 'i' has two different uses in this
function, and they get confused.

* libguile/read.c (scm_i_scan_for_encoding): cleanup

											
										
										
											2010-07-16 05:39:52 -07:00
+								  size_t bytes_read, encoding_length, i;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  char *encoding = NULL;
-												Revert "detect and consume byte-order marks for textual ports"

This reverts commit b2cb557d75e4daf8c7c8cd43313f4cc51d9a3f1b, which was
pushed accidentally.

											
										
										
											2013-01-30 15:30:31 +01:00
+								  int utf8_bom = 0;
-												More explicit variable names in scm_i_scan_for_encoding

Note especially that the variable 'i' has two different uses in this
function, and they get confused.

* libguile/read.c (scm_i_scan_for_encoding): cleanup

											
										
										
											2010-07-16 05:39:52 -07:00
+								  char *pos, *encoding_start;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  int in_comment;
-												fix encoding scanning for non-seekable ports

* libguile/read.c (scm_i_scan_for_encoding): If possible, just use the
  read buffer for the encoding scan, and avoid seeking.  Fixes
  `(open-input-file "/dev/urandom")', because /dev/urandom can't be
  seeked backwards.

											
										
										
											2011-03-03 12:46:49 +01:00
+								  pt = SCM_PTAB_ENTRY (port);
-												Disable encoding scanning on non-seekable file ports.

* libguile/read.c (scm_i_scan_for_encoding): Don't attempt to scan
  non-seekable file ports.

											
										
										
											2009-11-27 17:00:51 +01:00
-												fix encoding scanning for non-seekable ports

* libguile/read.c (scm_i_scan_for_encoding): If possible, just use the
  read buffer for the encoding scan, and avoid seeking.  Fixes
  `(open-input-file "/dev/urandom")', because /dev/urandom can't be
  seeked backwards.

											
										
										
											2011-03-03 12:46:49 +01:00
+								  if (pt->rw_active == SCM_PORT_WRITE)
 								    scm_flush (port);
-												Disable encoding scanning on non-seekable file ports.

* libguile/read.c (scm_i_scan_for_encoding): Don't attempt to scan
  non-seekable file ports.

											
										
										
											2009-11-27 17:00:51 +01:00
-												fix encoding scanning for non-seekable ports

* libguile/read.c (scm_i_scan_for_encoding): If possible, just use the
  read buffer for the encoding scan, and avoid seeking.  Fixes
  `(open-input-file "/dev/urandom")', because /dev/urandom can't be
  seeked backwards.

											
										
										
											2011-03-03 12:46:49 +01:00
+								  if (pt->rw_random)
 								    pt->rw_active = SCM_PORT_READ;
 								  if (pt->read_pos == pt->read_end)
 								    {
 								      /* We can use the read buffer, and thus avoid a seek. */
 								      if (scm_fill_input (port) == EOF)
 								        return NULL;
 								      bytes_read = pt->read_end - pt->read_pos;
 								      if (bytes_read > SCM_ENCODING_SEARCH_SIZE)
 								        bytes_read = SCM_ENCODING_SEARCH_SIZE;
 								      if (bytes_read <= 1)
 								        /* An unbuffered port -- don't scan.  */
 								        return NULL;
 								      memcpy (header, pt->read_pos, bytes_read);
 								      header[bytes_read] = '\0';
 								    }
 								  else
 								    {
 								      /* Try to read some bytes and then seek back.  Not all ports
 								         support seeking back; and indeed some file ports (like
 								         /dev/urandom) will succeed on an lseek (fd, 0, SEEK_CUR)---the
 								         check performed by SCM_FPORT_FDES---but fail to seek
 								         backwards.  Hence this block comes second.  We prefer to use
 								         the read buffer in-place.  */
 								      if (SCM_FPORTP (port) && !SCM_FDES_RANDOM_P (SCM_FPORT_FDES (port)))
 								        return NULL;
 								      bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
 								      header[bytes_read] = '\0';
-												Revert "detect and consume byte-order marks for textual ports"

This reverts commit b2cb557d75e4daf8c7c8cd43313f4cc51d9a3f1b, which was
pushed accidentally.

											
										
										
											2013-01-30 15:30:31 +01:00
+								      scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
-												fix encoding scanning for non-seekable ports

* libguile/read.c (scm_i_scan_for_encoding): If possible, just use the
  read buffer for the encoding scan, and avoid seeking.  Fixes
  `(open-input-file "/dev/urandom")', because /dev/urandom can't be
  seeked backwards.

											
										
										
											2011-03-03 12:46:49 +01:00
+								    }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
-												Revert "detect and consume byte-order marks for textual ports"

This reverts commit b2cb557d75e4daf8c7c8cd43313f4cc51d9a3f1b, which was
pushed accidentally.

											
										
										
											2013-01-30 15:30:31 +01:00
+								  if (bytes_read > 3
 								      && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
 								    utf8_bom = 1;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  /* search past "coding[:=]" */
 								  pos = header;
 								  while (1)
 								    {
 								      if ((pos = strstr(pos, "coding")) == NULL)
 								        return NULL;
 								      pos += strlen("coding");
 								      if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
 								          (*pos == ':' || *pos == '='))
 								        {
 								          pos ++;
 								          break;
 								        }
 								    }
 								  /* skip spaces */
 								  while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
 									 (*pos == ' ' || *pos == '\t'))
 								    pos ++;
 								  /* grab the next token */
-												More explicit variable names in scm_i_scan_for_encoding

Note especially that the variable 'i' has two different uses in this
function, and they get confused.

* libguile/read.c (scm_i_scan_for_encoding): cleanup

											
										
										
											2010-07-16 05:39:52 -07:00
+								  encoding_start = pos;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  i = 0;
-												More explicit variable names in scm_i_scan_for_encoding

Note especially that the variable 'i' has two different uses in this
function, and they get confused.

* libguile/read.c (scm_i_scan_for_encoding): cleanup

											
										
										
											2010-07-16 05:39:52 -07:00
+								  while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
 								         && encoding_start + i - header < bytes_read
 									 && (isalnum ((int) encoding_start[i])
 									     || strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    i++;
-												More explicit variable names in scm_i_scan_for_encoding

Note especially that the variable 'i' has two different uses in this
function, and they get confused.

* libguile/read.c (scm_i_scan_for_encoding): cleanup

											
										
										
											2010-07-16 05:39:52 -07:00
+								  encoding_length = i;
 								  if (encoding_length == 0)
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    return NULL;
-												More explicit variable names in scm_i_scan_for_encoding

Note especially that the variable 'i' has two different uses in this
function, and they get confused.

* libguile/read.c (scm_i_scan_for_encoding): cleanup

											
										
										
											2010-07-16 05:39:52 -07:00
+								  encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
 								  for (i = 0; i < encoding_length; i++)
-												Always cast input to toupper as int

* libguile/read.c (scm_scan_for_encoding): add cast to int

											
										
										
											2009-08-27 07:35:39 -07:00
+								    encoding[i] = toupper ((int) encoding[i]);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
 								  /* push backwards to make sure we were in a comment */
 								  in_comment = 0;
-												More explicit variable names in scm_i_scan_for_encoding

Note especially that the variable 'i' has two different uses in this
function, and they get confused.

* libguile/read.c (scm_i_scan_for_encoding): cleanup

											
										
										
											2010-07-16 05:39:52 -07:00
+								  pos = encoding_start;
 								  while (pos >= header)
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    {
-												fix problems detecting coding: in block comments

* libguile/read.c (scm_i_scan_for_encoding): Fix for coding on first
  line #! and for !# immediately following the coding.

* test-suite/Makefile.am:
* test-suite/tests/coding.test: Add tests.

											
										
										
											2011-03-31 14:46:21 +02:00
+								      if (*pos == ';')
 									{
 									  in_comment = 1;
 									  break;
 									}
 								      else if (*pos == '\n' || pos == header)
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									{
 									  /* This wasn't in a semicolon comment. Check for a
 									   hash-bang comment. */
 									  char *beg = strstr (header, "#!");
 									  char *end = strstr (header, "!#");
-												fix problems detecting coding: in block comments

* libguile/read.c (scm_i_scan_for_encoding): Fix for coding on first
  line #! and for !# immediately following the coding.

* test-suite/Makefile.am:
* test-suite/tests/coding.test: Add tests.

											
										
										
											2011-03-31 14:46:21 +02:00
+									  if (beg < encoding_start && encoding_start + encoding_length <= end)
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									    in_comment = 1;
 									  break;
 									}
-												fix problems detecting coding: in block comments

* libguile/read.c (scm_i_scan_for_encoding): Fix for coding on first
  line #! and for !# immediately following the coding.

* test-suite/Makefile.am:
* test-suite/tests/coding.test: Add tests.

											
										
										
											2011-03-31 14:46:21 +02:00
+								      else
 								        {
 								          pos --;
 								          continue;
 								        }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    }
 								  if (!in_comment)
-												Have `scm_scan_for_encoding ()' use GC-managed memory.

* libguile/read.c (scm_scan_for_encoding): Rename to ...
  (scm_i_scan_for_encoding): ... this; update callers.  Use
  `scm_gc_strndup ()' instead of `scm_malloc ()'.

* libguile/read.h: Update accordingly.

* libguile/load.c (scm_primitive_load): Don't call free(3) on the value
  returned by `scm_i_scan_for_encoding ()'.

											
										
										
											2009-11-14 16:27:28 +01:00
+								    /* This wasn't in a comment */
 								    return NULL;
-												Revert "detect and consume byte-order marks for textual ports"

This reverts commit b2cb557d75e4daf8c7c8cd43313f4cc51d9a3f1b, which was
pushed accidentally.

											
										
										
											2013-01-30 15:30:31 +01:00
+								  if (utf8_bom && strcmp(encoding, "UTF-8"))
 								    scm_misc_error (NULL,
 										    "the port input declares the encoding ~s but is encoded as UTF-8",
 										    scm_list_1 (scm_from_locale_string (encoding)));
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  return encoding;
 								}
 								SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
 								            (SCM port),
-												Correct manual wrt. encoding names.

* doc/ref/api-evaluation.texi (Character Encoding of Source Files):
  Don't suggest `latin1' as a good encoding name since Emacs cannot deal
  with it.

* libguile/read.c (scm_file_encoding): Fix "Emacs" spelling.

											
										
										
											2009-11-23 18:51:25 +01:00
+								            "Scans the port for an Emacs-like character coding declaration\n"
-												fix typos in the manual bits generated from source comments.

* libguile/bitvectors.c, libguile/chars.c,
libguile/deprecated.c, libguile/numbers.c, libguile/random.c,
libguile/read.c, libguile/root.c, libguile/srfi-1.c,
libguile/srfi-13.c, libguile/srfi-14.c, libguile/uniform.c:
Fix typos, add missing newlines.

											
										
										
											2011-02-07 00:29:51 +01:00
+								            "near the top of the contents of a port with random-accessible contents.\n"
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								            "The coding declaration is of the form\n"
 								            "@code{coding: XXXXX} and must appear in a scheme comment.\n"
 								            "\n"
 								            "Returns a string containing the character encoding of the file\n"
 								            "if a declaration was found, or @code{#f} otherwise.\n")
 								#define FUNC_NAME s_scm_file_encoding
 								{
 								  char *enc;
 								  SCM s_enc;
-												Have `scm_scan_for_encoding ()' use GC-managed memory.

* libguile/read.c (scm_scan_for_encoding): Rename to ...
  (scm_i_scan_for_encoding): ... this; update callers.  Use
  `scm_gc_strndup ()' instead of `scm_malloc ()'.

* libguile/read.h: Update accordingly.

* libguile/load.c (scm_primitive_load): Don't call free(3) on the value
  returned by `scm_i_scan_for_encoding ()'.

											
										
										
											2009-11-14 16:27:28 +01:00
-												fix encoding scanning for non-seekable ports

* libguile/read.c (scm_i_scan_for_encoding): If possible, just use the
  read buffer for the encoding scan, and avoid seeking.  Fixes
  `(open-input-file "/dev/urandom")', because /dev/urandom can't be
  seeked backwards.

											
										
										
											2011-03-03 12:46:49 +01:00
+								  SCM_VALIDATE_OPINPORT (SCM_ARG1, port);
-												Have `scm_scan_for_encoding ()' use GC-managed memory.

* libguile/read.c (scm_scan_for_encoding): Rename to ...
  (scm_i_scan_for_encoding): ... this; update callers.  Use
  `scm_gc_strndup ()' instead of `scm_malloc ()'.

* libguile/read.h: Update accordingly.

* libguile/load.c (scm_primitive_load): Don't call free(3) on the value
  returned by `scm_i_scan_for_encoding ()'.

											
										
										
											2009-11-14 16:27:28 +01:00
+								  enc = scm_i_scan_for_encoding (port);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  if (enc == NULL)
 								    return SCM_BOOL_F;
 								  else
 								    {
 								      s_enc = scm_from_locale_string (enc);
 								      return s_enc;
 								    }
-												Have `scm_scan_for_encoding ()' use GC-managed memory.

* libguile/read.c (scm_scan_for_encoding): Rename to ...
  (scm_i_scan_for_encoding): ... this; update callers.  Use
  `scm_gc_strndup ()' instead of `scm_malloc ()'.

* libguile/read.h: Update accordingly.

* libguile/load.c (scm_primitive_load): Don't call free(3) on the value
  returned by `scm_i_scan_for_encoding ()'.

											
										
										
											2009-11-14 16:27:28 +01:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  return SCM_BOOL_F;
 								}
 								#undef FUNC_NAME
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
 								/* Per-port read options.
 								   We store per-port read options in the 'port-read-options' key of the
 								   port's alist, which is stored in 'scm_i_port_weak_hash'.  The value
 								   stored in the alist is a single integer that contains a two-bit field
 								   for each read option.
 								   If a bit field contains READ_OPTION_INHERIT (3), that indicates that
 								   the applicable value should be inherited from the corresponding
 								   global read option.  Otherwise, the bit field contains the value of
 								   the read option.  For boolean read options that have been set
 								   per-port, the possible values are 0 or 1.  If the 'keyword_style'
 								   read option has been set per-port, its possible values are those in
 								   'enum t_keyword_style'. */
 								/* Key to read options in per-port alists. */
 								SCM_SYMBOL (sym_port_read_options, "port-read-options");
 								/* Offsets of bit fields for each per-port override */
 								#define READ_OPTION_COPY_SOURCE_P          0
 								#define READ_OPTION_RECORD_POSITIONS_P     2
 								#define READ_OPTION_CASE_INSENSITIVE_P     4
 								#define READ_OPTION_KEYWORD_STYLE          6
 								#define READ_OPTION_R6RS_ESCAPES_P         8
 								#define READ_OPTION_SQUARE_BRACKETS_P     10
 								#define READ_OPTION_HUNGRY_EOL_ESCAPES_P  12
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								#define READ_OPTION_CURLY_INFIX_P         14
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								/* The total width in bits of the per-port overrides */
 								#define READ_OPTIONS_NUM_BITS             16
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
 								#define READ_OPTIONS_INHERIT_ALL  ((1UL << READ_OPTIONS_NUM_BITS) - 1)
 								#define READ_OPTIONS_MAX_VALUE    READ_OPTIONS_INHERIT_ALL
 								#define READ_OPTION_MASK     3
 								#define READ_OPTION_INHERIT  3
 								static void
 								set_port_read_option (SCM port, int option, int new_value)
 								{
 								  SCM alist, scm_read_options;
 								  unsigned int read_options;
 								  new_value &= READ_OPTION_MASK;
 								  scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
 								  alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
 								  scm_read_options = scm_assq_ref (alist, sym_port_read_options);
 								  if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
 								    read_options = scm_to_uint (scm_read_options);
 								  else
 								    read_options = READ_OPTIONS_INHERIT_ALL;
 								  read_options &= ~(READ_OPTION_MASK << option);
 								  read_options |= new_value << option;
 								  scm_read_options = scm_from_uint (read_options);
 								  alist = scm_assq_set_x (alist, sym_port_read_options, scm_read_options);
 								  scm_hashq_set_x (scm_i_port_weak_hash, port, alist);
 								  scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
 								}
-												Implement #!fold-case and #!no-fold-case reader directives.

* libguile/read.c (set_port_case_insensitive_p): New function.

  (scm_read_shebang): Handle #!fold-case and #!no-fold-case.

* doc/ref/api-evaluation.texi (Case Sensitivity, Scheme Read): Document
  the #!fold-case and #!no-fold-case reader directives.

* test-suite/tests/reader.test ("per-port-read-options"): Add tests.

											
										
										
											2012-10-24 14:37:36 -04:00
+								/* Set OPTS and PORT's case-insensitivity according to VALUE. */
 								static void
 								set_port_case_insensitive_p (SCM port, scm_t_read_opts *opts, int value)
 								{
 								  value = !!value;
 								  opts->case_insensitive_p = value;
 								  set_port_read_option (port, READ_OPTION_CASE_INSENSITIVE_P, value);
 								}
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								/* Set OPTS and PORT's square_brackets_p option according to VALUE. */
 								static void
 								set_port_square_brackets_p (SCM port, scm_t_read_opts *opts, int value)
 								{
 								  value = !!value;
 								  opts->square_brackets_p = value;
 								  set_port_read_option (port, READ_OPTION_SQUARE_BRACKETS_P, value);
 								}
 								/* Set OPTS and PORT's curly_infix_p option according to VALUE. */
 								static void
 								set_port_curly_infix_p (SCM port, scm_t_read_opts *opts, int value)
 								{
 								  value = !!value;
 								  opts->curly_infix_p = value;
 								  set_port_read_option (port, READ_OPTION_CURLY_INFIX_P, value);
 								}
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
+								/* Initialize OPTS based on PORT's read options and the global read
 								   options. */
 								static void
 								init_read_options (SCM port, scm_t_read_opts *opts)
 								{
 								  SCM alist, val, scm_read_options;
 								  unsigned int read_options, x;
 								  scm_i_scm_pthread_mutex_lock (&scm_i_port_table_mutex);
 								  alist = scm_hashq_ref (scm_i_port_weak_hash, port, SCM_BOOL_F);
 								  scm_read_options = scm_assq_ref (alist, sym_port_read_options);
 								  scm_i_pthread_mutex_unlock (&scm_i_port_table_mutex);
 								  if (scm_is_unsigned_integer (scm_read_options, 0, READ_OPTIONS_MAX_VALUE))
 								    read_options = scm_to_uint (scm_read_options);
 								  else
 								    read_options = READ_OPTIONS_INHERIT_ALL;
 								  x = READ_OPTION_MASK & (read_options >> READ_OPTION_KEYWORD_STYLE);
 								  if (x == READ_OPTION_INHERIT)
 								    {
 								      val = SCM_PACK (SCM_KEYWORD_STYLE);
 								      if (scm_is_eq (val, scm_keyword_prefix))
 								        x = KEYWORD_STYLE_PREFIX;
 								      else if (scm_is_eq (val, scm_keyword_postfix))
 								        x = KEYWORD_STYLE_POSTFIX;
 								      else
 								        x = KEYWORD_STYLE_HASH_PREFIX;
 								    }
 								  opts->keyword_style = x;
 								#define RESOLVE_BOOLEAN_OPTION(NAME, name)                              \
 								  do                                                                    \
 								    {                                                                   \
 								      x = READ_OPTION_MASK & (read_options >> READ_OPTION_ ## NAME);    \
 								      if (x == READ_OPTION_INHERIT)                                     \
 								        x = !!SCM_ ## NAME;                                             \
 								          opts->name = x;                                               \
 								    }                                                                   \
 								  while (0)
 								  RESOLVE_BOOLEAN_OPTION (COPY_SOURCE_P,        copy_source_p);
 								  RESOLVE_BOOLEAN_OPTION (RECORD_POSITIONS_P,   record_positions_p);
 								  RESOLVE_BOOLEAN_OPTION (CASE_INSENSITIVE_P,   case_insensitive_p);
 								  RESOLVE_BOOLEAN_OPTION (R6RS_ESCAPES_P,       r6rs_escapes_p);
 								  RESOLVE_BOOLEAN_OPTION (SQUARE_BRACKETS_P,    square_brackets_p);
 								  RESOLVE_BOOLEAN_OPTION (HUNGRY_EOL_ESCAPES_P, hungry_eol_escapes_p);
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
+								  RESOLVE_BOOLEAN_OPTION (CURLY_INFIX_P,        curly_infix_p);
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
 								#undef RESOLVE_BOOLEAN_OPTION
-												Implement SRFI-105 curly infix expressions.

* libguile/private-options.h: Add SCM_CURLY_INFIX_P macro, and increment
  SCM_N_READ_OPTIONS.

* libguile/read.c (sym_nfx, sym_bracket_list, sym_bracket_apply): New
  variables.
  (scm_read_opts): Add curly-infix reader option.  Reformat to comply
  with GNU coding standards.
  (scm_t_read_opts): Add curly_infix_p and neoteric_p fields.
  (init_read_options): Initialize new fields.
  (CHAR_IS_DELIMITER): Add '{', '}', '[', and ']' as delimiters if
  curly_infix_p is set.

  (set_port_square_brackets_p, set_port_curly_infix_p): New functions.

  (read_inner_expression): New function which contains the code that was
  previously in 'scm_read_expression'.  Handle curly braces when
  curly_infix_p is set.  If curly_infix_p is set and square_brackets_p
  is unset, follow the Kawa convention: [...] => ($bracket-list$ ...)

  (scm_read_expression): New function body to handle neoteric
  expressions where appropriate.

  (scm_read_shebang): Handle the new reader directives: '#!curly-infix'
  and the non-standard '#!curly-infix-and-bracket-lists'.

  (scm_read_sexp): Handle curly infix lists.

* module/ice-9/boot-9.scm (%cond-expand-features): Add srfi-105 feature
  identifier.

* doc/ref/srfi-modules.texi (SRFI-105): Add stub doc for SRFI-105.

* doc/ref/api-evaluation.texi (Scheme Read): Add documentation for the
  'curly-infix' read option, and the '#!curly-infix' and
  '#!curly-infix-and-bracket-lists' reader directives.

* doc/ref/api-options.texi (Runtime Options): Add 'curly-infix' to the
  list of read options.

* test-suite/Makefile.am: Add tests/srfi-105.test.

* test-suite/tests/srfi-105.test: New file.

											
										
										
											2012-10-26 17:20:16 -04:00
 								  opts->neoteric_p = 0;
-												Implement per-port read options.

* libguile/read.c (scm_t_read_opts): Update comment to mention the
  per-port read options.

  (sym_port_read_options): New variable.

  (set_port_read_option): New function.

  (init_read_options): Add new 'port' parameter, and consult the
  per-port read option overrides when initializing the 'scm_t_read_opts'
  struct.  Move to bottom of file.

  (scm_read): Pass 'port' parameter to init_read_options.

											
										
										
											2012-10-23 17:28:43 -04:00
+								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								void
 								scm_init_read ()
 								{
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
+								  SCM read_hash_procs;
-												use new scm_make_fluid_with_default

* libguile/load.c (scm_init_load):
* libguile/ports.c (scm_init_ports):
* libguile/read.c (scm_init_read): Use scm_make_fluid_with_default.

											
										
										
											2011-11-23 12:21:22 +01:00
+								  read_hash_procs = scm_make_fluid_with_default (SCM_EOL);
-												Use a fluid for the list of the reader's "hash procedures"

This allows customizing the reader behavior for a dynamic extent more easily.

* libguile/read.c (scm_read_hash_procedures): Renamed to
  `scm_i_read_hash_procedures'.
  (scm_i_read_hash_procedures_ref, scm_i_read_hash_procedures_set_x):
  New (internal) accessor functions for the fluid.
  (scm_read_hash_extend, scm_get_hash_procedure): Use these accessor
  functions.
  (scm_init_read): Create the fluid, named `%read-hash-procedures' instead of
  the previous plain list `read-hash-procedures'.

* test-suite/tests/reader.test: Adapt the "R6RS/SRFI-30 block comment
  syntax overridden" test to make use of the fluid.

* module/ice-9/deprecated.scm (read-hash-procedures):
  New identifier macro -- backward-compatibility shim.

Signed-off-by: Ludovic Courtès <ludo@gnu.org>

											
										
										
											2010-11-03 00:09:57 +01:00
 								  scm_i_read_hash_procedures =
 								    SCM_VARIABLE_LOC (scm_c_define ("%read-hash-procedures", read_hash_procs));
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
-												* readline.c: terminate option list with NULL.

* read.c: idem.

* print.c: idem.

* eval.c: terminate option lists with 0.

* options.c: remove n (for length) from scm_option_X
functions. Detect option list length by looking for NULL name.

											
										
										
											2007-01-19 19:26:36 +00:00
+								  scm_init_opts (scm_read_options, scm_read_opts);
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/read.x"
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-													* *.[hc]: add Emacs magic at the end of file, to ensure GNU
 	indentation style.

											
										
										
											2000-03-19 19:01:16 +00:00
 								/*
 								  Local Variables:
 								  c-file-style: "gnu"
 								  End:
 								*/