guile/libguile/read.c

/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
 * Foundation, Inc.
 * 
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 3 of
 * the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 * 02110-1301 USA
 */


#ifdef HAVE_CONFIG_H
# include <config.h>
#endif

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include <unicase.h>

#include "libguile/_scm.h"
#include "libguile/bytevectors.h"
#include "libguile/chars.h"
#include "libguile/eval.h"
#include "libguile/arrays.h"
#include "libguile/bitvectors.h"
#include "libguile/keywords.h"
#include "libguile/alist.h"
#include "libguile/srcprop.h"
#include "libguile/hashtab.h"
#include "libguile/hash.h"
#include "libguile/ports.h"
#include "libguile/root.h"
#include "libguile/strings.h"
#include "libguile/strports.h"
#include "libguile/vectors.h"
#include "libguile/validate.h"
#include "libguile/srfi-4.h"
#include "libguile/srfi-13.h"

#include "libguile/read.h"
#include "libguile/private-options.h"


SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
SCM_SYMBOL (scm_keyword_prefix, "prefix");
SCM_SYMBOL (scm_keyword_postfix, "postfix");

scm_t_option scm_read_opts[] = {
  { SCM_OPTION_BOOLEAN, "copy", 0,
    "Copy source code expressions." },
  { SCM_OPTION_BOOLEAN, "positions", 0,
    "Record positions of source code expressions." },
  { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
    "Convert symbols to lower case."},
  { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
    "Style of keyword recognition: #f, 'prefix or 'postfix."},
#if SCM_ENABLE_ELISP
  { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
    "Support Elisp vector syntax, namely `[...]'."},
  { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
    "Support `\\(' and `\\)' in strings."},
#endif
  { 0, },
};

/*
  Give meaningful error messages for errors

  We use the format

  FILE:LINE:COL: MESSAGE
  This happened in ....

  This is not standard GNU format, but the test-suite likes the real
  message to be in front.

 */


void
scm_i_input_error (char const *function,
		   SCM port, const char *message, SCM arg)
{
  SCM fn = (scm_is_string (SCM_FILENAME(port))
	    ? SCM_FILENAME(port)
	    : scm_from_locale_string ("#<unknown port>"));

  SCM string_port = scm_open_output_string ();
  SCM string = SCM_EOL;
  scm_simple_format (string_port,
		     scm_from_locale_string ("~A:~S:~S: ~A"),
		     scm_list_4 (fn,
				 scm_from_long (SCM_LINUM (port) + 1),
				 scm_from_int (SCM_COL (port) + 1),
				 scm_from_locale_string (message)));
    
  string = scm_get_output_string (string_port);
  scm_close_output_port (string_port);
  scm_error_scm (scm_from_locale_symbol ("read-error"),
		 function? scm_from_locale_string (function) : SCM_BOOL_F,
		 string,
		 arg,
		 SCM_BOOL_F);
}


SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0, 
            (SCM setting),
	    "Option interface for the read options. Instead of using\n"
	    "this procedure directly, use the procedures @code{read-enable},\n"
	    "@code{read-disable}, @code{read-set!} and @code{read-options}.")
#define FUNC_NAME s_scm_read_options
{
  SCM ans = scm_options (setting,
			 scm_read_opts,
			 FUNC_NAME);
  if (SCM_COPY_SOURCE_P)
    SCM_RECORD_POSITIONS_P = 1;
  return ans;
}
#undef FUNC_NAME

/* An association list mapping extra hash characters to procedures.  */
static SCM *scm_read_hash_procedures;


/* Token readers.  */


/* Size of the C buffer used to read symbols and numbers.  */
#define READER_BUFFER_SIZE            128

/* Size of the C buffer used to read strings.  */
#define READER_STRING_BUFFER_SIZE     512

/* The maximum size of Scheme character names.  */
#define READER_CHAR_NAME_MAX_SIZE      50


/* `isblank' is only in C99.  */
#define CHAR_IS_BLANK_(_chr)					\
  (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n')	\
   || ((_chr) == '\f') || ((_chr) == '\r'))

#ifdef MSDOS
# define CHAR_IS_BLANK(_chr)			\
  ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
#else
# define CHAR_IS_BLANK CHAR_IS_BLANK_
#endif


/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
   structure'').  */
#define CHAR_IS_R5RS_DELIMITER(c)				\
  (CHAR_IS_BLANK (c)						\
   || (c == ')') || (c == '(') || (c == ';') || (c == '"'))

#define CHAR_IS_DELIMITER  CHAR_IS_R5RS_DELIMITER

/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
   Structure''.  */
#define CHAR_IS_EXPONENT_MARKER(_chr)				\
  (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f')	\
   || ((_chr) == 'd') || ((_chr) == 'l'))

/* Read an SCSH block comment.  */
static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
static SCM scm_read_commented_expression (int chr, SCM port);

/* Read from PORT until a delimiter (e.g., a whitespace) is read.  Return
   zero if the whole token fits in BUF, non-zero otherwise.  */
static inline int
read_token (SCM port, SCM buf, size_t *read)
{
  scm_t_wchar chr;
  *read = 0;

  buf = scm_i_string_start_writing (buf);
  while (*read < scm_i_string_length (buf))
    {
      chr = scm_getc (port);

      if (chr == EOF)
	{
	  scm_i_string_stop_writing ();
	  return 0;
	}

      chr = (SCM_CASE_INSENSITIVE_P ? uc_tolower (chr) : chr);

      if (CHAR_IS_DELIMITER (chr))
	{
	  scm_i_string_stop_writing ();
	  scm_ungetc (chr, port);
	  return 0;
	}

      scm_i_string_set_x (buf, *read, chr);
      (*read)++;
    }
  scm_i_string_stop_writing ();

  return 1;
}

static SCM
read_complete_token (SCM port, size_t *read)
{
  SCM buffer, str = SCM_EOL;
  size_t len;
  int overflow;

  buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL); 
  overflow = read_token (port, buffer, read);
  if (!overflow)
    return scm_i_substring (buffer, 0, *read);

  str = scm_string_copy (buffer);
  do
    {
      overflow = read_token (port, buffer, &len);
      str = scm_string_append (scm_list_2 (str, buffer));
      *read += len;
    }
  while (overflow);

  return scm_i_substring (str, 0, *read);
}

/* Skip whitespace from PORT and return the first non-whitespace character
   read.  Raise an error on end-of-file.  */
static int
flush_ws (SCM port, const char *eoferr)
{
  register scm_t_wchar c;
  while (1)
    switch (c = scm_getc (port))
      {
      case EOF:
      goteof:
	if (eoferr)
	  {
	    scm_i_input_error (eoferr,
			       port,
			       "end of file",
			       SCM_EOL);
	  }
	return c;

      case ';':
      lp:
	switch (c = scm_getc (port))
	  {
	  case EOF:
	    goto goteof;
	  default:
	    goto lp;
	  case SCM_LINE_INCREMENTORS:
	    break;
	  }
	break;

      case '#':
	switch (c = scm_getc (port))
	  {
	  case EOF:
	    eoferr = "read_sharp";
	    goto goteof;
	  case '!':
	    scm_read_scsh_block_comment (c, port);
	    break;
	  case ';':
	    scm_read_commented_expression (c, port);
	    break;
	  default:
	    scm_ungetc (c, port);
	    return '#';
	  }
	break;

      case SCM_LINE_INCREMENTORS:
      case SCM_SINGLE_SPACES:
      case '\t':
	break;

      default:
	return c;
      }

  return 0;
}


/* Token readers.  */

static SCM scm_read_expression (SCM port);
static SCM scm_read_sharp (int chr, SCM port);
static SCM scm_get_hash_procedure (int c);
static SCM recsexpr (SCM obj, long line, int column, SCM filename);


static SCM
scm_read_sexp (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_i_lreadparen"
{
  register int c;
  register SCM tmp;
  register SCM tl, ans = SCM_EOL;
  SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
  static const int terminating_char = ')';

  /* Need to capture line and column numbers here. */
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;


  c = flush_ws (port, FUNC_NAME);
  if (terminating_char == c)
    return SCM_EOL;

  scm_ungetc (c, port);
  if (scm_is_eq (scm_sym_dot,
		 (tmp = scm_read_expression (port))))
    {
      ans = scm_read_expression (port);
      if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
	scm_i_input_error (FUNC_NAME, port, "missing close paren",
			   SCM_EOL);
      return ans;
    }

  /* Build the head of the list structure. */
  ans = tl = scm_cons (tmp, SCM_EOL);

  if (SCM_COPY_SOURCE_P)
    ans2 = tl2 = scm_cons (scm_is_pair (tmp)
			   ? copy
			   : tmp,
			   SCM_EOL);

  while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
    {
      SCM new_tail;

      scm_ungetc (c, port);
      if (scm_is_eq (scm_sym_dot,
		     (tmp = scm_read_expression (port))))
	{
	  SCM_SETCDR (tl, tmp = scm_read_expression (port));

	  if (SCM_COPY_SOURCE_P)
	    SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
				       SCM_EOL));

	  c = flush_ws (port, FUNC_NAME);
	  if (terminating_char != c)
	    scm_i_input_error (FUNC_NAME, port,
			       "in pair: missing close paren", SCM_EOL);
	  goto exit;
	}

      new_tail = scm_cons (tmp, SCM_EOL);
      SCM_SETCDR (tl, new_tail);
      tl = new_tail;

      if (SCM_COPY_SOURCE_P)
	{
	  SCM new_tail2 = scm_cons (scm_is_pair (tmp)
				    ? copy
				    : tmp, SCM_EOL);
	  SCM_SETCDR (tl2, new_tail2);
	  tl2 = new_tail2;
	}
    }

 exit:
  if (SCM_RECORD_POSITIONS_P)
    scm_whash_insert (scm_source_whash,
		      ans,
		      scm_make_srcprops (line, column,
					 SCM_FILENAME (port),
					 SCM_COPY_SOURCE_P
					 ? ans2
					 : SCM_UNDEFINED,
					 SCM_EOL));
  return ans;
}
#undef FUNC_NAME

static SCM
scm_read_string (int chr, SCM port)
#define FUNC_NAME "scm_lreadr"
{
  /* For strings smaller than C_STR, this function creates only one Scheme
     object (the string returned).  */

  SCM str = SCM_BOOL_F;
  unsigned c_str_len = 0;
  scm_t_wchar c;

  str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
  while ('"' != (c = scm_getc (port)))
    {
      if (c == EOF)
        {
        str_eof:
          scm_i_input_error (FUNC_NAME, port,
                             "end of file in string constant", SCM_EOL);
        }

      if (c_str_len + 1 >= scm_i_string_length (str))
        {
          SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);

          str = scm_string_append (scm_list_2 (str, addy));
        }

      if (c == '\\')
        {
          switch (c = scm_getc (port))
            {
            case EOF:
              goto str_eof;
            case '"':
            case '\\':
              break;
#if SCM_ENABLE_ELISP
            case '(':
            case ')':
              if (SCM_ESCAPED_PARENS_P)
                break;
              goto bad_escaped;
#endif
            case '\n':
              continue;
            case '0':
              c = '\0';
              break;
            case 'f':
              c = '\f';
              break;
            case 'n':
              c = '\n';
              break;
            case 'r':
              c = '\r';
              break;
            case 't':
              c = '\t';
              break;
            case 'a':
              c = '\007';
              break;
            case 'v':
              c = '\v';
              break;
            case 'x':
              {
                scm_t_wchar a, b;
                a = scm_getc (port);
                if (a == EOF)
                  goto str_eof;
                b = scm_getc (port);
                if (b == EOF)
                  goto str_eof;
                if ('0' <= a && a <= '9')
                  a -= '0';
                else if ('A' <= a && a <= 'F')
                  a = a - 'A' + 10;
                else if ('a' <= a && a <= 'f')
                  a = a - 'a' + 10;
                else
                  {
                    c = a;
                    goto bad_escaped;
                  }
                if ('0' <= b && b <= '9')
                  b -= '0';
                else if ('A' <= b && b <= 'F')
                  b = b - 'A' + 10;
                else if ('a' <= b && b <= 'f')
                  b = b - 'a' + 10;
                else
                  {
                    c = b;
                    goto bad_escaped;
                  }
                c = a * 16 + b;
                break;
              }
            case 'u':
              {
                scm_t_wchar a;
                int i;
                c = 0;
                for (i = 0; i < 4; i++)
                  {
                    a = scm_getc (port);
                    if (a == EOF)
                      goto str_eof;
                    if ('0' <= a && a <= '9')
                      a -= '0';
                    else if ('A' <= a && a <= 'F')
                      a = a - 'A' + 10;
                    else if ('a' <= a && a <= 'f')
                      a = a - 'a' + 10;
                    else
                      {
                        c = a;
                        goto bad_escaped;
                      }
                    c = c * 16 + a;
                  }
                break;
              }
            case 'U':
              {
                scm_t_wchar a;
                int i;
                c = 0;
                for (i = 0; i < 6; i++)
                  {
                    a = scm_getc (port);
                    if (a == EOF)
                      goto str_eof;
                    if ('0' <= a && a <= '9')
                      a -= '0';
                    else if ('A' <= a && a <= 'F')
                      a = a - 'A' + 10;
                    else if ('a' <= a && a <= 'f')
                      a = a - 'a' + 10;
                    else
                      {
                        c = a;
                        goto bad_escaped;
                      }
                    c = c * 16 + a;
                  }
                break;
              }
            default:
            bad_escaped:
              scm_i_input_error (FUNC_NAME, port,
                                 "illegal character in escape sequence: ~S",
                                 scm_list_1 (SCM_MAKE_CHAR (c)));
            }
        }
      str = scm_i_string_start_writing (str);
      scm_i_string_set_x (str, c_str_len++, c);
      scm_i_string_stop_writing ();
    }

  if (c_str_len > 0)
    {
      return scm_i_substring_copy (str, 0, c_str_len);
    }
  
  return scm_nullstr;
}
#undef FUNC_NAME


static SCM
scm_read_number (scm_t_wchar chr, SCM port)
{
  SCM result;
  SCM buffer;
  size_t read;

  scm_ungetc (chr, port);
  buffer = read_complete_token (port, &read);
  result = scm_string_to_number (buffer, SCM_UNDEFINED);
  if (!scm_is_true (result))
    /* Return a symbol instead of a number.  */
    result = scm_string_to_symbol (buffer);

  return result;
}

static SCM
scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port)
{
  SCM result;
  int ends_with_colon = 0;
  SCM buffer;
  size_t read = 0;
  int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);

  scm_ungetc (chr, port);
  buffer = read_complete_token (port, &read);
  if (read > 0)
    ends_with_colon = scm_i_string_ref (buffer, read - 1) == ':';

  if (postfix && ends_with_colon && (read > 1))
    result = scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer, 0, read - 1)));
  else
    result = scm_string_to_symbol (buffer);

  return result;
}

static SCM
scm_read_number_and_radix (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_lreadr"
{
  SCM result;
  size_t read;
  SCM buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
  unsigned int radix;

  switch (chr)
    {
    case 'B':
    case 'b':
      radix = 2;
      break;

    case 'o':
    case 'O':
      radix = 8;
      break;

    case 'd':
    case 'D':
      radix = 10;
      break;

    case 'x':
    case 'X':
      radix = 16;
      break;

    default:
      scm_ungetc (chr, port);
      scm_ungetc ('#', port);
      radix = 10;
    }

  buffer = read_complete_token (port, &read);
  result = scm_string_to_number (buffer, scm_from_uint (radix));

  if (scm_is_true (result))
    return result;

  scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);

  return SCM_BOOL_F;
}
#undef FUNC_NAME

static SCM
scm_read_quote (int chr, SCM port)
{
  SCM p;
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;

  switch (chr)
    {
    case '`':
      p = scm_sym_quasiquote;
      break;

    case '\'':
      p = scm_sym_quote;
      break;

    case ',':
      {
	scm_t_wchar c;

	c = scm_getc (port);
	if ('@' == c)
	  p = scm_sym_uq_splicing;
	else
	  {
	    scm_ungetc (c, port);
	    p = scm_sym_unquote;
	  }
	break;
      }

    default:
      fprintf (stderr, "%s: unhandled quote character (%i)\n",
	       "scm_read_quote", chr);
      abort ();
    }

  p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
  if (SCM_RECORD_POSITIONS_P)
    scm_whash_insert (scm_source_whash, p,
		      scm_make_srcprops (line, column,
					 SCM_FILENAME (port),
					 SCM_COPY_SOURCE_P
					 ? (scm_cons2 (SCM_CAR (p),
						       SCM_CAR (SCM_CDR (p)),
						       SCM_EOL))
					 : SCM_UNDEFINED,
					 SCM_EOL));


  return p;
}

SCM_SYMBOL (sym_syntax, "syntax");
SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
SCM_SYMBOL (sym_unsyntax, "unsyntax");
SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");

static SCM
scm_read_syntax (int chr, SCM port)
{
  SCM p;
  long line = SCM_LINUM (port);
  int column = SCM_COL (port) - 1;

  switch (chr)
    {
    case '`':
      p = sym_quasisyntax;
      break;

    case '\'':
      p = sym_syntax;
      break;

    case ',':
      {
	int c;

	c = scm_getc (port);
	if ('@' == c)
	  p = sym_unsyntax_splicing;
	else
	  {
	    scm_ungetc (c, port);
	    p = sym_unsyntax;
	  }
	break;
      }

    default:
      fprintf (stderr, "%s: unhandled syntax character (%i)\n",
	       "scm_read_syntax", chr);
      abort ();
    }

  p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
  if (SCM_RECORD_POSITIONS_P)
    scm_whash_insert (scm_source_whash, p,
		      scm_make_srcprops (line, column,
					 SCM_FILENAME (port),
					 SCM_COPY_SOURCE_P
					 ? (scm_cons2 (SCM_CAR (p),
						       SCM_CAR (SCM_CDR (p)),
						       SCM_EOL))
					 : SCM_UNDEFINED,
					 SCM_EOL));


  return p;
}

static inline SCM
scm_read_semicolon_comment (int chr, SCM port)
{
  int c;

  /* We use the get_byte here because there is no need to get the
     locale correct with comment input. This presumes that newline
     always represents itself no matter what the encoding is.  */
  for (c = scm_get_byte_or_eof (port);
       (c != EOF) && (c != '\n');
       c = scm_getc (port));

  return SCM_UNSPECIFIED;
}


/* Sharp readers, i.e. readers called after a `#' sign has been read.  */

static SCM
scm_read_boolean (int chr, SCM port)
{
  switch (chr)
    {
    case 't':
    case 'T':
      return SCM_BOOL_T;

    case 'f':
    case 'F':
      return SCM_BOOL_F;
    }

  return SCM_UNSPECIFIED;
}

static SCM
scm_read_character (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_lreadr"
{
  SCM charname = scm_i_make_string (READER_CHAR_NAME_MAX_SIZE, NULL);
  size_t charname_len;
  scm_t_wchar cp;
  int overflow;

  overflow = read_token (port, charname, &charname_len);
  charname = scm_c_substring (charname, 0, charname_len);

  if (overflow)
    goto char_error;

  if (charname_len == 0)
    {
      chr = scm_getc (port);
      if (chr == EOF)
	scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
			   "while reading character", SCM_EOL);

      /* CHR must be a token delimiter, like a whitespace.  */
      return (SCM_MAKE_CHAR (chr));
    }

  if (charname_len == 1)
    return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));

  cp = scm_i_string_ref (charname, 0);
  if (cp >= '0' && cp < '8')
    {
      /* Dirk:FIXME::  This type of character syntax is not R5RS
       * compliant.  Further, it should be verified that the constant
       * does only consist of octal digits.  */
      SCM p = scm_string_to_number (charname, scm_from_uint (8));
      if (SCM_I_INUMP (p))
        {
          scm_t_wchar c = SCM_I_INUM (p);
          if (SCM_IS_UNICODE_CHAR (c))
            return SCM_MAKE_CHAR (c);
          else
            scm_i_input_error (FUNC_NAME, port, 
                               "out-of-range octal character escape: ~a",
                               scm_list_1 (charname));
        }
    }

  /* The names of characters should never have non-Latin1
     characters.  */
  if (scm_i_is_narrow_string (charname)
      || scm_i_try_narrow_string (charname))
    { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
                                       charname_len);
      if (scm_is_true (ch))
        return ch;
    }

 char_error:
  scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
		     scm_list_1 (charname));

  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

static inline SCM
scm_read_keyword (int chr, SCM port)
{
  SCM symbol;

  /* Read the symbol that comprises the keyword.  Doing this instead of
     invoking a specific symbol reader function allows `scm_read_keyword ()'
     to adapt to the delimiters currently valid of symbols.

     XXX: This implementation allows sloppy syntaxes like `#:  key'.  */
  symbol = scm_read_expression (port);
  if (!scm_is_symbol (symbol))
    scm_i_input_error ("scm_read_keyword", port,
		       "keyword prefix `~a' not followed by a symbol: ~s",
		       scm_list_2 (SCM_MAKE_CHAR (chr), symbol));

  return (scm_symbol_to_keyword (symbol));
}

static inline SCM
scm_read_vector (int chr, SCM port)
{
  /* Note: We call `scm_read_sexp ()' rather than READER here in order to
     guarantee that it's going to do what we want.  After all, this is an
     implementation detail of `scm_read_vector ()', not a desirable
     property.  */
  return (scm_vector (scm_read_sexp (chr, port)));
}

static inline SCM
scm_read_srfi4_vector (int chr, SCM port)
{
  return scm_i_read_array (port, chr);
}

static SCM
scm_read_bytevector (scm_t_wchar chr, SCM port)
{
  chr = scm_getc (port);
  if (chr != 'u')
    goto syntax;

  chr = scm_getc (port);
  if (chr != '8')
    goto syntax;

  chr = scm_getc (port);
  if (chr != '(')
    goto syntax;

  return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));

 syntax:
  scm_i_input_error ("read_bytevector", port,
		     "invalid bytevector prefix",
		     SCM_MAKE_CHAR (chr));
  return SCM_UNSPECIFIED;
}

static SCM
scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
{
  /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
     terribly inefficient but who cares?  */
  SCM s_bits = SCM_EOL;

  for (chr = scm_getc (port);
       (chr != EOF) && ((chr == '0') || (chr == '1'));
       chr = scm_getc (port))
    {
      s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
    }

  if (chr != EOF)
    scm_ungetc (chr, port);

  return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
}

static inline SCM
scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
{
  int bang_seen = 0;

  /* We can use the get_byte here because there is no need to get the
     locale correct when reading comments. This presumes that 
     hash and exclamation points always represent themselves no
     matter what the source encoding is.*/
  for (;;)
    {
      int c = scm_get_byte_or_eof (port);

      if (c == EOF)
	scm_i_input_error ("skip_block_comment", port,
			   "unterminated `#! ... !#' comment", SCM_EOL);

      if (c == '!')
	bang_seen = 1;
      else if (c == '#' && bang_seen)
	break;
      else
	bang_seen = 0;
    }

  return SCM_UNSPECIFIED;
}

static SCM
scm_read_commented_expression (scm_t_wchar chr, SCM port)
{
  scm_t_wchar c;
  
  c = flush_ws (port, (char *) NULL);
  if (EOF == c)
    scm_i_input_error ("read_commented_expression", port,
                       "no expression after #; comment", SCM_EOL);
  scm_ungetc (c, port);
  scm_read_expression (port);
  return SCM_UNSPECIFIED;
}

static SCM
scm_read_extended_symbol (scm_t_wchar chr, SCM port)
{
  /* Guile's extended symbol read syntax looks like this:

       #{This is all a symbol name}#

     So here, CHR is expected to be `{'.  */
  int saw_brace = 0, finished = 0;
  size_t len = 0;
  SCM buf = scm_i_make_string (1024, NULL);

  buf = scm_i_string_start_writing (buf);

  while ((chr = scm_getc (port)) != EOF)
    {
      if (saw_brace)
	{
	  if (chr == '#')
	    {
	      finished = 1;
	      break;
	    }
	  else
	    {
	      saw_brace = 0;
	      scm_i_string_set_x (buf, len++, '}');
	      scm_i_string_set_x (buf, len++, chr);
	    }
	}
      else if (chr == '}')
	saw_brace = 1;
      else
	scm_i_string_set_x (buf, len++, chr);

      if (len >= scm_i_string_length (buf) - 2)
	{
	  scm_i_string_stop_writing ();
	  SCM addy = scm_i_make_string (1024, NULL);
	  buf = scm_string_append (scm_list_2 (buf, addy));
	  len = 0;
	  buf = scm_i_string_start_writing (buf);
	}

      if (finished)
	break;
    }
  scm_i_string_stop_writing ();

  return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
}


/* Top-level token readers, i.e., dispatchers.  */

static SCM
scm_read_sharp_extension (int chr, SCM port)
{
  SCM proc;

  proc = scm_get_hash_procedure (chr);
  if (scm_is_true (scm_procedure_p (proc)))
    {
      long line = SCM_LINUM (port);
      int column = SCM_COL (port) - 2;
      SCM got;

      got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
      if (!scm_is_eq (got, SCM_UNSPECIFIED))
	{
	  if (SCM_RECORD_POSITIONS_P)
	    return (recsexpr (got, line, column,
			      SCM_FILENAME (port)));
	  else
	    return got;
	}
    }

  return SCM_UNSPECIFIED;
}

/* The reader for the sharp `#' character.  It basically dispatches reads
   among the above token readers.   */
static SCM
scm_read_sharp (scm_t_wchar chr, SCM port)
#define FUNC_NAME "scm_lreadr"
{
  SCM result;

  chr = scm_getc (port);

  result = scm_read_sharp_extension (chr, port);
  if (!scm_is_eq (result, SCM_UNSPECIFIED))
    return result;

  switch (chr)
    {
    case '\\':
      return (scm_read_character (chr, port));
    case '(':
      return (scm_read_vector (chr, port));
    case 's':
    case 'u':
    case 'f':
      /* This one may return either a boolean or an SRFI-4 vector.  */
      return (scm_read_srfi4_vector (chr, port));
    case 'v':
      return (scm_read_bytevector (chr, port));
    case '*':
      return (scm_read_guile_bit_vector (chr, port));
    case 't':
    case 'T':
    case 'F':
      /* This one may return either a boolean or an SRFI-4 vector.  */
      return (scm_read_boolean (chr, port));
    case ':':
      return (scm_read_keyword (chr, port));
    case '0': case '1': case '2': case '3': case '4':
    case '5': case '6': case '7': case '8': case '9':
    case '@':
#if SCM_ENABLE_DEPRECATED
      /* See below for 'i' and 'e'. */
    case 'a':
    case 'c':
    case 'y':
    case 'h':
    case 'l':
#endif
      return (scm_i_read_array (port, chr));

    case 'i':
    case 'e':
#if SCM_ENABLE_DEPRECATED
      {
	/* When next char is '(', it really is an old-style
	   uniform array. */
	scm_t_wchar next_c = scm_getc (port);
	if (next_c != EOF)
	  scm_ungetc (next_c, port);
	if (next_c == '(')
	  return scm_i_read_array (port, chr);
	/* Fall through. */
      }
#endif
    case 'b':
    case 'B':
    case 'o':
    case 'O':
    case 'd':
    case 'D':
    case 'x':
    case 'X':
    case 'I':
    case 'E':
      return (scm_read_number_and_radix (chr, port));
    case '{':
      return (scm_read_extended_symbol (chr, port));
    case '!':
      return (scm_read_scsh_block_comment (chr, port));
    case ';':
      return (scm_read_commented_expression (chr, port));
    case '`':
    case '\'':
    case ',':
      return (scm_read_syntax (chr, port));
    default:
      result = scm_read_sharp_extension (chr, port);
      if (scm_is_eq (result, SCM_UNSPECIFIED))
	scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
			   scm_list_1 (SCM_MAKE_CHAR (chr)));
      else
	return result;
    }

  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

static SCM
scm_read_expression (SCM port)
#define FUNC_NAME "scm_read_expression"
{
  while (1)
    {
      register scm_t_wchar chr;

      chr = scm_getc (port);

      switch (chr)
	{
	case SCM_WHITE_SPACES:
	case SCM_LINE_INCREMENTORS:
	  break;
	case ';':
	  (void) scm_read_semicolon_comment (chr, port);
	  break;
	case '(':
	  return (scm_read_sexp (chr, port));
	case '"':
	  return (scm_read_string (chr, port));
	case '\'':
	case '`':
	case ',':
	  return (scm_read_quote (chr, port));
	case '#':
	  {
	    SCM result;
	    result = scm_read_sharp (chr, port);
	    if (scm_is_eq (result, SCM_UNSPECIFIED))
	      /* We read a comment or some such.  */
	      break;
	    else
	      return result;
	  }
	case ')':
	  scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
	  break;
	case EOF:
	  return SCM_EOF_VAL;
	case ':':
	  if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
	    return scm_symbol_to_keyword (scm_read_expression (port));
	  /* Fall through.  */

	default:
	  {
	    if (((chr >= '0') && (chr <= '9'))
		|| (strchr ("+-.", chr)))
	      return (scm_read_number (chr, port));
	    else
	      return (scm_read_mixed_case_symbol (chr, port));
	  }
	}
    }
}
#undef FUNC_NAME


/* Actual reader.  */

SCM_DEFINE (scm_read, "read", 0, 1, 0, 
            (SCM port),
	    "Read an s-expression from the input port @var{port}, or from\n"
	    "the current input port if @var{port} is not specified.\n"
	    "Any whitespace before the next token is discarded.")
#define FUNC_NAME s_scm_read
{
  int c;

  if (SCM_UNBNDP (port))
    port = scm_current_input_port ();
  SCM_VALIDATE_OPINPORT (1, port);

  c = flush_ws (port, (char *) NULL);
  if (EOF == c)
    return SCM_EOF_VAL;
  scm_ungetc (c, port);

  return (scm_read_expression (port));
}
#undef FUNC_NAME


/* Used when recording expressions constructed by `scm_read_sharp ()'.  */
static SCM
recsexpr (SCM obj, long line, int column, SCM filename)
{
  if (!scm_is_pair(obj)) {
    return obj;
  } else {
    SCM tmp = obj, copy;
    /* If this sexpr is visible in the read:sharp source, we want to
       keep that information, so only record non-constant cons cells
       which haven't previously been read by the reader. */
    if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
      {
	if (SCM_COPY_SOURCE_P)
	  {
	    copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
			     SCM_UNDEFINED);
	    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
	      {
		SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
						      line,
						      column,
						      filename),
					    SCM_UNDEFINED));
		copy = SCM_CDR (copy);
	      }
	    SCM_SETCDR (copy, tmp);
	  }
	else
	  {
	    recsexpr (SCM_CAR (obj), line, column, filename);
	    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
	      recsexpr (SCM_CAR (tmp), line, column, filename);
	    copy = SCM_UNDEFINED;
	  }
	scm_whash_insert (scm_source_whash,
			  obj,
			  scm_make_srcprops (line,
					     column,
					     filename,
					     copy,
					     SCM_EOL));
      }
    return obj;
  }
}

/* Manipulate the read-hash-procedures alist.  This could be written in
   Scheme, but maybe it will also be used by C code during initialisation.  */
SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
            (SCM chr, SCM proc),
	    "Install the procedure @var{proc} for reading expressions\n"
	    "starting with the character sequence @code{#} and @var{chr}.\n"
	    "@var{proc} will be called with two arguments:  the character\n"
	    "@var{chr} and the port to read further data from. The object\n"
	    "returned will be the return value of @code{read}. \n"
	    "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
	    )
#define FUNC_NAME s_scm_read_hash_extend
{
  SCM this;
  SCM prev;

  SCM_VALIDATE_CHAR (1, chr);
  SCM_ASSERT (scm_is_false (proc)
	      || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
	      proc, SCM_ARG2, FUNC_NAME);

  /* Check if chr is already in the alist.  */
  this = *scm_read_hash_procedures;
  prev = SCM_BOOL_F;
  while (1)
    {
      if (scm_is_null (this))
	{
	  /* not found, so add it to the beginning.  */
	  if (scm_is_true (proc))
	    {
	      *scm_read_hash_procedures = 
		scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
	    }
	  break;
	}
      if (scm_is_eq (chr, SCM_CAAR (this)))
	{
	  /* already in the alist.  */
	  if (scm_is_false (proc))
	    {
	      /* remove it.  */
	      if (scm_is_false (prev))
		{
		  *scm_read_hash_procedures =
		    SCM_CDR (*scm_read_hash_procedures);
		}
	      else
		scm_set_cdr_x (prev, SCM_CDR (this));
	    }
	  else
	    {
	      /* replace it.  */
	      scm_set_cdr_x (SCM_CAR (this), proc);
	    }
	  break;
	}
      prev = this;
      this = SCM_CDR (this);
    }

  return SCM_UNSPECIFIED;
}
#undef FUNC_NAME

/* Recover the read-hash procedure corresponding to char c.  */
static SCM
scm_get_hash_procedure (int c)
{
  SCM rest = *scm_read_hash_procedures;

  while (1)
    {
      if (scm_is_null (rest))
	return SCM_BOOL_F;
  
      if (SCM_CHAR (SCM_CAAR (rest)) == c)
	return SCM_CDAR (rest);
     
      rest = SCM_CDR (rest);
    }
}

#define SCM_ENCODING_SEARCH_SIZE (500)

/* Search the first few hundred characters of a file for
   an emacs-like coding declaration.  */
char *
scm_scan_for_encoding (SCM port)
{
  char header[SCM_ENCODING_SEARCH_SIZE+1];
  size_t bytes_read;
  char *encoding = NULL;
  int utf8_bom = 0;
  char *pos;
  int i;
  int in_comment;

  bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);  
  scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));

  if (bytes_read > 3 
      && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
    utf8_bom = 1;

  /* search past "coding[:=]" */
  pos = header;
  while (1)
    {
      if ((pos = strstr(pos, "coding")) == NULL)
        return NULL;

      pos += strlen("coding");
      if (pos - header >= SCM_ENCODING_SEARCH_SIZE || 
          (*pos == ':' || *pos == '='))
        {
          pos ++;
          break;
        }
    }

  /* skip spaces */
  while (pos - header <= SCM_ENCODING_SEARCH_SIZE && 
	 (*pos == ' ' || *pos == '\t'))
    pos ++;

  /* grab the next token */
  i = 0;
  while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE 
	 && (isalnum((int) pos[i]) || pos[i] == '_' || pos[i] == '-' 
             || pos[i] == '.'))
    i++;

  if (i == 0)
    return NULL;

  encoding = scm_malloc (i+1);
  memcpy (encoding, pos, i);
  encoding[i] ='\0';
  for (i = 0; i < strlen (encoding); i++)
    encoding[i] = toupper ((int) encoding[i]);

  /* push backwards to make sure we were in a comment */
  in_comment = 0;
  while (pos - i - header > 0)
    {
      if (*(pos - i) == '\n')
	{
	  /* This wasn't in a semicolon comment. Check for a
	   hash-bang comment. */
	  char *beg = strstr (header, "#!");
	  char *end = strstr (header, "!#");
	  if (beg < pos && pos < end)
	    in_comment = 1;
	  break;
	}
      if (*(pos - i) == ';')
	{
	  in_comment = 1;
	  break;
	}
      i ++;
    }
  if (!in_comment)
    {
      /* This wasn't in a comment */
      free (encoding);
      return NULL;
    }
  if (utf8_bom && strcmp(encoding, "UTF-8"))
    scm_misc_error (NULL, 
		    "the port input declares the encoding ~s but is encoded as UTF-8",
		    scm_list_1 (scm_from_locale_string (encoding)));
      
  return encoding;
}

SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
            (SCM port),
            "Scans the port for an EMACS-like character coding declaration\n"
            "near the top of the contents of a port with random-acessible contents.\n"
            "The coding declaration is of the form\n"
            "@code{coding: XXXXX} and must appear in a scheme comment.\n"
            "\n"
            "Returns a string containing the character encoding of the file\n"
            "if a declaration was found, or @code{#f} otherwise.\n")
#define FUNC_NAME s_scm_file_encoding
{
  char *enc;
  SCM s_enc;
  
  enc = scm_scan_for_encoding (port);
  if (enc == NULL)
    return SCM_BOOL_F;
  else
    {
      s_enc = scm_from_locale_string (enc);
      free (enc);
      return s_enc;
    }
  
  return SCM_BOOL_F;
}
#undef FUNC_NAME

void
scm_init_read ()
{
  scm_read_hash_procedures =
    SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));

  scm_init_opts (scm_read_options, scm_read_opts);
#include "libguile/read.x"
}

/*
  Local Variables:
  c-file-style: "gnu"
  End:
*/
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								/* Copyright (C) 1995,1996,1997,1999,2000,2001,2003, 2004, 2006, 2007, 2008, 2009 Free Software
-												(scm_input_error): Pass arg list parameter to scm_error_scm,
rather than SCM_EOL.  Needed by "Unknown # object" case in scm_lreadr.

											
										
										
											2003-06-04 16:36:03 +00:00
+								 * Foundation, Inc.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * This library is free software; you can redistribute it and/or
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * modify it under the terms of the GNU Lesser General Public License
 								 * as published by the Free Software Foundation; either version 3 of
 								 * the License, or (at your option) any later version.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * This library is distributed in the hope that it will be useful, but
 								 * WITHOUT ANY WARRANTY; without even the implied warranty of
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 								 * Lesser General Public License for more details.
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								 *
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 * You should have received a copy of the GNU Lesser General Public
 								 * License along with this library; if not, write to the Free Software
-												Change Guile license to LGPLv3+

(Not quite finished, the following will be done tomorrow.
   module/srfi/*.scm
   module/rnrs/*.scm
   module/scripts/*.scm
   testsuite/*.scm
   guile-readline/*
)

											
										
										
											2009-06-17 00:22:09 +01:00
+								 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
 								 * 02110-1301 USA
-												Changed license terms to the plain LGPL thru-out.

											
										
										
											2003-04-05 19:15:35 +00:00
+								 */
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#ifdef HAVE_CONFIG_H
 								# include <config.h>
 								#endif
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								#include <stdio.h>
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#include <ctype.h>
 								#include <string.h>
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								#include <unistd.h>
 								#include <unicase.h>
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/_scm.h"
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								#include "libguile/bytevectors.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/chars.h"
 								#include "libguile/eval.h"
-												rename unif.[ch] to arrays.[ch]

* libguile/Makefile.am:
* libguile/unif.c:
* libguile/unif.h:
* libguile/arrays.c:
* libguile/arrays.h: Rename unif.[ch] to arrays.[ch].

* libguile.h:
* libguile/array-handle.c:
* libguile/array-map.c:
* libguile/bitvectors.c:
* libguile/bytevectors.c:
* libguile/eq.c:
* libguile/gc-card.c:
* libguile/gc-malloc.c:
* libguile/gc-mark.c:
* libguile/gc.c:
* libguile/init.c:
* libguile/inline.h:
* libguile/print.c:
* libguile/random.c:
* libguile/read.c:
* libguile/socket.c:
* libguile/sort.c:
* libguile/srfi-4.c:
* libguile/srfi-4.h:
* libguile/strports.c:
* libguile/vectors.c:
* libguile/vectors.h: Update includers.

											
										
										
											2009-07-17 01:08:35 +02:00
+								#include "libguile/arrays.h"
-												bitvector exodus from unif.[ch]

* libguile/Makefile.am:
* libguile/unif.c:
* libguile/unif.h:
* libguile/bitvectors.c:
* libguile/bitvectors.h: Move bitvector functionality out of unif.[ch].

* libguile/array-handle.c:
* libguile/array-map.c:
* libguile/init.c:
* libguile/read.c:
* libguile/srfi-4.c:
* libguile/vectors.c: Oh, what a tangled web we weave...

											
										
										
											2009-07-17 00:58:32 +02:00
+								#include "libguile/bitvectors.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/keywords.h"
 								#include "libguile/alist.h"
 								#include "libguile/srcprop.h"
 								#include "libguile/hashtab.h"
 								#include "libguile/hash.h"
 								#include "libguile/ports.h"
 								#include "libguile/root.h"
 								#include "libguile/strings.h"
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								#include "libguile/strports.h"
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/vectors.h"
 								#include "libguile/validate.h"
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+								#include "libguile/srfi-4.h"
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#include "libguile/srfi-13.h"
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/read.h"
-												* backtrace.c, debug.c, debug.h, deprecation.c, eq.c, eval.c
eval.h, gsubr.c, init.c, macros.c, print.c, print.h, read.c,
read.h, stacks.c, symbols.c, throw.c: use private-options.h

* private-options.h: new file: contain hardcoded option
definitions.

											
										
										
											2007-01-22 15:14:40 +00:00
+								#include "libguile/private-options.h"
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-													* eval.c, eval.h, read.c, read.h (scm_sym_dot): Moved from eval to
	read.  This will allow to make the definition in read.c static.

											
										
										
											2003-05-06 20:17:26 +00:00
+								SCM_GLOBAL_SYMBOL (scm_sym_dot, ".");
-													* read.h (SCM_N_READ_OPTIONS): increase SCM_N_READ_OPTIONS to 4.
	(SCM_KEYWORD_STYLE): defined.

	* read.c (scm_read_opts): add a keywords option.  This isn't a
	boolean option, in case someone wants to add support for DSSSL
	keywords too.
	Setup scm_keyword_prefix symbol.
	(scm_lreadr): Only process keywords if SCM_KEYWORD_STYLE is
	set to 'prefix.
*	I've left keyword support disabled by default, since it doesn't
	seem to break the module system and it gives R4RS standard behaviour.
	It can be reactivated with (read-set! keywords 'prefix).

											
										
										
											1997-03-10 06:49:15 +00:00
+								SCM_SYMBOL (scm_keyword_prefix, "prefix");
-												Add support for SRFI-88-like postfix keyword read syntax.

											
										
										
											2008-04-15 19:52:43 +02:00
+								SCM_SYMBOL (scm_keyword_postfix, "postfix");
-													* read.h (SCM_N_READ_OPTIONS): increase SCM_N_READ_OPTIONS to 4.
	(SCM_KEYWORD_STYLE): defined.

	* read.c (scm_read_opts): add a keywords option.  This isn't a
	boolean option, in case someone wants to add support for DSSSL
	keywords too.
	Setup scm_keyword_prefix symbol.
	(scm_lreadr): Only process keywords if SCM_KEYWORD_STYLE is
	set to 'prefix.
*	I've left keyword support disabled by default, since it doesn't
	seem to break the module system and it gives R4RS standard behaviour.
	It can be reactivated with (read-set! keywords 'prefix).

											
										
										
											1997-03-10 06:49:15 +00:00
-												replace "scm_*_t" with "scm_t_*".

											
										
										
											2001-06-14 19:50:43 +00:00
+								scm_t_option scm_read_opts[] = {
-												* eval.c, print.h, print.c, read.h, read.c: Modifications to
run-time options.

											
										
										
											1996-08-23 01:20:34 +00:00
+								  { SCM_OPTION_BOOLEAN, "copy", 0,
 								    "Copy source code expressions." },
-												*	* read.c: Recording of positions disabled by default.

											
										
										
											1996-11-02 20:54:03 +00:00
+								  { SCM_OPTION_BOOLEAN, "positions", 0,
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								    "Record positions of source code expressions." },
 								  { SCM_OPTION_BOOLEAN, "case-insensitive", 0,
-													* read.h (SCM_N_READ_OPTIONS): increase SCM_N_READ_OPTIONS to 4.
	(SCM_KEYWORD_STYLE): defined.

	* read.c (scm_read_opts): add a keywords option.  This isn't a
	boolean option, in case someone wants to add support for DSSSL
	keywords too.
	Setup scm_keyword_prefix symbol.
	(scm_lreadr): Only process keywords if SCM_KEYWORD_STYLE is
	set to 'prefix.
*	I've left keyword support disabled by default, since it doesn't
	seem to break the module system and it gives R4RS standard behaviour.
	It can be reactivated with (read-set! keywords 'prefix).

											
										
										
											1997-03-10 06:49:15 +00:00
+								    "Convert symbols to lower case."},
-												*** empty log message ***

											
										
										
											2000-03-12 18:30:33 +00:00
+								  { SCM_OPTION_SCM, "keywords", SCM_UNPACK (SCM_BOOL_F),
-												Revert "Fix typo in `read.c'."

This reverts commit 6ddb3ca825b1ee8b746eb688e98aa3b37ab55c3e.

											
										
										
											2008-04-15 20:14:44 +02:00
+								    "Style of keyword recognition: #f, 'prefix or 'postfix."},
-													* read.c (scm_read_opts): New opts `elisp-vectors' and
	`escaped-parens'.
	(s_vector): New.
	(scm_lreadr): Use scm_lreadparen1 instead of scm_lreadparen.  Make
	handling of elisp vector syntax dependent on SCM_ENABLE_ELISP and
	`elisp-vectors' option instead of SCM_ELISP_READ_EXTENSIONS.
	Allow "\(" and "\)" in strings when SCM_ENABLE_ELISP defined and
	`escaped-parens' option set.
	(scm_read_token): If elisp vector syntax active, disallow [ and ]
	in tokens.
	(scm_lreadparen): Rewrite as interface to scm_lreadparen1.
	(scm_lreadparen1): New.

	* read.h: Remove conditionally compiled last arg to
	scm_lreadparen.
	(SCM_ELISP_VECTORS_P, SCM_ESCAPED_PARENS_P): New.

											
										
										
											2004-02-08 19:14:18 +00:00
+								#if SCM_ENABLE_ELISP
 								  { SCM_OPTION_BOOLEAN, "elisp-vectors", 0,
 								    "Support Elisp vector syntax, namely `[...]'."},
-													* read.c (scm_read_opts): Change `escaped-parens' to
	`elisp-strings'.

											
										
										
											2004-02-10 19:44:10 +00:00
+								  { SCM_OPTION_BOOLEAN, "elisp-strings", 0,
-												* readline.c: terminate option list with NULL.

* read.c: idem.

* print.c: idem.

* eval.c: terminate option lists with 0.

* options.c: remove n (for length) from scm_option_X
functions. Detect option list length by looking for NULL name.

											
										
										
											2007-01-19 19:26:36 +00:00
+								    "Support `\\(' and `\\)' in strings."},
-													* read.c (scm_read_opts): New opts `elisp-vectors' and
	`escaped-parens'.
	(s_vector): New.
	(scm_lreadr): Use scm_lreadparen1 instead of scm_lreadparen.  Make
	handling of elisp vector syntax dependent on SCM_ENABLE_ELISP and
	`elisp-vectors' option instead of SCM_ELISP_READ_EXTENSIONS.
	Allow "\(" and "\)" in strings when SCM_ENABLE_ELISP defined and
	`escaped-parens' option set.
	(scm_read_token): If elisp vector syntax active, disallow [ and ]
	in tokens.
	(scm_lreadparen): Rewrite as interface to scm_lreadparen1.
	(scm_lreadparen1): New.

	* read.h: Remove conditionally compiled last arg to
	scm_lreadparen.
	(SCM_ELISP_VECTORS_P, SCM_ESCAPED_PARENS_P): New.

											
										
										
											2004-02-08 19:14:18 +00:00
+								#endif
-												* readline.c: terminate option list with NULL.

* read.c: idem.

* print.c: idem.

* eval.c: terminate option lists with 0.

* options.c: remove n (for length) from scm_option_X
functions. Detect option list length by looking for NULL name.

											
										
										
											2007-01-19 19:26:36 +00:00
+								  { 0, },
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
+								};
-												(INPUT_ERROR): Prepare for file:line:column error
messages for errors in scm_lreadr() and friends.

											
										
										
											2002-08-04 23:33:28 +00:00
+								/*
 								  Give meaningful error messages for errors
 								  We use the format
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								  FILE:LINE:COL: MESSAGE
-												(INPUT_ERROR): Prepare for file:line:column error
messages for errors in scm_lreadr() and friends.

											
										
										
											2002-08-04 23:33:28 +00:00
+								  This happened in ....
 								  This is not standard GNU format, but the test-suite likes the real
 								  message to be in front.
 								 */
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+								void
 								scm_i_input_error (char const *function,
 										   SCM port, const char *message, SCM arg)
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								{
-												(scm_input_error): Use a SCM value for 'fn', not a C string.  This
avoids a conversion round-trip.

											
										
										
											2004-08-10 13:54:01 +00:00
+								  SCM fn = (scm_is_string (SCM_FILENAME(port))
 									    ? SCM_FILENAME(port)
 									    : scm_from_locale_string ("#<unknown port>"));
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
-												(scm_input_error): Use a SCM value for 'fn', not a C string.  This
avoids a conversion round-trip.

											
										
										
											2004-08-10 13:54:01 +00:00
+								  SCM string_port = scm_open_output_string ();
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+								  SCM string = SCM_EOL;
 								  scm_simple_format (string_port,
-												(scm_i_casei_streq): New, for counted strings.

* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 17:17:43 +00:00
+										     scm_from_locale_string ("~A:~S:~S: ~A"),
-												(scm_input_error): Use a SCM value for 'fn', not a C string.  This
avoids a conversion round-trip.

											
										
										
											2004-08-10 13:54:01 +00:00
+										     scm_list_4 (fn,
-												merge from 1.8 branch

											
										
										
											2007-03-07 23:35:55 +00:00
+												 scm_from_long (SCM_LINUM (port) + 1),
-												* numbers.h, numbers.c, discouraged.h, discouraged.c (scm_short2num,
scm_ushort2num, scm_int2num, scm_uint2num, scm_long2num,
scm_ulong2num, scm_size2num, scm_ptrdiff2num, scm_num2short,
scm_num2ushort, scm_num2int, scm_num2uint, scm_num2long,
scm_num2ulong, scm_num2size, scm_num2ptrdiff, scm_long_long2num,
scm_ulong_long2num, scm_num2long_long, scm_num2ulong_long):
Discouraged by moving to discouraged.h and discouraged.c and
reimplementing in terms of scm_from_* and scm_to_*.  Changed all uses
to the new scm_from_* and scm_to_* functions.

											
										
										
											2004-08-02 16:14:04 +00:00
+												 scm_from_int (SCM_COL (port) + 1),
-												(scm_i_casei_streq): New, for counted strings.

* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 17:17:43 +00:00
+												 scm_from_locale_string (message)));
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
 								  string = scm_get_output_string (string_port);
 								  scm_close_output_port (string_port);
-												(scm_i_casei_streq): New, for counted strings.

* strings.h, strings.c: (scm_i_string_chars, scm_i_string_length,
scm_i_string_writable_chars, scm_i_string_stop_writing): New, to
replace SCM_I_STRING_CHARS and SCM_I_STRING_LENGTH.  Updated all
uses.
(scm_i_make_string, scm_c_make_string): New, to replace
scm_allocate_string.  Updated all uses.
(SCM_STRINGP, SCM_STRING_CHARS, SCM_STRING_UCHARS,
SCM_STRING_LENGTH): Deprecated.
(scm_allocate_string, scm_take_str, scm_take0str, scm_mem2string,
scm_str2string, scm_makfrom0str, scm_makfrom0str_opt):
Discouraged.  Replaced all uses with scm_from_locale_string or
similar, as appropriate.
(scm_c_string_length, scm_c_string_ref, scm_c_string_set_x,
scm_c_substring, scm_c_substring_shared, scm_c_substring_copy,
scm_substring_shared, scm_substring_copy): New.

* symbols.c, symbols.h (SCM_SYMBOLP, SCM_SYMBOL_FUNC,
SCM_SET_SYMBOL_FUNC, SCM_SYMBOL_PROPS, SCM_SET_SYMBOL_PROPS,
SCM_SYMBOL_HASH, SCM_SYMBOL_INTERNED_P, scm_mem2symbol,
scm_str2symbol, scm_mem2uninterned_symbol): Discouraged.
(SCM_SYMBOL_LENGTH, SCM_SYMBOL_CHARS, scm_c_symbol2str):
Deprecated.
(SCM_MAKE_SYMBOL_TAG, SCM_SET_SYMBOL_LENGTH, SCM_SET_SYMBOL_CHARS,
SCM_PROP_SLOTS, SCM_SET_PROP_SLOTS): Removed.
(scm_is_symbol, scm_from_locale_symbol, scm_from_locale_symboln):
New, to replace scm_str2symbol and scm_mem2symbol, respectively.
Updated all uses.
(scm_gensym): Generate only the number suffix in the buffer, just
string-append the prefix.

											
										
										
											2004-08-19 17:17:43 +00:00
+								  scm_error_scm (scm_from_locale_symbol ("read-error"),
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+										 function? scm_from_locale_string (function) : SCM_BOOL_F,
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+										 string,
-												(scm_input_error): Pass arg list parameter to scm_error_scm,
rather than SCM_EOL.  Needed by "Unknown # object" case in scm_lreadr.

											
										
										
											2003-06-04 16:36:03 +00:00
+										 arg,
-												* tests/reader.test: change misc-error in read-error.

* read.c (scm_input_error): new function: give meaningful error
messages, and throw read-error

* gc-malloc.c (scm_calloc): add scm_calloc.

* scheme-memory.texi (Memory Blocks): add scm_calloc, scm_gc_calloc.
correct typos.

											
										
										
											2002-08-05 23:04:44 +00:00
+										 SCM_BOOL_F);
 								}
-												(INPUT_ERROR): Prepare for file:line:column error
messages for errors in scm_lreadr() and friends.

											
										
										
											2002-08-04 23:33:28 +00:00
-												*.[ch]: Replace GUILE_PROC w/ SCM_DEFINE.

											
										
										
											2000-01-05 19:05:23 +00:00
+								SCM_DEFINE (scm_read_options, "read-options-interface", 0, 1, 0,
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								            (SCM setting),
-												(scm_read_options, scm_read, scm_read_hash_extend): Added docstrings.

											
										
										
											2001-02-16 15:17:20 +00:00
+									    "Option interface for the read options. Instead of using\n"
 									    "this procedure directly, use the procedures @code{read-enable},\n"
-												Change 3 instances of @var to @code in docstrings.

											
										
										
											2002-03-15 09:40:57 +00:00
+									    "@code{read-disable}, @code{read-set!} and @code{read-options}.")
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_read_options
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
+								{
-												* eval.c, print.h, print.c, read.h, read.c: Modifications to
run-time options.

											
										
										
											1996-08-23 01:20:34 +00:00
+								  SCM ans = scm_options (setting,
 											 scm_read_opts,
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+											 FUNC_NAME);
-												* eval.c, print.h, print.c, read.h, read.c: Modifications to
run-time options.

											
										
										
											1996-08-23 01:20:34 +00:00
+								  if (SCM_COPY_SOURCE_P)
 								    SCM_RECORD_POSITIONS_P = 1;
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
+								  return ans;
 								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												*	* read.c: New procedure: scm_read_options

											
										
										
											1996-08-20 17:11:25 +00:00
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								/* An association list mapping extra hash characters to procedures.  */
 								static SCM *scm_read_hash_procedures;
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Token readers.  */
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* Size of the C buffer used to read symbols and numbers.  */
 								#define READER_BUFFER_SIZE            128
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* Size of the C buffer used to read strings.  */
 								#define READER_STRING_BUFFER_SIZE     512
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* The maximum size of Scheme character names.  */
 								#define READER_CHAR_NAME_MAX_SIZE      50
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												* Don't call scm_vector_set_length_x for non-vector arguments.

											
										
										
											2000-10-06 16:51:08 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* `isblank' is only in C99.  */
 								#define CHAR_IS_BLANK_(_chr)					\
 								  (((_chr) == ' ') || ((_chr) == '\t') || ((_chr) == '\n')	\
-												Changes from arch/CVS synchronization

											
										
										
											2007-10-17 21:56:10 +00:00
+								   || ((_chr) == '\f') || ((_chr) == '\r'))
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								#ifdef MSDOS
 								# define CHAR_IS_BLANK(_chr)			\
 								  ((CHAR_IS_BLANK_ (chr)) || ((_chr) == 26))
 								#else
 								# define CHAR_IS_BLANK CHAR_IS_BLANK_
 								#endif
 								/* R5RS one-character delimiters (see section 7.1.1, ``Lexical
 								   structure'').  */
 								#define CHAR_IS_R5RS_DELIMITER(c)				\
 								  (CHAR_IS_BLANK (c)						\
 								   || (c == ')') || (c == '(') || (c == ';') || (c == '"'))
 								#define CHAR_IS_DELIMITER  CHAR_IS_R5RS_DELIMITER
 								/* Exponent markers, as defined in section 7.1.1 of R5RS, ``Lexical
 								   Structure''.  */
 								#define CHAR_IS_EXPONENT_MARKER(_chr)				\
 								  (((_chr) == 'e') || ((_chr) == 's') || ((_chr) == 'f')	\
 								   || ((_chr) == 'd') || ((_chr) == 'l'))
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
+								/* Read an SCSH block comment.  */
 								static inline SCM scm_read_scsh_block_comment (int chr, SCM port);
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								static SCM scm_read_commented_expression (int chr, SCM port);
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-10-17 21:56:10 +00:00
+								/* Read from PORT until a delimiter (e.g., a whitespace) is read.  Return
 								   zero if the whole token fits in BUF, non-zero otherwise.  */
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static inline int
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								read_token (SCM port, SCM buf, size_t *read)
-												(scm_flush_ws): Detect "#!"-style comments here.
(scm_lreadr): Abort on seeing "#!", which should no longer happen.
(skip_scsh_block_comment): Use scm_input_error instead of
scm_misc_error in case of EOF.

											
										
										
											2004-09-07 09:18:59 +00:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_t_wchar chr;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  *read = 0;
-												(scm_flush_ws): Detect "#!"-style comments here.
(scm_lreadr): Abort on seeing "#!", which should no longer happen.
(skip_scsh_block_comment): Use scm_input_error instead of
scm_misc_error in case of EOF.

											
										
										
											2004-09-07 09:18:59 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  buf = scm_i_string_start_writing (buf);
 								  while (*read < scm_i_string_length (buf))
-												(scm_flush_ws): Detect "#!"-style comments here.
(scm_lreadr): Abort on seeing "#!", which should no longer happen.
(skip_scsh_block_comment): Use scm_input_error instead of
scm_misc_error in case of EOF.

											
										
										
											2004-09-07 09:18:59 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      chr = scm_getc (port);
 								      if (chr == EOF)
 									{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									  scm_i_string_stop_writing ();
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  return 0;
 									}
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
 								      chr = (SCM_CASE_INSENSITIVE_P ? uc_tolower (chr) : chr);
 								      if (CHAR_IS_DELIMITER (chr))
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									  scm_i_string_stop_writing ();
 									  scm_ungetc (chr, port);
 									  return 0;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									}
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
 								      scm_i_string_set_x (buf, *read, chr);
 								      (*read)++;
-												(scm_flush_ws): Detect "#!"-style comments here.
(scm_lreadr): Abort on seeing "#!", which should no longer happen.
(skip_scsh_block_comment): Use scm_input_error instead of
scm_misc_error in case of EOF.

											
										
										
											2004-09-07 09:18:59 +00:00
+								    }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_i_string_stop_writing ();
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return 1;
-												(scm_flush_ws): Detect "#!"-style comments here.
(scm_lreadr): Abort on seeing "#!", which should no longer happen.
(skip_scsh_block_comment): Use scm_input_error instead of
scm_misc_error in case of EOF.

											
										
										
											2004-09-07 09:18:59 +00:00
+								}
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								static SCM
 								read_complete_token (SCM port, size_t *read)
 								{
 								  SCM buffer, str = SCM_EOL;
 								  size_t len;
 								  int overflow;
 								  buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
 								  overflow = read_token (port, buffer, read);
 								  if (!overflow)
 								    return scm_i_substring (buffer, 0, *read);
 								  str = scm_string_copy (buffer);
 								  do
 								    {
 								      overflow = read_token (port, buffer, &len);
 								      str = scm_string_append (scm_list_2 (str, buffer));
 								      *read += len;
 								    }
 								  while (overflow);
 								  return scm_i_substring (str, 0, *read);
 								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Skip whitespace from PORT and return the first non-whitespace character
 								   read.  Raise an error on end-of-file.  */
 								static int
 								flush_ws (SCM port, const char *eoferr)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  register scm_t_wchar c;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								  while (1)
-													* Makefile.in: Rebuilt.
	* Makefile.am (libguile_la_SOURCES): Removed extchrs.c,
	mbstrings.c.
	(modinclude_HEADERS): Removed extchrs.h, mbstrings.h.
	* unif.c (scm_vector_set_length_x): Don't handle multibyte
	strings.
	* tag.c (scm_utag_mb_string, scm_utag_mb_substring): Removed.
	(scm_tag): Don't handle multibyte strings.
	* read.c: Don't include mbstrings.h.
	(scm_lreadr): Don't handle multibyte ports.
	* kw.c: Don't include mbstrings.h.
	* init.c: Don't include mbstrings.h.
	(scm_boot_guile_1): Don't init mbstrings module.
	* hash.c (scm_hasher): Don't handle mbstrings.
	* gscm.c (gscm_run_scm): Don't init mbstrings module.
	* gc.c (scm_gc_mark): Don't handle mbstrings.
	(scm_gc_sweep): Likewise.
	* eval.c (SCM_CEVAL): Don't handle mbstrings.
	* eq.c (scm_equal_p): Use SCM_TYP7S, not SCM_TYP7SD.
	* tags.h (SCM_TYP7SD): Removed.
	(SCM_TYP7D): Removed.
	(scm_tc7_mb_string): Removed.
	(scm_tc7_mb_substring): Removed.
	* print.c (scm_iprin1): Handle char printing directly.  Don't
	handle mbstrings.
	Don't include "mbstrings.h".
	* symbols.c (scm_intern_obarray_soft, scm_string_to_symbol,
	scm_string_to_obarray_symbol, msymbolize): Don't set symbol's
	multi-byte flag.
	Don't include "mbstrings.h".
	* symbols.h (SCM_SYMBOL_MULTI_BYTE_STRINGP): Removed.
	(SCM_SYMBOL_SLOTS): Define as 4.
	(SCM_ROSTRINGP): Use SCM_TYP7S, not SCM_TYP7SD.
	* arbiters.c, backtrace.c, debug.c, dynl.c, eval.c, fluids.c,
	gc.c, gsubr.c, ioext.c, kw.c, mallocs.c, numbers.c, ports.c,
	print.c, read.c, regex-posix.c, root.c, srcprop.c, stackchk.c,
	struct.c, threads.c, throw.c, unif.c, variable.c: Use new
	("gen"-less) I/O function names.
	* ports.c (scm_add_to_port_table): Don't set port's
	representation.
	* ports.h (scm_port_representation_type): Removed.
	(scm_string_representation_type): Removed.
	(struct scm_port_table ): Removed representation field.
	(SCM_PORT_REPRESENTATION): Removed.
	(SCM_SET_PORT_REPRESENTATION): Removed.
	* genio.h: Use new function names.
	* genio.c: Don't include "extchrs.h".
	(scm_gen_putc, scm_gen_puts, scm_gen_write, scm_get_getc):
	Removed.
	(scm_putc, scm_puts, scm_lfwrite): No longer static.
	(scm_getc): No longer static; handle line and column changes.
	(scm_ungetc): Renamed from scm_gen_ungetc.
	(scm_do_read_line): Renamed from scm_gen_read_line.
	* libguile.h: Don't include "extchrs.h" or "mbstrings.h"
	* extchrs.h, extchrs.c, mbstrings.h, mbstrings.c: Removed.

											
										
										
											1997-10-15 17:18:32 +00:00
+								    switch (c = scm_getc (port))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      {
 								      case EOF:
 								      goteof:
 									if (eoferr)
-												* read.c (scm_flush_ws): Include filename in error message when it
is not `#f'.

											
										
										
											2000-08-06 22:04:11 +00:00
+									  {
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
+									    scm_i_input_error (eoferr,
 											       port,
 											       "end of file",
 											       SCM_EOL);
-												* read.c (scm_flush_ws): Include filename in error message when it
is not `#f'.

											
										
										
											2000-08-06 22:04:11 +00:00
+									  }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									return c;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      case ';':
 								      lp:
-													* Makefile.in: Rebuilt.
	* Makefile.am (libguile_la_SOURCES): Removed extchrs.c,
	mbstrings.c.
	(modinclude_HEADERS): Removed extchrs.h, mbstrings.h.
	* unif.c (scm_vector_set_length_x): Don't handle multibyte
	strings.
	* tag.c (scm_utag_mb_string, scm_utag_mb_substring): Removed.
	(scm_tag): Don't handle multibyte strings.
	* read.c: Don't include mbstrings.h.
	(scm_lreadr): Don't handle multibyte ports.
	* kw.c: Don't include mbstrings.h.
	* init.c: Don't include mbstrings.h.
	(scm_boot_guile_1): Don't init mbstrings module.
	* hash.c (scm_hasher): Don't handle mbstrings.
	* gscm.c (gscm_run_scm): Don't init mbstrings module.
	* gc.c (scm_gc_mark): Don't handle mbstrings.
	(scm_gc_sweep): Likewise.
	* eval.c (SCM_CEVAL): Don't handle mbstrings.
	* eq.c (scm_equal_p): Use SCM_TYP7S, not SCM_TYP7SD.
	* tags.h (SCM_TYP7SD): Removed.
	(SCM_TYP7D): Removed.
	(scm_tc7_mb_string): Removed.
	(scm_tc7_mb_substring): Removed.
	* print.c (scm_iprin1): Handle char printing directly.  Don't
	handle mbstrings.
	Don't include "mbstrings.h".
	* symbols.c (scm_intern_obarray_soft, scm_string_to_symbol,
	scm_string_to_obarray_symbol, msymbolize): Don't set symbol's
	multi-byte flag.
	Don't include "mbstrings.h".
	* symbols.h (SCM_SYMBOL_MULTI_BYTE_STRINGP): Removed.
	(SCM_SYMBOL_SLOTS): Define as 4.
	(SCM_ROSTRINGP): Use SCM_TYP7S, not SCM_TYP7SD.
	* arbiters.c, backtrace.c, debug.c, dynl.c, eval.c, fluids.c,
	gc.c, gsubr.c, ioext.c, kw.c, mallocs.c, numbers.c, ports.c,
	print.c, read.c, regex-posix.c, root.c, srcprop.c, stackchk.c,
	struct.c, threads.c, throw.c, unif.c, variable.c: Use new
	("gen"-less) I/O function names.
	* ports.c (scm_add_to_port_table): Don't set port's
	representation.
	* ports.h (scm_port_representation_type): Removed.
	(scm_string_representation_type): Removed.
	(struct scm_port_table ): Removed representation field.
	(SCM_PORT_REPRESENTATION): Removed.
	(SCM_SET_PORT_REPRESENTATION): Removed.
	* genio.h: Use new function names.
	* genio.c: Don't include "extchrs.h".
	(scm_gen_putc, scm_gen_puts, scm_gen_write, scm_get_getc):
	Removed.
	(scm_putc, scm_puts, scm_lfwrite): No longer static.
	(scm_getc): No longer static; handle line and column changes.
	(scm_ungetc): Renamed from scm_gen_ungetc.
	(scm_do_read_line): Renamed from scm_gen_read_line.
	* libguile.h: Don't include "extchrs.h" or "mbstrings.h"
	* extchrs.h, extchrs.c, mbstrings.h, mbstrings.c: Removed.

											
										
										
											1997-10-15 17:18:32 +00:00
+									switch (c = scm_getc (port))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									  {
 									  case EOF:
 									    goto goteof;
 									  default:
 									    goto lp;
 									  case SCM_LINE_INCREMENTORS:
 									    break;
 									  }
 									break;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
+								      case '#':
 									switch (c = scm_getc (port))
 									  {
 									  case EOF:
 									    eoferr = "read_sharp";
 									    goto goteof;
 									  case '!':
 									    scm_read_scsh_block_comment (c, port);
 									    break;
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+									  case ';':
 									    scm_read_commented_expression (c, port);
 									    break;
-												Changes from arch/CVS synchronization

											
										
										
											2007-09-03 16:58:20 +00:00
+									  default:
 									    scm_ungetc (c, port);
 									    return '#';
 									  }
 									break;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      case SCM_LINE_INCREMENTORS:
 								      case SCM_SINGLE_SPACES:
 								      case '\t':
 									break;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								      default:
 									return c;
 								      }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return 0;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								/* Token readers.  */
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM scm_read_expression (SCM port);
 								static SCM scm_read_sharp (int chr, SCM port);
 								static SCM scm_get_hash_procedure (int c);
 								static SCM recsexpr (SCM obj, long line, int column, SCM filename);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_sexp (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_i_lreadparen"
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  register int c;
 								  register SCM tmp;
 								  register SCM tl, ans = SCM_EOL;
-												Remove extraneous semi-colon in `read.c'.

											
										
										
											2008-04-26 21:39:27 +02:00
+								  SCM tl2 = SCM_EOL, ans2 = SCM_EOL, copy = SCM_BOOL_F;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  static const int terminating_char = ')';
 								  /* Need to capture line and column numbers here. */
 								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
-												* read.c (scm_lreadr): Recognize SCSH-style block comments; text
between `#!' and `!#' is ignored.
(skip_scsh_block_comment): New function.

											
										
										
											1996-10-25 08:30:26 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  c = flush_ws (port, FUNC_NAME);
 								  if (terminating_char == c)
 								    return SCM_EOL;
-												* read.c (scm_lreadr): Recognize SCSH-style block comments; text
between `#!' and `!#' is ignored.
(skip_scsh_block_comment): New function.

											
										
										
											1996-10-25 08:30:26 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  scm_ungetc (c, port);
 								  if (scm_is_eq (scm_sym_dot,
 										 (tmp = scm_read_expression (port))))
 								    {
 								      ans = scm_read_expression (port);
 								      if (terminating_char != (c = flush_ws (port, FUNC_NAME)))
 									scm_i_input_error (FUNC_NAME, port, "missing close paren",
 											   SCM_EOL);
 								      return ans;
 								    }
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  /* Build the head of the list structure. */
 								  ans = tl = scm_cons (tmp, SCM_EOL);
 								  if (SCM_COPY_SOURCE_P)
 								    ans2 = tl2 = scm_cons (scm_is_pair (tmp)
 											   ? copy
 											   : tmp,
 											   SCM_EOL);
 								  while (terminating_char != (c = flush_ws (port, FUNC_NAME)))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      SCM new_tail;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      scm_ungetc (c, port);
 								      if (scm_is_eq (scm_sym_dot,
 										     (tmp = scm_read_expression (port))))
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  SCM_SETCDR (tl, tmp = scm_read_expression (port));
 									  if (SCM_COPY_SOURCE_P)
 									    SCM_SETCDR (tl2, scm_cons (scm_is_pair (tmp) ? copy : tmp,
 												       SCM_EOL));
 									  c = flush_ws (port, FUNC_NAME);
 									  if (terminating_char != c)
 									    scm_i_input_error (FUNC_NAME, port,
 											       "in pair: missing close paren", SCM_EOL);
 									  goto exit;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									}
-													* read.c (scm_lreadr): When reading a hash token, check for a
	user-defined hash procedure first, so that overriding the builtin
	hash characters is possible (this was needed for implementing
	SRFI-4's read synax `f32(...)').

	* num2integral.i.c: Use scm_t_signed_bits instead of scm_t_bits,
	because the latter is unsigned now and breaks comparisons like
	(n < (scm_t_signed_bits)MIN_VALUE).

											
										
										
											2001-06-27 13:15:20 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      new_tail = scm_cons (tmp, SCM_EOL);
 								      SCM_SETCDR (tl, new_tail);
 								      tl = new_tail;
 								      if (SCM_COPY_SOURCE_P)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  SCM new_tail2 = scm_cons (scm_is_pair (tmp)
 												    ? copy
 												    : tmp, SCM_EOL);
 									  SCM_SETCDR (tl2, new_tail2);
 									  tl2 = new_tail2;
 									}
 								    }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								 exit:
 								  if (SCM_RECORD_POSITIONS_P)
 								    scm_whash_insert (scm_source_whash,
 										      ans,
 										      scm_make_srcprops (line, column,
 													 SCM_FILENAME (port),
 													 SCM_COPY_SOURCE_P
 													 ? ans2
 													 : SCM_UNDEFINED,
 													 SCM_EOL));
 								  return ans;
 								}
 								#undef FUNC_NAME
-												* read.c (scm_lreadr): Call scm_i_read_homogenous_vector for '#f',
'#u', and '#s'.

* read.h, read.c (scm_i_input_error): Renamed from scm_input_error
and made non-static.  Changed all uses.

											
										
										
											2004-10-26 17:00:13 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
 								scm_read_string (int chr, SCM port)
 								#define FUNC_NAME "scm_lreadr"
 								{
 								  /* For strings smaller than C_STR, this function creates only one Scheme
 								     object (the string returned).  */
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  SCM str = SCM_BOOL_F;
 								  unsigned c_str_len = 0;
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  scm_t_wchar c;
-												(scm_lreadr): Call scm_i_read_array for all characters followinf '#'
that can start an array.  Explicitely disambiguate 'i' and 'e' between
introducing numbers and uniform vectors.  Do not call
scm_i_read_homogenous_vector, since that is also handled by
scm_i_read_array now.

											
										
										
											2004-10-29 14:45:19 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								  str = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  while ('"' != (c = scm_getc (port)))
 								    {
 								      if (c == EOF)
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								        {
 								        str_eof:
 								          scm_i_input_error (FUNC_NAME, port,
 								                             "end of file in string constant", SCM_EOL);
 								        }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      if (c_str_len + 1 >= scm_i_string_length (str))
 								        {
 								          SCM addy = scm_i_make_string (READER_STRING_BUFFER_SIZE, NULL);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								          str = scm_string_append (scm_list_2 (str, addy));
 								        }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (c == '\\')
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								        {
 								          switch (c = scm_getc (port))
 								            {
 								            case EOF:
 								              goto str_eof;
 								            case '"':
 								            case '\\':
 								              break;
-													* read.c (scm_read_opts): New opts `elisp-vectors' and
	`escaped-parens'.
	(s_vector): New.
	(scm_lreadr): Use scm_lreadparen1 instead of scm_lreadparen.  Make
	handling of elisp vector syntax dependent on SCM_ENABLE_ELISP and
	`elisp-vectors' option instead of SCM_ELISP_READ_EXTENSIONS.
	Allow "\(" and "\)" in strings when SCM_ENABLE_ELISP defined and
	`escaped-parens' option set.
	(scm_read_token): If elisp vector syntax active, disallow [ and ]
	in tokens.
	(scm_lreadparen): Rewrite as interface to scm_lreadparen1.
	(scm_lreadparen1): New.

	* read.h: Remove conditionally compiled last arg to
	scm_lreadparen.
	(SCM_ELISP_VECTORS_P, SCM_ESCAPED_PARENS_P): New.

											
										
										
											2004-02-08 19:14:18 +00:00
+								#if SCM_ENABLE_ELISP
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            case '(':
 								            case ')':
 								              if (SCM_ESCAPED_PARENS_P)
 								                break;
 								              goto bad_escaped;
-													* read.c (scm_read_opts): New opts `elisp-vectors' and
	`escaped-parens'.
	(s_vector): New.
	(scm_lreadr): Use scm_lreadparen1 instead of scm_lreadparen.  Make
	handling of elisp vector syntax dependent on SCM_ENABLE_ELISP and
	`elisp-vectors' option instead of SCM_ELISP_READ_EXTENSIONS.
	Allow "\(" and "\)" in strings when SCM_ENABLE_ELISP defined and
	`escaped-parens' option set.
	(scm_read_token): If elisp vector syntax active, disallow [ and ]
	in tokens.
	(scm_lreadparen): Rewrite as interface to scm_lreadparen1.
	(scm_lreadparen1): New.

	* read.h: Remove conditionally compiled last arg to
	scm_lreadparen.
	(SCM_ELISP_VECTORS_P, SCM_ESCAPED_PARENS_P): New.

											
										
										
											2004-02-08 19:14:18 +00:00
+								#endif
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								            case '\n':
 								              continue;
 								            case '0':
 								              c = '\0';
 								              break;
 								            case 'f':
 								              c = '\f';
 								              break;
 								            case 'n':
 								              c = '\n';
 								              break;
 								            case 'r':
 								              c = '\r';
 								              break;
 								            case 't':
 								              c = '\t';
 								              break;
 								            case 'a':
 								              c = '\007';
 								              break;
 								            case 'v':
 								              c = '\v';
 								              break;
 								            case 'x':
 								              {
 								                scm_t_wchar a, b;
 								                a = scm_getc (port);
 								                if (a == EOF)
 								                  goto str_eof;
 								                b = scm_getc (port);
 								                if (b == EOF)
 								                  goto str_eof;
 								                if ('0' <= a && a <= '9')
 								                  a -= '0';
 								                else if ('A' <= a && a <= 'F')
 								                  a = a - 'A' + 10;
 								                else if ('a' <= a && a <= 'f')
 								                  a = a - 'a' + 10;
 								                else
 								                  {
 								                    c = a;
 								                    goto bad_escaped;
 								                  }
 								                if ('0' <= b && b <= '9')
 								                  b -= '0';
 								                else if ('A' <= b && b <= 'F')
 								                  b = b - 'A' + 10;
 								                else if ('a' <= b && b <= 'f')
 								                  b = b - 'a' + 10;
 								                else
 								                  {
 								                    c = b;
 								                    goto bad_escaped;
 								                  }
 								                c = a * 16 + b;
 								                break;
 								              }
 								            case 'u':
 								              {
 								                scm_t_wchar a;
 								                int i;
 								                c = 0;
 								                for (i = 0; i < 4; i++)
 								                  {
 								                    a = scm_getc (port);
 								                    if (a == EOF)
 								                      goto str_eof;
 								                    if ('0' <= a && a <= '9')
 								                      a -= '0';
 								                    else if ('A' <= a && a <= 'F')
 								                      a = a - 'A' + 10;
 								                    else if ('a' <= a && a <= 'f')
 								                      a = a - 'a' + 10;
 								                    else
 								                      {
 								                        c = a;
 								                        goto bad_escaped;
 								                      }
 								                    c = c * 16 + a;
 								                  }
 								                break;
 								              }
 								            case 'U':
 								              {
 								                scm_t_wchar a;
 								                int i;
 								                c = 0;
 								                for (i = 0; i < 6; i++)
 								                  {
 								                    a = scm_getc (port);
 								                    if (a == EOF)
 								                      goto str_eof;
 								                    if ('0' <= a && a <= '9')
 								                      a -= '0';
 								                    else if ('A' <= a && a <= 'F')
 								                      a = a - 'A' + 10;
 								                    else if ('a' <= a && a <= 'f')
 								                      a = a - 'a' + 10;
 								                    else
 								                      {
 								                        c = a;
 								                        goto bad_escaped;
 								                      }
 								                    c = c * 16 + a;
 								                  }
 								                break;
 								              }
 								            default:
 								            bad_escaped:
 								              scm_i_input_error (FUNC_NAME, port,
 								                                 "illegal character in escape sequence: ~S",
 								                                 scm_list_1 (SCM_MAKE_CHAR (c)));
 								            }
 								        }
 								      str = scm_i_string_start_writing (str);
 								      scm_i_string_set_x (str, c_str_len++, c);
 								      scm_i_string_stop_writing ();
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    }
-												(scm_lreadr): Simply do (symbol->keyword (read)) after
reading '#:' or ':'.  See NEWS for consequences.

											
										
										
											2004-10-04 18:03:18 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  if (c_str_len > 0)
 								    {
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
+								      return scm_i_substring_copy (str, 0, c_str_len);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												Add Unicode strings and symbols

This adds full Unicode strings as a datatype, and it adds some
minimal functionality.  The terminal and port encoding is assumed
to be ISO-8859-1.  Non-ISO-8859-1 characters are written or
input as string character escapes.

The string character escapes now have 3 forms: \xXX \uXXXX and
\UXXXXXX, for unprintable characters that have 2, 4 or 6 hex digits.

The process for writing to strings has been modified.  There is now a
function scm_i_string_start_writing that does the copy-on-write
conversion if necessary.

To compile strings that may be wide, the VM storage of strings and
string-likes has changed.

Most string-using functions have not yet been updated and may break
when used with wide strings.


        * module/language/assembly/compile-bytecode.scm (write-bytecode):
        use variable width string bytecode format

        * module/language/assembly.scm (byte-length): use variable width
        bytecode format

        * libguile/vm-i-loader.c (load-string, load-symbol):
        (load-keyword, define): use variable-width bytecode format

        * libguile/vm-engine.h (FETCH_WIDTH): new macro

        * libguile/strings.h: new declarations

        * libguile/strings.c (make_wide_stringbuf): new function
        (widen_stringbuf): new function
        (scm_i_make_wide_string): new function
        (scm_i_is_narrow_string): new function
        (scm_i_string_wide_chars): new function
        (scm_i_string_start_writing): new function
        (scm_i_string_ref): new function
        (scm_i_string_set_x): new function
        (scm_i_is_narrow_symbol): new function
        (scm_i_symbol_wide_chars, scm_i_symbol_ref): new function
        (scm_string_width): new function
        (unistring_escapes_to_guile_escapes): new function
        (scm_to_stringn): new function
        (scm_i_stringbuf_free): modify for wide strings
        (scm_i_substring_copy): modify for wide strings
        (scm_i_string_chars, scm_string_append): modify for wide strings
        (scm_i_make_symbol, scm_to_locale_stringn): modify for wide strings
        (scm_string_dump, scm_symbol_dump, scm_to_locale_stringbuf):
        (scm_string, scm_i_deprecated_string_chars): modify for wide strings
        (scm_from_locale_string, scm_from_locale_stringn): add null test

        * libguile/srfi-13.c: add calls for scm_i_string_start_writing for
        each call of scm_i_string_stop_writing
        (scm_string_for_each): modify for wide strings

        * libguile/socket.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/rw.c: add calls for scm_i_string_start_writing for each
        call of scm_i_string_stop_writing

        * libguile/read.c (scm_read_string): allow reading of wide strings

        * libguile/print.h: add declaration for scm_charprint

        * libguile/print.c (iprin1): print wide strings and add new string
        escapes
        (scm_charprint): new function

        * libguile/ports.h: new declarations for scm_lfwrite_substr and
        scm_lfwrite_str

        * libguile/ports.c (update_port_lf): new function
        (scm_lfwrite): use update_port_lf
        (scm_lfwrite_substr): new function
        (scm_lfwrite_str): new function

        * test-suite/tests/asm-to-bytecode.test ("compiler"): add string
        width byte to sting-like asm tests

											
										
										
											2009-08-08 02:35:00 -07:00
 								  return scm_nullstr;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* Eliminate some calls to scm_wta.

											
										
										
											2001-03-04 17:09:34 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_number (scm_t_wchar chr, SCM port)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM result;
 								  SCM buffer;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  size_t read;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  scm_ungetc (chr, port);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  buffer = read_complete_token (port, &read);
 								  result = scm_string_to_number (buffer, SCM_UNDEFINED);
 								  if (!scm_is_true (result))
 								    /* Return a symbol instead of a number.  */
 								    result = scm_string_to_symbol (buffer);
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  return result;
 								}
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_mixed_case_symbol (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM result;
 								  int ends_with_colon = 0;
 								  SCM buffer;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  size_t read = 0;
-												Add support for SRFI-88-like postfix keyword read syntax.

											
										
										
											2008-04-15 19:52:43 +02:00
+								  int postfix = scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_postfix);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  scm_ungetc (chr, port);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  buffer = read_complete_token (port, &read);
 								  if (read > 0)
 								    ends_with_colon = scm_i_string_ref (buffer, read - 1) == ':';
-												Add support for SRFI-88-like postfix keyword read syntax.

											
										
										
											2008-04-15 19:52:43 +02:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  if (postfix && ends_with_colon && (read > 1))
 								    result = scm_symbol_to_keyword (scm_string_to_symbol (scm_i_substring (buffer, 0, read - 1)));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  else
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    result = scm_string_to_symbol (buffer);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return result;
 								}
 								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_number_and_radix (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_lreadr"
 								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM result;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  size_t read;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM buffer = scm_i_make_string (READER_BUFFER_SIZE, NULL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  unsigned int radix;
 								  switch (chr)
 								    {
 								    case 'B':
 								    case 'b':
 								      radix = 2;
 								      break;
 								    case 'o':
 								    case 'O':
 								      radix = 8;
 								      break;
 								    case 'd':
 								    case 'D':
 								      radix = 10;
 								      break;
 								    case 'x':
 								    case 'X':
 								      radix = 16;
 								      break;
 								    default:
 								      scm_ungetc (chr, port);
 								      scm_ungetc ('#', port);
 								      radix = 10;
 								    }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  buffer = read_complete_token (port, &read);
 								  result = scm_string_to_number (buffer, scm_from_uint (radix));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  if (scm_is_true (result))
 								    return result;
 								  scm_i_input_error (FUNC_NAME, port, "unknown # object", SCM_EOL);
 								  return SCM_BOOL_F;
 								}
 								#undef FUNC_NAME
 								static SCM
 								scm_read_quote (int chr, SCM port)
 								{
 								  SCM p;
-												Changes from arch/CVS synchronization

											
										
										
											2007-08-23 21:17:24 +00:00
+								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  switch (chr)
 								    {
 								    case '`':
 								      p = scm_sym_quasiquote;
 								      break;
 								    case '\'':
 								      p = scm_sym_quote;
 								      break;
 								    case ',':
 								      {
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									scm_t_wchar c;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 									c = scm_getc (port);
 									if ('@' == c)
 									  p = scm_sym_uq_splicing;
 									else
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									  {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									    scm_ungetc (c, port);
 									    p = scm_sym_unquote;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+									  }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									break;
 								      }
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    default:
 								      fprintf (stderr, "%s: unhandled quote character (%i)\n",
-												More compilation fixes with Sun CC (bug #21378).

											
										
										
											2008-02-07 09:54:47 +00:00
+									       "scm_read_quote", chr);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      abort ();
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-08-23 21:17:24 +00:00
+								  if (SCM_RECORD_POSITIONS_P)
 								    scm_whash_insert (scm_source_whash, p,
 										      scm_make_srcprops (line, column,
 													 SCM_FILENAME (port),
 													 SCM_COPY_SOURCE_P
 													 ? (scm_cons2 (SCM_CAR (p),
 														       SCM_CAR (SCM_CDR (p)),
 														       SCM_EOL))
 													 : SCM_UNDEFINED,
 													 SCM_EOL));
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  return p;
 								}
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								SCM_SYMBOL (sym_syntax, "syntax");
 								SCM_SYMBOL (sym_quasisyntax, "quasisyntax");
 								SCM_SYMBOL (sym_unsyntax, "unsyntax");
 								SCM_SYMBOL (sym_unsyntax_splicing, "unsyntax-splicing");
 								static SCM
 								scm_read_syntax (int chr, SCM port)
 								{
 								  SCM p;
 								  long line = SCM_LINUM (port);
 								  int column = SCM_COL (port) - 1;
 								  switch (chr)
 								    {
 								    case '`':
 								      p = sym_quasisyntax;
 								      break;
 								    case '\'':
 								      p = sym_syntax;
 								      break;
 								    case ',':
 								      {
 									int c;
 									c = scm_getc (port);
 									if ('@' == c)
 									  p = sym_unsyntax_splicing;
 									else
 									  {
 									    scm_ungetc (c, port);
 									    p = sym_unsyntax;
 									  }
 									break;
 								      }
 								    default:
 								      fprintf (stderr, "%s: unhandled syntax character (%i)\n",
 									       "scm_read_syntax", chr);
 								      abort ();
 								    }
 								  p = scm_cons2 (p, scm_read_expression (port), SCM_EOL);
 								  if (SCM_RECORD_POSITIONS_P)
 								    scm_whash_insert (scm_source_whash, p,
 										      scm_make_srcprops (line, column,
 													 SCM_FILENAME (port),
 													 SCM_COPY_SOURCE_P
 													 ? (scm_cons2 (SCM_CAR (p),
 														       SCM_CAR (SCM_CDR (p)),
 														       SCM_EOL))
 													 : SCM_UNDEFINED,
 													 SCM_EOL));
 								  return p;
 								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static inline SCM
 								scm_read_semicolon_comment (int chr, SCM port)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								{
 								  int c;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  /* We use the get_byte here because there is no need to get the
 								     locale correct with comment input. This presumes that newline
 								     always represents itself no matter what the encoding is.  */
 								  for (c = scm_get_byte_or_eof (port);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								       (c != EOF) && (c != '\n');
 								       c = scm_getc (port));
 								  return SCM_UNSPECIFIED;
 								}
 								/* Sharp readers, i.e. readers called after a `#' sign has been read.  */
 								static SCM
 								scm_read_boolean (int chr, SCM port)
 								{
 								  switch (chr)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case 't':
 								    case 'T':
 								      return SCM_BOOL_T;
 								    case 'f':
 								    case 'F':
 								      return SCM_BOOL_F;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return SCM_UNSPECIFIED;
 								}
 								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_character (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_lreadr"
 								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM charname = scm_i_make_string (READER_CHAR_NAME_MAX_SIZE, NULL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  size_t charname_len;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_t_wchar cp;
 								  int overflow;
 								  overflow = read_token (port, charname, &charname_len);
 								  charname = scm_c_substring (charname, 0, charname_len);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  if (overflow)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    goto char_error;
 								  if (charname_len == 0)
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      chr = scm_getc (port);
 								      if (chr == EOF)
 									scm_i_input_error (FUNC_NAME, port, "unexpected end of file "
 											   "while reading character", SCM_EOL);
 								      /* CHR must be a token delimiter, like a whitespace.  */
 								      return (SCM_MAKE_CHAR (chr));
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  if (charname_len == 1)
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    return SCM_MAKE_CHAR (scm_i_string_ref (charname, 0));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  cp = scm_i_string_ref (charname, 0);
 								  if (cp >= '0' && cp < '8')
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    {
 								      /* Dirk:FIXME::  This type of character syntax is not R5RS
 								       * compliant.  Further, it should be verified that the constant
-												Range check octal-escaped characters

* libguile/read.c (scm_read_character): range check octal escapes

											
										
										
											2009-08-29 07:14:49 -07:00
+								       * does only consist of octal digits.  */
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      SCM p = scm_string_to_number (charname, scm_from_uint (8));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (SCM_I_INUMP (p))
-												Range check octal-escaped characters

* libguile/read.c (scm_read_character): range check octal escapes

											
										
										
											2009-08-29 07:14:49 -07:00
+								        {
 								          scm_t_wchar c = SCM_I_INUM (p);
 								          if (SCM_IS_UNICODE_CHAR (c))
 								            return SCM_MAKE_CHAR (c);
 								          else
 								            scm_i_input_error (FUNC_NAME, port,
 								                               "out-of-range octal character escape: ~a",
 								                               scm_list_1 (charname));
 								        }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  /* The names of characters should never have non-Latin1
 								     characters.  */
 								  if (scm_i_is_narrow_string (charname)
 								      || scm_i_try_narrow_string (charname))
-												fix uninitialized variable in scm_read_character

* libguile/read.c (scm_read_character): Fix uninitialized variable.

											
										
										
											2009-08-26 13:15:07 +02:00
+								    { SCM ch = scm_i_charname_to_char (scm_i_string_chars (charname),
 								                                       charname_len);
 								      if (scm_is_true (ch))
 								        return ch;
 								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								 char_error:
 								  scm_i_input_error (FUNC_NAME, port, "unknown character name ~a",
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+										     scm_list_1 (charname));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return SCM_UNSPECIFIED;
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-												* Eliminate some calls to scm_wta.

											
										
										
											2001-03-04 17:09:34 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static inline SCM
 								scm_read_keyword (int chr, SCM port)
 								{
 								  SCM symbol;
 								  /* Read the symbol that comprises the keyword.  Doing this instead of
 								     invoking a specific symbol reader function allows `scm_read_keyword ()'
 								     to adapt to the delimiters currently valid of symbols.
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								     XXX: This implementation allows sloppy syntaxes like `#:  key'.  */
 								  symbol = scm_read_expression (port);
 								  if (!scm_is_symbol (symbol))
-												More compilation fixes with Sun CC (bug #21378).

											
										
										
											2008-02-07 09:54:47 +00:00
+								    scm_i_input_error ("scm_read_keyword", port,
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+										       "keyword prefix `~a' not followed by a symbol: ~s",
 										       scm_list_2 (SCM_MAKE_CHAR (chr), symbol));
 								  return (scm_symbol_to_keyword (symbol));
 								}
 								static inline SCM
 								scm_read_vector (int chr, SCM port)
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  /* Note: We call `scm_read_sexp ()' rather than READER here in order to
 								     guarantee that it's going to do what we want.  After all, this is an
 								     implementation detail of `scm_read_vector ()', not a desirable
 								     property.  */
 								  return (scm_vector (scm_read_sexp (chr, port)));
 								}
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static inline SCM
 								scm_read_srfi4_vector (int chr, SCM port)
 								{
 								  return scm_i_read_array (port, chr);
 								}
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_bytevector (scm_t_wchar chr, SCM port)
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								{
 								  chr = scm_getc (port);
 								  if (chr != 'u')
 								    goto syntax;
 								  chr = scm_getc (port);
 								  if (chr != '8')
 								    goto syntax;
 								  chr = scm_getc (port);
 								  if (chr != '(')
 								    goto syntax;
 								  return scm_u8_list_to_bytevector (scm_read_sexp (chr, port));
 								 syntax:
 								  scm_i_input_error ("read_bytevector", port,
 										     "invalid bytevector prefix",
 										     SCM_MAKE_CHAR (chr));
 								  return SCM_UNSPECIFIED;
 								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_guile_bit_vector (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  /* Read the `#*10101'-style read syntax for bit vectors in Guile.  This is
 								     terribly inefficient but who cares?  */
 								  SCM s_bits = SCM_EOL;
 								  for (chr = scm_getc (port);
 								       (chr != EOF) && ((chr == '0') || (chr == '1'));
 								       chr = scm_getc (port))
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    {
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      s_bits = scm_cons ((chr == '0') ? SCM_BOOL_F : SCM_BOOL_T, s_bits);
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  if (chr != EOF)
 								    scm_ungetc (chr, port);
 								  return scm_bitvector (scm_reverse_x (s_bits, SCM_EOL));
 								}
 								static inline SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_scsh_block_comment (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  int bang_seen = 0;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  /* We can use the get_byte here because there is no need to get the
 								     locale correct when reading comments. This presumes that
 								     hash and exclamation points always represent themselves no
 								     matter what the source encoding is.*/
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								  for (;;)
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    {
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      int c = scm_get_byte_or_eof (port);
-												* Don't use return value from SCM_SETCDR or SCM_WHASHSET.

											
										
										
											2000-07-18 16:09:09 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (c == EOF)
 									scm_i_input_error ("skip_block_comment", port,
 											   "unterminated `#! ... !#' comment", SCM_EOL);
 								      if (c == '!')
 									bang_seen = 1;
 								      else if (c == '#' && bang_seen)
 									break;
 								      else
 									bang_seen = 0;
 								    }
 								  return SCM_UNSPECIFIED;
 								}
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_commented_expression (scm_t_wchar chr, SCM port)
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_t_wchar c;
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
 								  c = flush_ws (port, (char *) NULL);
 								  if (EOF == c)
 								    scm_i_input_error ("read_commented_expression", port,
 								                       "no expression after #; comment", SCM_EOL);
 								  scm_ungetc (c, port);
 								  scm_read_expression (port);
 								  return SCM_UNSPECIFIED;
 								}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_extended_symbol (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								{
 								  /* Guile's extended symbol read syntax looks like this:
 								       #{This is all a symbol name}#
 								     So here, CHR is expected to be `{'.  */
 								  int saw_brace = 0, finished = 0;
 								  size_t len = 0;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  SCM buf = scm_i_make_string (1024, NULL);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  buf = scm_i_string_start_writing (buf);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  while ((chr = scm_getc (port)) != EOF)
 								    {
 								      if (saw_brace)
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+									{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  if (chr == '#')
 									    {
 									      finished = 1;
 									      break;
 									    }
 									  else
 									    {
 									      saw_brace = 0;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									      scm_i_string_set_x (buf, len++, '}');
 									      scm_i_string_set_x (buf, len++, chr);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									    }
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+									}
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      else if (chr == '}')
 									saw_brace = 1;
 								      else
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									scm_i_string_set_x (buf, len++, chr);
-												* Don't use return value from SCM_SETCDR or SCM_WHASHSET.

											
										
										
											2000-07-18 16:09:09 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      if (len >= scm_i_string_length (buf) - 2)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									{
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									  scm_i_string_stop_writing ();
 									  SCM addy = scm_i_make_string (1024, NULL);
 									  buf = scm_string_append (scm_list_2 (buf, addy));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  len = 0;
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									  buf = scm_i_string_start_writing (buf);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									}
-												* Don't use return value from SCM_SETCDR or SCM_WHASHSET.

											
										
										
											2000-07-18 16:09:09 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								      if (finished)
 									break;
 								    }
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  scm_i_string_stop_writing ();
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								  return (scm_string_to_symbol (scm_c_substring (buf, 0, len)));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								}
 								/* Top-level token readers, i.e., dispatchers.  */
 								static SCM
 								scm_read_sharp_extension (int chr, SCM port)
 								{
 								  SCM proc;
 								  proc = scm_get_hash_procedure (chr);
 								  if (scm_is_true (scm_procedure_p (proc)))
 								    {
 								      long line = SCM_LINUM (port);
 								      int column = SCM_COL (port) - 2;
 								      SCM got;
 								      got = scm_call_2 (proc, SCM_MAKE_CHAR (chr), port);
 								      if (!scm_is_eq (got, SCM_UNSPECIFIED))
-												* Don't use return value from SCM_SETCDR or SCM_WHASHSET.

											
										
										
											2000-07-18 16:09:09 +00:00
+									{
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									  if (SCM_RECORD_POSITIONS_P)
 									    return (recsexpr (got, line, column,
 											      SCM_FILENAME (port)));
 									  else
 									    return got;
-												* Don't use return value from SCM_SETCDR or SCM_WHASHSET.

											
										
										
											2000-07-18 16:09:09 +00:00
+									}
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								    }
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								  return SCM_UNSPECIFIED;
 								}
 								/* The reader for the sharp `#' character.  It basically dispatches reads
 								   among the above token readers.   */
 								static SCM
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								scm_read_sharp (scm_t_wchar chr, SCM port)
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								#define FUNC_NAME "scm_lreadr"
 								{
 								  SCM result;
 								  chr = scm_getc (port);
 								  result = scm_read_sharp_extension (chr, port);
 								  if (!scm_is_eq (result, SCM_UNSPECIFIED))
 								    return result;
 								  switch (chr)
 								    {
 								    case '\\':
 								      return (scm_read_character (chr, port));
 								    case '(':
 								      return (scm_read_vector (chr, port));
 								    case 's':
 								    case 'u':
 								    case 'f':
 								      /* This one may return either a boolean or an SRFI-4 vector.  */
 								      return (scm_read_srfi4_vector (chr, port));
-												Implement R6RS bytevector read syntax.

* libguile/read.c (scm_read_bytevector): New function.
  (scm_read_sharp): Add `v' case for bytevectors.

* test-suite/lib.scm (exception:read-error): New variable.

* test-suite/tests/bytevectors.test ("Datum Syntax"): New test set.

											
										
										
											2009-06-19 00:47:11 +02:00
+								    case 'v':
 								      return (scm_read_bytevector (chr, port));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    case '*':
 								      return (scm_read_guile_bit_vector (chr, port));
 								    case 't':
 								    case 'T':
 								    case 'F':
 								      /* This one may return either a boolean or an SRFI-4 vector.  */
 								      return (scm_read_boolean (chr, port));
 								    case ':':
 								      return (scm_read_keyword (chr, port));
 								    case '0': case '1': case '2': case '3': case '4':
 								    case '5': case '6': case '7': case '8': case '9':
 								    case '@':
 								#if SCM_ENABLE_DEPRECATED
 								      /* See below for 'i' and 'e'. */
 								    case 'a':
 								    case 'c':
 								    case 'y':
 								    case 'h':
 								    case 'l':
 								#endif
 								      return (scm_i_read_array (port, chr));
 								    case 'i':
 								    case 'e':
 								#if SCM_ENABLE_DEPRECATED
 								      {
 									/* When next char is '(', it really is an old-style
 									   uniform array. */
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+									scm_t_wchar next_c = scm_getc (port);
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+									if (next_c != EOF)
 									  scm_ungetc (next_c, port);
 									if (next_c == '(')
 									  return scm_i_read_array (port, chr);
 									/* Fall through. */
 								      }
 								#endif
 								    case 'b':
 								    case 'B':
 								    case 'o':
 								    case 'O':
 								    case 'd':
 								    case 'D':
 								    case 'x':
 								    case 'X':
 								    case 'I':
 								    case 'E':
 								      return (scm_read_number_and_radix (chr, port));
 								    case '{':
 								      return (scm_read_extended_symbol (chr, port));
 								    case '!':
 								      return (scm_read_scsh_block_comment (chr, port));
-												add reader support for #; #` #' #, and #,@. fix bug in compile-and-load.

* libguile/read.c (flush_ws, scm_read_commented_expression)
  (scm_read_sharp): Add support for commenting out expressions with #;.
  (scm_read_syntax, scm_read_sharp): Add support for #', #`, #, and #,@.

* module/ice-9/boot-9.scm: Remove #' read-hash extension, which actually
  didn't do anything at all. It's been there since 1997, but no Guile
  code I've ever seen uses it, and it conflicts with #'x => (syntax x)
  from modern Scheme.

* module/system/base/compile.scm (compile-and-load): Whoops, fix a number
  of bugs here.

											
										
										
											2009-05-28 14:49:33 +02:00
+								    case ';':
 								      return (scm_read_commented_expression (chr, port));
 								    case '`':
 								    case '\'':
 								    case ',':
 								      return (scm_read_syntax (chr, port));
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								    default:
 								      result = scm_read_sharp_extension (chr, port);
 								      if (scm_is_eq (result, SCM_UNSPECIFIED))
 									scm_i_input_error (FUNC_NAME, port, "Unknown # object: ~S",
 											   scm_list_1 (SCM_MAKE_CHAR (chr)));
 								      else
 									return result;
 								    }
 								  return SCM_UNSPECIFIED;
 								}
 								#undef FUNC_NAME
 								static SCM
 								scm_read_expression (SCM port)
 								#define FUNC_NAME "scm_read_expression"
 								{
 								  while (1)
 								    {
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								      register scm_t_wchar chr;
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
 								      chr = scm_getc (port);
 								      switch (chr)
 									{
 									case SCM_WHITE_SPACES:
 									case SCM_LINE_INCREMENTORS:
 									  break;
 									case ';':
 									  (void) scm_read_semicolon_comment (chr, port);
 									  break;
 									case '(':
 									  return (scm_read_sexp (chr, port));
 									case '"':
 									  return (scm_read_string (chr, port));
 									case '\'':
 									case '`':
 									case ',':
 									  return (scm_read_quote (chr, port));
 									case '#':
 									  {
 									    SCM result;
 									    result = scm_read_sharp (chr, port);
 									    if (scm_is_eq (result, SCM_UNSPECIFIED))
 									      /* We read a comment or some such.  */
 									      break;
 									    else
 									      return result;
 									  }
 									case ')':
 									  scm_i_input_error (FUNC_NAME, port, "unexpected \")\"", SCM_EOL);
 									  break;
 									case EOF:
 									  return SCM_EOF_VAL;
 									case ':':
 									  if (scm_is_eq (SCM_PACK (SCM_KEYWORD_STYLE), scm_keyword_prefix))
 									    return scm_symbol_to_keyword (scm_read_expression (port));
 									  /* Fall through.  */
 									default:
 									  {
 									    if (((chr >= '0') && (chr <= '9'))
 										|| (strchr ("+-.", chr)))
 									      return (scm_read_number (chr, port));
 									    else
 									      return (scm_read_mixed_case_symbol (chr, port));
 									  }
 									}
 								    }
 								}
 								#undef FUNC_NAME
 								/* Actual reader.  */
 								SCM_DEFINE (scm_read, "read", 0, 1, 0,
 								            (SCM port),
 									    "Read an s-expression from the input port @var{port}, or from\n"
 									    "the current input port if @var{port} is not specified.\n"
 									    "Any whitespace before the next token is discarded.")
 								#define FUNC_NAME s_scm_read
 								{
 								  int c;
 								  if (SCM_UNBNDP (port))
 								    port = scm_current_input_port ();
 								  SCM_VALIDATE_OPINPORT (1, port);
 								  c = flush_ws (port, (char *) NULL);
 								  if (EOF == c)
 								    return SCM_EOF_VAL;
 								  scm_ungetc (c, port);
 								  return (scm_read_expression (port));
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
+								}
-												* Eliminate some calls to scm_wta.

											
										
										
											2001-03-04 17:09:34 +00:00
+								#undef FUNC_NAME
-												* read.c: Added code for recording of positions of source code
expressions; New functions: recsexpr, scm_lreadrecparen;
_scm_make_srcprops --> scm_make_srcprops
(scm_flush_ws): Removed updating of positions counters.  This work
is already done by scm_gen_getc

											
										
										
											1996-09-18 19:35:48 +00:00
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-												Changes from arch/CVS synchronization

											
										
										
											2007-07-22 16:30:13 +00:00
+								/* Used when recording expressions constructed by `scm_read_sharp ()'.  */
 								static SCM
 								recsexpr (SCM obj, long line, int column, SCM filename)
 								{
 								  if (!scm_is_pair(obj)) {
 								    return obj;
 								  } else {
 								    SCM tmp = obj, copy;
 								    /* If this sexpr is visible in the read:sharp source, we want to
 								       keep that information, so only record non-constant cons cells
 								       which haven't previously been read by the reader. */
 								    if (scm_is_false (scm_whash_lookup (scm_source_whash, obj)))
 								      {
 									if (SCM_COPY_SOURCE_P)
 									  {
 									    copy = scm_cons (recsexpr (SCM_CAR (obj), line, column, filename),
 											     SCM_UNDEFINED);
 									    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
 									      {
 										SCM_SETCDR (copy, scm_cons (recsexpr (SCM_CAR (tmp),
 														      line,
 														      column,
 														      filename),
 													    SCM_UNDEFINED));
 										copy = SCM_CDR (copy);
 									      }
 									    SCM_SETCDR (copy, tmp);
 									  }
 									else
 									  {
 									    recsexpr (SCM_CAR (obj), line, column, filename);
 									    while ((tmp = SCM_CDR (tmp)) && scm_is_pair (tmp))
 									      recsexpr (SCM_CAR (tmp), line, column, filename);
 									    copy = SCM_UNDEFINED;
 									  }
 									scm_whash_insert (scm_source_whash,
 											  obj,
 											  scm_make_srcprops (line,
 													     column,
 													     filename,
 													     copy,
 													     SCM_EOL));
 								      }
 								    return obj;
 								  }
 								}
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								/* Manipulate the read-hash-procedures alist.  This could be written in
 								   Scheme, but maybe it will also be used by C code during initialisation.  */
-												*.[ch]: Replace GUILE_PROC w/ SCM_DEFINE.

											
										
										
											2000-01-05 19:05:23 +00:00
+								SCM_DEFINE (scm_read_hash_extend, "read-hash-extend", 2, 0, 0,
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								            (SCM chr, SCM proc),
-												(scm_read_options, scm_read, scm_read_hash_extend): Added docstrings.

											
										
										
											2001-02-16 15:17:20 +00:00
+									    "Install the procedure @var{proc} for reading expressions\n"
 									    "starting with the character sequence @code{#} and @var{chr}.\n"
 									    "@var{proc} will be called with two arguments:  the character\n"
 									    "@var{chr} and the port to read further data from. The object\n"
-												(s_scm_read_hash_extend): document #f argument to
read-hash-extend.

											
										
										
											2007-01-06 18:20:35 +00:00
+									    "returned will be the return value of @code{read}. \n"
 									    "Passing @code{#f} for @var{proc} will remove a previous setting. \n"
 									    )
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#define FUNC_NAME s_scm_read_hash_extend
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								{
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+								  SCM this;
 								  SCM prev;
-												* Deprecated scm_makfromstr and added scm_mem2string as a replacement.
* Eliminated some potential gc problems.
* Eliminated some signedness problems.
* Minor changes.

											
										
										
											2001-06-26 10:59:34 +00:00
+								  SCM_VALIDATE_CHAR (1, chr);
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+								  SCM_ASSERT (scm_is_false (proc)
-												* tags.h, deprecated.h (SCM_EQ_P): Deprecated by moving it into
deprecated.h.  Replaced all uses with scm_is_eq.

											
										
										
											2004-07-27 15:41:49 +00:00
+									      || scm_is_eq (scm_procedure_p (proc), SCM_BOOL_T),
-												* Deprecated scm_makfromstr and added scm_mem2string as a replacement.
* Eliminated some potential gc problems.
* Eliminated some signedness problems.
* Minor changes.

											
										
										
											2001-06-26 10:59:34 +00:00
+									      proc, SCM_ARG2, FUNC_NAME);
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								  /* Check if chr is already in the alist.  */
 								  this = *scm_read_hash_procedures;
 								  prev = SCM_BOOL_F;
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+								  while (1)
 								    {
-												*** empty log message ***

											
										
										
											2004-09-22 17:41:37 +00:00
+								      if (scm_is_null (this))
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									{
 									  /* not found, so add it to the beginning.  */
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+									  if (scm_is_true (proc))
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									    {
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+									      *scm_read_hash_procedures =
 										scm_cons (scm_cons (chr, proc), *scm_read_hash_procedures);
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									    }
 									  break;
 									}
-												* tags.h, deprecated.h (SCM_EQ_P): Deprecated by moving it into
deprecated.h.  Replaced all uses with scm_is_eq.

											
										
										
											2004-07-27 15:41:49 +00:00
+								      if (scm_is_eq (chr, SCM_CAAR (this)))
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									{
 									  /* already in the alist.  */
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+									  if (scm_is_false (proc))
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+									    {
 									      /* remove it.  */
-												* deprecated.h, boolean.h (SCM_FALSEP, SCM_NFALSEP, SCM_BOOL,
SCM_NEGATE_BOOL, SCM_BOOLP): Deprecated by moving into "deprecated.h".
Replaced all uses with scm_is_false, scm_is_true, scm_from_bool, and
scm_is_bool, respectively.

											
										
										
											2004-07-06 10:59:25 +00:00
+									      if (scm_is_false (prev))
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+										{
 										  *scm_read_hash_procedures =
 										    SCM_CDR (*scm_read_hash_procedures);
 										}
 									      else
 										scm_set_cdr_x (prev, SCM_CDR (this));
 									    }
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									  else
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+									    {
 									      /* replace it.  */
 									      scm_set_cdr_x (SCM_CAR (this), proc);
 									    }
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
+									  break;
 									}
 								      prev = this;
 								      this = SCM_CDR (this);
 								    }
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
 								  return SCM_UNSPECIFIED;
 								}
-												* *.c: Pervasive software-engineering-motivated rewrite of
function headers and argument checking.  Switched SCM_PROC,
SCM_PROC1 macros to be GUILE_PROC, GUILE_PROC1 (may change names
later, but was useful to keep old versions around while migrate)
that has docstrings and argument lists embedded in the GUILE_PROC
macro invocations that expand into a function header.  Use lots of
new SCM_VALIDATE_* macros to simplify error checking and reduce
tons of redundancy.  This is very similar to what I did for Scwm.

Note that none of the extraction of the docstrings, nor software
engineering checks of Scwm is yet added to Guile.  I'll work on
that tomorrow, I expect.

* Makefile.am: Added scm_validate.h to modinclude_HEADERS.

* chars.c: Added docstrings for the primitives defined in here.

* snarf.h:  Added GUILE_PROC, GUILE_PROC1.  Added
SCM_REGISTER_PROC to be like old SCM_PROC, though old SCM_PROC
still remains for now.  Changed naming convention for the s_foo
string name of the primitive to be s_scm_foo for ease of use with
the macro.

* scm_validate.h: Lots of new SCM_VALIDATE macros to simplify
argument checking through guile.  Maybe some of these should be
folded into the header file for the types they check, but for now
it was easiest to just stick them all in one place.

											
										
										
											1999-12-12 02:36:16 +00:00
+								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								/* Recover the read-hash procedure corresponding to char c.  */
 								static SCM
-												* *.c: Finish replacing K&R style prototypes with ANSI C
prototypes.

* eval.c: Make scm_m_mody's 3rd argument be a const char *, not a
char *.  ANSI prototypes caught this.

* strorder.c: Use GUILE_PROC1 for the couple SCM_PROC1 expansions
that I missed.

* scm_validate.h: Use SCM_BOOLP for validating bools.  Do not
expand macros if SCM_DOCSTRING_SNARF.

											
										
										
											1999-12-12 20:35:02 +00:00
+								scm_get_hash_procedure (int c)
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								{
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								  SCM rest = *scm_read_hash_procedures;
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+								  while (1)
 								    {
-												*** empty log message ***

											
										
										
											2004-09-22 17:41:37 +00:00
+								      if (scm_is_null (rest))
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+									return SCM_BOOL_F;
-												* list.c: Moved append docs to append! Thanks Dirk Hermann.  Also,
added append docs from R4RS.

* strings.c: Docstring typo fix, + eliminate unneeded IMP tests.
Thanks Dirk Hermann!

* chars.h: Provide SCM_CHARP, SCM_CHAR, SCM_MAKE_CHAR and
deprecate SCM_ICHRP, SCM_ICHR, SCM_MAKICHR.  Thanks Dirk Hermann!

* *.h, *.c: Use SCM_CHARP, SCM_CHAR, SCM_MAKE_CHAR throughout.
Drop use of SCM_P for function prototypes... assume an ANSI C
compiler.  Thanks Dirk Hermann!

											
										
										
											2000-03-02 20:54:43 +00:00
+								      if (SCM_CHAR (SCM_CAAR (rest)) == c)
-													* strports.c (scm_read_0str, scm_eval_0str): update scm_read usage.

	* gdbint.c (gdb_read): update scm_lreadr usage.

	* load.h: update prototypes.

	* load.c (scm_primitive_load, scm_read_and_eval_x,
	scm_primitive_load_path): remove case_insensitive_p, sharp arguments.

	* read.h: add prototype for scm_read_hash_extend.  Change args for
	other prototypes.

	* read.c (scm_read_hash_procedures): new variable.
	(scm_read_hash_extend): new procedure.
	(scm_get_hash_procedure): new procedure.
*	(scm_lreadr): use scm_get_hash_procedure instead of an argument
	for extended # processing.
	(scm_read, scm_lreadr, scm_lreadrecparen, scm_lreadparen,
	scm_read_token): remove case_i, sharp arguments.  Change callers.

	* read.h (SCM_N_READ_OPTIONS): increase to 3.
	(SCM_CASE_INSENSITIVE_P): define.

	* read.c: add case-insensitive option to scm_read_opts.
*	(scm_read_token): use SCM_CASE_INSENSITIVE_P instead of an argument
	to determine whether to convert symbol case.
	(default_case_i): definition removed.
	* read.c (scm_read_token): if case_i, downcase ic before doing
	anything with it.

											
										
										
											1997-03-08 18:58:24 +00:00
+									return SCM_CDAR (rest);
 								      rest = SCM_CDR (rest);
 								    }
 								}
-												* __scm.h, alist.c, alist.h, append.c, append.h, appinit.c,
arbiters.c, arbiters.h, async.c, async.h, boolean.c, boolean.h,
chars.c, chars.h, continuations.c, continuations.h, debug.c,
debug.h, dynwind.c, dynwind.h, eq.c, eq.h, error.c, eval.c,
eval.h, extchrs.c, extchrs.h, fdsocket.c, fdsocket.h, filesys.c,
filesys.h, fports.c, fports.h, gc.c, gdb_interface.h, gdbint.c,
gdbint.h, genio.c, genio.h, gscm.c, gscm.h, gsubr.c, gsubr.h,
hash.c, hash.h, hashtab.c, hashtab.h, init.c, ioext.c, ioext.h,
kw.c, kw.h, libguile.h, mallocs.c, mallocs.h, markers.c,
markers.h, mbstrings.c, mbstrings.h, numbers.c, numbers.h,
objprop.c, objprop.h, options.c, options.h, pairs.c, pairs.h,
ports.c, ports.h, posix.c, posix.h, print.c, print.h, procprop.c,
procprop.h, procs.c, procs.h, ramap.c, ramap.h, read.c, read.h,
root.c, scmsigs.c, scmsigs.h, sequences.c, sequences.h, simpos.c,
simpos.h, smob.c, socket.c, socket.h, srcprop.c, srcprop.h,
stackchk.c, stackchk.h, stime.c, stime.h, strings.c, strings.h,
strop.c, strop.h, strorder.c, strorder.h, strports.c, strports.h,
struct.c, struct.h, symbols.c, symbols.h, tag.c, tag.h, unif.c,
unif.h, variable.c, variable.h, vectors.c, vectors.h, version.c,
version.h, vports.c, vports.h, weaks.c, weaks.h: Use SCM_P to
declare functions with prototypes.  (Patch thanks to Marius
Vollmer.)

											
										
										
											1996-10-14 01:33:50 +00:00
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								#define SCM_ENCODING_SEARCH_SIZE (500)
 								/* Search the first few hundred characters of a file for
 								   an emacs-like coding declaration.  */
 								char *
 								scm_scan_for_encoding (SCM port)
 								{
 								  char header[SCM_ENCODING_SEARCH_SIZE+1];
 								  size_t bytes_read;
 								  char *encoding = NULL;
 								  int utf8_bom = 0;
 								  char *pos;
 								  int i;
 								  int in_comment;
 								  bytes_read = scm_c_read (port, header, SCM_ENCODING_SEARCH_SIZE);
 								  scm_seek (port, scm_from_int (0), scm_from_int (SEEK_SET));
 								  if (bytes_read > 3
 								      && header[0] == '\xef' && header[1] == '\xbb' && header[2] == '\xbf')
 								    utf8_bom = 1;
 								  /* search past "coding[:=]" */
 								  pos = header;
 								  while (1)
 								    {
 								      if ((pos = strstr(pos, "coding")) == NULL)
 								        return NULL;
 								      pos += strlen("coding");
 								      if (pos - header >= SCM_ENCODING_SEARCH_SIZE ||
 								          (*pos == ':' || *pos == '='))
 								        {
 								          pos ++;
 								          break;
 								        }
 								    }
 								  /* skip spaces */
 								  while (pos - header <= SCM_ENCODING_SEARCH_SIZE &&
 									 (*pos == ' ' || *pos == '\t'))
 								    pos ++;
 								  /* grab the next token */
 								  i = 0;
 								  while (pos + i - header <= SCM_ENCODING_SEARCH_SIZE
-												Cast the input to isalpha et al to integer

* libguile/gc_os_dep.c (GC_linux_stack_base) [LINUX_STACKBOTTOM]: cast
  input of ctype functions to int

* libguile/inet_aton.c (inet_aton): cast input of ctype functions to int

* libguile/read.c (scm_scan_for_encoding): cast input of isalnum to int

* libguile/win32-socket.c (scm_i_socket_uncomment): cast input of isspace
  to int

											
										
										
											2009-08-28 21:19:05 -07:00
+									 && (isalnum((int) pos[i]) || pos[i] == '_' || pos[i] == '-'
 								             || pos[i] == '.'))
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
+								    i++;
 								  if (i == 0)
 								    return NULL;
 								  encoding = scm_malloc (i+1);
 								  memcpy (encoding, pos, i);
 								  encoding[i] ='\0';
-												Always cast input to toupper as int

* libguile/read.c (scm_scan_for_encoding): add cast to int

											
										
										
											2009-08-27 07:35:39 -07:00
+								  for (i = 0; i < strlen (encoding); i++)
 								    encoding[i] = toupper ((int) encoding[i]);
-												Add full Unicode capability to ports and the default reader

Ports are given two additional properties: a character encoding and
a conversion failure strategy.  These properties have getters and setters.
The new properties are used to convert any locale text to/from the
internal representation of strings.

If unspecified, ports use a default value. The default value of these
properties is held in a fluid.  The default character encoding can be
modified by calling setlocale.

ISO-8859-1 is treated specially.  Since it is a native encoding of
strings, it can be processed more quickly.  Source code is assumed to be
ISO-8859-1 unless otherwise specified.  The encoding of a source code
file can be given as 'coding: XXXXX' in a magic comment at the top of a
file.

The C functions that deal with encoding often use a null pointer
as shorthand for the native Latin-1 encoding, for efficiency's sake.

* test-suite/tests/encoding-iso88591.test: new tests
* test-suite/tests/encoding-iso88597.test: new tests
* test-suite/tests/encoding-utf8.test: new tests
* test-suite/tests/encoding-escapes.test: new tests
* test-suite/tests/numbers.test: declare 'binary' encoding
* test-suite/tests/ports.test: declare 'binary' encoding
* test-suite/tests/r6rs-ports.test: declare 'binary' encoding

* module/system/base/compile.scm (compile-file): use source-code
  file's self-declared encoding when compiling files

* libguile/strports.c: store string ports in locale encoding
  (scm_strport_to_locale_u8vector, scm_call_with_output_locale_u8vector)
  (scm_open_input_locale_u8vector, scm_get_output_locale_u8vector):
  new functions

* libguile/strings.h: new declaration for scm_i_string_contains_char

* libguile/strings.c (scm_i_string_contains_char): new function
  (scm_from_stringn, scm_to_stringn):  use NULL for Latin-1
  (scm_from_locale_stringn, scm_to_locale_stringn): respect character
  encoding of input and output ports

* libguile/read.h: declaration for scm_scan_for_encoding

* libguile/read.c:
  (read_token): now takes scheme string instead of C string/length
  (read_complete_token): new function
  (scm_read_sexp, scm_read_number, scm_read_mixed_case_symbol)
  (scm_read_number_and_radix, scm_read_quote, scm_read_semicolon_comment)
  (scm_read_srfi4_vector, scm_read_bytevector, scm_read_guile_bit_vector)
  (scm_read_scsh_block_comment, scm_read_commented_expression)
  (scm_read_extended_symbol, scm_read_sharp_extension, scm_read_shart)
  (scm_read_expression): use scm_t_wchar for char type, use read_complete_token
  (scm_scan_for_encoding): new function to find a file's character encoding
  (scm_file_encoding): new function to find a port's character encoding

* libguile/rdelim.c: don't unpack strings

* libguile/print.h: declaration for modified function
  scm_i_charprint

* libguile/print.c: use locale when printing characters and
  strings
  (scm_i_charprint): input parameter is now scm_t_wchar
  (scm_simple_format): don't unpack strings

* libguile/posix.h: new declaration for scm_setbinary.

* libguile/posix.c (scm_setlocale): set default and stdio port
  encodings based on the locale's character encoding
  (scm_setbinary): new function

* libguile/ports.h (scm_t_port): add encoding and failed
  conversion handler to port type.  Declarations for new or modified
  functions scm_getc, scm_unget_byte, scm_ungetc,
  scm_i_get_port_encoding, scm_i_set_port_encoding_x,
  scm_port_encoding, scm_set_port_encoding_x,
  scm_i_get_conversion_strategy, scm_i_set_conversion_strategy_x,
  scm_port_conversion_strategy, scm_set_port_conversion_strategy_x.

* libguile/ports.c: assign the current ports to zero on startup so
  we can see if they've been set.
  (scm_current_input_port, scm_current_output_port,
  scm_current_error_port): return #f if the port is not yet
  initialized
  (scm_new_port_table_entry): set up a new port's encoding and
  illegal sequence handler based on the thread's current defaults
  (scm_i_remove_port): free port encoding name when port is removed
  (scm_i_mode_bits_n): now takes a scheme string instead of a c
  string and length.  All callers changed.
  (SCM_MBCHAR_BUF_SIZE): new const
  (scm_getc): new function, since the scm_getc in inline.h is now
  scm_get_byte_or_eof.  This pulls one codepoint from a port.
  (scm_lfwrite_substr, scm_lfwrite_str): now uses port's encoding
  (scm_unget_byte): new function, incorportaing the low-level functionality
  of scm_ungetc
  (scm_ungetc): uses scm_unget_byte

* libguile/numbers.h (scm_t_wchar): compilation order problem with
  scm_t_wchar being use in functions in multiple headers.  Forward
  declare scm_t_wchar.

* libguile/load.c (scm_primitive_load): scan for file encoding at
  top of file and use it to set the load port's encoding

* libguile/inline.h (scm_get_byte_or_eof): new function
  incorporating most of the functionality of scm_getc.

* libguile/fports.c (fport_fill_input): now returns scm_t_wchar

* libguile/chars.h (scm_t_wchar): avoid compilation order problem
  with declaration of scm_t_wchar

											
										
										
											2009-08-25 07:54:37 -07:00
 								  /* push backwards to make sure we were in a comment */
 								  in_comment = 0;
 								  while (pos - i - header > 0)
 								    {
 								      if (*(pos - i) == '\n')
 									{
 									  /* This wasn't in a semicolon comment. Check for a
 									   hash-bang comment. */
 									  char *beg = strstr (header, "#!");
 									  char *end = strstr (header, "!#");
 									  if (beg < pos && pos < end)
 									    in_comment = 1;
 									  break;
 									}
 								      if (*(pos - i) == ';')
 									{
 									  in_comment = 1;
 									  break;
 									}
 								      i ++;
 								    }
 								  if (!in_comment)
 								    {
 								      /* This wasn't in a comment */
 								      free (encoding);
 								      return NULL;
 								    }
 								  if (utf8_bom && strcmp(encoding, "UTF-8"))
 								    scm_misc_error (NULL,
 										    "the port input declares the encoding ~s but is encoded as UTF-8",
 										    scm_list_1 (scm_from_locale_string (encoding)));
 								  return encoding;
 								}
 								SCM_DEFINE (scm_file_encoding, "file-encoding", 1, 0, 0,
 								            (SCM port),
 								            "Scans the port for an EMACS-like character coding declaration\n"
 								            "near the top of the contents of a port with random-acessible contents.\n"
 								            "The coding declaration is of the form\n"
 								            "@code{coding: XXXXX} and must appear in a scheme comment.\n"
 								            "\n"
 								            "Returns a string containing the character encoding of the file\n"
 								            "if a declaration was found, or @code{#f} otherwise.\n")
 								#define FUNC_NAME s_scm_file_encoding
 								{
 								  char *enc;
 								  SCM s_enc;
 								  enc = scm_scan_for_encoding (port);
 								  if (enc == NULL)
 								    return SCM_BOOL_F;
 								  else
 								    {
 								      s_enc = scm_from_locale_string (enc);
 								      free (enc);
 								      return s_enc;
 								    }
 								  return SCM_BOOL_F;
 								}
 								#undef FUNC_NAME
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								void
 								scm_init_read ()
 								{
-												* read.c (scm_read_hash_extend): make scm_read_hash_procedures a
pointer to the Scheme variable read-hash-procedures and intern it
in scm_init_read. Modify scm_read_hash_extend and
scm_get_hash_procedure to use the pointer.

											
										
										
											1997-03-11 03:57:04 +00:00
+								  scm_read_hash_procedures =
-												Merge from mvo-vcell-cleanup-1-branch.

											
										
										
											2001-05-15 14:57:22 +00:00
+								    SCM_VARIABLE_LOC (scm_c_define ("read-hash-procedures", SCM_EOL));
-													* read.c (scm_init_read): intitialise scm_read_hash_procedures
	(idea from Mikael: make it a pair so scm_permanent object only
	called once.)
	(scm_read_hash_extend): don't call scm_permanent_object.
	(ideas from Mikael): if chr is already in the list, replace its
	procedure instead of appending it again.  If chr is #f, remove
	it from the list.
	(scm_get_hash_procedure): take CDR of scm_read_hash_procedures.

											
										
										
											1997-03-08 22:52:56 +00:00
-												* readline.c: terminate option list with NULL.

* read.c: idem.

* print.c: idem.

* eval.c: terminate option lists with 0.

* options.c: remove n (for length) from scm_option_X
functions. Detect option list length by looking for NULL name.

											
										
										
											2007-01-19 19:26:36 +00:00
+								  scm_init_opts (scm_read_options, scm_read_opts);
-												* Makefile.am (DEFS): Added.  automake adds -I options to DEFS,
and we don't want that.
(INCLUDES): Removed all -I options except for the root source
directory and the root build directory.

* *.*: Change includes so that they always use the "prefixes"
libguile/, qt/, guile-readline/, or libltdl/.
(Thanks to Tim Mooney.)

											
										
										
											2000-04-21 14:16:44 +00:00
+								#include "libguile/read.x"
-												maintainer changed: was lord, now jimb; first import

											
										
										
											1996-07-25 22:56:11 +00:00
+								}
-													* *.[hc]: add Emacs magic at the end of file, to ensure GNU
 	indentation style.

											
										
										
											2000-03-19 19:01:16 +00:00
 								/*
 								  Local Variables:
 								  c-file-style: "gnu"
 								  End:
 								*/