Port encodings are case-insensitive, but normalized to upper-case.

* libguile/ports.c (ascii_toupper, encoding_matches)
  (canonicalize_encoding): New helpers.

  (scm_c_make_port_with_encoding):
  (scm_i_set_default_port_encoding):
  (scm_i_set_port_encoding_x): Use the new helpers to be
  case-insensitive and also to canonicalize the internal representation
  to upper-case ASCII names.

  (scm_i_default_port_encoding): Never return NULL.
  (scm_port_encoding): The encoding is always a string.

* libguile/read.c (scm_i_scan_for_encoding): Use a locale-independent
  check instead of isalnum.  Don't upcase the result: the port code will
  handle that.

* test-suite/tests/web-response.test ("example-1"): Adapt test to expect
  normalized (upper-case) encoding for the response port.
This commit is contained in:
Andy Wingo 2013-01-15 14:31:49 +01:00
commit 93c4fa2174
3 changed files with 95 additions and 51 deletions

View file

@ -25,7 +25,6 @@
#endif
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <unistd.h>
#include <unicase.h>
@ -1949,6 +1948,15 @@ scm_get_hash_procedure (int c)
#define SCM_ENCODING_SEARCH_SIZE (500)
static int
is_encoding_char (char c)
{
if (c >= 'a' && c <= 'z') return 1;
if (c >= 'A' && c <= 'Z') return 1;
if (c >= '0' && c <= '9') return 1;
return strchr ("_-.:/,+=()", c) != NULL;
}
/* Search the first few hundred characters of a file for an Emacs-like coding
declaration. Returns either NULL or a string whose storage has been
allocated with `scm_gc_malloc ()'. */
@ -2034,8 +2042,7 @@ scm_i_scan_for_encoding (SCM port)
i = 0;
while (encoding_start + i - header <= SCM_ENCODING_SEARCH_SIZE
&& encoding_start + i - header < bytes_read
&& (isalnum ((int) encoding_start[i])
|| strchr ("_-.:/,+=()", encoding_start[i]) != NULL))
&& is_encoding_char (encoding_start[i]))
i++;
encoding_length = i;
@ -2043,8 +2050,6 @@ scm_i_scan_for_encoding (SCM port)
return NULL;
encoding = scm_gc_strndup (encoding_start, encoding_length, "encoding");
for (i = 0; i < encoding_length; i++)
encoding[i] = toupper ((int) encoding[i]);
/* push backwards to make sure we were in a comment */
in_comment = 0;
@ -2076,7 +2081,7 @@ scm_i_scan_for_encoding (SCM port)
/* This wasn't in a comment */
return NULL;
if (utf8_bom && strcmp(encoding, "UTF-8"))
if (utf8_bom && strcasecmp (encoding, "UTF-8"))
scm_misc_error (NULL,
"the port input declares the encoding ~s but is encoded as UTF-8",
scm_list_1 (scm_from_locale_string (encoding)));