fix scm_to_utf8_stringn for wide strings to NUL-append the strings
* libguile/strings.c (u32_u8_strlen, scm_to_utf8_stringn): Sigh, fix the u32-to-u8 optimization to ensure that the utf8 string is NUL-terminated.
This commit is contained in:
parent
a8004dcb4d
commit
2c1b79513b
1 changed files with 66 additions and 3 deletions
|
|
@ -1874,6 +1874,39 @@ latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
|
|||
return u8_result;
|
||||
}
|
||||
|
||||
/* From RFC 3629:
|
||||
|
||||
Char. number range | UTF-8 octet sequence
|
||||
(hexadecimal) | (binary)
|
||||
--------------------+---------------------------------------------
|
||||
0000 0000-0000 007F | 0xxxxxxx
|
||||
0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
||||
0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
||||
0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||
*/
|
||||
|
||||
static size_t
|
||||
u32_u8_strlen (const scm_t_uint32 *str, size_t len)
|
||||
{
|
||||
size_t ret, i;
|
||||
|
||||
for (i = 0, ret = 0; i < len; i++)
|
||||
{
|
||||
scm_t_uint32 c = str[i];
|
||||
|
||||
if (c <= 0x7f)
|
||||
ret += 1;
|
||||
else if (c <= 0x7ff)
|
||||
ret += 2;
|
||||
else if (c <= 0xffff)
|
||||
ret += 3;
|
||||
else
|
||||
ret += 4;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
char *
|
||||
scm_to_utf8_stringn (SCM str, size_t *lenp)
|
||||
{
|
||||
|
|
@ -1882,9 +1915,39 @@ scm_to_utf8_stringn (SCM str, size_t *lenp)
|
|||
scm_i_string_length (str),
|
||||
NULL, lenp);
|
||||
else
|
||||
return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
|
||||
scm_i_string_length (str),
|
||||
NULL, lenp);
|
||||
{
|
||||
scm_t_uint8 *buf, *ret;
|
||||
size_t len, allocated;
|
||||
|
||||
len = u32_u8_strlen ((scm_t_uint32*)scm_i_string_wide_chars (str),
|
||||
scm_i_string_length (str));
|
||||
allocated = len + 1;
|
||||
buf = scm_malloc (allocated);
|
||||
|
||||
ret = u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
|
||||
scm_i_string_length (str), buf, &len);
|
||||
|
||||
if (ret == buf && len + 1 == allocated)
|
||||
{
|
||||
ret[len] = 0;
|
||||
return (char *) ret;
|
||||
}
|
||||
|
||||
/* An error: a bad codepoint. */
|
||||
{
|
||||
int saved_errno = errno;
|
||||
|
||||
free (buf);
|
||||
if (!saved_errno)
|
||||
abort ();
|
||||
|
||||
scm_decoding_error ("scm_to_utf8_stringn", errno,
|
||||
"invalid codepoint in string", str);
|
||||
|
||||
/* Not reached. */
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
scm_t_wchar *
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue