guile/test-suite/tests/encoding-escapes.test

;;;; encoding-escapes.test --- test suite for Guile's string encodings -*- mode: scheme; coding: utf-8 -*-
;;;;
;;;; Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;;; Lesser General Public License for more details.
;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

(define-module (test-strings)
  #:use-module (test-suite lib)
  #:use-module (srfi srfi-1))

;; Create a string from integer char values, eg. (string-ints 65) => "A"
(define (string-ints . args)
  (apply string (map integer->char args)))

(define s1 "última")
(define s2 "cédula")
(define s3 "años")
(define s4 "羅生門")

(with-test-prefix "internal encoding"

  (pass-if "ultima"
	   (string=? s1 (string-ints #xfa #x6c #x74 #x69 #x6d #x61)))
  
  (pass-if "cedula"
	   (string=? s2 (string-ints #x63 #xe9 #x64 #x75 #x6c #x61)))
  
  (pass-if "anos"
	   (string=? s3 (string-ints #x61 #xf1 #x6f #x73)))
  
  (pass-if "Rashomon"
	   (string=? s4 (string-ints #x7f85 #x751f #x9580))))

(with-test-prefix "chars"
 
  (pass-if "ultima"
	   (list= eqv? (string->list s1)
		  (list #\372 #\l #\t #\i #\m #\a)))
  
  (pass-if "cedula"
	   (list= eqv? (string->list s2)
		  (list #\c #\351 #\d #\u #\l #\a)))
  
  (pass-if "anos"
	   (list= eqv? (string->list s3)
		  (list #\a #\361 #\o #\s)))
  
  (pass-if "Rashomon"
	   (list= eqv? (string->list s4)
		  (list #\77605 #\72437 #\112600))))


;; Check that an error is flagged on display output when the output
;; error strategy is 'error

(with-test-prefix "display output errors"

  (pass-if "ultima"
    (let ((pt (open-output-string)))
      (set-port-encoding! pt "ASCII")
      (set-port-conversion-strategy! pt 'error)
      (catch 'encoding-error
        (lambda ()
          (display s1 pt)
          #f)
        (lambda (key subr message errno port chr)
          (and (eq? port pt)
               (char=? chr (string-ref s1 0))
               (string=? (get-output-string pt) ""))))))

  (pass-if "Rashomon"
    (let ((pt (open-output-string)))
      (set-port-encoding! pt "ASCII")
      (set-port-conversion-strategy! pt 'error)
      (catch 'encoding-error
        (lambda ()
          (display s4 pt)
          #f)
        (lambda (key subr message errno port chr)
          (and (eq? port pt)
               (char=? chr (string-ref s4 0))
               (string=? (get-output-string pt) ""))))))

  (pass-if "tekniko"
    (let ((pt (open-output-string)))
      (set-port-encoding! pt "ASCII")
      (set-port-conversion-strategy! pt 'error)
      (catch 'encoding-error
        (lambda ()
          ;; This time encoding should fail on the 3rd character.
          (display "teĥniko" pt)
          #f)
        (lambda (key subr message errno port chr)
          (and (eq? port pt)
               (char=? chr #\ĥ)
               (string=? "te" (get-output-string pt))))))))

;; Check that questions marks or substitutions appear when the conversion
;; mode is substitute
(with-test-prefix "display output substitutions"

  (pass-if "ultima"
	   (let ((pt (open-output-string)))
	     (set-port-encoding! pt "ASCII")
	     (set-port-conversion-strategy! pt 'substitute)
	     (display s1 pt)
	     (string=? "?ltima"
		       (get-output-string pt))))

  (pass-if "Rashomon"
	   (let ((pt (open-output-string)))
	     (set-port-encoding! pt "ASCII")
	     (set-port-conversion-strategy! pt 'substitute)
	     (display s4 pt)
	     (string=? "???"
		       (get-output-string pt)))))


;; Check that hex escapes appear in the write output and that no error
;; is thrown.  The output error strategy should be irrelevant here.
(with-test-prefix "display output escapes"

  (pass-if "ultima"
	   (let ((pt (open-output-string)))
	     (set-port-encoding! pt "ASCII")
	     (set-port-conversion-strategy! pt 'escape)
	     (display s1 pt)
	     (string=? "\\xfaltima"
		       (get-output-string pt))))
  (pass-if "Rashomon"
    (let ((pt (open-output-string)))
      (set-port-encoding! pt "ASCII")
      (set-port-conversion-strategy! pt 'escape)
      (display s4 pt)
      (string=? "\\u7f85\\u751f\\u9580"
                (get-output-string pt))))

  (pass-if "fake escape"
           ;; The input string below contains something that looks like
           ;; an escape in libunistring syntax, but which should be left
           ;; as is in the output.  See
           ;; <http://lists.gnu.org/archive/html/bug-libunistring/2010-09/msg00004.html>
           ;; for background info.
           (let ((pt (open-output-string)))
	     (set-port-encoding! pt "ASCII")
             (set-port-conversion-strategy! pt 'escape)
             (display "λ -- \\u0012" pt)
             (string=? "\\u03bb -- \\u0012"
                       (get-output-string pt)))))

(with-test-prefix "input escapes"

  (pass-if "última"
    (with-locale "en_US.utf8"
                 (string=? "última"
                           (with-input-from-string "\"\\xfaltima\"" read))))

  (pass-if "羅生門"
    (with-locale "en_US.utf8"
                 (string=? "羅生門"
                           (with-input-from-string 
                               "\"\\u7F85\\u751F\\u9580\"" read)))))