From 8f2e9c6b94b0e40cd995e20d174992ee361ed632 Mon Sep 17 00:00:00 2001 From: Markus Triska Date: Sun, 11 Feb 2024 22:19:19 +0100 Subject: [PATCH] FIXED: char_type/2 for unbound first argument. Surrogate pairs form a gap in valid character codes, see: https://github.com/mthom/scryer-prolog/issues/2326#issuecomment-1937864665 Many thanks to @Skgland for the pointer, and to @librarianmage for the question that spawned this! This addresses #2326. --- src/lib/charsio.pl | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/lib/charsio.pl b/src/lib/charsio.pl index afe745a2..2204d80d 100644 --- a/src/lib/charsio.pl +++ b/src/lib/charsio.pl @@ -143,22 +143,17 @@ char_type(Char, Type) :- ctype(Type), '$char_type'(Char, Type) ; ground(Type) -> - max_char_code(Max), - between(0, Max, Code), + ccode(Code), char_code(Char, Code), '$char_type'(Char, Type) ; must_be(character, Char) ). -max_char_code(Max) :- - catch((length(_, Code), - catch(char_code(_Char, Code), - error(representation_error(_),_), - throw(max_char_code(Code))), - false), - max_char_code(Code), - Max is Code - 1). +% 0xD800 to 0xDFFF are surrogate code points used by UTF-16. + +ccode(Code) :- between(0, 0xD7FF, Code). +ccode(Code) :- between(0xE000, 0x10FFFF, Code). ctype(alnum). ctype(alpha). -- 2.54.0