From 40f3d0030e6e98bcb02d6523e5ee48497dec49a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Wed, 7 Aug 2019 09:32:48 +0200 Subject: New upstream version 6.9.3 --- src/utf16_le.c | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'src/utf16_le.c') diff --git a/src/utf16_le.c b/src/utf16_le.c index 4b231c6..cdc74b0 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2018 K.Kosako + * Copyright (c) 2002-2019 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = { static int utf16le_code_to_mbclen(OnigCodePoint code) { - return (code > 0xffff ? 4 : 2); + if (code > 0xffff) { + if (code > 0x10ffff) + return ONIGERR_INVALID_CODE_POINT_VALUE; + else + return 4; + } + else { + return 2; + } } static int @@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end) const UChar* end1 = end - 1; while (p < end1) { - p += utf16le_mbc_enc_len(p); + int len = utf16le_mbc_enc_len(p); + if (len == 4) { + if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3))) + return FALSE; + } + else + if (UTF16_IS_SURROGATE_SECOND(*(p + 1))) + return FALSE; + + p += len; } if (p != end) @@ -252,7 +269,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s) s--; } - if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1) + if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 && + UTF16_IS_SURROGATE_FIRST(*(s-1))) s -= 2; return (UChar* )s; -- cgit v1.2.3 From 4216de6a3336cbc6dddb572cb7e6ab6193bf3729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Fri, 29 Nov 2019 11:26:35 +0100 Subject: New upstream version 6.9.4 --- src/utf16_le.c | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) (limited to 'src/utf16_le.c') diff --git a/src/utf16_le.c b/src/utf16_le.c index cdc74b0..c6edd94 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako + * Copyright (c) 2002-2019 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -227,39 +227,6 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag, fold); } -#if 0 -static int -utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, - const UChar* end) -{ - const UChar* p = *pp; - - (*pp) += EncLen_UTF16[*(p+1)]; - - if (*(p+1) == 0) { - int c, v; - - if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - return TRUE; - } - - c = *p; - v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, - (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER)); - if ((v | BIT_CTYPE_LOWER) != 0) { - /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */ - if (c >= 0xaa && c <= 0xba) - return FALSE; - else - return TRUE; - } - return (v != 0 ? TRUE : FALSE); - } - - return FALSE; -} -#endif - static UChar* utf16le_left_adjust_char_head(const UChar* start, const UChar* s) { -- cgit v1.2.3