From 6b986090d954dbac91bbb3c43ce7c3328c91a780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Mon, 20 Apr 2020 20:33:51 +0200 Subject: New upstream version 6.9.5 --- src/regenc.c | 66 +++++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 41 insertions(+), 25 deletions(-) (limited to 'src/regenc.c') diff --git a/src/regenc.c b/src/regenc.c index 16ac313..dbfbc89 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -29,6 +29,9 @@ #include "regint.h" +#define LARGE_S 0x53 +#define SMALL_S 0x73 + OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; #define INITED_LIST_SIZE 20 @@ -549,7 +552,7 @@ static int ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void* arg) { - static OnigCodePoint ss[] = { 0x73, 0x73 }; + static OnigCodePoint ss[] = { SMALL_S, SMALL_S }; return (*f)((OnigCodePoint )0xdf, ss, 2, arg); } @@ -588,35 +591,48 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { - if (0x41 <= *p && *p <= 0x5a) { + int i, j, n; + static OnigUChar sa[] = { LARGE_S, SMALL_S }; + + if (0x41 <= *p && *p <= 0x5a) { /* A - Z */ + if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == LARGE_S || *(p+1) == SMALL_S)) { /* SS */ + ss_combination: + items[0].byte_len = 2; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )0xdf; + + n = 1; + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (sa[i] == *p && sa[j] == *(p+1)) + continue; + + items[n].byte_len = 2; + items[n].code_len = 2; + items[n].code[0] = (OnigCodePoint )sa[i]; + items[n].code[1] = (OnigCodePoint )sa[j]; + n++; + } + } + return 4; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p + 0x20); - if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { - /* SS */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } - else if (0x61 <= *p && *p <= 0x7a) { + else if (0x61 <= *p && *p <= 0x7a) { /* a - z */ + if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == SMALL_S || *(p+1) == LARGE_S)) { + goto ss_combination; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p - 0x20); - if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { - /* ss */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } else if (*p == 0xdf && ess_tsett_flag != 0) { items[0].byte_len = 1; @@ -676,7 +692,7 @@ extern int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) { if (p < end) { - if (*p == 0x0a) return 1; + if (*p == NEWLINE_CODE) return 1; } return 0; } @@ -887,7 +903,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end) { OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end); - if (code > 127) return 0; + if (code > ASCII_LIMIT) return 0; return ONIGENC_IS_ASCII_CODE_WORD(code); } -- cgit v1.2.3