diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2020-04-20 20:34:10 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2020-04-20 20:34:10 +0200 |
commit | f3d6e46ce3762b6f51a166119d3982fd3715507a (patch) | |
tree | 0935fb6da7f1d9728b42ddf08395a0e977e1c228 /src/regenc.c | |
parent | 043fff5b6f2461aeccb1c62cb771826cfe301832 (diff) | |
parent | 73c6133c32cddae59813cbadf655cb50a3a7356a (diff) |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src/regenc.c')
-rw-r--r-- | src/regenc.c | 66 |
1 files changed, 41 insertions, 25 deletions
diff --git a/src/regenc.c b/src/regenc.c index 16ac313..dbfbc89 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -29,6 +29,9 @@ #include "regint.h" +#define LARGE_S 0x53 +#define SMALL_S 0x73 + OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; #define INITED_LIST_SIZE 20 @@ -549,7 +552,7 @@ static int ss_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, OnigApplyAllCaseFoldFunc f, void* arg) { - static OnigCodePoint ss[] = { 0x73, 0x73 }; + static OnigCodePoint ss[] = { SMALL_S, SMALL_S }; return (*f)((OnigCodePoint )0xdf, ss, 2, arg); } @@ -588,35 +591,48 @@ onigenc_get_case_fold_codes_by_str_with_map(int map_size, int ess_tsett_flag, OnigCaseFoldType flag ARG_UNUSED, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]) { - if (0x41 <= *p && *p <= 0x5a) { + int i, j, n; + static OnigUChar sa[] = { LARGE_S, SMALL_S }; + + if (0x41 <= *p && *p <= 0x5a) { /* A - Z */ + if (*p == LARGE_S && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == LARGE_S || *(p+1) == SMALL_S)) { /* SS */ + ss_combination: + items[0].byte_len = 2; + items[0].code_len = 1; + items[0].code[0] = (OnigCodePoint )0xdf; + + n = 1; + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + if (sa[i] == *p && sa[j] == *(p+1)) + continue; + + items[n].byte_len = 2; + items[n].code_len = 2; + items[n].code[0] = (OnigCodePoint )sa[i]; + items[n].code[1] = (OnigCodePoint )sa[j]; + n++; + } + } + return 4; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p + 0x20); - if (*p == 0x53 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x53 || *(p+1) == 0x73)) { - /* SS */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } - else if (0x61 <= *p && *p <= 0x7a) { + else if (0x61 <= *p && *p <= 0x7a) { /* a - z */ + if (*p == SMALL_S && ess_tsett_flag != 0 && end > p + 1 + && (*(p+1) == SMALL_S || *(p+1) == LARGE_S)) { + goto ss_combination; + } + items[0].byte_len = 1; items[0].code_len = 1; items[0].code[0] = (OnigCodePoint )(*p - 0x20); - if (*p == 0x73 && ess_tsett_flag != 0 && end > p + 1 - && (*(p+1) == 0x73 || *(p+1) == 0x53)) { - /* ss */ - items[1].byte_len = 2; - items[1].code_len = 1; - items[1].code[0] = (OnigCodePoint )0xdf; - return 2; - } - else - return 1; + return 1; } else if (*p == 0xdf && ess_tsett_flag != 0) { items[0].byte_len = 1; @@ -676,7 +692,7 @@ extern int onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end) { if (p < end) { - if (*p == 0x0a) return 1; + if (*p == NEWLINE_CODE) return 1; } return 0; } @@ -887,7 +903,7 @@ onigenc_is_mbc_word_ascii(OnigEncoding enc, UChar* s, const UChar* end) { OnigCodePoint code = ONIGENC_MBC_TO_CODE(enc, s, end); - if (code > 127) return 0; + if (code > ASCII_LIMIT) return 0; return ONIGENC_IS_ASCII_CODE_WORD(code); } |