diff options
Diffstat (limited to 'src/gb18030.c')
-rw-r--r-- | src/gb18030.c | 34 |
1 files changed, 20 insertions, 14 deletions
diff --git a/src/gb18030.c b/src/gb18030.c index 7654432..50898eb 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -2,8 +2,8 @@ gb18030.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org> - * K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2005-2019 KUBO Takehiro <kubo AT jiubao DOT org> + * K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -33,6 +33,7 @@ #if 1 #define DEBUG_GB18030(arg) #else +#include <stdio.h> #define DEBUG_GB18030(arg) printf arg #endif @@ -67,15 +68,29 @@ gb18030_mbc_enc_len(const UChar* p) { if (GB18030_MAP[*p] != CM) return 1; + p++; if (GB18030_MAP[*p] == C4) return 4; - if (GB18030_MAP[*p] == C1) - return 1; /* illegal sequence */ + return 2; } static int +gb18030_code_to_mbclen(OnigCodePoint code) +{ + if ((code & 0xff000000) != 0) return 4; + else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE; + else if ((code & 0xff00) != 0) return 2; + else { + if (GB18030_MAP[(int )(code & 0xff)] == CM) + return ONIGERR_INVALID_CODE_POINT_VALUE; + + return 1; + } +} + +static int is_valid_mbc_string(const UChar* p, const UChar* end) { while (p < end) { @@ -135,15 +150,6 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, pp, end, lower); } -#if 0 -static int -gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) -{ - return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); -} -#endif - static int gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype) { @@ -522,7 +528,7 @@ OnigEncodingType OnigEncodingGB18030 = { 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, gb18030_mbc_to_code, - onigenc_mb4_code_to_mbclen, + gb18030_code_to_mbclen, gb18030_code_to_mbc, gb18030_mbc_case_fold, onigenc_ascii_apply_all_case_fold, |