diff options
Diffstat (limited to 'src/regexec.c')
-rw-r--r-- | src/regexec.c | 134 |
1 files changed, 98 insertions, 36 deletions
diff --git a/src/regexec.c b/src/regexec.c index e7dfb96..9dbef70 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -31,6 +31,9 @@ #define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \ + ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end)) + #ifdef USE_CRNL_AS_LINE_TERMINATOR #define ONIGENC_IS_MBC_CRNL(enc,p,end) \ (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \ @@ -2002,6 +2005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MOP_OUT; break; +#ifdef USE_OP_CCLASS_NODE case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); { OnigCodePoint code; @@ -2020,6 +2024,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } MOP_OUT; break; +#endif case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); DATA_ENSURE(1); @@ -2152,7 +2157,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MOP_OUT; break; - case OP_NOT_WORD: MOP_IN(OP_NOT_WORD); + case OP_WORD_ASCII: MOP_IN(OP_WORD_ASCII); + DATA_ENSURE(1); + if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) + goto fail; + + s += enclen(encode, s); + MOP_OUT; + break; + + case OP_NO_WORD: MOP_IN(OP_NO_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; @@ -2161,38 +2175,57 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MOP_OUT; break; - case OP_WORD_BOUND: MOP_IN(OP_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - DATA_ENSURE(1); - if (! ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (! ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - == ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + case OP_NO_WORD_ASCII: MOP_IN(OP_NO_WORD_ASCII); + DATA_ENSURE(1); + if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) + goto fail; + + s += enclen(encode, s); + MOP_OUT; + break; + + case OP_WORD_BOUNDARY: MOP_IN(OP_WORD_BOUNDARY); + { + ModeType mode; + GET_MODE_INC(mode, p); // ascii_mode + + if (ON_STR_BEGIN(s)) { + DATA_ENSURE(1); + if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) + goto fail; + } + else if (ON_STR_END(s)) { + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } MOP_OUT; continue; break; - case OP_NOT_WORD_BOUND: MOP_IN(OP_NOT_WORD_BOUND); - if (ON_STR_BEGIN(s)) { - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) - goto fail; - } - else if (ON_STR_END(s)) { - if (ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; - } - else { - if (ONIGENC_IS_MBC_WORD(encode, s, end) - != ONIGENC_IS_MBC_WORD(encode, sprev, end)) - goto fail; + case OP_NO_WORD_BOUNDARY: MOP_IN(OP_NO_WORD_BOUNDARY); + { + ModeType mode; + GET_MODE_INC(mode, p); // ascii_mode + + if (ON_STR_BEGIN(s)) { + if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) + goto fail; + } + else if (ON_STR_END(s)) { + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } MOP_OUT; continue; @@ -2200,26 +2233,55 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_WORD_BEGIN_END case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); - if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) { - if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - MOP_OUT; - continue; + { + ModeType mode; + GET_MODE_INC(mode, p); // ascii_mode + + if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { + if (ON_STR_BEGIN(s) || + ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + MOP_OUT; + continue; + } } } goto fail; break; case OP_WORD_END: MOP_IN(OP_WORD_END); - if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) { - if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) { - MOP_OUT; - continue; + { + ModeType mode; + GET_MODE_INC(mode, p); // ascii_mode + + if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { + MOP_OUT; + continue; + } } } goto fail; break; #endif + case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { + MOP_OUT; + continue; + } + goto fail; + break; + + case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: + MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) + goto fail; + + MOP_OUT; + continue; + break; + case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; |