diff options
Diffstat (limited to 'src/regexec.c')
-rw-r--r-- | src/regexec.c | 1032 |
1 files changed, 497 insertions, 535 deletions
diff --git a/src/regexec.c b/src/regexec.c index 6c76d85..fa61839 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -782,13 +782,13 @@ static int onig_region_resize_clear(OnigRegion* region, int n) { int r; - + r = onig_region_resize(region, n); if (r != 0) return r; onig_region_clear(region); return 0; } - + extern int onig_region_set(OnigRegion* region, int at, int beg, int end) { @@ -798,7 +798,7 @@ onig_region_set(OnigRegion* region, int at, int beg, int end) int r = onig_region_resize(region, at + 1); if (r < 0) return r; } - + region->beg[at] = beg; region->end[at] = end; return 0; @@ -1225,7 +1225,7 @@ onig_initialize_match_param(OnigMatchParam* mp) static int adjust_match_param(regex_t* reg, OnigMatchParam* mp) { - RegexExt* ext = REG_EXTP(reg); + RegexExt* ext = reg->extp; mp->match_at_call_counter = 0; @@ -2337,6 +2337,79 @@ typedef struct { regoff_t rm_eo; } posix_regmatch_t; + +#ifdef __GNUC__ +#define USE_THREADED_CODE +#endif + +#ifdef USE_THREADED_CODE + +#define BYTECODE_INTERPRETER_START JUMP_OP; +#define BYTECODE_INTERPRETER_END +#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(1) +#define DEFAULT_OP /* L_DEFAULT: */ +#define NEXT_OP sprev = sbegin; JUMP_OP +#define JUMP_OP goto *opcode_to_label[*p++] +#define BREAK_OP /* Nothing */ + +#else + +#define BYTECODE_INTERPRETER_START \ + while (1) {\ + MATCH_DEBUG_OUT(0)\ + sbegin = s;\ + switch (*p++) { +#define BYTECODE_INTERPRETER_END } sprev = sbegin; } +#define CASE_OP(x) case OP_##x: SOP_IN(OP_##x); +#define DEFAULT_OP default: +#define NEXT_OP break +#define JUMP_OP continue; break +#define BREAK_OP break + +#endif /* USE_THREADED_CODE */ + +#define NEXT_OUT SOP_OUT; NEXT_OP +#define JUMP_OUT SOP_OUT; JUMP_OP +#define BREAK_OUT SOP_OUT; BREAK_OP +#define CHECK_INTERRUPT_JUMP_OUT SOP_OUT; CHECK_INTERRUPT_IN_MATCH; JUMP_OP + + +#ifdef ONIG_DEBUG_MATCH +#define MATCH_DEBUG_OUT(offset) do {\ + UChar *xp, *q, *bp, buf[50];\ + int len, spos;\ + spos = IS_NOT_NULL(s) ? (int )(s - str) : -1;\ + xp = p - (offset);\ + fprintf(stderr, "%7u: %7ld: %4d> \"",\ + counter, GET_STACK_INDEX(stk), spos);\ + counter++;\ + bp = buf;\ + if (IS_NOT_NULL(s)) {\ + for (i = 0, q = s; i < 7 && q < end; i++) {\ + len = enclen(encode, q);\ + while (len-- > 0) *bp++ = *q++;\ + }\ + if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }\ + else { xmemcpy(bp, "\"", 1); bp += 1; }\ + }\ + else {\ + xmemcpy(bp, "\"", 1); bp += 1;\ + }\ + *bp = 0;\ + fputs((char* )buf, stderr);\ + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);\ + if (xp == FinishCode)\ + fprintf(stderr, "----: ");\ + else\ + fprintf(stderr, "%4d: ", (int )(xp - reg->p));\ + onig_print_compiled_byte_code(stderr, xp, NULL, reg->p, encode);\ + fprintf(stderr, "\n");\ + } while(0); +#else +#define MATCH_DEBUG_OUT(offset) +#endif + + /* match data(str - end) from position (sstart). */ /* if sstart == str then set sprev to NULL. */ static int @@ -2346,6 +2419,107 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, { static UChar FinishCode[] = { OP_FINISH }; +#ifdef USE_THREADED_CODE + static const void *opcode_to_label[] = { + &&L_FINISH, + &&L_END, + &&L_EXACT1, + &&L_EXACT2, + &&L_EXACT3, + &&L_EXACT4, + &&L_EXACT5, + &&L_EXACTN, + &&L_EXACTMB2N1, + &&L_EXACTMB2N2, + &&L_EXACTMB2N3, + &&L_EXACTMB2N, + &&L_EXACTMB3N, + &&L_EXACTMBN, + &&L_EXACT1_IC, + &&L_EXACTN_IC, + &&L_CCLASS, + &&L_CCLASS_MB, + &&L_CCLASS_MIX, + &&L_CCLASS_NOT, + &&L_CCLASS_MB_NOT, + &&L_CCLASS_MIX_NOT, +#ifdef USE_OP_CCLASS_NODE + &&L_CCLASS_NODE, +#endif + &&L_ANYCHAR, + &&L_ANYCHAR_ML, + &&L_ANYCHAR_STAR, + &&L_ANYCHAR_ML_STAR, + &&L_ANYCHAR_STAR_PEEK_NEXT, + &&L_ANYCHAR_ML_STAR_PEEK_NEXT, + &&L_WORD, + &&L_WORD_ASCII, + &&L_NO_WORD, + &&L_NO_WORD_ASCII, + &&L_WORD_BOUNDARY, + &&L_NO_WORD_BOUNDARY, + &&L_WORD_BEGIN, + &&L_WORD_END, + &&L_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, + &&L_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, + &&L_BEGIN_BUF, + &&L_END_BUF, + &&L_BEGIN_LINE, + &&L_END_LINE, + &&L_SEMI_END_BUF, + &&L_BEGIN_POSITION, + &&L_BACKREF1, + &&L_BACKREF2, + &&L_BACKREF_N, + &&L_BACKREF_N_IC, + &&L_BACKREF_MULTI, + &&L_BACKREF_MULTI_IC, + &&L_BACKREF_WITH_LEVEL, + &&L_BACKREF_CHECK, + &&L_BACKREF_CHECK_WITH_LEVEL, + &&L_MEMORY_START, + &&L_MEMORY_START_PUSH, + &&L_MEMORY_END_PUSH, + &&L_MEMORY_END_PUSH_REC, + &&L_MEMORY_END, + &&L_MEMORY_END_REC, + &&L_FAIL, + &&L_JUMP, + &&L_PUSH, + &&L_PUSH_SUPER, + &&L_POP_OUT, + &&L_PUSH_OR_JUMP_EXACT1, + &&L_PUSH_IF_PEEK_NEXT, + &&L_REPEAT, + &&L_REPEAT_NG, + &&L_REPEAT_INC, + &&L_REPEAT_INC_NG, + &&L_REPEAT_INC_SG, + &&L_REPEAT_INC_NG_SG, + &&L_EMPTY_CHECK_START, + &&L_EMPTY_CHECK_END, + &&L_EMPTY_CHECK_END_MEMST, + &&L_EMPTY_CHECK_END_MEMST_PUSH, + &&L_PREC_READ_START, + &&L_PREC_READ_END, + &&L_PREC_READ_NOT_START, + &&L_PREC_READ_NOT_END, + &&L_ATOMIC_START, + &&L_ATOMIC_END, + &&L_LOOK_BEHIND, + &&L_LOOK_BEHIND_NOT_START, + &&L_LOOK_BEHIND_NOT_END, + &&L_CALL, + &&L_RETURN, + &&L_PUSH_SAVE_VAL, + &&L_UPDATE_VAR, +#ifdef USE_CALLOUT + &&L_CALLOUT_CONTENTS, + &&L_CALLOUT_NAME, +#endif + }; +#endif + int i, n, num_mem, best_len, pop_level; LengthType tlen, tlen2; MemNumType mem; @@ -2374,6 +2548,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef ONIG_DEBUG_MATCH + static unsigned int counter = 1; +#endif + #ifdef USE_CALLOUT msa->mp->match_at_call_counter++; #endif @@ -2406,40 +2584,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, retry_in_match_counter = 0; #endif - while (1) { -#ifdef ONIG_DEBUG_MATCH - { - static unsigned int counter = 1; - - UChar *q, *bp, buf[50]; - int len; - fprintf(stderr, "%7u: %7ld: %4d> \"", - counter, GET_STACK_INDEX(stk), (int )(s - str)); - counter++; - - bp = buf; - for (i = 0, q = s; i < 7 && q < end; i++) { - len = enclen(encode, q); - while (len-- > 0) *bp++ = *q++; - } - if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; } - else { xmemcpy(bp, "\"", 1); bp += 1; } - *bp = 0; - fputs((char* )buf, stderr); - - for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - if (p == FinishCode) - fprintf(stderr, "----: "); - else - fprintf(stderr, "%4d: ", (int )(p - reg->p)); - onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); - fprintf(stderr, "\n"); - } -#endif - - sbegin = s; - switch (*p++) { - case OP_END: SOP_IN(OP_END); + BYTECODE_INTERPRETER_START { + CASE_OP(END) n = (int )(s - sstart); if (n > best_len) { OnigRegion* region; @@ -2551,16 +2697,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, /* default behavior: return first-matching result. */ goto finish; - break; - case OP_EXACT1: SOP_IN(OP_EXACT1); + CASE_OP(EXACT1) DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; - SOP_OUT; - break; + NEXT_OUT; - case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC); + CASE_OP(EXACT1_IC) { int len; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2579,21 +2723,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; q++; } } - SOP_OUT; - break; + NEXT_OUT; - case OP_EXACT2: SOP_IN(OP_EXACT2); + CASE_OP(EXACT2) DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACT3: SOP_IN(OP_EXACT3); + CASE_OP(EXACT3) DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -2602,11 +2743,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACT4: SOP_IN(OP_EXACT4); + CASE_OP(EXACT4) DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -2617,11 +2756,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACT5: SOP_IN(OP_EXACT5); + CASE_OP(EXACT5) DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -2634,22 +2771,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTN: SOP_IN(OP_EXACTN); + CASE_OP(EXACTN) GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC); + CASE_OP(EXACTN_IC) { int len; UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -2673,20 +2806,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1); + CASE_OP(EXACTMB2N1) DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - SOP_OUT; - break; + NEXT_OUT; - case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2); + CASE_OP(EXACTMB2N2) DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -2697,11 +2827,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3); + CASE_OP(EXACTMB2N3) DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -2716,11 +2844,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N); + CASE_OP(EXACTMB2N) GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -2730,11 +2856,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 2; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N); + CASE_OP(EXACTMB3N) GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -2746,11 +2870,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 3; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EXACTMBN: SOP_IN(OP_EXACTMBN); + CASE_OP(EXACTMBN) GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -2760,19 +2882,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - tlen; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_CCLASS: SOP_IN(OP_CCLASS); + CASE_OP(CCLASS) DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB); + CASE_OP(CCLASS_MB) if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -2798,10 +2917,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } p += tlen; - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX); + CASE_OP(CCLASS_MIX) DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -2816,18 +2934,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT); + CASE_OP(CCLASS_NOT) DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT); + CASE_OP(CCLASS_MB_NOT) DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -2865,10 +2981,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; cc_mb_not_success: - SOP_OUT; - break; + NEXT_OUT; - case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT); + CASE_OP(CCLASS_MIX_NOT) DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -2883,11 +2998,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - SOP_OUT; - break; + NEXT_OUT; #ifdef USE_OP_CCLASS_NODE - case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE); + CASE_OP(CCLASS_NODE) { OnigCodePoint code; void *node; @@ -2903,28 +3017,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, code = ONIGENC_MBC_TO_CODE(encode, ss, s); if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - SOP_OUT; - break; + NEXT_OUT; #endif - case OP_ANYCHAR: SOP_IN(OP_ANYCHAR); + CASE_OP(ANYCHAR) DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - SOP_OUT; - break; + NEXT_OUT; - case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML); + CASE_OP(ANYCHAR_ML) DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); s += n; - SOP_OUT; - break; + NEXT_OUT; - case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR); + CASE_OP(ANYCHAR_STAR) while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2933,11 +3044,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; s += n; } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR); + CASE_OP(ANYCHAR_ML_STAR) while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2951,11 +3060,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + CASE_OP(ANYCHAR_STAR_PEEK_NEXT) while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2967,10 +3074,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } p++; - SOP_OUT; - break; + NEXT_OUT; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + CASE_OP(ANYCHAR_ML_STAR_PEEK_NEXT) while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2987,46 +3093,41 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } p++; - SOP_OUT; - break; + NEXT_OUT; - case OP_WORD: SOP_IN(OP_WORD); + CASE_OP(WORD) DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII); + CASE_OP(WORD_ASCII) DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_NO_WORD: SOP_IN(OP_NO_WORD); + CASE_OP(NO_WORD) DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII); + CASE_OP(NO_WORD_ASCII) DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - SOP_OUT; - break; + NEXT_OUT; - case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY); + CASE_OP(WORD_BOUNDARY) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ @@ -3046,11 +3147,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY); + CASE_OP(NO_WORD_BOUNDARY) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ @@ -3069,189 +3168,150 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN); + CASE_OP(WORD_BEGIN) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - SOP_OUT; - continue; + JUMP_OUT; } } } goto fail; - break; - case OP_WORD_END: SOP_IN(OP_WORD_END); + CASE_OP(WORD_END) { ModeType mode; GET_MODE_INC(mode, p); /* ascii_mode */ if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - SOP_OUT; - continue; + JUMP_OUT; } } } goto fail; - break; #endif - case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + CASE_OP(EXTENDED_GRAPHEME_CLUSTER_BOUNDARY) if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { - SOP_OUT; - continue; + JUMP_OUT; } goto fail; - break; - case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + CASE_OP(NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY) if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF); + CASE_OP(BEGIN_BUF) if (! ON_STR_BEGIN(s)) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_END_BUF: SOP_IN(OP_END_BUF); + CASE_OP(END_BUF) if (! ON_STR_END(s)) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE); + CASE_OP(BEGIN_LINE) if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - SOP_OUT; - continue; + JUMP_OUT; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - SOP_OUT; - continue; + JUMP_OUT; } goto fail; - break; - case OP_END_LINE: SOP_IN(OP_END_LINE); + CASE_OP(END_LINE) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - SOP_OUT; - continue; + JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - SOP_OUT; - continue; + JUMP_OUT; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - SOP_OUT; - continue; + JUMP_OUT; } #endif goto fail; - break; - case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF); + CASE_OP(SEMI_END_BUF) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - SOP_OUT; - continue; + JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enclen(encode, s))) { - SOP_OUT; - continue; + JUMP_OUT; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { UChar* ss = s + enclen(encode, s); ss += enclen(encode, ss); if (ON_STR_END(ss)) { - SOP_OUT; - continue; + JUMP_OUT; } } #endif goto fail; - break; - case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION); + CASE_OP(BEGIN_POSITION) if (s != msa->start) goto fail; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH); + CASE_OP(MEMORY_START_PUSH) GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_START: SOP_IN(OP_MEMORY_START); + CASE_OP(MEMORY_START) GET_MEMNUM_INC(mem, p); mem_start_stk[mem] = (StackIndex )((void* )s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH); + CASE_OP(MEMORY_END_PUSH) GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_END: SOP_IN(OP_MEMORY_END); + CASE_OP(MEMORY_END) GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_CALL - case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC); + CASE_OP(MEMORY_END_PUSH_REC) GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC); + CASE_OP(MEMORY_END_REC) GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); @@ -3262,22 +3322,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_BACKREF1: SOP_IN(OP_BACKREF1); + CASE_OP(BACKREF1) mem = 1; goto backref; - break; - case OP_BACKREF2: SOP_IN(OP_BACKREF2); + CASE_OP(BACKREF2) mem = 2; goto backref; - break; - case OP_BACKREF_N: SOP_IN(OP_BACKREF_N); + CASE_OP(BACKREF_N) GET_MEMNUM_INC(mem, p); backref: { @@ -3301,13 +3357,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP(pstart, s, n); while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - - SOP_OUT; - continue; } - break; + JUMP_OUT; - case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC); + CASE_OP(BACKREF_N_IC) GET_MEMNUM_INC(mem, p); { int len; @@ -3330,13 +3383,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STRING_CMP_IC(case_fold_flag, pstart, &s, n); while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - - SOP_OUT; - continue; } - break; + JUMP_OUT; - case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI); + CASE_OP(BACKREF_MULTI) { int len, is_fail; UChar *pstart, *pend, *swork; @@ -3370,12 +3420,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - SOP_OUT; - continue; } - break; + JUMP_OUT; - case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC); + CASE_OP(BACKREF_MULTI_IC) { int len, is_fail; UChar *pstart, *pend, *swork; @@ -3409,13 +3457,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - SOP_OUT; - continue; } - break; + JUMP_OUT; #ifdef USE_BACKREF_WITH_LEVEL - case OP_BACKREF_WITH_LEVEL: + CASE_OP(BACKREF_WITH_LEVEL) { int len; OnigOptionType ic; @@ -3436,14 +3482,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else goto fail; - - SOP_OUT; - continue; } - break; + JUMP_OUT; #endif - case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK); + CASE_OP(BACKREF_CHECK) { GET_LENGTH_INC(tlen, p); for (i = 0; i < tlen; i++) { @@ -3456,13 +3499,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - SOP_OUT; - continue; } - break; + JUMP_OUT; #ifdef USE_BACKREF_WITH_LEVEL - case OP_BACKREF_CHECK_WITH_LEVEL: + CASE_OP(BACKREF_CHECK_WITH_LEVEL) { LengthType level; @@ -3475,21 +3516,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else goto fail; - - SOP_OUT; - continue; } - break; + JUMP_OUT; #endif - case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START); + CASE_OP(EMPTY_CHECK_START) GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_EMPTY_CHECK_START(mem, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END); + CASE_OP(EMPTY_CHECK_END) { int is_empty; @@ -3518,12 +3554,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT - case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST); + CASE_OP(EMPTY_CHECK_END_MEMST) { int is_empty; @@ -3537,14 +3571,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto empty_check_found; } } - SOP_OUT; - continue; - break; + JUMP_OUT; #endif #ifdef USE_CALL - case OP_EMPTY_CHECK_END_MEMST_PUSH: - SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); + CASE_OP(EMPTY_CHECK_END_MEMST_PUSH) { int is_empty; @@ -3566,68 +3597,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_EMPTY_CHECK_END(mem); } } - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_JUMP: SOP_IN(OP_JUMP); + CASE_OP(JUMP) GET_RELADDR_INC(addr, p); p += addr; - SOP_OUT; - CHECK_INTERRUPT_IN_MATCH; - continue; - break; + CHECK_INTERRUPT_JUMP_OUT; - case OP_PUSH: SOP_IN(OP_PUSH); + CASE_OP(PUSH) GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER); + CASE_OP(PUSH_SUPER) GET_RELADDR_INC(addr, p); STACK_PUSH_SUPER_ALT(p + addr, s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_POP_OUT: SOP_IN(OP_POP_OUT); + CASE_OP(POP_OUT) STACK_POP_ONE; /* for stop backtrack */ /* CHECK_RETRY_LIMIT_IN_MATCH; */ - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1); + CASE_OP(PUSH_OR_JUMP_EXACT1) GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - SOP_OUT; - continue; + JUMP_OUT; } p += (addr + 1); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT); + CASE_OP(PUSH_IF_PEEK_NEXT) GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - SOP_OUT; - continue; + JUMP_OUT; } p++; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_REPEAT: SOP_IN(OP_REPEAT); + CASE_OP(REPEAT) { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -3640,11 +3654,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p + addr, s, sprev); } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG); + CASE_OP(REPEAT_NG) { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -3658,11 +3670,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += addr; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC); + CASE_OP(REPEAT_INC) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -3680,19 +3690,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - SOP_OUT; - CHECK_INTERRUPT_IN_MATCH; - continue; - break; + CHECK_INTERRUPT_JUMP_OUT; - case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG); + CASE_OP(REPEAT_INC_SG) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; - break; - case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG); + CASE_OP(REPEAT_INC_NG) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -3714,68 +3720,51 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - SOP_OUT; - CHECK_INTERRUPT_IN_MATCH; - continue; - break; + CHECK_INTERRUPT_JUMP_OUT; - case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG); + CASE_OP(REPEAT_INC_NG_SG) GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; - break; - case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START); + CASE_OP(PREC_READ_START) STACK_PUSH_POS(s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END); + CASE_OP(PREC_READ_END) { STACK_EXEC_TO_VOID(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START); + CASE_OP(PREC_READ_NOT_START) GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END); + CASE_OP(PREC_READ_NOT_END) STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; - break; - case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START); + CASE_OP(ATOMIC_START) STACK_PUSH_TO_VOID_START; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END); + CASE_OP(ATOMIC_END) STACK_EXEC_TO_VOID(stkp); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND); + CASE_OP(LOOK_BEHIND) GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START); + CASE_OP(LOOK_BEHIND_NOT_START) GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -3790,33 +3779,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END); + CASE_OP(LOOK_BEHIND_NOT_END) STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; goto fail; - break; #ifdef USE_CALL - case OP_CALL: SOP_IN(OP_CALL); + CASE_OP(CALL) GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_RETURN: SOP_IN(OP_RETURN); + CASE_OP(RETURN) STACK_RETURN(p); STACK_PUSH_RETURN; - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL); + CASE_OP(PUSH_SAVE_VAL) { SaveType type; GET_SAVE_TYPE_INC(type, p); @@ -3835,11 +3817,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - SOP_OUT; - continue; - break; + JUMP_OUT; - case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR); + CASE_OP(UPDATE_VAR) { UpdateVarType type; enum SaveType save_type; @@ -3867,20 +3847,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - SOP_OUT; - continue; - break; + JUMP_OUT; #ifdef USE_CALLOUT - case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); + CASE_OP(CALLOUT_CONTENTS) of = ONIG_CALLOUT_OF_CONTENTS; goto callout_common_entry; + BREAK_OUT; - SOP_OUT; - continue; - break; - - case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); + CASE_OP(CALLOUT_NAME) { int call_result; int name_id; @@ -3941,34 +3916,34 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - SOP_OUT; - continue; - break; + JUMP_OUT; #endif - case OP_FINISH: + CASE_OP(FINISH) goto finish; - break; +#ifdef ONIG_DEBUG_STATISTICS fail: SOP_OUT; - /* fall */ - case OP_FAIL: SOP_IN(OP_FAIL); + goto fail2; +#endif + CASE_OP(FAIL) +#ifdef ONIG_DEBUG_STATISTICS + fail2: +#else + fail: +#endif STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; sprev = stk->u.state.pstr_prev; CHECK_RETRY_LIMIT_IN_MATCH; - SOP_OUT; - continue; - break; + JUMP_OUT; - default: + DEFAULT_OP goto bytecode_error; - } /* end of switch */ - sprev = sbegin; - } /* end of while(1) */ + } BYTECODE_INTERPRETER_END; finish: STACK_SAVE; @@ -4130,150 +4105,143 @@ slow_search_backward_ic(OnigEncoding enc, int case_fold_flag, return (UChar* )NULL; } + static UChar* -bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, - const UChar* text_range) +sunday_quick_search_step_forward(regex_t* reg, + const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *se, *t, *p, *end; const UChar *tail; int skip, tlen1; + int map_offset; + OnigEncoding enc; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n", - text, text_end, text_range); + fprintf(stderr, + "sunday_quick_search_step_forward: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range); #endif + enc = reg->enc; + tail = target_end - 1; tlen1 = (int )(tail - target); end = text_range; if (end + tlen1 > text_end) end = text_end - tlen1; + map_offset = reg->map_offset; s = text; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); - } - } - else { - while (s < end) { - p = se = s + tlen1; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )s; - p--; t--; - } - skip = reg->int_map[*se]; - t = s; - do { - s += enclen(reg->enc, s); - } while ((s - t) < skip && s < end); + while (s < end) { + p = se = s + tlen1; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )s; + p--; t--; } + if (se + map_offset >= text_end) break; + skip = reg->map[*(se + map_offset)]; +#if 0 + t = s; + do { + s += enclen(enc, s); + } while ((s - t) < skip && s < end); +#else + s += skip; + if (s < end) + s = onigenc_get_right_adjust_char_head(enc, text, s); +#endif } return (UChar* )NULL; } static UChar* -bm_search(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* text_end, const UChar* text_range) +sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { const UChar *s, *t, *p, *end; const UChar *tail; + int map_offset; - end = text_range + (target_end - target) - 1; + end = text_range + (target_end - target); if (end > text_end) end = text_end; + map_offset = reg->map_offset; tail = target_end - 1; - s = text + (target_end - target) - 1; - if (IS_NULL(reg->int_map)) { - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->map[*s]; - } - } - else { /* see int_map[] */ - while (s < end) { - p = s; - t = tail; - while (*p == *t) { - if (t == target) return (UChar* )p; - p--; t--; - } - s += reg->int_map[*s]; + s = text + (tail - target); + + while (s < end) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; } + if (s + map_offset >= text_end) break; + s += reg->map[*(s + map_offset)]; } + return (UChar* )NULL; } -#ifdef USE_INT_MAP_BACKWARD -static int -set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, int** skip) +static UChar* +sunday_quick_search_case_fold(regex_t* reg, + const UChar* target, const UChar* target_end, + const UChar* text, const UChar* text_end, + const UChar* text_range) { - int i, len; - - if (IS_NULL(*skip)) { - *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE); - if (IS_NULL(*skip)) return ONIGERR_MEMORY; - } - - len = end - s; - for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) - (*skip)[i] = len; + const UChar *s, *se, *end; + const UChar *tail; + int skip, tlen1; + int map_offset; + int case_fold_flag; + OnigEncoding enc; - for (i = len - 1; i > 0; i--) - (*skip)[s[i]] = i; +#ifdef ONIG_DEBUG_SEARCH + fprintf(stderr, + "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range); +#endif - return 0; -} + enc = reg->enc; + case_fold_flag = reg->case_fold_flag; -static UChar* -bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end, - const UChar* text, const UChar* adjust_text, - const UChar* text_end, const UChar* text_start) -{ - const UChar *s, *t, *p; + tail = target_end - 1; + tlen1 = (int )(tail - target); + end = text_range; + if (end + tlen1 > text_end) + end = text_end - tlen1; - s = text_end - (target_end - target); - if (text_start < s) - s = text_start; - else - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + map_offset = reg->map_offset; + s = text; - while (s >= text) { - p = s; - t = target; - while (t < target_end && *p == *t) { - p++; t++; - } - if (t == target_end) + while (s < end) { + if (str_lower_case_match(enc, case_fold_flag, target, target_end, + s, text_end)) return (UChar* )s; - s -= reg->int_map_backward[*s]; - s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s); + se = s + tlen1; + if (se + map_offset >= text_end) break; + skip = reg->map[*(se + map_offset)]; +#if 0 + p = s; + do { + s += enclen(enc, s); + } while ((s - p) < skip && s < end); +#else + /* This is faster than prev code for long text. ex: /(?i)Twain/ */ + s += skip; + if (s < end) + s = onigenc_get_right_adjust_char_head(enc, text, s); +#endif } return (UChar* )NULL; } -#endif static UChar* map_search(OnigEncoding enc, UChar map[], @@ -4380,20 +4348,26 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, retry: switch (reg->optimize) { - case OPTIMIZE_EXACT: + case OPTIMIZE_STR: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case OPTIMIZE_EXACT_IC: + case OPTIMIZE_STR_CASE_FOLD: p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; - case OPTIMIZE_EXACT_BM: - p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); + case OPTIMIZE_STR_CASE_FOLD_FAST: + p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end, + range); + break; + + case OPTIMIZE_STR_FAST: + p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range); break; - case OPTIMIZE_EXACT_BM_NO_REV: - p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); + case OPTIMIZE_STR_FAST_STEP_FORWARD: + p = sunday_quick_search_step_forward(reg, reg->exact, reg->exact_end, + p, end, range); break; case OPTIMIZE_MAP: @@ -4413,7 +4387,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, UChar* prev; switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: + case ANCR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p); @@ -4422,7 +4396,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } break; - case ANCHOR_END_LINE: + case ANCR_END_LINE: if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = (UChar* )onigenc_get_prev_char_head(reg->enc, @@ -4490,8 +4464,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } -#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100 - static int backward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, const UChar* range, UChar* adjrange, @@ -4499,41 +4471,29 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, { UChar *p; + if (range == 0) goto fail; + range += reg->dmin; p = s; retry: switch (reg->optimize) { - case OPTIMIZE_EXACT: + case OPTIMIZE_STR: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case OPTIMIZE_EXACT_IC: + case OPTIMIZE_STR_CASE_FOLD: + case OPTIMIZE_STR_CASE_FOLD_FAST: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case OPTIMIZE_EXACT_BM: - case OPTIMIZE_EXACT_BM_NO_REV: -#ifdef USE_INT_MAP_BACKWARD - if (IS_NULL(reg->int_map_backward)) { - int r; - - if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD) - goto exact_method; - - r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, - &(reg->int_map_backward)); - if (r != 0) return r; - } - p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, - end, p); -#else + case OPTIMIZE_STR_FAST: + case OPTIMIZE_STR_FAST_STEP_FORWARD: goto exact_method; -#endif break; case OPTIMIZE_MAP: @@ -4546,17 +4506,17 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* prev; switch (reg->sub_anchor) { - case ANCHOR_BEGIN_LINE: + case ANCR_BEGIN_LINE: if (!ON_STR_BEGIN(p)) { prev = onigenc_get_prev_char_head(reg->enc, str, p); - if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { + if (IS_NOT_NULL(prev) && !ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end)) { p = prev; goto retry; } } break; - case ANCHOR_END_LINE: + case ANCR_END_LINE: if (ON_STR_END(p)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE prev = onigenc_get_prev_char_head(reg->enc, adjrange, p); @@ -4682,7 +4642,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, if (reg->anchor != 0 && str < end) { UChar *min_semi_end, *max_semi_end; - if (reg->anchor & ANCHOR_BEGIN_POSITION) { + if (reg->anchor & ANCR_BEGIN_POSITION) { /* search start-position only */ begin_position: if (range > start) @@ -4690,7 +4650,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, else range = start; } - else if (reg->anchor & ANCHOR_BEGIN_BUF) { + else if (reg->anchor & ANCR_BEGIN_BUF) { /* search str-position only */ if (range > start) { if (start != str) goto mismatch_no_msa; @@ -4705,7 +4665,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, goto mismatch_no_msa; } } - else if (reg->anchor & ANCHOR_END_BUF) { + else if (reg->anchor & ANCR_END_BUF) { min_semi_end = max_semi_end = (UChar* )end; end_buf: @@ -4737,7 +4697,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, if (range > start) goto mismatch_no_msa; } } - else if (reg->anchor & ANCHOR_SEMI_END_BUF) { + else if (reg->anchor & ANCR_SEMI_END_BUF) { UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, 1); max_semi_end = (UChar* )end; @@ -4760,7 +4720,7 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) { + else if ((reg->anchor & ANCR_ANYCHAR_INF_ML)) { goto begin_position; } } @@ -4833,13 +4793,13 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search_range(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) { + if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) { do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; s += enclen(reg->enc, s); - if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + if ((reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { prev = s; s += enclen(reg->enc, s); @@ -4862,6 +4822,8 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, } } else { /* backward search */ + if (range < str) goto mismatch; + if (orig_start < end) orig_start += enclen(reg->enc, orig_start); /* is upper range */ |