From fc9ba4264eafbb5a6ec0f3cc4cd2e1964c9b8fcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 8 Nov 2020 10:58:30 +0100 Subject: New upstream version 6.9.6 --- src/regexec.c | 592 +++++++++++++++++++++++++++++++--------------------------- 1 file changed, 320 insertions(+), 272 deletions(-) (limited to 'src/regexec.c') diff --git a/src/regexec.c b/src/regexec.c index 1b6895d..bb6b474 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -46,15 +46,15 @@ #define CHECK_INTERRUPT_IN_MATCH -#define STACK_MEM_START(reg, i) \ - (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \ - STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i]))) +#define STACK_MEM_START(reg, idx) \ + (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \ + STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s) -#define STACK_MEM_END(reg, i) \ - (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \ - STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i]))) +#define STACK_MEM_END(reg, idx) \ + (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \ + STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s) -static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev); +static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high); static int search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp); @@ -170,6 +170,9 @@ typedef struct { int best_len; /* for ONIG_OPTION_FIND_LONGEST */ UChar* best_s; #endif +#ifdef USE_CALL + unsigned long subexp_call_in_search_counter; +#endif } MatchArg; @@ -1057,8 +1060,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /** stack **/ -#define INVALID_STACK_INDEX -1 - #define STK_ALT_FLAG 0x0001 /* stack type */ @@ -1099,7 +1100,15 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_MASK_TO_VOID_TARGET 0x100e #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ -typedef intptr_t StackIndex; +typedef ptrdiff_t StackIndex; + +#define INVALID_STACK_INDEX ((StackIndex )-1) + +typedef union { + StackIndex i; + UChar* s; +} StkPtrType; + typedef struct _StackType { unsigned int type; @@ -1108,7 +1117,6 @@ typedef struct _StackType { struct { Operation* pcode; /* byte code position */ UChar* pstr; /* string position */ - UChar* pstr_prev; /* previous char position of pstr */ } state; struct { int count; @@ -1119,8 +1127,8 @@ typedef struct _StackType { struct { UChar *pstr; /* start/end position */ /* Following information is set, if this stack type is MEM-START */ - StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */ - StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */ + StkPtrType prev_start; /* prev. info (for backtrack "(...)*" ) */ + StkPtrType prev_end; /* prev. info (for backtrack "(...)*" ) */ } mem; struct { UChar *pstr; /* start position */ @@ -1166,8 +1174,8 @@ struct OnigCalloutArgsStruct { MatchArg* msa; StackType* stk_base; StackType* stk; - StackIndex* mem_start_stk; - StackIndex* mem_end_stk; + StkPtrType* mem_start_stk; + StkPtrType* mem_end_stk; }; #endif @@ -1178,7 +1186,7 @@ struct OnigCalloutArgsStruct { #define UPDATE_FOR_STACK_REALLOC do{\ repeat_stk = (StackIndex* )alloc_base;\ empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ - mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\ + mem_start_stk = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\ mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) @@ -1194,7 +1202,7 @@ struct OnigCalloutArgsStruct { #define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2) #define UPDATE_FOR_STACK_REALLOC do{\ - mem_start_stk = (StackIndex* )alloc_base;\ + mem_start_stk = (StkPtrType* )alloc_base;\ mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) @@ -1218,8 +1226,12 @@ struct OnigCalloutArgsStruct { #endif #if defined(USE_CALL) +#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \ + (msa).subexp_call_in_search_counter = 0; + #define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;} #else +#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) #define POP_CALL #endif @@ -1231,6 +1243,7 @@ struct OnigCalloutArgsStruct { (msa).start = (arg_start);\ (msa).match_stack_limit = (mpv)->match_stack_limit;\ RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ (msa).mp = mpv;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ @@ -1243,6 +1256,7 @@ struct OnigCalloutArgsStruct { (msa).start = (arg_start);\ (msa).match_stack_limit = (mpv)->match_stack_limit;\ RETRY_IN_MATCH_ARG_INIT(msa,mpv)\ + SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\ (msa).mp = mpv;\ (msa).ptr_num = PTR_NUM_SIZE(reg);\ } while(0) @@ -1258,27 +1272,27 @@ struct OnigCalloutArgsStruct { is_alloca = 0;\ alloc_base = msa->stack_p;\ stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ + + (sizeof(StkPtrType) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + msa->stack_n;\ }\ else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\ is_alloca = 0;\ - alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ + alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\ + sizeof(StackType) * (stack_num));\ CHECK_NULL_RETURN_MEMERR(alloc_base);\ stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ + + (sizeof(StkPtrType) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ else {\ is_alloca = 1;\ - alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\ + alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\ + sizeof(StackType) * (stack_num));\ CHECK_NULL_RETURN_MEMERR(alloc_base);\ stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ + + (sizeof(StkPtrType) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ @@ -1288,7 +1302,7 @@ struct OnigCalloutArgsStruct { #define STACK_SAVE(msa,is_alloca,alloc_base) do{\ (msa)->stack_n = (int )(stk_end - stk_base);\ if ((is_alloca) != 0) {\ - size_t size = sizeof(StackIndex) * (msa)->ptr_num\ + size_t size = sizeof(StkPtrType) * (msa)->ptr_num\ + sizeof(StackType) * (msa)->stack_n;\ (msa)->stack_p = xmalloc(size);\ CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\ @@ -1373,6 +1387,24 @@ onig_set_retry_limit_in_search(unsigned long n) #endif } +#ifdef USE_CALL +static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH; + +extern unsigned long +onig_get_subexp_call_limit_in_search(void) +{ + return SubexpCallLimitInSearch; +} + +extern int +onig_set_subexp_call_limit_in_search(unsigned long n) +{ + SubexpCallLimitInSearch = n; + return 0; +} + +#endif + #ifdef USE_CALLOUT static OnigCalloutFunc DefaultProgressCallout; static OnigCalloutFunc DefaultRetractionCallout; @@ -1637,9 +1669,9 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk = *arg_stk; n = (unsigned int )(stk_end - stk_base); - size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; + size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n; n *= 2; - new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; + new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n; if (*is_alloca != 0) { new_alloc_base = (char* )xmalloc(new_size); if (IS_NULL(new_alloc_base)) { @@ -1669,7 +1701,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, used = (int )(stk - stk_base); *arg_alloc_base = alloc_base; *arg_stk_base = (StackType* )(alloc_base - + (sizeof(StackIndex) * msa->ptr_num)); + + (sizeof(StkPtrType) * msa->ptr_num)); *arg_stk = *arg_stk_base + used; *arg_stk_end = *arg_stk_base + n; return 0; @@ -1694,22 +1726,20 @@ stack_double(int* is_alloca, char** arg_alloc_base, #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ +#define STACK_PUSH(stack_type,pat,s) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ STACK_INC;\ } while(0) -#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\ +#define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->zid = (int )(id);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ STACK_INC;\ } while(0) @@ -1724,7 +1754,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = s;\ - stk->u.state.pstr_prev = sprev;\ STACK_INC;\ } while (0) #else @@ -1735,10 +1764,9 @@ stack_double(int* is_alloca, char** arg_alloc_base, } while (0) #endif -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) -#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) -#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \ - STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id) +#define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s) +#define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s) +#define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id) #if 0 #define STACK_PUSH_REPEAT(sid, pat) do {\ @@ -1767,8 +1795,8 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->u.mem.pstr = (s);\ stk->u.mem.prev_start = mem_start_stk[mnum];\ stk->u.mem.prev_end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum].i = INVALID_STACK_INDEX;\ STACK_INC;\ } while(0) @@ -1779,7 +1807,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->u.mem.pstr = (s);\ stk->u.mem.prev_start = mem_start_stk[mnum];\ stk->u.mem.prev_end = mem_end_stk[mnum];\ - mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum].i = GET_STACK_INDEX(stk);\ STACK_INC;\ } while(0) @@ -1861,12 +1889,11 @@ stack_double(int* is_alloca, char** arg_alloc_base, STACK_INC;\ } while(0) -#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\ +#define STACK_PUSH_MARK_WITH_POS(sid, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MARK;\ stk->zid = (sid);\ stk->u.val.v = (UChar* )(s);\ - stk->u.val.v2 = (sprev);\ STACK_INC;\ } while(0) @@ -1885,7 +1912,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ - stk->u.val.v2 = sprev;\ STACK_INC;\ } while(0) @@ -1932,7 +1958,6 @@ stack_double(int* is_alloca, char** arg_alloc_base, && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ - sprev = k->u.val.v2;\ break;\ }\ }\ @@ -2135,14 +2160,14 @@ stack_double(int* is_alloca, char** arg_alloc_base, } while(0) #define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\ - if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\ + if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\ (addr) = 0;\ }\ else {\ if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\ - (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\ + (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\ else\ - (addr) = (UChar* )k->u.mem.prev_end;\ + (addr) = k->u.mem.prev_end.s;\ }\ } while (0) @@ -2163,7 +2188,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, if (endp == 0) {\ (isnull) = 0; break;\ }\ - else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\ + else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) {\ (isnull) = 0; break;\ }\ else if (endp != s) {\ @@ -2199,7 +2224,7 @@ stack_double(int* is_alloca, char** arg_alloc_base, if (endp == 0) {\ (isnull) = 0; break;\ }\ - else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \ + else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) { \ (isnull) = 0; break;\ }\ else if (endp != s) {\ @@ -2362,6 +2387,10 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, p1++; p2++; } + if (s2 >= end2) { + if (s1 < end1) return 0; + else break; + } } *ps2 = s2; @@ -2390,7 +2419,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define ON_STR_END(s) ((s) == end) #define DATA_ENSURE_CHECK1 (s < right_range) #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) -#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail +#define DATA_ENSURE(n) if (right_range - s < (n)) goto fail #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range @@ -2632,9 +2661,9 @@ typedef struct { #define BYTECODE_INTERPRETER_START GOTO_OP; #define BYTECODE_INTERPRETER_END -#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0) +#define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0) #define DEFAULT_OP /* L_DEFAULT: */ -#define NEXT_OP sprev = sbegin; JUMP_OP +#define NEXT_OP JUMP_OP #define JUMP_OP GOTO_OP #ifdef USE_DIRECT_THREADED_CODE #define GOTO_OP goto *(p->opaddr) @@ -2648,9 +2677,8 @@ typedef struct { #define BYTECODE_INTERPRETER_START \ while (1) {\ MATCH_DEBUG_OUT(0)\ - sbegin = s;\ switch (p->opcode) { -#define BYTECODE_INTERPRETER_END } sprev = sbegin; } +#define BYTECODE_INTERPRETER_END } } #define CASE_OP(x) case OP_##x: SOP_IN(OP_##x); #define DEFAULT_OP default: #define NEXT_OP break @@ -2718,12 +2746,22 @@ typedef struct { best_len = err_code; goto match_at_end;\ } while(0) +#define MATCH_COUNTER_OUT(title) do {\ + int i;\ + fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \ + fprintf(DBGFP, " ");\ + for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\ + fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\ + }\ + fprintf(DBGFP, "\n");\ + fflush(DBGFP);\ +} while (0) + /* match data(str - end) from position (sstart). */ -/* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, - const UChar* in_right_range, const UChar* sstart, UChar* sprev, + const UChar* in_right_range, const UChar* sstart, MatchArg* msa) { @@ -2782,10 +2820,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, &&L_BACKREF_N_IC, &&L_BACKREF_MULTI, &&L_BACKREF_MULTI_IC, +#ifdef USE_BACKREF_WITH_LEVEL &&L_BACKREF_WITH_LEVEL, &&L_BACKREF_WITH_LEVEL_IC, +#endif &&L_BACKREF_CHECK, +#ifdef USE_BACKREF_WITH_LEVEL &&L_BACKREF_CHECK_WITH_LEVEL, +#endif &&L_MEM_START, &&L_MEM_START_PUSH, &&L_MEM_END_PUSH, @@ -2838,13 +2880,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, LengthType tlen, tlen2; MemNumType mem; RelAddrType addr; - UChar *s, *ps, *sbegin; + UChar *s, *ps; UChar *right_range; int is_alloca; char *alloc_base; StackType *stk_base, *stk, *stk_end; StackType *stkp; /* used as any purpose. */ - StackIndex *mem_start_stk, *mem_end_stk; + StkPtrType *mem_start_stk, *mem_end_stk; UChar* keep; #ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR @@ -2858,6 +2900,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_CALLOUT int of; #endif +#ifdef ONIG_DEBUG_MATCH_COUNTER +#define MAX_SUBEXP_CALL_COUNTERS 9 + unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS]; +#endif Operation* p = reg->ops; OnigOptionType option = reg->options; @@ -2872,6 +2918,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, static unsigned int counter = 1; #endif +#ifdef ONIG_DEBUG_MATCH_COUNTER + for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) { + subexp_call_counters[i] = 0; + } +#endif + #ifdef USE_DIRECT_THREADED_CODE if (IS_NULL(msa)) { for (i = 0; i < reg->ops_used; i++) { @@ -2903,12 +2955,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_INIT(INIT_MATCH_STACK_SIZE); UPDATE_FOR_STACK_REALLOC; for (i = 1; i <= num_mem; i++) { - mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX; + mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX; } #ifdef ONIG_DEBUG_MATCH - fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", - str, end, sstart, sprev); + fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart); fprintf(DBGFP, "size: %d, start offset: %d\n", (int )(end - str), (int )(sstart - str)); #endif @@ -2932,24 +2983,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (n > msa->best_len) { msa->best_len = n; msa->best_s = (UChar* )sstart; + goto set_region; } else goto end_best_len; } #endif best_len = n; + + set_region: region = msa->region; if (region) { if (keep > s) keep = s; -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API if (OPTON_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; rmt[0].rm_so = (regoff_t )(keep - str); rmt[0].rm_eo = (regoff_t )(s - str); for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (mem_end_stk[i].i != INVALID_STACK_INDEX) { rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str); rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str); } @@ -2959,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } else { -#endif /* USE_POSIX_API_REGION_OPTION */ +#endif /* USE_POSIX_API */ region->beg[0] = (int )(keep - str); region->end[0] = (int )(s - str); for (i = 1; i <= num_mem; i++) { - if (mem_end_stk[i] != INVALID_STACK_INDEX) { + if (mem_end_stk[i].i != INVALID_STACK_INDEX) { region->beg[i] = (int )(STACK_MEM_START(reg, i) - str); region->end[i] = (int )(STACK_MEM_END(reg, i) - str); } @@ -2996,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (r < 0) MATCH_AT_ERROR_RETURN(r); } #endif /* USE_CAPTURE_HISTORY */ -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API } /* else OPTON_POSIX_REGION() */ #endif } /* if (region) */ @@ -3012,8 +3066,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, best_len = ONIG_MISMATCH; goto fail; /* for retry */ } - if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) { - goto fail; /* for retry */ + if (OPTON_FIND_LONGEST(option)) { + if (s >= in_right_range && msa->best_s == sstart) + best_len = msa->best_len; + else + goto fail; /* for retry */ } } @@ -3034,7 +3091,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3047,7 +3103,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3062,7 +3117,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3079,7 +3133,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; - sprev = s; s++; INC_OP; JUMP_OUT; @@ -3091,7 +3144,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (tlen-- > 0) { if (*ps++ != *s++) goto fail; } - sprev = s - 1; INC_OP; JUMP_OUT; @@ -3112,7 +3164,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ps++; s++; if (*ps != *s) goto fail; ps++; s++; - sprev = s; if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; @@ -3131,7 +3182,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ps++; s++; if (*ps != *s) goto fail; ps++; s++; - sprev = s; if (*ps != *s) goto fail; ps++; s++; if (*ps != *s) goto fail; @@ -3149,7 +3199,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - 2; INC_OP; JUMP_OUT; @@ -3165,7 +3214,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - 3; INC_OP; JUMP_OUT; @@ -3179,7 +3227,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*ps != *s) goto fail; ps++; s++; } - sprev = s - tlen; INC_OP; JUMP_OUT; @@ -3295,11 +3342,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(ANYCHAR_STAR) INC_OP; while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; s += n; } JUMP_OUT; @@ -3307,15 +3353,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(ANYCHAR_ML_STAR) INC_OP; while (DATA_ENSURE_CHECK1) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); - sprev = s; s += n; } else { - sprev = s; s++; } } @@ -3329,12 +3373,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; while (DATA_ENSURE_CHECK1) { if (c == *s) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); } n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; s += n; } } @@ -3348,16 +3391,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; while (DATA_ENSURE_CHECK1) { if (c == *s) { - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); } n = enclen(encode, s); if (n > 1) { DATA_ENSURE(n); - sprev = s; s += n; } else { - sprev = s; s++; } } @@ -3410,14 +3451,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) goto fail; } - else if (ON_STR_END(s)) { - if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; - } else { - if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) - == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ON_STR_END(s)) { + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } } INC_OP; @@ -3432,14 +3476,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) goto fail; } - else if (ON_STR_END(s)) { - if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; - } else { - if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) - != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) - goto fail; + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ON_STR_END(s)) { + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } + else { + if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode) + != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) + goto fail; + } } } INC_OP; @@ -3452,7 +3499,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mode = p->word_boundary.mode; if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + UChar* sprev; + if (ON_STR_BEGIN(s)) { + INC_OP; + JUMP_OUT; + } + sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { INC_OP; JUMP_OUT; } @@ -3465,10 +3518,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, ModeType mode; mode = p->word_boundary.mode; - if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - INC_OP; - JUMP_OUT; + if (! ON_STR_BEGIN(s)) { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { + INC_OP; + JUMP_OUT; + } } } } @@ -3478,6 +3534,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(TEXT_SEGMENT_BOUNDARY) { int is_break; + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); switch (p->text_segment_boundary.type) { case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: @@ -3507,12 +3564,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BEGIN_BUF) if (! ON_STR_BEGIN(s)) goto fail; + if (OPTON_NOTBOL(msa->options)) goto fail; + if (OPTON_NOT_BEGIN_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; CASE_OP(END_BUF) if (! ON_STR_END(s)) goto fail; + if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; @@ -3523,15 +3584,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, INC_OP; JUMP_OUT; } - else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - INC_OP; - JUMP_OUT; + else if (! ON_STR_END(s)) { + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); + if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { + INC_OP; + JUMP_OUT; + } } goto fail; CASE_OP(END_LINE) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (OPTON_NOTEOL(msa->options)) goto fail; @@ -3556,9 +3621,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(SEMI_END_BUF) if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE + UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE @@ -3567,6 +3634,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enclen(encode, s))) { + if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; } @@ -3575,6 +3644,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar* ss = s + enclen(encode, s); ss += enclen(encode, ss); if (ON_STR_END(ss)) { + if (OPTON_NOTEOL(msa->options)) goto fail; + if (OPTON_NOT_END_STRING(msa->options)) goto fail; INC_OP; JUMP_OUT; } @@ -3586,6 +3657,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, switch (p->check_position.type) { case CHECK_POSITION_SEARCH_START: if (s != msa->start) goto fail; + if (OPTON_NOT_BEGIN_POSITION(msa->options)) goto fail; break; case CHECK_POSITION_CURRENT_RIGHT_RANGE: if (s != right_range) goto fail; @@ -3604,7 +3676,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(MEM_START) mem = p->memory_start.num; - mem_start_stk[mem] = (StackIndex )((void* )s); + mem_start_stk[mem].s = s; INC_OP; JUMP_OUT; @@ -3616,7 +3688,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(MEM_END) mem = p->memory_end.num; - mem_end_stk[mem] = (StackIndex )((void* )s); + mem_end_stk[mem].s = s; INC_OP; JUMP_OUT; @@ -3629,20 +3701,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ si = GET_STACK_INDEX(stkp); STACK_PUSH_MEM_END(mem, s); - mem_start_stk[mem] = si; + mem_start_stk[mem].i = si; INC_OP; JUMP_OUT; } CASE_OP(MEM_END_REC) mem = p->memory_end.num; - mem_end_stk[mem] = (StackIndex )((void* )s); + mem_end_stk[mem].s = s; STACK_GET_MEM_START(mem, stkp); if (MEM_STATUS_AT(reg->push_mem_start, mem)) - mem_start_stk[mem] = GET_STACK_INDEX(stkp); + mem_start_stk[mem].i = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); + mem_start_stk[mem].s = stkp->u.mem.pstr; STACK_PUSH_MEM_END_MARK(mem); INC_OP; @@ -3661,21 +3733,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem = p->backref_n.n1; backref: { - int len; UChar *pstart, *pend; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; STRING_CMP(s, pstart, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } } INC_OP; @@ -3684,21 +3752,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_N_IC) mem = p->backref_n.n1; { - int len; UChar *pstart, *pend; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; STRING_CMP_IC(case_fold_flag, pstart, &s, n); - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } } INC_OP; @@ -3706,28 +3770,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_MULTI) { - int len, is_fail; + int is_fail; UChar *pstart, *pend, *swork; tlen = p->backref_general.num; for (i = 0; i < tlen; i++) { mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i]; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; swork = s; STRING_CMP_VALUE(swork, pstart, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } break; /* success */ } @@ -3738,28 +3799,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(BACKREF_MULTI_IC) { - int len, is_fail; + int is_fail; UChar *pstart, *pend, *swork; tlen = p->backref_general.num; for (i = 0; i < tlen; i++) { mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i]; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue; pstart = STACK_MEM_START(reg, mem); pend = STACK_MEM_END(reg, mem); n = (int )(pend - pstart); if (n != 0) { DATA_ENSURE(n); - sprev = s; swork = s; STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail); if (is_fail) continue; s = swork; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; } break; /* success */ } @@ -3774,10 +3832,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto backref_with_level; CASE_OP(BACKREF_WITH_LEVEL) { - int len; int level; MemNumType* mems; - UChar* ssave; n = 0; backref_with_level: @@ -3785,17 +3841,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, tlen = p->backref_general.num; mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns; - ssave = s; - if (backref_match_at_nested_level(reg, stk, stk_base, n, - case_fold_flag, level, (int )tlen, mems, &s, end)) { - if (ssave != s) { - sprev = ssave; - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - } - } - else + if (! backref_match_at_nested_level(reg, stk, stk_base, n, + case_fold_flag, level, (int )tlen, mems, &s, end)) { goto fail; + } } INC_OP; JUMP_OUT; @@ -3810,8 +3859,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, for (i = 0; i < tlen; i++) { mem = mems[i]; - if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; - if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue; break; /* success */ } if (i == tlen) goto fail; @@ -3928,13 +3977,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(PUSH) addr = p->push.addr; - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); INC_OP; JUMP_OUT; CASE_OP(PUSH_SUPER) addr = p->push.addr; - STACK_PUSH_SUPER_ALT(p + addr, s, sprev); + STACK_PUSH_SUPER_ALT(p + addr, s); INC_OP; JUMP_OUT; @@ -3956,7 +4005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, addr = p->push_or_jump_exact1.addr; c = p->push_or_jump_exact1.c; if (DATA_ENSURE_CHECK1 && c == *s) { - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); INC_OP; JUMP_OUT; } @@ -3972,9 +4021,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, addr = p->push_if_peek_next.addr; c = p->push_if_peek_next.c; if (DATA_ENSURE_CHECK1 && c == *s) { - STACK_PUSH_ALT(p + addr, s, sprev); - INC_OP; - JUMP_OUT; + STACK_PUSH_ALT(p + addr, s); } } INC_OP; @@ -3986,7 +4033,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(mem, 0); if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + addr, s, sprev); + STACK_PUSH_ALT(p + addr, s); } INC_OP; JUMP_OUT; @@ -3997,7 +4044,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_REPEAT_INC(mem, 0); if (reg->repeat_range[mem].lower == 0) { - STACK_PUSH_ALT(p + 1, s, sprev); + STACK_PUSH_ALT(p + 1, s); p += addr; } else @@ -4014,7 +4061,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (n >= reg->repeat_range[mem].lower) { INC_OP; - STACK_PUSH_ALT(p, s, sprev); + STACK_PUSH_ALT(p, s); p = reg->repeat_range[mem].u.pcode; } else { @@ -4033,7 +4080,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else { if (n >= reg->repeat_range[mem].lower) { - STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev); + STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s); INC_OP; } else { @@ -4047,6 +4094,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (subexp_call_nest_counter == SubexpCallMaxNestLevel) goto fail; subexp_call_nest_counter++; + + if (SubexpCallLimitInSearch != 0) { + msa->subexp_call_in_search_counter++; +#ifdef ONIG_DEBUG_MATCH_COUNTER + if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS) + subexp_call_counters[p->call.called_mem]++; + if (msa->subexp_call_in_search_counter % 1000 == 0) + MATCH_COUNTER_OUT("CALL"); +#endif + if (msa->subexp_call_in_search_counter > + SubexpCallLimitInSearch) { + MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER); + } + } + addr = p->call.addr; INC_OP; STACK_PUSH_CALL_FRAME(p); p = reg->ops + addr; @@ -4070,7 +4132,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, for (tlen = p->move.n; tlen > 0; tlen--) { len = enclen(encode, s); - sprev = s; s += len; if (s > end) goto fail; if (s == end) { @@ -4079,7 +4140,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); INC_OP; JUMP_OUT; @@ -4088,10 +4148,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (tlen != 0) { s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } if (p->step_back_start.remaining != 0) { - STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining); + STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining); p += p->step_back_start.addr; } else @@ -4103,9 +4162,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (tlen != INFINITE_LEN) tlen--; s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1); if (IS_NULL(s)) goto fail; - sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); if (tlen != 0) { - STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen); + STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen); } INC_OP; JUMP_OUT; @@ -4114,8 +4172,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem = p->cut_to_mark.id; /* mem: mark id */ STACK_TO_VOID_TO_MARK(stkp, mem); if (p->cut_to_mark.restore_pos != 0) { - s = stkp->u.val.v; - sprev = stkp->u.val.v2; + s = stkp->u.val.v; } INC_OP; JUMP_OUT; @@ -4123,7 +4180,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, CASE_OP(MARK) mem = p->mark.id; /* mem: mark id */ if (p->mark.save_pos != 0) - STACK_PUSH_MARK_WITH_POS(mem, s, sprev); + STACK_PUSH_MARK_WITH_POS(mem, s); else STACK_PUSH_MARK(mem); @@ -4275,9 +4332,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fail: #endif STACK_POP; - p = stk->u.state.pcode; - s = stk->u.state.pstr; - sprev = stk->u.state.pstr_prev; + p = stk->u.state.pcode; + s = stk->u.state.pstr; CHECK_RETRY_LIMIT_IN_MATCH; JUMP_OUT; @@ -4290,6 +4346,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (msa->retry_limit_in_search != 0) { msa->retry_limit_in_search_counter += retry_in_match_counter; } + +#ifdef ONIG_DEBUG_MATCH_COUNTER + MATCH_COUNTER_OUT("END"); +#endif + STACK_SAVE(msa, is_alloca, alloc_base); return best_len; } @@ -4324,12 +4385,11 @@ typedef struct { int state; /* value of enum SearchRangeStatus */ UChar* low; UChar* high; - UChar* low_prev; UChar* sch_range; } SearchRange; #define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \ + r = match_at(reg, str, end, (upper_range), s, msas + i); \ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ goto match;\ @@ -4345,8 +4405,8 @@ regset_search_body_position_lead(OnigRegSet* set, OnigOptionType option, MatchArg* msas, int* rmatch_pos) { int r, n, i; - UChar *s, *prev; - UChar *low, *high, *low_prev; + UChar *s; + UChar *low, *high; UChar* sch_range; regex_t* reg; OnigEncoding enc; @@ -4354,12 +4414,7 @@ regset_search_body_position_lead(OnigRegSet* set, n = set->n; enc = set->enc; - s = (UChar* )start; - if (s > str) - prev = onigenc_get_prev_char_head(enc, str, s); - else - prev = (UChar* )NULL; sr = (SearchRange* )xmalloc(sizeof(*sr) * n); CHECK_NULL_RETURN_MEMERR(sr); @@ -4375,18 +4430,16 @@ regset_search_body_position_lead(OnigRegSet* set, else sch_range = (UChar* )end; - if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) { + if (forward_search(reg, str, end, s, sch_range, &low, &high)) { sr[i].state = SRS_LOW_HIGH; sr[i].low = low; sr[i].high = high; - sr[i].low_prev = low_prev; sr[i].sch_range = sch_range; } } else { sch_range = (UChar* )end; - if (forward_search(reg, str, end, s, sch_range, - &low, &high, (UChar** )NULL)) { + if (forward_search(reg, str, end, s, sch_range, &low, &high)) { goto total_active; } } @@ -4396,7 +4449,6 @@ regset_search_body_position_lead(OnigRegSet* set, sr[i].state = SRS_ALL_RANGE; sr[i].low = s; sr[i].high = (UChar* )range; - sr[i].low_prev = prev; } } @@ -4412,10 +4464,9 @@ regset_search_body_position_lead(OnigRegSet* set, if (s < sr[i].low) continue; if (s >= sr[i].high) { if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, - &low, &high, &low_prev) != 0) { + &low, &high) != 0) { sr[i].low = low; sr[i].high = high; - sr[i].low_prev = low_prev; if (s < low) continue; } else { @@ -4436,16 +4487,13 @@ regset_search_body_position_lead(OnigRegSet* set, for (i = 0; i < n; i++) { if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) { low = sr[i].low; - low_prev = sr[i].low_prev; } } if (low == range) break; s = low; - prev = low_prev; } else { - prev = s; s += enclen(enc, s); } } while (1); @@ -4459,10 +4507,9 @@ regset_search_body_position_lead(OnigRegSet* set, if (s < sr[i].low) continue; if (s >= sr[i].high) { if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range, - &low, &high, &low_prev) != 0) { + &low, &high) != 0) { sr[i].low = low; sr[i].high = high; - /* sr[i].low_prev = low_prev; */ if (s < low) continue; } else { @@ -4483,7 +4530,6 @@ regset_search_body_position_lead(OnigRegSet* set, if (set->anychar_inf != 0) prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end); - prev = s; s += enclen(enc, s); } while (1); } @@ -4552,7 +4598,7 @@ onig_regset_search_with_param(OnigRegSet* set, { int r; int i; - UChar *s, *prev; + UChar *s; regex_t* reg; OnigEncoding enc; OnigRegion* region; @@ -4654,7 +4700,6 @@ onig_regset_search_with_param(OnigRegSet* set, else if (str == end) { /* empty string */ start = end = str; s = (UChar* )start; - prev = (UChar* )NULL; msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n); CHECK_NULL_RETURN_MEMERR(msas); @@ -4669,7 +4714,7 @@ onig_regset_search_with_param(OnigRegSet* set, /* Can't use REGSET_MATCH_AND_RETURN_CHECK() because r must be set regex index (i) */ - r = match_at(reg, str, end, end, s, prev, msas + i); + r = match_at(reg, str, end, end, s, msas + i); if (r != ONIG_MISMATCH) { if (r >= 0) { r = i; @@ -4814,7 +4859,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end, else s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s); - while (s >= text) { + while (PTR_GE(s, text)) { if (*s == *target) { p = s + 1; t = target + 1; @@ -4855,7 +4900,7 @@ sunday_quick_search_step_forward(regex_t* reg, tail = target_end - 1; tlen1 = (int )(tail - target); end = text_range; - if (end + tlen1 > text_end) + if (tlen1 > text_end - end) end = text_end - tlen1; map_offset = reg->map_offset; @@ -4893,15 +4938,38 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, const UChar *s, *t, *p, *end; const UChar *tail; int map_offset; - - end = text_range + (target_end - target); - if (end > text_end) - end = text_end; + ptrdiff_t target_len; map_offset = reg->map_offset; tail = target_end - 1; - s = text + (tail - target); + target_len = target_end - target; + if (target_len > text_end - text_range) { + end = text_end; + if (target_len > text_end - text) + return (UChar* )NULL; + } + else { + end = text_range + target_len; + } + + s = text + target_len - 1; + +#ifdef USE_STRICT_POINTER_ADDRESS + if (s < end) { + while (TRUE) { + p = s; + t = tail; + while (*p == *t) { + if (t == target) return (UChar* )p; + p--; t--; + } + if (text_end - s <= map_offset) break; + if (reg->map[*(s + map_offset)] >= end - s) break; + s += reg->map[*(s + map_offset)]; + } + } +#else while (s < end) { p = s; t = tail; @@ -4909,9 +4977,10 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end, if (t == target) return (UChar* )p; p--; t--; } - if (s + map_offset >= text_end) break; + if (text_end - s <= map_offset) break; s += reg->map[*(s + map_offset)]; } +#endif return (UChar* )NULL; } @@ -4937,7 +5006,7 @@ map_search_backward(OnigEncoding enc, UChar map[], { const UChar *s = text_start; - while (s >= text) { + while (PTR_GE(s, text)) { if (map[*s]) return (UChar* )s; s = onigenc_get_prev_char_head(enc, adjust_text, s); @@ -4963,13 +5032,16 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, OnigMatchParam* mp) { int r; - UChar *prev; MatchArg msa; +#ifndef USE_POSIX_API + if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT; +#endif + ADJUST_MATCH_PARAM(reg, mp); MATCH_ARG_INIT(msa, reg, option, region, at, mp); if (region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API && !OPTON_POSIX_REGION(option) #endif ) { @@ -4986,8 +5058,14 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, } } - prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, end, at, prev, &msa); + r = match_at(reg, str, end, end, at, &msa); +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) { + if (msa.best_len >= 0) { + r = msa.best_len; + } + } +#endif } end: @@ -4997,7 +5075,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, - UChar* range, UChar** low, UChar** high, UChar** low_prev) + UChar* range, UChar** low, UChar** high) { UChar *p, *pprev = (UChar* )NULL; @@ -5081,33 +5159,18 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, } if (reg->dist_max == 0) { - *low = p; - if (low_prev) { - if (*low > start) - *low_prev = onigenc_get_prev_char_head(reg->enc, start, p); - else - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), p); - } + *low = p; *high = p; } else { if (reg->dist_max != INFINITE_LEN) { if (p - str < reg->dist_max) { *low = (UChar* )str; - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low); } else { *low = p - reg->dist_max; if (*low > start) { - *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start, - *low, (const UChar** )low_prev); - } - else { - if (low_prev) - *low_prev = onigenc_get_prev_char_head(reg->enc, - (pprev ? pprev : str), *low); + *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low); } } } @@ -5263,7 +5326,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, OnigOptionType option, OnigMatchParam* mp) { int r; - UChar *s, *prev; + UChar *s; MatchArg msa; const UChar *orig_start = start; @@ -5275,8 +5338,15 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, ADJUST_MATCH_PARAM(reg, mp); +#ifndef USE_POSIX_API + if (OPTON_POSIX_REGION(option)) { + r = ONIGERR_INVALID_ARGUMENT; + goto finish_no_msa; + } +#endif + if (region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API && ! OPTON_POSIX_REGION(option) #endif ) { @@ -5294,27 +5364,14 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, } -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! OPTON_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(upper_range) \ - r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ + r = match_at(reg, str, end, (upper_range), s, &msa);\ if (r != ONIG_MISMATCH) {\ if (r >= 0) {\ goto match;\ }\ else goto finish; /* error */ \ } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ /* anchor optimize: resume search range */ @@ -5422,7 +5479,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (reg->threshold_len == 0) { start = end = str = address_for_empty_string; s = (UChar* )start; - prev = (UChar* )NULL; MATCH_ARG_INIT(msa, reg, option, region, start, mp); MATCH_AND_RETURN_CHECK(end); @@ -5440,13 +5496,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; if (range > start) { /* forward search */ - if (s > str) - prev = onigenc_get_prev_char_head(reg->enc, str, s); - else - prev = (UChar* )NULL; - if (reg->optimize != OPTIMIZE_NONE) { - UChar *sch_range, *low, *high, *low_prev; + UChar *sch_range, *low, *high; if (reg->dist_max != 0) { if (reg->dist_max == INFINITE_LEN) @@ -5467,27 +5518,27 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (reg->dist_max != INFINITE_LEN) { do { - if (! forward_search(reg, str, end, s, sch_range, &low, &high, - &low_prev)) goto mismatch; + if (! forward_search(reg, str, end, s, sch_range, &low, &high)) + goto mismatch; if (s < low) { s = low; - prev = low_prev; } while (s <= high) { MATCH_AND_RETURN_CHECK(data_range); - prev = s; s += enclen(reg->enc, s); } } while (s < range); goto mismatch; } else { /* check only. */ - if (! forward_search(reg, str, end, s, sch_range, &low, &high, - (UChar** )NULL)) goto mismatch; + if (! forward_search(reg, str, end, s, sch_range, &low, &high)) + goto mismatch; if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 && (reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) { do { + UChar* prev; + MATCH_AND_RETURN_CHECK(data_range); prev = s; s += enclen(reg->enc, s); @@ -5504,7 +5555,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, do { MATCH_AND_RETURN_CHECK(data_range); - prev = s; s += enclen(reg->enc, s); } while (s < range); @@ -5549,12 +5599,11 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, if (s > high) s = high; - while (s >= low) { - prev = onigenc_get_prev_char_head(reg->enc, str, s); + while (PTR_GE(s, low)) { MATCH_AND_RETURN_CHECK(orig_start); - s = prev; + s = onigenc_get_prev_char_head(reg->enc, str, s); } - } while (s >= range); + } while (PTR_GE(s, range)); goto mismatch; } else { /* check only. */ @@ -5566,10 +5615,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, } do { - prev = onigenc_get_prev_char_head(reg->enc, str, s); MATCH_AND_RETURN_CHECK(orig_start); - s = prev; - } while (s >= range); + s = onigenc_get_prev_char_head(reg->enc, str, s); + } while (PTR_GE(s, range)); } mismatch: @@ -5589,7 +5637,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end, /* If result is mismatch and no FIND_NOT_EMPTY option, then the region is not set in match_at(). */ if (OPTON_FIND_NOT_EMPTY(reg->options) && region -#ifdef USE_POSIX_API_REGION_OPTION +#ifdef USE_POSIX_API && !OPTON_POSIX_REGION(option) #endif ) { @@ -5952,7 +6000,7 @@ extern int onig_init_for_match_at(regex_t* reg) { return match_at(reg, (const UChar* )NULL, (const UChar* )NULL, - (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL, + (const UChar* )NULL, (const UChar* )NULL, (MatchArg* )NULL); } #endif @@ -6139,8 +6187,8 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i const UChar* str; StackType* stk_base; int i; - StackIndex* mem_start_stk; - StackIndex* mem_end_stk; + StkPtrType* mem_start_stk; + StkPtrType* mem_end_stk; i = mem_num; reg = a->regex; @@ -6150,7 +6198,7 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i mem_end_stk = a->mem_end_stk; if (i > 0) { - if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { + if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) { *begin = (int )(STACK_MEM_START(reg, i) - str); *end = (int )(STACK_MEM_END(reg, i) - str); } -- cgit v1.2.3