summaryrefslogtreecommitdiff
path: root/src/regexec.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regexec.c')
-rw-r--r--src/regexec.c1911
1 files changed, 1299 insertions, 612 deletions
diff --git a/src/regexec.c b/src/regexec.c
index 6618996..ce498c6 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,6 +39,20 @@
#define CHECK_INTERRUPT_IN_MATCH
+#define STACK_MEM_START(reg, i) \
+ (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \
+ STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i])))
+
+#define STACK_MEM_END(reg, i) \
+ (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \
+ STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i])))
+
+static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev);
+
+static int
+search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
+
+
#ifdef USE_CALLOUT
typedef struct {
int last_match_at_call_counter;
@@ -129,7 +143,7 @@ typedef struct {
} MatchArg;
-#ifdef ONIG_DEBUG
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
/* arguments type */
typedef enum {
@@ -149,102 +163,108 @@ typedef struct {
} OpInfoType;
static OpInfoType OpInfo[] = {
- { OP_FINISH, "finish" },
- { OP_END, "end" },
- { OP_EXACT1, "exact1" },
- { OP_EXACT2, "exact2" },
- { OP_EXACT3, "exact3" },
- { OP_EXACT4, "exact4" },
- { OP_EXACT5, "exact5" },
- { OP_EXACTN, "exactn" },
- { OP_EXACTMB2N1, "exactmb2-n1" },
- { OP_EXACTMB2N2, "exactmb2-n2" },
- { OP_EXACTMB2N3, "exactmb2-n3" },
- { OP_EXACTMB2N, "exactmb2-n" },
- { OP_EXACTMB3N, "exactmb3n" },
- { OP_EXACTMBN, "exactmbn" },
- { OP_EXACT1_IC, "exact1-ic" },
- { OP_EXACTN_IC, "exactn-ic" },
- { OP_CCLASS, "cclass" },
- { OP_CCLASS_MB, "cclass-mb" },
- { OP_CCLASS_MIX, "cclass-mix" },
- { OP_CCLASS_NOT, "cclass-not" },
- { OP_CCLASS_MB_NOT, "cclass-mb-not" },
- { OP_CCLASS_MIX_NOT, "cclass-mix-not" },
- { OP_ANYCHAR, "anychar" },
- { OP_ANYCHAR_ML, "anychar-ml" },
- { OP_ANYCHAR_STAR, "anychar*" },
- { OP_ANYCHAR_ML_STAR, "anychar-ml*" },
- { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next" },
- { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next" },
- { OP_WORD, "word" },
- { OP_WORD_ASCII, "word-ascii" },
- { OP_NO_WORD, "not-word" },
- { OP_NO_WORD_ASCII, "not-word-ascii" },
- { OP_WORD_BOUNDARY, "word-boundary" },
- { OP_NO_WORD_BOUNDARY, "not-word-boundary" },
- { OP_WORD_BEGIN, "word-begin" },
- { OP_WORD_END, "word-end" },
- { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary" },
- { OP_BEGIN_BUF, "begin-buf" },
- { OP_END_BUF, "end-buf" },
- { OP_BEGIN_LINE, "begin-line" },
- { OP_END_LINE, "end-line" },
- { OP_SEMI_END_BUF, "semi-end-buf" },
- { OP_BEGIN_POSITION, "begin-position" },
- { OP_BACKREF1, "backref1" },
- { OP_BACKREF2, "backref2" },
- { OP_BACKREF_N, "backref-n" },
- { OP_BACKREF_N_IC, "backref-n-ic" },
- { OP_BACKREF_MULTI, "backref_multi" },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic" },
- { OP_BACKREF_WITH_LEVEL, "backref_with_level" },
- { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c" },
- { OP_BACKREF_CHECK, "backref_check" },
- { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level" },
- { OP_MEMORY_START_PUSH, "mem-start-push" },
- { OP_MEMORY_START, "mem-start" },
- { OP_MEMORY_END_PUSH, "mem-end-push" },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec" },
- { OP_MEMORY_END, "mem-end" },
- { OP_MEMORY_END_REC, "mem-end-rec" },
- { OP_FAIL, "fail" },
- { OP_JUMP, "jump" },
- { OP_PUSH, "push" },
- { OP_PUSH_SUPER, "push-super" },
- { OP_POP_OUT, "pop-out" },
+ { OP_FINISH, "finish"},
+ { OP_END, "end"},
+ { OP_STR_1, "str_1"},
+ { OP_STR_2, "str_2"},
+ { OP_STR_3, "str_3"},
+ { OP_STR_4, "str_4"},
+ { OP_STR_5, "str_5"},
+ { OP_STR_N, "str_n"},
+ { OP_STR_MB2N1, "str_mb2-n1"},
+ { OP_STR_MB2N2, "str_mb2-n2"},
+ { OP_STR_MB2N3, "str_mb2-n3"},
+ { OP_STR_MB2N, "str_mb2-n"},
+ { OP_STR_MB3N, "str_mb3n"},
+ { OP_STR_MBN, "str_mbn"},
+ { OP_STR_1_IC, "str_1-ic"},
+ { OP_STR_N_IC, "str_n-ic"},
+ { OP_CCLASS, "cclass"},
+ { OP_CCLASS_MB, "cclass-mb"},
+ { OP_CCLASS_MIX, "cclass-mix"},
+ { OP_CCLASS_NOT, "cclass-not"},
+ { OP_CCLASS_MB_NOT, "cclass-mb-not"},
+ { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
+ { OP_ANYCHAR, "anychar"},
+ { OP_ANYCHAR_ML, "anychar-ml"},
+ { OP_ANYCHAR_STAR, "anychar*"},
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*"},
+ { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next"},
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
+ { OP_WORD, "word"},
+ { OP_WORD_ASCII, "word-ascii"},
+ { OP_NO_WORD, "not-word"},
+ { OP_NO_WORD_ASCII, "not-word-ascii"},
+ { OP_WORD_BOUNDARY, "word-boundary"},
+ { OP_NO_WORD_BOUNDARY, "not-word-boundary"},
+ { OP_WORD_BEGIN, "word-begin"},
+ { OP_WORD_END, "word-end"},
+ { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
+ { OP_BEGIN_BUF, "begin-buf"},
+ { OP_END_BUF, "end-buf"},
+ { OP_BEGIN_LINE, "begin-line"},
+ { OP_END_LINE, "end-line"},
+ { OP_SEMI_END_BUF, "semi-end-buf"},
+ { OP_BEGIN_POSITION, "begin-position"},
+ { OP_BACKREF1, "backref1"},
+ { OP_BACKREF2, "backref2"},
+ { OP_BACKREF_N, "backref-n"},
+ { OP_BACKREF_N_IC, "backref-n-ic"},
+ { OP_BACKREF_MULTI, "backref_multi"},
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic"},
+ { OP_BACKREF_WITH_LEVEL, "backref_with_level"},
+ { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
+ { OP_BACKREF_CHECK, "backref_check"},
+ { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
+ { OP_MEM_START_PUSH, "mem-start-push"},
+ { OP_MEM_START, "mem-start"},
+ { OP_MEM_END_PUSH, "mem-end-push"},
+#ifdef USE_CALL
+ { OP_MEM_END_PUSH_REC, "mem-end-push-rec"},
+#endif
+ { OP_MEM_END, "mem-end"},
+#ifdef USE_CALL
+ { OP_MEM_END_REC, "mem-end-rec"},
+#endif
+ { OP_FAIL, "fail"},
+ { OP_JUMP, "jump"},
+ { OP_PUSH, "push"},
+ { OP_PUSH_SUPER, "push-super"},
+ { OP_POP_OUT, "pop-out"},
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1" },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1"},
+#endif
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next"},
+ { OP_REPEAT, "repeat"},
+ { OP_REPEAT_NG, "repeat-ng"},
+ { OP_REPEAT_INC, "repeat-inc"},
+ { OP_REPEAT_INC_NG, "repeat-inc-ng"},
+ { OP_EMPTY_CHECK_START, "empty-check-start"},
+ { OP_EMPTY_CHECK_END, "empty-check-end"},
+ { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
+#ifdef USE_CALL
+ { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
+#endif
+ { OP_PREC_READ_START, "push-pos"},
+ { OP_PREC_READ_END, "pop-pos"},
+ { OP_PREC_READ_NOT_START, "prec-read-not-start"},
+ { OP_PREC_READ_NOT_END, "prec-read-not-end"},
+ { OP_ATOMIC_START, "atomic-start"},
+ { OP_ATOMIC_END, "atomic-end"},
+ { OP_LOOK_BEHIND, "look-behind"},
+ { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"},
+ { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end"},
+ { OP_PUSH_SAVE_VAL, "push-save-val"},
+ { OP_UPDATE_VAR, "update-var"},
+#ifdef USE_CALL
+ { OP_CALL, "call"},
+ { OP_RETURN, "return"},
#endif
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next" },
- { OP_REPEAT, "repeat" },
- { OP_REPEAT_NG, "repeat-ng" },
- { OP_REPEAT_INC, "repeat-inc" },
- { OP_REPEAT_INC_NG, "repeat-inc-ng" },
- { OP_REPEAT_INC_SG, "repeat-inc-sg" },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg" },
- { OP_EMPTY_CHECK_START, "empty-check-start" },
- { OP_EMPTY_CHECK_END, "empty-check-end" },
- { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst" },
- { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push" },
- { OP_PREC_READ_START, "push-pos" },
- { OP_PREC_READ_END, "pop-pos" },
- { OP_PREC_READ_NOT_START, "prec-read-not-start" },
- { OP_PREC_READ_NOT_END, "prec-read-not-end" },
- { OP_ATOMIC_START, "atomic-start" },
- { OP_ATOMIC_END, "atomic-end" },
- { OP_LOOK_BEHIND, "look-behind" },
- { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start" },
- { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end" },
- { OP_CALL, "call" },
- { OP_RETURN, "return" },
- { OP_PUSH_SAVE_VAL, "push-save-val" },
- { OP_UPDATE_VAR, "update-var" },
#ifdef USE_CALLOUT
- { OP_CALLOUT_CONTENTS, "callout-contents" },
- { OP_CALLOUT_NAME, "callout-name" },
+ { OP_CALLOUT_CONTENTS, "callout-contents"},
+ { OP_CALLOUT_NAME, "callout-name"},
#endif
- { -1, "" }
+ { -1, ""}
};
static char*
@@ -320,32 +340,32 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
fprintf(f, "%s", op2name(opcode));
switch (opcode) {
- case OP_EXACT1:
+ case OP_STR_1:
p_string(f, 1, p->exact.s); break;
- case OP_EXACT2:
+ case OP_STR_2:
p_string(f, 2, p->exact.s); break;
- case OP_EXACT3:
+ case OP_STR_3:
p_string(f, 3, p->exact.s); break;
- case OP_EXACT4:
+ case OP_STR_4:
p_string(f, 4, p->exact.s); break;
- case OP_EXACT5:
+ case OP_STR_5:
p_string(f, 5, p->exact.s); break;
- case OP_EXACTN:
+ case OP_STR_N:
len = p->exact_n.n;
p_string(f, len, p->exact_n.s); break;
- case OP_EXACTMB2N1:
+ case OP_STR_MB2N1:
p_string(f, 2, p->exact.s); break;
- case OP_EXACTMB2N2:
+ case OP_STR_MB2N2:
p_string(f, 4, p->exact.s); break;
- case OP_EXACTMB2N3:
+ case OP_STR_MB2N3:
p_string(f, 3, p->exact.s); break;
- case OP_EXACTMB2N:
+ case OP_STR_MB2N:
len = p->exact_n.n;
p_len_string(f, len, 2, p->exact_n.s); break;
- case OP_EXACTMB3N:
+ case OP_STR_MB3N:
len = p->exact_n.n;
p_len_string(f, len, 3, p->exact_n.s); break;
- case OP_EXACTMBN:
+ case OP_STR_MBN:
{
int mb_len;
@@ -357,11 +377,11 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
while (n-- > 0) { fputc(*q++, f); }
}
break;
- case OP_EXACT1_IC:
+ case OP_STR_1_IC:
len = enclen(enc, p->exact.s);
p_string(f, len, p->exact.s);
break;
- case OP_EXACTN_IC:
+ case OP_STR_N_IC:
len = p->exact_n.n;
p_len_string(f, len, 1, p->exact_n.s);
break;
@@ -375,13 +395,13 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_CCLASS_MB_NOT:
{
OnigCodePoint ncode;
- OnigCodePoint* codes;
+ OnigCodePoint* codes;
codes = (OnigCodePoint* )p->cclass_mb.mb;
GET_CODE_POINT(ncode, codes);
codes++;
GET_CODE_POINT(code, codes);
- fprintf(f, ":%u:%u", code, ncode);
+ fprintf(f, ":%d:0x%x", ncode, code);
}
break;
case OP_CCLASS_MIX:
@@ -447,15 +467,18 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
}
break;
- case OP_MEMORY_START:
- case OP_MEMORY_START_PUSH:
+ case OP_MEM_START:
+ case OP_MEM_START_PUSH:
mem = p->memory_start.num;
fprintf(f, ":%d", mem);
break;
- case OP_MEMORY_END_PUSH:
- case OP_MEMORY_END_PUSH_REC:
- case OP_MEMORY_END:
- case OP_MEMORY_END_REC:
+
+ case OP_MEM_END:
+ case OP_MEM_END_PUSH:
+#ifdef USE_CALL
+ case OP_MEM_END_REC:
+ case OP_MEM_END_PUSH_REC:
+#endif
mem = p->memory_end.num;
fprintf(f, ":%d", mem);
break;
@@ -499,8 +522,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
mem = p->repeat.id;
fprintf(f, ":%d", mem);
break;
@@ -511,7 +532,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
break;
case OP_EMPTY_CHECK_END:
case OP_EMPTY_CHECK_END_MEMST:
+#ifdef USE_CALL
case OP_EMPTY_CHECK_END_MEMST_PUSH:
+#endif
mem = p->empty_check_end.mem;
fprintf(f, ":%d", mem);
break;
@@ -534,10 +557,12 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
p_rel_addr(f, addr, p, start);
break;
+#ifdef USE_CALL
case OP_CALL:
addr = p->call.addr;
fprintf(f, ":{/%d}", addr);
break;
+#endif
case OP_PUSH_SAVE_VAL:
{
@@ -607,7 +632,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_ATOMIC_START:
case OP_ATOMIC_END:
case OP_LOOK_BEHIND_NOT_END:
+#ifdef USE_CALL
case OP_RETURN:
+#endif
break;
default:
@@ -615,7 +642,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
break;
}
}
-#endif /* ONIG_DEBUG */
+#endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
#ifdef ONIG_DEBUG_COMPILE
extern void
@@ -625,8 +652,8 @@ onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
Operation* start = reg->ops;
Operation* end = reg->ops + reg->ops_used;
- fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
- reg->bt_mem_start, reg->bt_mem_end);
+ fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
+ reg->push_mem_start, reg->push_mem_end);
fprintf(f, "code-length: %d\n", reg->ops_used);
bp = start;
@@ -943,7 +970,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
result = ONIGERR_INVALID_ARGUMENT;\
}\
best_len = result;\
- goto finish;\
+ goto match_at_end;\
break;\
}\
} while(0)
@@ -965,21 +992,31 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/* handled by normal-POP */
#define STK_MEM_START 0x0010
#define STK_MEM_END 0x8030
-#define STK_REPEAT_INC 0x0050
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+#define STK_REPEAT_INC (0x0040 | STK_MASK_POP_HANDLED)
+#else
+#define STK_REPEAT_INC 0x0040
+#endif
#ifdef USE_CALLOUT
#define STK_CALLOUT 0x0070
#endif
/* avoided by normal-POP */
#define STK_VOID 0x0000 /* for fill a blank */
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+#define STK_EMPTY_CHECK_START (0x3000 | STK_MASK_POP_HANDLED)
+#else
#define STK_EMPTY_CHECK_START 0x3000
+#endif
#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
#define STK_MEM_END_MARK 0x8100
#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
-#define STK_REPEAT 0x0300
+/* #define STK_REPEAT 0x0300 */
#define STK_CALL_FRAME 0x0400
#define STK_RETURN 0x0500
#define STK_SAVE_VAL 0x0600
+#define STK_PREC_READ_START 0x0700
+#define STK_PREC_READ_END 0x0800
/* stack type check mask */
#define STK_MASK_POP_USED STK_ALT_FLAG
@@ -1000,11 +1037,10 @@ typedef struct _StackType {
UChar* pstr_prev; /* previous char position of pstr */
} state;
struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- Operation* pcode; /* byte code position (head of repeated target) */
- } repeat;
- struct {
- StackIndex si; /* index of stack */
+ int count;
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ StackIndex prev_index; /* index of stack */
+#endif
} repeat_inc;
struct {
UChar *pstr; /* start/end position */
@@ -1013,7 +1049,10 @@ typedef struct _StackType {
StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
- UChar *pstr; /* start position */
+ UChar *pstr; /* start position */
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ StackIndex prev_index; /* index of stack */
+#endif
} empty_check;
#ifdef USE_CALL
struct {
@@ -1059,29 +1098,64 @@ struct OnigCalloutArgsStruct {
#endif
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+
+#define PTR_NUM_SIZE(reg) ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
+#define UPDATE_FOR_STACK_REALLOC do{\
+ repeat_stk = (StackIndex* )alloc_base;\
+ empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
+ mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
+ mem_end_stk = mem_start_stk + num_mem + 1;\
+} while(0)
+
+#define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
+#define LOAD_TO_REPEAT_STK_VAR(sid) repeat_stk[sid] = GET_STACK_INDEX(stk)
+#define POP_REPEAT_INC else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
+
+#define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
+#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) empty_check_stk[sid] = GET_STACK_INDEX(stk)
+#define POP_EMPTY_CHECK_START else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
+
+#else
+
+#define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
+#define UPDATE_FOR_STACK_REALLOC do{\
+ mem_start_stk = (StackIndex* )alloc_base;\
+ mem_end_stk = mem_start_stk + num_mem + 1;\
+} while(0)
+
+#define SAVE_REPEAT_STK_VAR(sid)
+#define LOAD_TO_REPEAT_STK_VAR(sid)
+#define POP_REPEAT_INC
+
+#define SAVE_EMPTY_CHECK_STK_VAR(sid)
+#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
+#define POP_EMPTY_CHECK_START
+
+#endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
- (msa).match_stack_limit = (mp)->match_stack_limit;\
- (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
- (msa).mp = mp;\
+ (msa).match_stack_limit = (mpv)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
+ (msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
- (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
+ (msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#else
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
- (msa).match_stack_limit = (mp)->match_stack_limit;\
- (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
- (msa).mp = mp;\
- (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
+ (msa).match_stack_limit = (mpv)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
+ (msa).mp = mpv;\
+ (msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
@@ -1136,12 +1210,6 @@ struct OnigCalloutArgsStruct {
};\
} while(0)
-#define UPDATE_FOR_STACK_REALLOC do{\
- repeat_stk = (StackIndex* )alloc_base;\
- mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
- mem_end_stk = mem_start_stk + num_mem + 1;\
-} while(0)
-
static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
extern unsigned int
@@ -1162,7 +1230,9 @@ onig_set_match_stack_limit_size(unsigned int size)
static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
#define CHECK_RETRY_LIMIT_IN_MATCH do {\
- if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
+ if (retry_in_match_counter++ > retry_limit_in_match) {\
+ MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\
+ }\
} while (0)
#else
@@ -1544,27 +1614,31 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
-#define STACK_PUSH_POS(s,sprev) \
- STACK_PUSH(STK_TO_VOID_START,(Operation* )0,s,sprev)
+#define STACK_PUSH_PREC_READ_START(s,sprev) \
+ STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
+#if 0
#define STACK_PUSH_REPEAT(sid, pat) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT;\
stk->zid = (sid);\
- stk->u.repeat.pcode = (pat);\
- stk->u.repeat.count = 0;\
+ stk->u.repeat.pcode = (pat);\
STACK_INC;\
} while(0)
+#endif
-#define STACK_PUSH_REPEAT_INC(sindex) do {\
+#define STACK_PUSH_REPEAT_INC(sid, ct) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT_INC;\
- stk->u.repeat_inc.si = (sindex);\
+ stk->zid = (sid);\
+ stk->u.repeat_inc.count = (ct);\
+ SAVE_REPEAT_STK_VAR(sid);\
+ LOAD_TO_REPEAT_STK_VAR(sid);\
STACK_INC;\
} while(0)
@@ -1637,6 +1711,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk->type = STK_EMPTY_CHECK_START;\
stk->zid = (cnum);\
stk->u.empty_check.pstr = (s);\
+ SAVE_EMPTY_CHECK_STK_VAR(cnum);\
+ LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
STACK_INC;\
} while(0)
@@ -1774,7 +1850,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
fprintf(stderr, "at %s\n", at);\
- goto stack_error;\
+ MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
}
#else
#define STACK_BASE_CHECK(p, at)
@@ -1825,13 +1901,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
else if (stk->type == STK_MEM_END) {\
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
+ POP_REPEAT_INC \
+ POP_EMPTY_CHECK_START \
POP_CALLOUT_CASE\
}\
}\
@@ -1850,13 +1925,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
else if (stk->type == STK_MEM_END) {\
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
+ POP_REPEAT_INC \
+ POP_EMPTY_CHECK_START \
/* Don't call callout here because negation of total success by (?!..) (?<!..) */\
}\
}\
@@ -1887,65 +1961,99 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_EMPTY_CHECK(isnull,sid,s) do {\
- StackType* k = stk;\
+#define STACK_GET_PREC_READ_START(k) do {\
+ int level = 0;\
+ k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) {\
- (isnull) = (k->u.empty_check.pstr == (s));\
+ STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_PREC_READ_START) {\
+ if (level == 0) {\
break;\
}\
+ level--;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ level++;\
}\
}\
} while(0)
+
+#define EMPTY_CHECK_START_SEARCH(sid, k) do {\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
+ if (k->type == STK_EMPTY_CHECK_START) {\
+ if (k->zid == (sid)) break;\
+ }\
+ }\
+} while(0)
+
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+
+#define GET_EMPTY_CHECK_START(sid, k) do {\
+ if (reg->num_call == 0) {\
+ k = STACK_AT(empty_check_stk[sid]);\
+ }\
+ else {\
+ EMPTY_CHECK_START_SEARCH(sid, k);\
+ }\
+} while(0)
+#else
+
+#define GET_EMPTY_CHECK_START(sid, k) EMPTY_CHECK_START_SEARCH(sid, k)
+
+#endif
+
+
+#define STACK_EMPTY_CHECK(isnull, sid, s) do {\
+ StackType* k;\
+ GET_EMPTY_CHECK_START(sid, k);\
+ (isnull) = (k->u.empty_check.pstr == (s));\
+} while(0)
+
#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
(addr) = 0;\
}\
else {\
- if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
+ if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
(addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
else\
(addr) = (UChar* )k->u.mem.prev_end;\
}\
} while (0)
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
-#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
- StackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) {\
- if (k->u.empty_check.pstr != (s)) {\
- (isnull) = 0;\
- break;\
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
+ StackType* k;\
+ GET_EMPTY_CHECK_START(sid, k);\
+ if (k->u.empty_check.pstr != (s)) {\
+ (isnull) = 0;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START &&\
+ MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
+ STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
+ if (endp == 0) {\
+ (isnull) = 0; break;\
}\
- else {\
- UChar* endp;\
- (isnull) = 1;\
- while (k < stk) {\
- if (k->type == STK_MEM_START) {\
- STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
- if (endp == 0) {\
- (isnull) = 0; break;\
- }\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
- (isnull) = 0; break;\
- }\
- else if (endp != s) {\
- (isnull) = -1; /* empty, but position changed */ \
- }\
- }\
- k++;\
- }\
- break;\
+ else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
}\
}\
+ k++;\
}\
}\
} while(0)
@@ -1968,7 +2076,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START) {\
- if (level == 0) {\
+ if (level == 0 && \
+ MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
@@ -2023,26 +2132,47 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
} while(0)
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
-#define STACK_GET_REPEAT(sid, k) do {\
- int level = 0;\
- k = stk;\
+#define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
+ StackType* k = stk;\
while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
- if (k->type == STK_REPEAT) {\
- if (level == 0) {\
- if (k->zid == (sid)) {\
- break;\
+ (k)--;\
+ STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
+ if ((k)->type == STK_REPEAT_INC) {\
+ if ((k)->zid == (sid)) {\
+ (c) = (k)->u.repeat_inc.count;\
+ break;\
+ }\
+ }\
+ else if ((k)->type == STK_RETURN) {\
+ int level = -1;\
+ while (1) {\
+ (k)--;\
+ if ((k)->type == STK_CALL_FRAME) {\
+ level++;\
+ if (level == 0) break;\
}\
+ else if ((k)->type == STK_RETURN) level--;\
}\
}\
- else if (k->type == STK_CALL_FRAME) level--;\
- else if (k->type == STK_RETURN) level++;\
}\
} while(0)
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+
+#define STACK_GET_REPEAT_COUNT(sid, c) do {\
+ if (reg->num_call == 0) {\
+ (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
+ }\
+ else {\
+ STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
+ }\
+} while(0)
+#else
+#define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
+#endif
+
#define STACK_RETURN(addr) do {\
int level = 0;\
StackType* k = stk;\
@@ -2444,6 +2574,8 @@ typedef struct {
#define MATCH_DEBUG_OUT(offset)
#endif
+#define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end
+
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
@@ -2463,20 +2595,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
static const void *opcode_to_label[] = {
&&L_FINISH,
&&L_END,
- &&L_EXACT1,
- &&L_EXACT2,
- &&L_EXACT3,
- &&L_EXACT4,
- &&L_EXACT5,
- &&L_EXACTN,
- &&L_EXACTMB2N1,
- &&L_EXACTMB2N2,
- &&L_EXACTMB2N3,
- &&L_EXACTMB2N,
- &&L_EXACTMB3N,
- &&L_EXACTMBN,
- &&L_EXACT1_IC,
- &&L_EXACTN_IC,
+ &&L_STR_1,
+ &&L_STR_2,
+ &&L_STR_3,
+ &&L_STR_4,
+ &&L_STR_5,
+ &&L_STR_N,
+ &&L_STR_MB2N1,
+ &&L_STR_MB2N2,
+ &&L_STR_MB2N3,
+ &&L_STR_MB2N,
+ &&L_STR_MB3N,
+ &&L_STR_MBN,
+ &&L_STR_1_IC,
+ &&L_STR_N_IC,
&&L_CCLASS,
&&L_CCLASS_MB,
&&L_CCLASS_MIX,
@@ -2514,12 +2646,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_BACKREF_WITH_LEVEL_IC,
&&L_BACKREF_CHECK,
&&L_BACKREF_CHECK_WITH_LEVEL,
- &&L_MEMORY_START,
- &&L_MEMORY_START_PUSH,
- &&L_MEMORY_END_PUSH,
- &&L_MEMORY_END_PUSH_REC,
- &&L_MEMORY_END,
- &&L_MEMORY_END_REC,
+ &&L_MEM_START,
+ &&L_MEM_START_PUSH,
+ &&L_MEM_END_PUSH,
+#ifdef USE_CALL
+ &&L_MEM_END_PUSH_REC,
+#endif
+ &&L_MEM_END,
+#ifdef USE_CALL
+ &&L_MEM_END_REC,
+#endif
&&L_FAIL,
&&L_JUMP,
&&L_PUSH,
@@ -2533,12 +2669,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_REPEAT_NG,
&&L_REPEAT_INC,
&&L_REPEAT_INC_NG,
- &&L_REPEAT_INC_SG,
- &&L_REPEAT_INC_NG_SG,
&&L_EMPTY_CHECK_START,
&&L_EMPTY_CHECK_END,
&&L_EMPTY_CHECK_END_MEMST,
+#ifdef USE_CALL
&&L_EMPTY_CHECK_END_MEMST_PUSH,
+#endif
&&L_PREC_READ_START,
&&L_PREC_READ_END,
&&L_PREC_READ_NOT_START,
@@ -2548,10 +2684,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_LOOK_BEHIND,
&&L_LOOK_BEHIND_NOT_START,
&&L_LOOK_BEHIND_NOT_END,
- &&L_CALL,
- &&L_RETURN,
&&L_PUSH_SAVE_VAL,
&&L_UPDATE_VAR,
+#ifdef USE_CALL
+ &&L_CALL,
+ &&L_RETURN,
+#endif
#ifdef USE_CALLOUT
&&L_CALLOUT_CONTENTS,
&&L_CALLOUT_NAME,
@@ -2569,15 +2707,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
char *alloc_base;
StackType *stk_base, *stk, *stk_end;
StackType *stkp; /* used as any purpose. */
- StackIndex si;
- StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
UChar* keep;
+
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ StackIndex *repeat_stk;
+ StackIndex *empty_check_stk;
+#endif
#ifdef USE_RETRY_LIMIT_IN_MATCH
unsigned long retry_limit_in_match;
unsigned long retry_in_match_counter;
#endif
-
#ifdef USE_CALLOUT
int of;
#endif
@@ -2663,15 +2803,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
rmt[0].rm_eo = (regoff_t )(s - str);
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
- else
- rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
-
- rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i]))
- - str);
+ rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
+ rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str);
}
else {
rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
@@ -2684,14 +2817,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
region->end[0] = (int )(s - str);
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
- else
- region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str);
-
- region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str);
+ region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
+ region->end[i] = (int )(STACK_MEM_END(reg, i) - str);
}
else {
region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
@@ -2719,10 +2846,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
stkp = stk_base;
r = make_capture_history_tree(region->history_root, &stkp,
stk, (UChar* )str, reg);
- if (r < 0) {
- best_len = r; /* error code */
- goto finish;
- }
+ if (r < 0) MATCH_AT_ERROR_RETURN(r);
}
#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_API_REGION_OPTION
@@ -2747,9 +2871,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
/* default behavior: return first-matching result. */
- goto finish;
+ goto match_at_end;
- CASE_OP(EXACT1)
+ CASE_OP(STR_1)
DATA_ENSURE(1);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2757,7 +2881,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
NEXT_OUT;
- CASE_OP(EXACT1_IC)
+ CASE_OP(STR_1_IC)
{
int len;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -2778,7 +2902,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
NEXT_OUT;
- CASE_OP(EXACT2)
+ CASE_OP(STR_2)
DATA_ENSURE(2);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2789,7 +2913,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACT3)
+ CASE_OP(STR_3)
DATA_ENSURE(3);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2802,7 +2926,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACT4)
+ CASE_OP(STR_4)
DATA_ENSURE(4);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2817,7 +2941,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACT5)
+ CASE_OP(STR_5)
DATA_ENSURE(5);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2834,7 +2958,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTN)
+ CASE_OP(STR_N)
tlen = p->exact_n.n;
DATA_ENSURE(tlen);
ps = p->exact_n.s;
@@ -2845,7 +2969,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTN_IC)
+ CASE_OP(STR_N_IC)
{
int len;
UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -2863,6 +2987,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
+ if (ps >= endp) goto fail;
if (*ps != *q) goto fail;
ps++; q++;
}
@@ -2872,7 +2997,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB2N1)
+ CASE_OP(STR_MB2N1)
DATA_ENSURE(2);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2882,7 +3007,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
NEXT_OUT;
- CASE_OP(EXACTMB2N2)
+ CASE_OP(STR_MB2N2)
DATA_ENSURE(4);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2897,7 +3022,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB2N3)
+ CASE_OP(STR_MB2N3)
DATA_ENSURE(6);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2916,7 +3041,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB2N)
+ CASE_OP(STR_MB2N)
tlen = p->exact_n.n;
DATA_ENSURE(tlen * 2);
ps = p->exact_n.s;
@@ -2930,7 +3055,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB3N)
+ CASE_OP(STR_MB3N)
tlen = p->exact_n.n;
DATA_ENSURE(tlen * 3);
ps = p->exact_n.s;
@@ -2946,7 +3071,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMBN)
+ CASE_OP(STR_MBN)
tlen = p->exact_len_n.len; /* mb byte len */
tlen2 = p->exact_len_n.n; /* number of chars */
tlen2 *= tlen;
@@ -2968,6 +3093,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
NEXT_OUT;
CASE_OP(CCLASS_MB)
+ DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
@@ -2976,7 +3102,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar *ss;
int mb_len;
- DATA_ENSURE(1);
mb_len = enclen(encode, s);
DATA_ENSURE(mb_len);
ss = s;
@@ -3265,7 +3390,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
#endif
default:
- goto bytecode_error;
+ MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
break;
}
@@ -3365,46 +3490,50 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_START_PUSH)
+ CASE_OP(MEM_START_PUSH)
mem = p->memory_start.num;
STACK_PUSH_MEM_START(mem, s);
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_START)
+ CASE_OP(MEM_START)
mem = p->memory_start.num;
mem_start_stk[mem] = (StackIndex )((void* )s);
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_END_PUSH)
+ CASE_OP(MEM_END_PUSH)
mem = p->memory_end.num;
STACK_PUSH_MEM_END(mem, s);
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_END)
+ CASE_OP(MEM_END)
mem = p->memory_end.num;
mem_end_stk[mem] = (StackIndex )((void* )s);
INC_OP;
JUMP_OUT;
#ifdef USE_CALL
- CASE_OP(MEMORY_END_PUSH_REC)
- mem = p->memory_end.num;
- STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
- si = GET_STACK_INDEX(stkp);
- STACK_PUSH_MEM_END(mem, s);
- mem_start_stk[mem] = si;
- INC_OP;
- JUMP_OUT;
+ CASE_OP(MEM_END_PUSH_REC)
+ {
+ StackIndex si;
+
+ mem = p->memory_end.num;
+ STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
+ si = GET_STACK_INDEX(stkp);
+ STACK_PUSH_MEM_END(mem, s);
+ mem_start_stk[mem] = si;
+ INC_OP;
+ JUMP_OUT;
+ }
- CASE_OP(MEMORY_END_REC)
+ CASE_OP(MEM_END_REC)
mem = p->memory_end.num;
mem_end_stk[mem] = (StackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
+ if (MEM_STATUS_AT(reg->push_mem_start, mem))
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
@@ -3432,20 +3561,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(pstart, s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(s, pstart, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3459,20 +3584,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3489,24 +3610,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(pstart, swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(swork, pstart, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3526,24 +3642,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3560,6 +3671,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int len;
int level;
MemNumType* mems;
+ UChar* ssave;
n = 0;
backref_with_level:
@@ -3567,10 +3679,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen = p->backref_general.num;
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
- sprev = s;
+ ssave = s;
if (backref_match_at_nested_level(reg, stk, stk_base, n,
case_fold_flag, level, (int )tlen, mems, &s, end)) {
- if (sprev < end) {
+ if (ssave != s) {
+ sprev = ssave;
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
@@ -3643,12 +3756,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_PUSH:
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
INC_OP;
break;
default:
- goto unexpected_bytecode_error;
+ MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
break;
}
#else
@@ -3658,7 +3769,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
JUMP_OUT;
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
CASE_OP(EMPTY_CHECK_END_MEMST)
{
int is_empty;
@@ -3683,7 +3794,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int is_empty;
mem = p->empty_check_end.mem; /* mem: null check id */
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
#else
STACK_EMPTY_CHECK_REC(is_empty, mem, s);
@@ -3751,7 +3862,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_if_peek_next.addr;
c = p->push_if_peek_next.c;
- if (c == *s) {
+ if (DATA_ENSURE_CHECK1 && c == *s) {
STACK_PUSH_ALT(p + addr, s, sprev);
INC_OP;
JUMP_OUT;
@@ -3764,10 +3875,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->repeat.id; /* mem: OP_REPEAT ID */
addr = p->repeat.addr;
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p + 1);
-
+ STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + addr, s, sprev);
}
@@ -3778,10 +3886,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->repeat.id; /* mem: OP_REPEAT ID */
addr = p->repeat.addr;
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p + 1);
-
+ STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + 1, s, sprev);
p += addr;
@@ -3792,73 +3897,52 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(REPEAT_INC)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
+ STACK_GET_REPEAT_COUNT(mem, n);
+ n++;
+ if (n >= reg->repeat_range[mem].upper) {
/* end of repeat. Nothing to do. */
INC_OP;
}
- else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ else if (n >= reg->repeat_range[mem].lower) {
INC_OP;
STACK_PUSH_ALT(p, s, sprev);
- p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+ p = reg->repeat_range[mem].u.pcode;
}
else {
- p = stkp->u.repeat.pcode;
+ p = reg->repeat_range[mem].u.pcode;
}
- STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_REPEAT_INC(mem, n);
CHECK_INTERRUPT_JUMP_OUT;
- CASE_OP(REPEAT_INC_SG)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc;
-
CASE_OP(REPEAT_INC_NG)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc_ng:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- Operation* pcode = stkp->u.repeat.pcode;
-
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev);
+ STACK_GET_REPEAT_COUNT(mem, n);
+ n++;
+ STACK_PUSH_REPEAT_INC(mem, n);
+ if (n == reg->repeat_range[mem].upper) {
+ INC_OP;
+ }
+ else {
+ if (n >= reg->repeat_range[mem].lower) {
+ STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
INC_OP;
}
else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
+ p = reg->repeat_range[mem].u.pcode;
}
}
- else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
- INC_OP;
- }
CHECK_INTERRUPT_JUMP_OUT;
- CASE_OP(REPEAT_INC_NG_SG)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc_ng;
-
CASE_OP(PREC_READ_START)
- STACK_PUSH_POS(s, sprev);
+ STACK_PUSH_PREC_READ_START(s, sprev);
INC_OP;
JUMP_OUT;
CASE_OP(PREC_READ_END)
- STACK_EXEC_TO_VOID(stkp);
+ STACK_GET_PREC_READ_START(stkp);
s = stkp->u.state.pstr;
sprev = stkp->u.state.pstr_prev;
+ STACK_PUSH(STK_PREC_READ_END,0,0,0);
INC_OP;
JUMP_OUT;
@@ -3997,14 +4081,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
OnigCalloutFunc func;
OnigCalloutArgs args;
- of = ONIG_CALLOUT_OF_NAME;
- name_id = p->callout_name.id;
- mem = p->callout_name.num;
+ of = ONIG_CALLOUT_OF_NAME;
+ mem = p->callout_name.num;
callout_common_entry:
e = onig_reg_callout_list_at(reg, mem);
in = e->in;
if (of == ONIG_CALLOUT_OF_NAME) {
+ name_id = p->callout_name.id;
func = onig_get_callout_start_func(reg, mem);
}
else {
@@ -4027,7 +4111,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
call_result = ONIGERR_INVALID_ARGUMENT;
}
best_len = call_result;
- goto finish;
+ goto match_at_end;
break;
}
}
@@ -4053,7 +4137,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
CASE_OP(FINISH)
- goto finish;
+ goto match_at_end;
#ifdef ONIG_DEBUG_STATISTICS
fail:
@@ -4074,37 +4158,472 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
JUMP_OUT;
DEFAULT_OP
- goto bytecode_error;
+ MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
} BYTECODE_INTERPRETER_END;
- finish:
+ match_at_end:
STACK_SAVE;
return best_len;
+}
-#ifdef ONIG_DEBUG
- stack_error:
- STACK_SAVE;
- return ONIGERR_STACK_BUG;
-#endif
+typedef struct {
+ regex_t* reg;
+ OnigRegion* region;
+} RR;
+
+struct OnigRegSetStruct {
+ RR* rs;
+ int n;
+ int alloc;
+ OnigEncoding enc;
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigLen anc_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anc_dmax; /* (SEMI_)END_BUF anchor distance */
+ int all_low_high;
+ int anychar_inf;
+};
- bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNDEFINED_BYTECODE;
+enum SearchRangeStatus {
+ SRS_DEAD = 0,
+ SRS_LOW_HIGH = 1,
+ SRS_ALL_RANGE = 2
+};
-#if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
- unexpected_bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNEXPECTED_BYTECODE;
-#endif
+typedef struct {
+ int state; /* value of enum SearchRangeStatus */
+ UChar* low;
+ UChar* high;
+ UChar* low_prev;
+ UChar* sch_range;
+} SearchRange;
+
+#define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
+ }
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- retry_limit_in_match_over:
- STACK_SAVE;
- return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
+static inline int
+regset_search_body_position_lead(OnigRegSet* set,
+ const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, /* match start range */
+ const UChar* orig_range, /* data range */
+ OnigOptionType option, MatchArg* msas, int* rmatch_pos)
+{
+ int r, n, i;
+ UChar *s, *prev;
+ UChar *low, *high, *low_prev;
+ UChar* sch_range;
+ regex_t* reg;
+ OnigEncoding enc;
+ SearchRange* sr;
+
+ n = set->n;
+ enc = set->enc;
+
+ s = (UChar* )start;
+ if (s > str)
+ prev = onigenc_get_prev_char_head(enc, str, s);
+ else
+ prev = (UChar* )NULL;
+
+ sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
+ CHECK_NULL_RETURN_MEMERR(sr);
+
+ for (i = 0; i < n; i++) {
+ reg = set->rs[i].reg;
+
+ sr[i].state = SRS_DEAD;
+ if (reg->optimize != OPTIMIZE_NONE) {
+ if (reg->dist_max != INFINITE_LEN) {
+ if (end - range > reg->dist_max)
+ sch_range = (UChar* )range + reg->dist_max;
+ else
+ sch_range = (UChar* )end;
+
+ if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) {
+ sr[i].state = SRS_LOW_HIGH;
+ sr[i].low = low;
+ sr[i].high = high;
+ sr[i].low_prev = low_prev;
+ sr[i].sch_range = sch_range;
+ }
+ }
+ else {
+ sch_range = (UChar* )end;
+ if (forward_search(reg, str, end, s, sch_range,
+ &low, &high, (UChar** )NULL)) {
+ goto total_active;
+ }
+ }
+ }
+ else {
+ total_active:
+ sr[i].state = SRS_ALL_RANGE;
+ sr[i].low = s;
+ sr[i].high = (UChar* )range;
+ sr[i].low_prev = prev;
+ }
+ }
+
+#define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN 500
+
+ if (set->all_low_high != 0
+ && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
+ do {
+ int try_count = 0;
+ for (i = 0; i < n; i++) {
+ if (sr[i].state == SRS_DEAD) continue;
+
+ if (s < sr[i].low) continue;
+ if (s >= sr[i].high) {
+ if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
+ &low, &high, &low_prev) != 0) {
+ sr[i].low = low;
+ sr[i].high = high;
+ sr[i].low_prev = low_prev;
+ if (s < low) continue;
+ }
+ else {
+ sr[i].state = SRS_DEAD;
+ continue;
+ }
+ }
+
+ reg = set->rs[i].reg;
+ REGSET_MATCH_AND_RETURN_CHECK(orig_range);
+ try_count++;
+ } /* for (i) */
+
+ if (s >= range) break;
+
+ if (try_count == 0) {
+ low = (UChar* )range;
+ for (i = 0; i < n; i++) {
+ if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
+ low = sr[i].low;
+ low_prev = sr[i].low_prev;
+ }
+ }
+ if (low == range) break;
+
+ s = low;
+ prev = low_prev;
+ }
+ else {
+ prev = s;
+ s += enclen(enc, s);
+ }
+ } while (1);
+ }
+ else {
+ int prev_is_newline = 1;
+ do {
+ for (i = 0; i < n; i++) {
+ if (sr[i].state == SRS_DEAD) continue;
+ if (sr[i].state == SRS_LOW_HIGH) {
+ if (s < sr[i].low) continue;
+ if (s >= sr[i].high) {
+ if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
+ &low, &high, &low_prev) != 0) {
+ sr[i].low = low;
+ sr[i].high = high;
+ /* sr[i].low_prev = low_prev; */
+ if (s < low) continue;
+ }
+ else {
+ sr[i].state = SRS_DEAD;
+ continue;
+ }
+ }
+ }
+
+ reg = set->rs[i].reg;
+ if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
+ REGSET_MATCH_AND_RETURN_CHECK(orig_range);
+ }
+ }
+
+ if (s >= range) break;
+
+ if (set->anychar_inf != 0)
+ prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
+
+ prev = s;
+ s += enclen(enc, s);
+ } while (1);
+ }
+
+ xfree(sr);
+ return ONIG_MISMATCH;
+
+ finish:
+ xfree(sr);
+ return r;
+
+ match:
+ xfree(sr);
+ *rmatch_pos = (int )(s - str);
+ return i;
+}
+
+static inline int
+regset_search_body_regex_lead(OnigRegSet* set,
+ const UChar* str, const UChar* end,
+ const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
+ OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
+{
+ int r;
+ int i;
+ int n;
+ int match_index;
+ const UChar* ep;
+ regex_t* reg;
+ OnigRegion* region;
+
+ n = set->n;
+
+ match_index = ONIG_MISMATCH;
+ ep = orig_range;
+ for (i = 0; i < n; i++) {
+ reg = set->rs[i].reg;
+ region = set->rs[i].region;
+ r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
+ if (r > 0) {
+ if (str + r < ep) {
+ match_index = i;
+ *rmatch_pos = r;
+ if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
+ break;
+
+ ep = str + r;
+ }
+ }
+ else if (r == 0) {
+ match_index = i;
+ *rmatch_pos = r;
+ break;
+ }
+ }
+
+ return match_index;
+}
+
+extern int
+onig_regset_search_with_param(OnigRegSet* set,
+ const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range,
+ OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
+ int* rmatch_pos)
+{
+ int r;
+ int i;
+ UChar *s, *prev;
+ regex_t* reg;
+ OnigEncoding enc;
+ OnigRegion* region;
+ MatchArg* msas;
+ const UChar *orig_start = start;
+ const UChar *orig_range = range;
+
+ if (set->n == 0)
+ return ONIG_MISMATCH;
+
+ if (IS_POSIX_REGION(option))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ r = 0;
+ enc = set->enc;
+ msas = (MatchArg* )NULL;
+
+ for (i = 0; i < set->n; i++) {
+ reg = set->rs[i].reg;
+ region = set->rs[i].region;
+ ADJUST_MATCH_PARAM(reg, mps[i]);
+ if (IS_NOT_NULL(region)) {
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
+ if (r != 0) goto finish_no_msa;
+ }
+ }
+
+ if (start > end || start < str) goto mismatch_no_msa;
+ if (str < end) {
+ /* forward search only */
+ if (range <= start)
+ return ONIGERR_INVALID_ARGUMENT;
+ }
+
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
+ r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ goto finish_no_msa;
+ }
+ }
+
+ if (set->anchor != OPTIMIZE_NONE && str < end) {
+ UChar *min_semi_end, *max_semi_end;
+
+ if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
+ /* search start-position only */
+ begin_position:
+ range = start + 1;
+ }
+ else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
+ /* search str-position only */
+ if (start != str) goto mismatch_no_msa;
+ range = str + 1;
+ }
+ else if ((set->anchor & ANCR_END_BUF) != 0) {
+ min_semi_end = max_semi_end = (UChar* )end;
+
+ end_buf:
+ if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
+ goto mismatch_no_msa;
+
+ if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
+ start = min_semi_end - set->anc_dmax;
+ if (start < end)
+ start = onigenc_get_right_adjust_char_head(enc, str, start);
+ }
+ if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
+ range = max_semi_end - set->anc_dmin + 1;
+ }
+ if (start > range) goto mismatch_no_msa;
+ }
+ else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
+ UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
+
+ max_semi_end = (UChar* )end;
+ if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
+ min_semi_end = pre_end;
+
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
+ if (IS_NOT_NULL(pre_end) &&
+ ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
+ min_semi_end = pre_end;
+ }
#endif
+ if (min_semi_end > str && start <= min_semi_end) {
+ goto end_buf;
+ }
+ }
+ else {
+ min_semi_end = (UChar* )end;
+ goto end_buf;
+ }
+ }
+ else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
+ goto begin_position;
+ }
+ }
+ else if (str == end) { /* empty string */
+ start = end = str;
+ s = (UChar* )start;
+ prev = (UChar* )NULL;
+
+ msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
+ CHECK_NULL_RETURN_MEMERR(msas);
+ for (i = 0; i < set->n; i++) {
+ reg = set->rs[i].reg;
+ MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
+ }
+ for (i = 0; i < set->n; i++) {
+ reg = set->rs[i].reg;
+ if (reg->threshold_len == 0) {
+ REGSET_MATCH_AND_RETURN_CHECK(end);
+ }
+ }
+
+ goto mismatch;
+ }
+
+ if (lead == ONIG_REGSET_POSITION_LEAD) {
+ msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
+ CHECK_NULL_RETURN_MEMERR(msas);
+
+ for (i = 0; i < set->n; i++) {
+ MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
+ orig_start, mps[i]);
+ }
+
+ r = regset_search_body_position_lead(set, str, end, start, range,
+ orig_range, option, msas, rmatch_pos);
+ }
+ else {
+ r = regset_search_body_regex_lead(set, str, end, start, orig_range,
+ lead, option, mps, rmatch_pos);
+ }
+ if (r < 0) goto finish;
+ else goto match2;
+
+ mismatch:
+ r = ONIG_MISMATCH;
+ finish:
+ for (i = 0; i < set->n; i++) {
+ if (IS_NOT_NULL(msas))
+ MATCH_ARG_FREE(msas[i]);
+ if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
+ IS_NOT_NULL(set->rs[i].region)) {
+ onig_region_clear(set->rs[i].region);
+ }
+ }
+ if (IS_NOT_NULL(msas)) xfree(msas);
+ return r;
+
+ mismatch_no_msa:
+ r = ONIG_MISMATCH;
+ finish_no_msa:
+ return r;
+
+ match:
+ *rmatch_pos = (int )(s - str);
+ match2:
+ for (i = 0; i < set->n; i++) {
+ if (IS_NOT_NULL(msas))
+ MATCH_ARG_FREE(msas[i]);
+ if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
+ IS_NOT_NULL(set->rs[i].region)) {
+ onig_region_clear(set->rs[i].region);
+ }
+ }
+ if (IS_NOT_NULL(msas)) xfree(msas);
+ return r; /* regex index */
}
+extern int
+onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range,
+ OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
+{
+ int r;
+ int i;
+ OnigMatchParam* mp;
+ OnigMatchParam** mps;
+
+ mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
+ CHECK_NULL_RETURN_MEMERR(mps);
+
+ mp = (OnigMatchParam* )(mps + set->n);
+
+ for (i = 0; i < set->n; i++) {
+ onig_initialize_match_param(mp + i);
+ mps[i] = mp + i;
+ }
+
+ r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
+ rmatch_pos);
+ for (i = 0; i < set->n; i++)
+ onig_free_match_param_content(mp + i);
+
+ xfree(mps);
+
+ return r;
+}
static UChar*
slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
@@ -4146,9 +4665,11 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag,
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
while (t < tend) {
+ if (p >= end) return 0;
lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
+ if (t >= tend) return 0;
if (*t++ != *q++) return 0;
lowlen--;
}
@@ -4162,16 +4683,11 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* text_end, UChar* text_range)
{
- UChar *s, *end;
-
- end = (UChar* )text_end;
- end -= target_end - target - 1;
- if (end > text_range)
- end = text_range;
+ UChar *s;
s = (UChar* )text;
- while (s < end) {
+ while (s < text_range) {
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
s, text_end))
return s;
@@ -4325,60 +4841,6 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
}
static UChar*
-sunday_quick_search_case_fold(regex_t* reg,
- const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *end;
- const UChar *tail;
- int skip, tlen1;
- int map_offset;
- int case_fold_flag;
- OnigEncoding enc;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
-#endif
-
- enc = reg->enc;
- case_fold_flag = reg->case_fold_flag;
-
- tail = target_end - 1;
- tlen1 = (int )(tail - target);
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- map_offset = reg->map_offset;
- s = text;
-
- while (s < end) {
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, text_end))
- return (UChar* )s;
-
- se = s + tlen1;
- if (se + map_offset >= text_end) break;
- skip = reg->map[*(se + map_offset)];
-#if 0
- p = s;
- do {
- s += enclen(enc, s);
- } while ((s - p) < skip && s < end);
-#else
- /* This is faster than prev code for long text. ex: /(?i)Twain/ */
- s += skip;
- if (s < end)
- s = onigenc_get_right_adjust_char_head(enc, text, s);
-#endif
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
map_search(OnigEncoding enc, UChar map[],
const UChar* text, const UChar* text_range)
{
@@ -4458,25 +4920,26 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
static int
-forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
- UChar* range, UChar** low, UChar** high, UChar** low_prev)
+forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
+ UChar* range, UChar** low, UChar** high, UChar** low_prev)
{
UChar *p, *pprev = (UChar* )NULL;
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
- str, end, s, range);
+ fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
+ str, end, start, range);
#endif
- p = s;
- if (reg->dmin > 0) {
+ p = start;
+ if (reg->dist_min != 0) {
+ if (end - p <= reg->dist_min)
+ return 0; /* fail */
+
if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
- p += reg->dmin;
+ p += reg->dist_min;
}
else {
- UChar *q = p + reg->dmin;
-
- if (q >= end) return 0; /* fail */
+ UChar *q = p + reg->dist_min;
while (p < q) p += enclen(reg->enc, p);
}
}
@@ -4491,11 +4954,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
reg->exact, reg->exact_end, p, end, range);
break;
- case OPTIMIZE_STR_CASE_FOLD_FAST:
- p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end,
- range);
- break;
-
case OPTIMIZE_STR_FAST:
p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
break;
@@ -4511,7 +4969,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
if (p && p < range) {
- if (p - reg->dmin < s) {
+ if (p - start < reg->dist_min) {
retry_gate:
pprev = p;
p += enclen(reg->enc, p);
@@ -4524,8 +4982,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
switch (reg->sub_anchor) {
case ANCR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
+ prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
@@ -4546,35 +5003,34 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
#endif
)
goto retry_gate;
+
break;
}
}
- if (reg->dmax == 0) {
+ if (reg->dist_max == 0) {
*low = p;
if (low_prev) {
- if (*low > s)
- *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
+ if (*low > start)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);
else
*low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p);
}
+ *high = p;
}
else {
- if (reg->dmax != INFINITE_LEN) {
- if (p - str < reg->dmax) {
+ if (reg->dist_max != INFINITE_LEN) {
+ if (p - str < reg->dist_max) {
*low = (UChar* )str;
if (low_prev)
*low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
}
else {
- *low = p - reg->dmax;
- if (*low > s) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low = p - reg->dist_max;
+ if (*low > start) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,
*low, (const UChar** )low_prev);
- if (low_prev && IS_NULL(*low_prev))
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low);
}
else {
if (low_prev)
@@ -4583,14 +5039,18 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
}
}
+ /* no needs to adjust *high, *high is used as range check only */
+ if (p - str < reg->dist_min)
+ *high = (UChar* )str;
+ else
+ *high = p - reg->dist_min;
}
- /* no needs to adjust *high, *high is used as range check only */
- *high = p - reg->dmin;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
- "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
- (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
+ "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
+ (int )(*low - str), (int )(*high - str),
+ reg->dist_min, reg->dist_max);
#endif
return 1; /* success */
}
@@ -4600,15 +5060,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
static int
-backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
- UChar* s, const UChar* range, UChar* adjrange,
- UChar** low, UChar** high)
+backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
+ const UChar* range, UChar* adjrange, UChar** low, UChar** high)
{
UChar *p;
- if (range == 0) goto fail;
-
- range += reg->dmin;
p = s;
retry:
@@ -4620,7 +5076,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
break;
case OPTIMIZE_STR_CASE_FOLD:
- case OPTIMIZE_STR_CASE_FOLD_FAST:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
@@ -4675,15 +5130,27 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
}
}
- /* no needs to adjust *high, *high is used as range check only */
- if (reg->dmax != INFINITE_LEN) {
- *low = p - reg->dmax;
- *high = p - reg->dmin;
+ if (reg->dist_max != INFINITE_LEN) {
+ if (p - str < reg->dist_max)
+ *low = (UChar* )str;
+ else
+ *low = p - reg->dist_max;
+
+ if (reg->dist_min != 0) {
+ if (p - str < reg->dist_min)
+ *high = (UChar* )str;
+ else
+ *high = p - reg->dist_min;
+ }
+ else {
+ *high = p;
+ }
+
*high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
}
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
+ fprintf(stderr, "backward_search: low: %d, high: %d\n",
(int )(*low - str), (int )(*high - str));
#endif
return 1; /* success */
@@ -4691,7 +5158,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
fail:
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: fail.\n");
+ fprintf(stderr, "backward_search: fail.\n");
#endif
return 0; /* fail */
}
@@ -4704,24 +5171,35 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
{
int r;
OnigMatchParam mp;
+ const UChar* data_range;
onig_initialize_match_param(&mp);
- r = onig_search_with_param(reg, str, end, start, range, region, option, &mp);
+
+ /* The following is an expanded code of onig_search_with_param() */
+ if (range > start)
+ data_range = range;
+ else
+ data_range = end;
+
+ r = search_in_range(reg, str, end, start, range, data_range, region,
+ option, &mp);
+
onig_free_match_param_content(&mp);
return r;
}
-extern int
-onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* start, const UChar* range, OnigRegion* region,
- OnigOptionType option, OnigMatchParam* mp)
+static int
+search_in_range(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, /* match start range */
+ const UChar* data_range, /* subject string range */
+ OnigRegion* region,
+ OnigOptionType option, OnigMatchParam* mp)
{
int r;
UChar *s, *prev;
MatchArg msa;
const UChar *orig_start = start;
- const UChar *orig_range = range;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
@@ -4804,17 +5282,21 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
min_semi_end = max_semi_end = (UChar* )end;
end_buf:
- if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
+ if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
goto mismatch_no_msa;
if (range > start) {
- if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
- start = min_semi_end - reg->anchor_dmax;
+ if (reg->anc_dist_max != INFINITE_LEN &&
+ min_semi_end - start > reg->anc_dist_max) {
+ start = min_semi_end - reg->anc_dist_max;
if (start < end)
start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
}
- if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
- range = max_semi_end - reg->anchor_dmin + 1;
+ if (max_semi_end - (range - 1) < reg->anc_dist_min) {
+ if (max_semi_end - str + 1 < reg->anc_dist_min)
+ goto mismatch_no_msa;
+ else
+ range = max_semi_end - reg->anc_dist_min + 1;
}
if (start > range) goto mismatch_no_msa;
@@ -4822,12 +5304,17 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
Backward search is used. */
}
else {
- if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
- range = min_semi_end - reg->anchor_dmax;
+ if (reg->anc_dist_max != INFINITE_LEN &&
+ min_semi_end - range > reg->anc_dist_max) {
+ range = min_semi_end - reg->anc_dist_max;
}
- if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
- start = max_semi_end - reg->anchor_dmin;
- start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
+ if (max_semi_end - start < reg->anc_dist_min) {
+ if (max_semi_end - str < reg->anc_dist_min)
+ goto mismatch_no_msa;
+ else {
+ start = max_semi_end - reg->anc_dist_min;
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
+ }
}
if (range > start) goto mismatch_no_msa;
}
@@ -4895,29 +5382,33 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
if (reg->optimize != OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
- sch_range = (UChar* )range;
- if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_LEN)
+ if (reg->dist_max != 0) {
+ if (reg->dist_max == INFINITE_LEN)
sch_range = (UChar* )end;
else {
- sch_range += reg->dmax;
- if (sch_range > end) sch_range = (UChar* )end;
+ if ((end - range) < reg->dist_max)
+ sch_range = (UChar* )end;
+ else {
+ sch_range = (UChar* )range + reg->dist_max;
+ }
}
}
+ else
+ sch_range = (UChar* )range;
if ((end - start) < reg->threshold_len)
goto mismatch;
- if (reg->dmax != INFINITE_LEN) {
+ if (reg->dist_max != INFINITE_LEN) {
do {
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, &low_prev)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high,
+ &low_prev)) goto mismatch;
if (s < low) {
s = low;
prev = low_prev;
}
while (s <= high) {
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
}
@@ -4925,12 +5416,12 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
goto mismatch;
}
else { /* check only. */
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high,
+ (UChar** )NULL)) goto mismatch;
if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {
do {
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
@@ -4947,13 +5438,13 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
do {
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
} while (s < range);
if (s == range) { /* because empty match with /$/. */
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
}
}
else { /* backward search */
@@ -4964,19 +5455,30 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
if (reg->optimize != OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
+ const UChar *min_range;
+
+ if ((end - range) < reg->threshold_len) goto mismatch;
if (range < end)
adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
else
adjrange = (UChar* )end;
- if (reg->dmax != INFINITE_LEN &&
- (end - range) >= reg->threshold_len) {
+ if (end - range > reg->dist_min)
+ min_range = range + reg->dist_min;
+ else
+ min_range = end;
+
+ if (reg->dist_max != INFINITE_LEN) {
do {
- sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0)
+ if (end - s > reg->dist_max)
+ sch_start = s + reg->dist_max;
+ else {
+ sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
+ }
+
+ if (backward_search(reg, str, end, sch_start, min_range, adjrange,
+ &low, &high) <= 0)
goto mismatch;
if (s > high)
@@ -4991,22 +5493,10 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
goto mismatch;
}
else { /* check only. */
- if ((end - range) < reg->threshold_len) goto mismatch;
+ sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
- sch_start = s;
- if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_LEN)
- sch_start = (UChar* )end;
- else {
- sch_start += reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- else
- sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
- start, sch_start);
- }
- }
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0) goto mismatch;
+ if (backward_search(reg, str, end, sch_start, min_range, adjrange,
+ &low, &high) <= 0) goto mismatch;
}
}
@@ -5062,6 +5552,22 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
extern int
+onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region,
+ OnigOptionType option, OnigMatchParam* mp)
+{
+ const UChar* data_range;
+
+ if (range > start)
+ data_range = range;
+ else
+ data_range = end;
+
+ return search_in_range(reg, str, end, start, range, data_range, region,
+ option, mp);
+}
+
+extern int
onig_scan(regex_t* reg, const UChar* str, const UChar* end,
OnigRegion* region, OnigOptionType option,
int (*scan_callback)(int, int, OnigRegion*, void*),
@@ -5163,6 +5669,202 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)
*to = *from;
}
+extern int
+onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
+{
+#define REGSET_INITIAL_ALLOC_SIZE 10
+
+ int i;
+ int r;
+ int alloc;
+ OnigRegSet* set;
+ RR* rs;
+
+ *rset = 0;
+
+ set = (OnigRegSet* )xmalloc(sizeof(*set));
+ CHECK_NULL_RETURN_MEMERR(set);
+
+ alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
+ rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
+ if (IS_NULL(rs)) {
+ xfree(set);
+ return ONIGERR_MEMORY;
+ }
+
+ set->rs = rs;
+ set->n = 0;
+ set->alloc = alloc;
+
+ for (i = 0; i < n; i++) {
+ regex_t* reg = regs[i];
+
+ r = onig_regset_add(set, reg);
+ if (r != 0) {
+ for (i = 0; i < set->n; i++) {
+ OnigRegion* region = set->rs[i].region;
+ if (IS_NOT_NULL(region))
+ onig_region_free(region, 1);
+ }
+ xfree(set->rs);
+ xfree(set);
+ return r;
+ }
+ }
+
+ *rset = set;
+ return 0;
+}
+
+static void
+update_regset_by_reg(OnigRegSet* set, regex_t* reg)
+{
+ if (set->n == 1) {
+ set->enc = reg->enc;
+ set->anchor = reg->anchor;
+ set->anc_dmin = reg->anc_dist_min;
+ set->anc_dmax = reg->anc_dist_max;
+ set->all_low_high =
+ (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
+ set->anychar_inf = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
+ }
+ else {
+ int anchor;
+
+ anchor = set->anchor & reg->anchor;
+ if (anchor != 0) {
+ OnigLen anc_dmin;
+ OnigLen anc_dmax;
+
+ anc_dmin = set->anc_dmin;
+ anc_dmax = set->anc_dmax;
+ if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
+ if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
+ set->anc_dmin = anc_dmin;
+ set->anc_dmax = anc_dmax;
+ }
+
+ set->anchor = anchor;
+
+ if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
+ set->all_low_high = 0;
+
+ if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
+ set->anychar_inf = 1;
+ }
+}
+
+extern int
+onig_regset_add(OnigRegSet* set, regex_t* reg)
+{
+ OnigRegion* region;
+
+ if (IS_FIND_LONGEST(reg->options))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (set->n != 0 && reg->enc != set->enc)
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (set->n >= set->alloc) {
+ RR* nrs;
+ int new_alloc;
+
+ new_alloc = set->alloc * 2;
+ nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
+ CHECK_NULL_RETURN_MEMERR(nrs);
+
+ set->rs = nrs;
+ set->alloc = new_alloc;
+ }
+
+ region = onig_region_new();
+ CHECK_NULL_RETURN_MEMERR(region);
+
+ set->rs[set->n].reg = reg;
+ set->rs[set->n].region = region;
+ set->n++;
+
+ update_regset_by_reg(set, reg);
+ return 0;
+}
+
+extern int
+onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
+{
+ int i;
+
+ if (at < 0 || at >= set->n)
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (IS_NULL(reg)) {
+ onig_region_free(set->rs[at].region, 1);
+ for (i = at; i < set->n - 1; i++) {
+ set->rs[i].reg = set->rs[i+1].reg;
+ set->rs[i].region = set->rs[i+1].region;
+ }
+ set->n--;
+ }
+ else {
+ if (IS_FIND_LONGEST(reg->options))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (set->n > 1 && reg->enc != set->enc)
+ return ONIGERR_INVALID_ARGUMENT;
+
+ set->rs[at].reg = reg;
+ }
+
+ for (i = 0; i < set->n; i++)
+ update_regset_by_reg(set, set->rs[i].reg);
+
+ return 0;
+}
+
+extern void
+onig_regset_free(OnigRegSet* set)
+{
+ int i;
+
+ for (i = 0; i < set->n; i++) {
+ regex_t* reg;
+ OnigRegion* region;
+
+ reg = set->rs[i].reg;
+ region = set->rs[i].region;
+ onig_free(reg);
+ if (IS_NOT_NULL(region))
+ onig_region_free(region, 1);
+ }
+
+ xfree(set->rs);
+ xfree(set);
+}
+
+extern int
+onig_regset_number_of_regex(OnigRegSet* set)
+{
+ return set->n;
+}
+
+extern regex_t*
+onig_regset_get_regex(OnigRegSet* set, int at)
+{
+ if (at < 0 || at >= set->n)
+ return (regex_t* )0;
+
+ return set->rs[at].reg;
+}
+
+extern OnigRegion*
+onig_regset_get_region(OnigRegSet* set, int at)
+{
+ if (at < 0 || at >= set->n)
+ return (OnigRegion* )0;
+
+ return set->rs[at].region;
+}
+
+
#ifdef USE_DIRECT_THREADED_CODE
extern int
onig_init_for_match_at(regex_t* reg)
@@ -5355,35 +6057,25 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i
const UChar* str;
StackType* stk_base;
int i;
+ StackIndex* mem_start_stk;
+ StackIndex* mem_end_stk;
i = mem_num;
reg = a->regex;
str = a->string;
stk_base = a->stk_base;
+ mem_start_stk = a->mem_start_stk;
+ mem_end_stk = a->mem_end_stk;
if (i > 0) {
if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str);
- else
- *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str);
-
- *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )a->mem_end_stk[i])) - str);
+ *begin = (int )(STACK_MEM_START(reg, i) - str);
+ *end = (int )(STACK_MEM_END(reg, i) - str);
}
else {
*begin = *end = ONIG_REGION_NOTPOS;
}
}
- else if (i == 0) {
-#if 0
- *begin = a->start - str;
- *end = a->current - str;
-#else
- return ONIGERR_INVALID_ARGUMENT;
-#endif
- }
else
return ONIGERR_INVALID_ARGUMENT;
@@ -5421,14 +6113,6 @@ onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUS
return ONIG_MISMATCH;
}
-#if 0
-extern int
-onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
-{
- return ONIG_CALLOUT_SUCCESS;
-}
-#endif
-
extern int
onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
{
@@ -5443,6 +6127,9 @@ onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
if (n >= 0) {
n = ONIGERR_INVALID_CALLOUT_BODY;
}
+ else if (onig_is_error_code_needs_param(n)) {
+ n = ONIGERR_INVALID_CALLOUT_BODY;
+ }
return n;
}