diff options
Diffstat (limited to 'src/regcomp.c')
-rw-r--r-- | src/regcomp.c | 156 |
1 files changed, 93 insertions, 63 deletions
diff --git a/src/regcomp.c b/src/regcomp.c index c2c04a4..b96c793 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -599,12 +599,34 @@ select_str_opcode(int mb_len, int str_len, int ignore_case) } static int -compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env) +is_strict_real_node(Node* node) +{ + switch (NODE_TYPE(node)) { + case NODE_STRING: + { + StrNode* sn = STR_(node); + return (sn->end != sn->s); + } + break; + + case NODE_CCLASS: + case NODE_CTYPE: + return 1; + break; + + default: + return 0; + break; + } +} + +static int +compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env) { int r; int saved_num_null_check = reg->num_null_check; - if (empty_info != BODY_IS_NOT_EMPTY) { + if (emptiness != BODY_IS_NOT_EMPTY) { r = add_op(reg, OP_EMPTY_CHECK_START); if (r != 0) return r; COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */ @@ -614,12 +636,12 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env) r = compile_tree(node, reg, env); if (r != 0) return r; - if (empty_info != BODY_IS_NOT_EMPTY) { - if (empty_info == BODY_IS_EMPTY) + if (emptiness != BODY_IS_NOT_EMPTY) { + if (emptiness == BODY_IS_EMPTY_POSSIBILITY) r = add_op(reg, OP_EMPTY_CHECK_END); - else if (empty_info == BODY_IS_EMPTY_MEM) + else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) r = add_op(reg, OP_EMPTY_CHECK_END_MEMST); - else if (empty_info == BODY_IS_EMPTY_REC) + else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH); if (r != 0) return r; @@ -895,12 +917,12 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } p[id].lower = lower; - p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper); + p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper); return 0; } static int -compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, +compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness, regex_t* reg, ScanEnv* env) { int r; @@ -915,7 +937,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); if (r != 0) return r; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); if (r != 0) return r; if ( @@ -937,7 +959,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, static int is_anychar_infinite_greedy(QuantNode* qn) { - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && + if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) && NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) return 1; else @@ -951,8 +973,8 @@ static int compile_length_quantifier_node(QuantNode* qn, regex_t* reg) { int len, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - enum BodyEmpty empty_info = qn->empty_info; + int infinite = IS_INFINITE_REPEAT(qn->upper); + enum BodyEmptyType emptiness = qn->emptiness; int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; @@ -969,10 +991,9 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) } } - if (empty_info == BODY_IS_NOT_EMPTY) - mod_tlen = tlen; - else - mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); + mod_tlen = tlen; + if (emptiness != BODY_IS_NOT_EMPTY) + mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END; if (infinite && (qn->lower <= 1 || @@ -1026,8 +1047,8 @@ static int compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) { int i, r, mod_tlen; - int infinite = IS_REPEAT_INFINITE(qn->upper); - enum BodyEmpty empty_info = qn->empty_info; + int infinite = IS_INFINITE_REPEAT(qn->upper); + enum BodyEmptyType emptiness = qn->emptiness; int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; @@ -1055,10 +1076,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) } } - if (empty_info == BODY_IS_NOT_EMPTY) - mod_tlen = tlen; - else - mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); + mod_tlen = tlen; + if (emptiness != BODY_IS_NOT_EMPTY) + mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END; if (infinite && (qn->lower <= 1 || @@ -1096,7 +1116,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP; COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0]; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); if (r != 0) return r; addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1); @@ -1109,7 +1129,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP; COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0]; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); if (r != 0) return r; addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT); @@ -1119,7 +1139,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); if (r != 0) return r; addr = -(mod_tlen + (int )SIZE_OP_PUSH); @@ -1134,7 +1154,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env); if (r != 0) return r; r = add_op(reg, OP_PUSH); @@ -1188,7 +1208,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env); + r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env); } return r; } @@ -1273,7 +1293,7 @@ compile_length_bag_node(BagNode* node, regex_t* reg) break; case BAG_STOP_BACKTRACK: - if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { + if (NODE_IS_STRICT_REAL_REPEAT(node)) { int v; QuantNode* qn; @@ -1307,8 +1327,9 @@ compile_length_bag_node(BagNode* node, regex_t* reg) len += tlen; } + len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END; + if (IS_NOT_NULL(Else)) { - len += SIZE_OP_JUMP; tlen = compile_length_tree(Else, reg); if (tlen < 0) return tlen; len += tlen; @@ -1423,7 +1444,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) break; case BAG_STOP_BACKTRACK: - if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { + if (NODE_IS_STRICT_REAL_REPEAT(node)) { QuantNode* qn = QUANT_(NODE_BAG_BODY(node)); r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; @@ -1455,7 +1476,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) case BAG_IF_ELSE: { - int cond_len, then_len, jump_len; + int cond_len, then_len, else_len, jump_len; Node* cond = NODE_BAG_BODY(node); Node* Then = node->te.Then; Node* Else = node->te.Else; @@ -1472,8 +1493,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) else then_len = 0; - jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END; - if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP; + jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP; r = add_op(reg, OP_PUSH); if (r != 0) return r; @@ -1490,11 +1510,20 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env) } if (IS_NOT_NULL(Else)) { - int else_len = compile_length_tree(Else, reg); - r = add_op(reg, OP_JUMP); - if (r != 0) return r; - COP(reg)->jump.addr = else_len + SIZE_INC_OP; + else_len = compile_length_tree(Else, reg); + if (else_len < 0) return else_len; + } + else + else_len = 0; + + r = add_op(reg, OP_JUMP); + if (r != 0) return r; + COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP; + r = add_op(reg, OP_ATOMIC_END); + if (r != 0) return r; + + if (IS_NOT_NULL(Else)) { r = compile_tree(Else, reg, env); } } @@ -3035,7 +3064,7 @@ tree_max_len(Node* node, ScanEnv* env) if (qn->upper != 0) { len = tree_max_len(NODE_BODY(node), env); if (len != 0) { - if (! IS_REPEAT_INFINITE(qn->upper)) + if (! IS_INFINITE_REPEAT(qn->upper)) len = distance_multiply(len, qn->upper); else len = INFINITE_LEN; @@ -3581,7 +3610,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg) type = NODE_TYPE(node); if (type == NODE_QUANT) { QuantNode* qn = QUANT_(node); - if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { + if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) { #ifdef USE_QUANT_PEEK_NEXT Node* n = get_head_value_node(next_node, 1, reg); /* '\0': for UTF-16BE etc... */ @@ -3591,7 +3620,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg) #endif /* automatic posseivation a*b ==> (?>a*)b */ if (qn->lower <= 1) { - if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) { + if (is_strict_real_node(NODE_BODY(node))) { Node *x, *y; x = get_head_value_node(NODE_BODY(node), 0, reg); if (IS_NOT_NULL(x)) { @@ -3599,7 +3628,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg) if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) { Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK); CHECK_NULL_RETURN_MEMERR(en); - NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT); + NODE_STATUS_ADD(en, STRICT_REAL_REPEAT); swap_node(node, en); NODE_BODY(node) = en; } @@ -4001,11 +4030,11 @@ expand_case_fold_string(Node* node, regex_t* reg, int state) return r; } -#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT -static enum BodyEmpty +#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT +static enum BodyEmptyType quantifiers_memory_node_info(Node* node) { - int r = BODY_IS_EMPTY; + int r = BODY_IS_EMPTY_POSSIBILITY; switch (NODE_TYPE(node)) { case NODE_LIST: @@ -4022,7 +4051,7 @@ quantifiers_memory_node_info(Node* node) #ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) { - return BODY_IS_EMPTY_REC; /* tiny version */ + return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */ } else r = quantifiers_memory_node_info(NODE_BODY(node)); @@ -4044,9 +4073,9 @@ quantifiers_memory_node_info(Node* node) switch (en->type) { case BAG_MEMORY: if (NODE_IS_RECURSION(node)) { - return BODY_IS_EMPTY_REC; + return BODY_IS_EMPTY_POSSIBILITY_REC; } - return BODY_IS_EMPTY_MEM; + return BODY_IS_EMPTY_POSSIBILITY_MEM; break; case BAG_OPTION: @@ -4083,7 +4112,7 @@ quantifiers_memory_node_info(Node* node) return r; } -#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */ +#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */ #ifdef USE_CALL @@ -4351,7 +4380,7 @@ setup_called_state_call(Node* node, int state) { QuantNode* qn = QUANT_(node); - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2) + if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2) state |= IN_REAL_REPEAT; if (qn->lower != qn->upper) state |= IN_VAR_REPEAT; @@ -4468,7 +4497,7 @@ setup_called_state(Node* node, int state) { QuantNode* qn = QUANT_(node); - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2) + if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2) state |= IN_REAL_REPEAT; if (qn->lower != qn->upper) state |= IN_VAR_REPEAT; @@ -4600,24 +4629,24 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) NODE_STATUS_ADD(node, IN_MULTI_ENTRY); } - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { + if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) { d = tree_min_len(body, env); if (d == 0) { -#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT - qn->empty_info = quantifiers_memory_node_info(body); - if (qn->empty_info == BODY_IS_EMPTY_REC) { +#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT + qn->emptiness = quantifiers_memory_node_info(body); + if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) { if (NODE_TYPE(body) == NODE_BAG && BAG_(body)->type == BAG_MEMORY) { MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum); } } #else - qn->empty_info = BODY_IS_EMPTY; + qn->emptiness = BODY_IS_EMPTY_POSSIBILITY; #endif } } - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2) + if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2) state |= IN_REAL_REPEAT; if (qn->lower != qn->upper) state |= IN_VAR_REPEAT; @@ -4628,7 +4657,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 if (NODE_TYPE(body) == NODE_STRING) { - if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && + if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper && qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { int len = NODE_STRING_LEN(body); StrNode* sn = STR_(body); @@ -4646,7 +4675,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) } } - if (qn->greedy && (qn->empty_info == BODY_IS_NOT_EMPTY)) { + if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) { if (NODE_TYPE(body) == NODE_QUANT) { QuantNode* tqn = QUANT_(body); if (IS_NOT_NULL(tqn->head_exact)) { @@ -4663,7 +4692,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) } /* setup_tree does the following work. - 1. check empty loop. (set qn->empty_info) + 1. check empty loop. (set qn->emptiness) 2. expand ignore-case in char class. 3. set memory status bit flags. (reg->mem_stats) 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. @@ -4752,10 +4781,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) r = setup_tree(target, reg, state, env); if (NODE_TYPE(target) == NODE_QUANT) { QuantNode* tqn = QUANT_(target); - if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && + if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 && tqn->greedy != 0) { /* (?>a*), a*+ etc... */ - if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target))) - NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT); + if (is_strict_real_node(NODE_BODY(target))) + NODE_STATUS_ADD(node, STRICT_REAL_REPEAT); } } } @@ -5752,7 +5781,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env) opt->sm.reach_end = 0; } - if (IS_REPEAT_INFINITE(qn->upper)) { + if (IS_INFINITE_REPEAT(qn->upper)) { if (env->mmd.max == 0 && NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) @@ -6672,6 +6701,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) } else { len = ONIGENC_CODE_TO_MBCLEN(enc, code); + if (len < 0) return 0; } return onig_is_code_in_cc_len(len, code, cc); } |