diff options
Diffstat (limited to 'src/regcomp.c')
-rw-r--r-- | src/regcomp.c | 1102 |
1 files changed, 834 insertions, 268 deletions
diff --git a/src/regcomp.c b/src/regcomp.c index db83739..47023cb 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -147,7 +147,7 @@ swap_node(Node* a, Node* b) Node c; c = *a; *a = *b; *b = c; - if (NODE_TYPE(a) == NODE_STR) { + if (NODE_TYPE(a) == NODE_STRING) { StrNode* sn = STR_(a); if (sn->capa == 0) { int len = sn->end - sn->s; @@ -156,7 +156,7 @@ swap_node(Node* a, Node* b) } } - if (NODE_TYPE(b) == NODE_STR) { + if (NODE_TYPE(b) == NODE_STRING) { StrNode* sn = STR_(b); if (sn->capa == 0) { int len = sn->end - sn->s; @@ -169,11 +169,11 @@ swap_node(Node* a, Node* b) static OnigLen distance_add(OnigLen d1, OnigLen d2) { - if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) - return ONIG_INFINITE_DISTANCE; + if (d1 == INFINITE_LEN || d2 == INFINITE_LEN) + return INFINITE_LEN; else { - if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; - else return ONIG_INFINITE_DISTANCE; + if (d1 <= INFINITE_LEN - d2) return d1 + d2; + else return INFINITE_LEN; } } @@ -182,10 +182,10 @@ distance_multiply(OnigLen d, int m) { if (m == 0) return 0; - if (d < ONIG_INFINITE_DISTANCE / m) + if (d < INFINITE_LEN / m) return d * m; else - return ONIG_INFINITE_DISTANCE; + return INFINITE_LEN; } static int @@ -230,7 +230,7 @@ onig_bbuf_init(BBuf* buf, int size) } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL static int unset_addr_list_init(UnsetAddrList* list, int size) @@ -271,7 +271,7 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node) list->num++; return 0; } -#endif /* USE_SUBEXP_CALL */ +#endif /* USE_CALL */ static int @@ -347,6 +347,24 @@ add_option(regex_t* reg, OnigOptionType option) } static int +add_save_type(regex_t* reg, enum SaveType type) +{ + SaveType t = (SaveType )type; + + BBUF_ADD(reg, &t, SIZE_SAVE_TYPE); + return 0; +} + +static int +add_update_var_type(regex_t* reg, enum UpdateVarType type) +{ + UpdateVarType t = (UpdateVarType )type; + + BBUF_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE); + return 0; +} + +static int add_opcode_rel_addr(regex_t* reg, int opcode, int addr) { int r; @@ -466,7 +484,7 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env) return r; } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL static int compile_call(CallNode* node, regex_t* reg, ScanEnv* env) { @@ -545,7 +563,7 @@ compile_length_string_node(Node* node, regex_t* reg) if (sn->end <= sn->s) return 0; - ambig = NSTRING_IS_AMBIG(node); + ambig = NODE_STRING_IS_AMBIG(node); p = prev = sn->s; prev_len = enclen(enc, p); @@ -594,7 +612,7 @@ compile_string_node(Node* node, regex_t* reg) return 0; end = sn->end; - ambig = NSTRING_IS_AMBIG(node); + ambig = NODE_STRING_IS_AMBIG(node); p = prev = sn->s; prev_len = enclen(enc, p); @@ -767,7 +785,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, if (r != 0) return r; if ( -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL NODE_IS_IN_MULTI_ENTRY(qn) || #endif NODE_IS_IN_REAL_REPEAT(qn)) { @@ -893,7 +911,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); @@ -906,7 +924,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); } else { - if (IS_MULTILINE(reg->options)) { + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) { r = add_opcode(reg, (CKN_ON ? OP_STATE_CHECK_ANYCHAR_ML_STAR : OP_ANYCHAR_ML_STAR)); @@ -1109,7 +1127,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); @@ -1117,7 +1135,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); } else { - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) return add_opcode(reg, OP_ANYCHAR_ML_STAR); else return add_opcode(reg, OP_ANYCHAR_STAR); @@ -1229,7 +1247,7 @@ compile_length_option_node(EnclosureNode* node, regex_t* reg) int tlen; OnigOptionType prev = reg->options; - reg->options = node->o.option; + reg->options = node->o.options; tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); reg->options = prev; @@ -1249,8 +1267,8 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) int r; OnigOptionType prev = reg->options; - if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { - r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.option); + if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.options); if (r != 0) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); if (r != 0) return r; @@ -1258,11 +1276,11 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; } - reg->options = node->o.option; + reg->options = node->o.options; r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); reg->options = prev; - if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { + if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) { if (r != 0) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); } @@ -1287,7 +1305,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) switch (node->type) { case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; @@ -1336,6 +1354,32 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) } break; + case ENCLOSURE_IF_ELSE: + { + Node* cond = NODE_ENCLOSURE_BODY(node); + Node* Then = node->te.Then; + Node* Else = node->te.Else; + + len = compile_length_tree(cond, reg); + if (len < 0) return len; + len += SIZE_OP_PUSH; + len += SIZE_OP_PUSH_STOP_BT + SIZE_OP_POP_STOP_BT; + + if (IS_NOT_NULL(Then)) { + tlen = compile_length_tree(Then, reg); + if (tlen < 0) return tlen; + len += tlen; + } + + if (IS_NOT_NULL(Else)) { + len += SIZE_OP_JUMP; + tlen = compile_length_tree(Else, reg); + if (tlen < 0) return tlen; + len += tlen; + } + } + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1352,7 +1396,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) int r; int len; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r != 0) return r; @@ -1370,9 +1414,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) r = add_opcode(reg, OP_RETURN); return r; } -#endif -#ifdef USE_SUBEXP_CALL if (NODE_IS_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r != 0) return r; @@ -1404,7 +1446,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); if (r != 0) return r; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) r = add_opcode(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); @@ -1434,14 +1476,15 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) { int r, len; - if (node->type == ENCLOSURE_OPTION) - return compile_option_node(node, reg, env); - switch (node->type) { case ENCLOSURE_MEMORY: r = compile_enclosure_memory_node(node, reg, env); break; + case ENCLOSURE_OPTION: + r = compile_option_node(node, reg, env); + break; + case ENCLOSURE_STOP_BACKTRACK: if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); @@ -1469,6 +1512,49 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) } break; + case ENCLOSURE_IF_ELSE: + { + int cond_len, then_len, jump_len; + Node* cond = NODE_ENCLOSURE_BODY(node); + Node* Then = node->te.Then; + Node* Else = node->te.Else; + + r = add_opcode(reg, OP_PUSH_STOP_BT); + if (r != 0) return r; + + cond_len = compile_length_tree(cond, reg); + if (cond_len < 0) return cond_len; + if (IS_NOT_NULL(Then)) { + then_len = compile_length_tree(Then, reg); + if (then_len < 0) return then_len; + } + else + then_len = 0; + + jump_len = cond_len + then_len + SIZE_OP_POP_STOP_BT; + if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP; + + r = add_opcode_rel_addr(reg, OP_PUSH, jump_len); + if (r != 0) return r; + r = compile_tree(cond, reg, env); + if (r != 0) return r; + r = add_opcode(reg, OP_POP_STOP_BT); + if (r != 0) return r; + + if (IS_NOT_NULL(Then)) { + r = compile_tree(Then, reg, env); + if (r != 0) return r; + } + + if (IS_NOT_NULL(Else)) { + int else_len = compile_length_tree(Else, reg); + r = add_opcode_rel_addr(reg, OP_JUMP, else_len); + if (r != 0) return r; + r = compile_tree(Else, reg, env); + } + } + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1490,10 +1576,10 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) switch (node->type) { case ANCHOR_PREC_READ: - len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; + len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END; break; case ANCHOR_PREC_READ_NOT: - len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; + len = SIZE_OP_PUSH_PREC_READ_NOT + tlen + SIZE_OP_FAIL_PREC_READ_NOT; break; case ANCHOR_LOOK_BEHIND: len = SIZE_OP_LOOK_BEHIND + tlen; @@ -1531,21 +1617,21 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) #endif case ANCHOR_PREC_READ: - r = add_opcode(reg, OP_PUSH_POS); + r = add_opcode(reg, OP_PREC_READ_START); if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP_POS); + r = add_opcode(reg, OP_PREC_READ_END); break; case ANCHOR_PREC_READ_NOT: len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); + r = add_opcode_rel_addr(reg, OP_PUSH_PREC_READ_NOT, len + SIZE_OP_FAIL_PREC_READ_NOT); if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_FAIL_POS); + r = add_opcode(reg, OP_FAIL_PREC_READ_NOT); break; case ANCHOR_LOOK_BEHIND: @@ -1596,6 +1682,67 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) } static int +compile_gimmick_node(GimmickNode* node, regex_t* reg) +{ + int r; + + switch (node->type) { + case GIMMICK_FAIL: + r = add_opcode(reg, OP_FAIL); + break; + + case GIMMICK_KEEP: + r = add_opcode(reg, OP_PUSH_SAVE_VAL); + if (r != 0) return r; + r = add_save_type(reg, SAVE_KEEP); + if (r != 0) return r; + r = add_mem_num(reg, node->id); + break; + + case GIMMICK_SAVE: + r = add_opcode(reg, OP_PUSH_SAVE_VAL); + if (r != 0) return r; + r = add_save_type(reg, node->detail_type); + if (r != 0) return r; + r = add_mem_num(reg, node->id); + break; + + case GIMMICK_UPDATE_VAR: + r = add_opcode(reg, OP_UPDATE_VAR); + if (r != 0) return r; + r = add_update_var_type(reg, node->detail_type); + if (r != 0) return r; + r = add_mem_num(reg, node->id); + break; + } + + return r; +} + +static int +compile_length_gimmick_node(GimmickNode* node, regex_t* reg) +{ + int len; + + switch (node->type) { + case GIMMICK_FAIL: + len = SIZE_OP_FAIL; + break; + + case GIMMICK_KEEP: + case GIMMICK_SAVE: + len = SIZE_OP_PUSH_SAVE_VAL; + break; + + case GIMMICK_UPDATE_VAR: + len = SIZE_OP_UPDATE_VAR; + break; + } + + return len; +} + +static int compile_length_tree(Node* node, regex_t* reg) { int len, r; @@ -1624,8 +1771,8 @@ compile_length_tree(Node* node, regex_t* reg) } break; - case NODE_STR: - if (NSTRING_IS_RAW(node)) + case NODE_STRING: + if (NODE_STRING_IS_RAW(node)) r = compile_length_string_raw_node(STR_(node), reg); else r = compile_length_string_node(node, reg); @@ -1639,28 +1786,39 @@ compile_length_tree(Node* node, regex_t* reg) r = SIZE_OPCODE; break; - case NODE_BREF: + case NODE_BACKREF: { - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); + if (NODE_IS_CHECKER(node)) { #ifdef USE_BACKREF_WITH_LEVEL - if (NODE_IS_NEST_LEVEL(node)) { - r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + - SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - else + if (NODE_IS_NEST_LEVEL(node)) { + r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else #endif - if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) - ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); } else { - r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif + if (br->back_num == 1) { + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + } + else { + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: r = SIZE_OP_CALL; break; @@ -1678,6 +1836,10 @@ compile_length_tree(Node* node, regex_t* reg) r = compile_length_anchor_node(ANCHOR_(node), reg); break; + case NODE_GIMMICK: + r = compile_length_gimmick_node(GIMMICK_(node), reg); + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1713,7 +1875,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) do { len = compile_length_tree(NODE_CAR(node), reg); if (IS_NOT_NULL(NODE_CDR(node))) { - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH; + r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP); if (r != 0) break; } r = compile_tree(NODE_CAR(node), reg, env); @@ -1727,8 +1890,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) } break; - case NODE_STR: - if (NSTRING_IS_RAW(node)) + case NODE_STRING: + if (NODE_STRING_IS_RAW(node)) r = compile_string_raw_node(STR_(node), reg); else r = compile_string_node(node, reg); @@ -1744,7 +1907,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) switch (CTYPE_(node)->ctype) { case CTYPE_ANYCHAR: - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(node, reg))) r = add_opcode(reg, OP_ANYCHAR_ML); else r = add_opcode(reg, OP_ANYCHAR); @@ -1764,69 +1927,86 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) } break; - case NODE_BREF: + case NODE_BACKREF: { - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); + if (NODE_IS_CHECKER(node)) { #ifdef USE_BACKREF_WITH_LEVEL - if (NODE_IS_NEST_LEVEL(node)) { - r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); - if (r != 0) return r; - r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); - if (r != 0) return r; - r = add_length(reg, br->nest_level); - if (r != 0) return r; + if (NODE_IS_NEST_LEVEL(node)) { + r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL); + if (r != 0) return r; + r = add_length(reg, br->nest_level); + if (r != 0) return r; + } + else +#endif + { + r = add_opcode(reg, OP_BACKREF_CHECK); + if (r != 0) return r; + } goto add_bacref_mems; } - else -#endif - if (br->back_num == 1) { - n = br->back_static[0]; - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREFN_IC); + else { +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); + if (r != 0) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r != 0) return r; + r = add_length(reg, br->nest_level); if (r != 0) return r; - r = add_mem_num(reg, n); + + goto add_bacref_mems; } - else { - switch (n) { - case 1: r = add_opcode(reg, OP_BACKREF1); break; - case 2: r = add_opcode(reg, OP_BACKREF2); break; - default: - r = add_opcode(reg, OP_BACKREFN); + else +#endif + if (br->back_num == 1) { + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_N_IC); if (r != 0) return r; r = add_mem_num(reg, n); - break; } - } - } - else { - int i; - int* p; - - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREF_MULTI_IC); + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + default: + r = add_opcode(reg, OP_BACKREF_N); + if (r != 0) return r; + r = add_mem_num(reg, n); + break; + } + } } else { - r = add_opcode(reg, OP_BACKREF_MULTI); - } - if (r != 0) return r; + int i; + int* p; -#ifdef USE_BACKREF_WITH_LEVEL - add_bacref_mems: -#endif - r = add_length(reg, br->back_num); - if (r != 0) return r; - p = BACKREFS_P(br); - for (i = br->back_num - 1; i >= 0; i--) { - r = add_mem_num(reg, p[i]); + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + r = add_opcode(reg, OP_BACKREF_MULTI); + } if (r != 0) return r; + + add_bacref_mems: + r = add_length(reg, br->back_num); + if (r != 0) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r != 0) return r; + } } } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: r = compile_call(CALL_(node), reg, env); break; @@ -1844,6 +2024,10 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) r = compile_anchor_node(ANCHOR_(node), reg, env); break; + case NODE_GIMMICK: + r = compile_gimmick_node(GIMMICK_(node), reg); + break; + default: #ifdef ONIG_DEBUG fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node)); @@ -1898,6 +2082,18 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) r = noname_disable_map(plink, map, counter); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter); + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = noname_disable_map(&(en->te.Then), map, counter); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = noname_disable_map(&(en->te.Else), map, counter); + if (r != 0) return r; + } + } else r = noname_disable_map(&(NODE_BODY(node)), map, counter); } @@ -1920,7 +2116,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map) { int i, pos, n, old_num; int *backs; - BRefNode* bn = BREF_(node); + BackRefNode* bn = BACKREF_(node); if (! NODE_IS_BY_NAME(node)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; @@ -1957,11 +2153,29 @@ renumber_by_map(Node* node, GroupNumRemap* map) break; case NODE_QUANT: - case NODE_ENCLOSURE: r = renumber_by_map(NODE_BODY(node), map); break; - case NODE_BREF: + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + r = renumber_by_map(NODE_BODY(node), map); + if (r != 0) return r; + + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = renumber_by_map(en->te.Then, map); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = renumber_by_map(en->te.Else, map); + if (r != 0) return r; + } + } + } + break; + + case NODE_BACKREF: r = renumber_node_backref(node, map); break; @@ -1995,11 +2209,30 @@ numbered_ref_check(Node* node) break; /* fall */ case NODE_QUANT: - case NODE_ENCLOSURE: r = numbered_ref_check(NODE_BODY(node)); break; - case NODE_BREF: + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + r = numbered_ref_check(NODE_BODY(node)); + if (r != 0) return r; + + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = numbered_ref_check(en->te.Then); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = numbered_ref_check(en->te.Else); + if (r != 0) return r; + } + } + } + + break; + + case NODE_BACKREF: if (! NODE_IS_BY_NAME(node)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; @@ -2052,7 +2285,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) } #endif /* USE_NAMED_GROUP */ -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL static int unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) { @@ -2061,9 +2294,11 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) AbsAddrType addr; for (i = 0; i < uslist->num; i++) { + if (! NODE_IS_ADDR_FIXED(uslist->us[i].target)) + return ONIGERR_PARSER_BUG; + en = ENCLOSURE_(uslist->us[i].target); - if (! NODE_IS_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; - addr = en->m.called_addr; + addr = en->m.called_addr; offset = uslist->us[i].offset; BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); @@ -2120,7 +2355,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); UChar *s = sn->s; @@ -2135,16 +2370,21 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) { QuantNode* qn = QUANT_(node); if (qn->lower == qn->upper) { - r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); - if (r == 0) - *len = distance_multiply(tlen, qn->lower); + if (qn->upper == 0) { + *len = 0; + } + else { + r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); + if (r == 0) + *len = distance_multiply(tlen, qn->lower); + } } else r = GET_CHAR_LEN_VARLEN; } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (! NODE_IS_RECURSION(node)) r = get_char_length_tree1(NODE_BODY(node), reg, len, level); @@ -2166,7 +2406,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (NODE_IS_CLEN_FIXED(node)) *len = en->char_len; else { @@ -2182,6 +2422,31 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case ENCLOSURE_STOP_BACKTRACK: r = get_char_length_tree1(NODE_BODY(node), reg, len, level); break; + case ENCLOSURE_IF_ELSE: + { + int clen, elen; + r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level); + if (r == 0) { + if (IS_NOT_NULL(en->te.Then)) { + r = get_char_length_tree1(en->te.Then, reg, &tlen, level); + if (r != 0) break; + } + else tlen = 0; + if (IS_NOT_NULL(en->te.Else)) { + r = get_char_length_tree1(en->te.Else, reg, &elen, level); + if (r != 0) break; + } + else elen = 0; + + if (clen + tlen != elen) { + r = GET_CHAR_LEN_VARLEN; + } + else { + *len = elen; + } + } + } + break; default: break; } @@ -2189,8 +2454,13 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) break; case NODE_ANCHOR: + case NODE_GIMMICK: break; + case NODE_BACKREF: + if (NODE_IS_CHECKER(node)) + break; + /* fall */ default: r = GET_CHAR_LEN_VARLEN; break; @@ -2241,7 +2511,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: goto swap; break; @@ -2318,7 +2588,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: goto swap; break; @@ -2328,10 +2598,10 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: { StrNode* xs = STR_(x); - if (NSTRING_LEN(x) == 0) + if (NODE_STRING_LEN(x) == 0) break; //c = *(xs->s); @@ -2362,13 +2632,13 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: { UChar *q; StrNode* ys = STR_(y); - len = NSTRING_LEN(x); - if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); - if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + len = NODE_STRING_LEN(x); + if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y); + if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) { /* tiny version */ return 0; } @@ -2399,9 +2669,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg) Node* n = NULL_NODE; switch (NODE_TYPE(node)) { - case NODE_BREF: + case NODE_BACKREF: case NODE_ALT: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: #endif break; @@ -2420,7 +2690,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) n = get_head_value_node(NODE_CAR(node), exact, reg); break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); @@ -2428,7 +2698,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) break; if (exact != 0 && - !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { + !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { } else { n = node; @@ -2456,7 +2726,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) { OnigOptionType options = reg->options; - reg->options = ENCLOSURE_(node)->o.option; + reg->options = ENCLOSURE_(node)->o.options; n = get_head_value_node(NODE_BODY(node), exact, reg); reg->options = options; } @@ -2464,6 +2734,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) case ENCLOSURE_MEMORY: case ENCLOSURE_STOP_BACKTRACK: + case ENCLOSURE_IF_ELSE: n = get_head_value_node(NODE_BODY(node), exact, reg); break; } @@ -2475,6 +2746,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) n = get_head_value_node(NODE_BODY(node), exact, reg); break; + case NODE_GIMMICK: default: break; } @@ -2512,6 +2784,15 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) return 1; r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); + if (r == 0 && en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask); + if (r != 0) break; + } + if (IS_NOT_NULL(en->te.Else)) { + r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask); + } + } } break; @@ -2524,6 +2805,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); break; + case NODE_GIMMICK: default: break; } @@ -2531,31 +2813,31 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) } static OnigLen -get_min_len(Node* node, ScanEnv* env) +tree_min_len(Node* node, ScanEnv* env) { OnigLen len; OnigLen tmin; len = 0; switch (NODE_TYPE(node)) { - case NODE_BREF: - { + case NODE_BACKREF: + if (! NODE_IS_CHECKER(node)) { int i; int* backs; MemEnv* mem_env = SCANENV_MEMENV(env); - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); if (NODE_IS_RECURSION(node)) break; backs = BACKREFS_P(br); - len = get_min_len(mem_env[backs[0]].node, env); + len = tree_min_len(mem_env[backs[0]].node, env); for (i = 1; i < br->back_num; i++) { - tmin = get_min_len(mem_env[backs[i]].node, env); + tmin = tree_min_len(mem_env[backs[i]].node, env); if (len > tmin) len = tmin; } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: { Node* t = NODE_BODY(node); @@ -2564,15 +2846,15 @@ get_min_len(Node* node, ScanEnv* env) len = ENCLOSURE_(t)->min_len; } else - len = get_min_len(t, env); + len = tree_min_len(t, env); } break; #endif case NODE_LIST: do { - tmin = get_min_len(NODE_CAR(node), env); - len += tmin; + tmin = tree_min_len(NODE_CAR(node), env); + len = distance_add(len, tmin); } while (IS_NOT_NULL(node = NODE_CDR(node))); break; @@ -2582,14 +2864,14 @@ get_min_len(Node* node, ScanEnv* env) y = node; do { x = NODE_CAR(y); - tmin = get_min_len(x, env); + tmin = tree_min_len(x, env); if (y == node) len = tmin; else if (len > tmin) len = tmin; } while (IS_NOT_NULL(y = NODE_CDR(y))); } break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); len = sn->end - sn->s; @@ -2598,7 +2880,7 @@ get_min_len(Node* node, ScanEnv* env) case NODE_CTYPE: case NODE_CCLASS: - len = 1; + len = ONIGENC_MBC_MINLEN(env->enc); break; case NODE_QUANT: @@ -2606,7 +2888,7 @@ get_min_len(Node* node, ScanEnv* env) QuantNode* qn = QUANT_(node); if (qn->lower > 0) { - len = get_min_len(NODE_BODY(node), env); + len = tree_min_len(NODE_BODY(node), env); len = distance_multiply(len, qn->lower); } } @@ -2624,7 +2906,7 @@ get_min_len(Node* node, ScanEnv* env) len = 0; // recursive else { NODE_STATUS_ADD(node, NST_MARK1); - len = get_min_len(NODE_BODY(node), env); + len = tree_min_len(NODE_BODY(node), env); NODE_STATUS_REMOVE(node, NST_MARK1); en->min_len = len; @@ -2635,12 +2917,34 @@ get_min_len(Node* node, ScanEnv* env) case ENCLOSURE_OPTION: case ENCLOSURE_STOP_BACKTRACK: - len = get_min_len(NODE_BODY(node), env); + len = tree_min_len(NODE_BODY(node), env); + break; + case ENCLOSURE_IF_ELSE: + { + int elen; + len = tree_min_len(NODE_BODY(node), env); + if (IS_NOT_NULL(en->te.Then)) + len += tree_min_len(en->te.Then, env); + if (IS_NOT_NULL(en->te.Else)) + elen = tree_min_len(en->te.Else, env); + else elen = 0; + + if (elen < len) len = elen; + } break; } } break; + case NODE_GIMMICK: + { + GimmickNode* g = GIMMICK_(node); + if (g->type == GIMMICK_FAIL) { + len = INFINITE_LEN; + break; + } + } + /* fall */ case NODE_ANCHOR: default: break; @@ -2650,7 +2954,7 @@ get_min_len(Node* node, ScanEnv* env) } static OnigLen -get_max_len(Node* node, ScanEnv* env) +tree_max_len(Node* node, ScanEnv* env) { OnigLen len; OnigLen tmax; @@ -2659,19 +2963,19 @@ get_max_len(Node* node, ScanEnv* env) switch (NODE_TYPE(node)) { case NODE_LIST: do { - tmax = get_max_len(NODE_CAR(node), env); + tmax = tree_max_len(NODE_CAR(node), env); len = distance_add(len, tmax); } while (IS_NOT_NULL(node = NODE_CDR(node))); break; case NODE_ALT: do { - tmax = get_max_len(NODE_CAR(node), env); + tmax = tree_max_len(NODE_CAR(node), env); if (len < tmax) len = tmax; } while (IS_NOT_NULL(node = NODE_CDR(node))); break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); len = sn->end - sn->s; @@ -2683,30 +2987,30 @@ get_max_len(Node* node, ScanEnv* env) len = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case NODE_BREF: - { + case NODE_BACKREF: + if (! NODE_IS_CHECKER(node)) { int i; int* backs; MemEnv* mem_env = SCANENV_MEMENV(env); - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); if (NODE_IS_RECURSION(node)) { - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; break; } backs = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - tmax = get_max_len(mem_env[backs[i]].node, env); + tmax = tree_max_len(mem_env[backs[i]].node, env); if (len < tmax) len = tmax; } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (! NODE_IS_RECURSION(node)) - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); else - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; break; #endif @@ -2715,12 +3019,12 @@ get_max_len(Node* node, ScanEnv* env) QuantNode* qn = QUANT_(node); if (qn->upper != 0) { - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); if (len != 0) { if (! IS_REPEAT_INFINITE(qn->upper)) len = distance_multiply(len, qn->upper); else - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; } } } @@ -2735,10 +3039,10 @@ get_max_len(Node* node, ScanEnv* env) len = en->max_len; else { if (NODE_IS_MARK1(node)) - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; else { NODE_STATUS_ADD(node, NST_MARK1); - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); NODE_STATUS_REMOVE(node, NST_MARK1); en->max_len = len; @@ -2749,13 +3053,29 @@ get_max_len(Node* node, ScanEnv* env) case ENCLOSURE_OPTION: case ENCLOSURE_STOP_BACKTRACK: - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); + break; + case ENCLOSURE_IF_ELSE: + { + int tlen, elen; + len = tree_max_len(NODE_BODY(node), env); + if (IS_NOT_NULL(en->te.Then)) { + tlen = tree_max_len(en->te.Then, env); + len = distance_add(len, tlen); + } + if (IS_NOT_NULL(en->te.Else)) + elen = tree_max_len(en->te.Else, env); + else elen = 0; + + if (elen > len) len = elen; + } break; } } break; case NODE_ANCHOR: + case NODE_GIMMICK: default: break; } @@ -2783,14 +3103,31 @@ check_backrefs(Node* node, ScanEnv* env) } /* fall */ case NODE_QUANT: + r = check_backrefs(NODE_BODY(node), env); + break; + case NODE_ENCLOSURE: r = check_backrefs(NODE_BODY(node), env); + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_IF_ELSE) { + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = check_backrefs(en->te.Then, env); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = check_backrefs(en->te.Else, env); + } + } + } break; - case NODE_BREF: + case NODE_BACKREF: { int i; - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); int* backs = BACKREFS_P(br); MemEnv* mem_env = SCANENV_MEMENV(env); @@ -2813,7 +3150,7 @@ check_backrefs(Node* node, ScanEnv* env) } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL #define RECURSION_EXIST (1<<0) #define RECURSION_MUST (1<<1) @@ -2822,6 +3159,7 @@ check_backrefs(Node* node, ScanEnv* env) static int infinite_recursive_call_check(Node* node, ScanEnv* env, int head) { + int ret; int r = 0; switch (NODE_TYPE(node)) { @@ -2829,15 +3167,14 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) { Node *x; OnigLen min; - int ret; x = node; do { ret = infinite_recursive_call_check(NODE_CAR(x), env, head); if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; r |= ret; - if (head) { - min = get_min_len(NODE_CAR(x), env); + if (head != 0) { + min = tree_min_len(NODE_CAR(x), env); if (min != 0) head = 0; } } while (IS_NOT_NULL(x = NODE_CDR(x))); @@ -2846,7 +3183,6 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) case NODE_ALT: { - int ret; int must; must = RECURSION_MUST; @@ -2894,6 +3230,31 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) NODE_STATUS_REMOVE(node, NST_MARK2); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + int eret; + + ret = infinite_recursive_call_check(NODE_BODY(node), env, head); + if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; + r |= ret; + if (IS_NOT_NULL(en->te.Then)) { + OnigLen min; + if (head != 0) { + min = tree_min_len(NODE_BODY(node), env); + } + else min = 0; + + ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head); + if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; + r |= ret; + } + if (IS_NOT_NULL(en->te.Else)) { + eret = infinite_recursive_call_check(en->te.Else, env, head); + if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret; + r |= (eret & RECURSION_EXIST); + if ((eret & RECURSION_MUST) == 0) + r &= ~RECURSION_MUST; + } + } else { r = infinite_recursive_call_check(NODE_BODY(node), env, head); } @@ -2948,6 +3309,16 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env) NODE_STATUS_REMOVE(node, NST_MARK1); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = infinite_recursive_call_check_trav(en->te.Then, env); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = infinite_recursive_call_check_trav(en->te.Else, env); + if (r != 0) return r; + } + } } r = infinite_recursive_call_check_trav(NODE_BODY(node), env); @@ -2987,7 +3358,10 @@ recursive_call_check(Node* node) case NODE_CALL: r = recursive_call_check(NODE_BODY(node)); - if (r != 0) NODE_STATUS_ADD(node, NST_RECURSION); + if (r != 0) { + if (NODE_IS_MARK1(NODE_BODY(node))) + NODE_STATUS_ADD(node, NST_RECURSION); + } break; case NODE_ENCLOSURE: @@ -3005,6 +3379,16 @@ recursive_call_check(Node* node) NODE_STATUS_REMOVE(node, NST_MARK2); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + r = 0; + if (IS_NOT_NULL(en->te.Then)) { + r |= recursive_call_check(en->te.Then); + } + if (IS_NOT_NULL(en->te.Else)) { + r |= recursive_call_check(en->te.Else); + } + r |= recursive_call_check(NODE_BODY(node)); + } else { r = recursive_call_check(NODE_BODY(node)); } @@ -3058,6 +3442,8 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) case NODE_ENCLOSURE: { + int ret; + int state1; EnclosureNode* en = ENCLOSURE_(node); if (en->type == ENCLOSURE_MEMORY) { @@ -3075,16 +3461,25 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) } } - { - int ret; - int state1 = state; + state1 = state; + if (NODE_IS_RECURSION(node)) + state1 |= IN_RECURSION; - if (NODE_IS_RECURSION(node)) - state1 |= IN_RECURSION; + ret = recursive_call_check_trav(NODE_BODY(node), env, state1); + if (ret == FOUND_CALLED_NODE) + r = FOUND_CALLED_NODE; - ret = recursive_call_check_trav(NODE_BODY(node), env, state1); - if (ret == FOUND_CALLED_NODE) - r = FOUND_CALLED_NODE; + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + ret = recursive_call_check_trav(en->te.Then, env, state1); + if (ret == FOUND_CALLED_NODE) + r = FOUND_CALLED_NODE; + } + if (IS_NOT_NULL(en->te.Else)) { + ret = recursive_call_check_trav(en->te.Else, env, state1); + if (ret == FOUND_CALLED_NODE) + r = FOUND_CALLED_NODE; + } } } break; @@ -3126,7 +3521,7 @@ divide_look_behind_alternatives(Node* node) if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { np = node; do { - SET_NODE_TYPE(np, NODE_LIST); /* alt -> list */ + NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */ } while (IS_NOT_NULL(np = NODE_CDR(np))); } return 0; @@ -3257,8 +3652,8 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, return r; } - NSTRING_SET_AMBIG(node); - NSTRING_SET_DONT_GET_OPT_INFO(node); + NODE_STRING_SET_AMBIG(node); + NODE_STRING_SET_DONT_GET_OPT_INFO(node); *rnode = node; return 0; } @@ -3386,7 +3781,7 @@ expand_case_fold_string(Node* node, regex_t* reg) OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; StrNode* sn = STR_(node); - if (NSTRING_IS_AMBIG(node)) return 0; + if (NODE_STRING_IS_AMBIG(node)) return 0; start = sn->s; end = sn->end; @@ -3529,10 +3924,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) switch (NODE_TYPE(node)) { case NODE_LIST: { - Node* prev = NULL_NODE; do { r = setup_comb_exp_check(NODE_CAR(node), r, env); - prev = NODE_CAR(node); } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); } break; @@ -3619,8 +4012,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) switch (en->type) { case ENCLOSURE_MEMORY: { - if (env->curr_max_regnum < en->regnum) - env->curr_max_regnum = en->regnum; + if (env->curr_max_regnum < en->m.regnum) + env->curr_max_regnum = en->m.regnum; r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); } @@ -3633,7 +4026,7 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) env->has_recursion = 1; @@ -3668,7 +4061,7 @@ quantifiers_memory_node_info(Node* node) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) { return QUANT_BODY_IS_EMPTY_REC; /* tiny version */ @@ -3702,17 +4095,32 @@ quantifiers_memory_node_info(Node* node) case ENCLOSURE_STOP_BACKTRACK: r = quantifiers_memory_node_info(NODE_BODY(node)); break; + case ENCLOSURE_IF_ELSE: + { + int v; + r = quantifiers_memory_node_info(NODE_BODY(node)); + if (IS_NOT_NULL(en->te.Then)) { + v = quantifiers_memory_node_info(en->te.Then); + if (v > r) r = v; + } + if (IS_NOT_NULL(en->te.Else)) { + v = quantifiers_memory_node_info(en->te.Else); + if (v > r) r = v; + } + } + break; default: break; } } break; - case NODE_BREF: - case NODE_STR: + case NODE_BACKREF: + case NODE_STRING: case NODE_CTYPE: case NODE_CCLASS: case NODE_ANCHOR: + case NODE_GIMMICK: default: break; } @@ -3729,7 +4137,7 @@ quantifiers_memory_node_info(Node* node) #define IN_ZERO_REPEAT (1<<4) #define IN_MULTI_ENTRY (1<<5) -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL #ifdef __GNUC__ __inline @@ -3745,7 +4153,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state) #ifdef USE_NAMED_GROUP if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + !ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) { return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } #endif @@ -3811,10 +4219,26 @@ setup_call2_call(Node* node) break; case NODE_ENCLOSURE: - if (! NODE_IS_MARK1(node)) { - NODE_STATUS_ADD(node, NST_MARK1); - setup_call2_call(NODE_BODY(node)); - NODE_STATUS_REMOVE(node, NST_MARK1); + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if (! NODE_IS_MARK1(node)) { + NODE_STATUS_ADD(node, NST_MARK1); + setup_call2_call(NODE_BODY(node)); + NODE_STATUS_REMOVE(node, NST_MARK1); + } + } + else if (en->type == ENCLOSURE_IF_ELSE) { + setup_call2_call(NODE_BODY(node)); + if (IS_NOT_NULL(en->te.Then)) + setup_call2_call(en->te.Then); + if (IS_NOT_NULL(en->te.Else)) + setup_call2_call(en->te.Else); + } + else { + setup_call2_call(NODE_BODY(node)); + } } break; @@ -3868,11 +4292,29 @@ setup_call(Node* node, ScanEnv* env, int state) break; case NODE_ENCLOSURE: - if ((state & IN_ZERO_REPEAT) != 0) { - NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); - ENCLOSURE_(node)->m.entry_count--; + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if ((state & IN_ZERO_REPEAT) != 0) { + NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); + ENCLOSURE_(node)->m.entry_count--; + } + r = setup_call(NODE_BODY(node), env, state); + } + else if (en->type == ENCLOSURE_IF_ELSE) { + r = setup_call(NODE_BODY(node), env, state); + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = setup_call(en->te.Then, env, state); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) + r = setup_call(en->te.Else, env, state); + } + else + r = setup_call(NODE_BODY(node), env, state); } - r = setup_call(NODE_BODY(node), env, state); break; case NODE_CALL: @@ -3918,6 +4360,20 @@ setup_call2(Node* node) case NODE_ENCLOSURE: if (! NODE_IS_IN_ZERO_REPEAT(node)) r = setup_call2(NODE_BODY(node)); + + { + EnclosureNode* en = ENCLOSURE_(node); + + if (r != 0) return r; + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = setup_call2(en->te.Then); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) + r = setup_call2(en->te.Else); + } + } break; case NODE_CALL: @@ -3997,6 +4453,13 @@ setup_called_state_call(Node* node, int state) NODE_STATUS_REMOVE(node, NST_MARK1); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + setup_called_state_call(en->te.Then, state); + } + if (IS_NOT_NULL(en->te.Else)) + setup_called_state_call(en->te.Else, state); + } else { setup_called_state_call(NODE_BODY(node), state); } @@ -4025,7 +4488,7 @@ setup_called_state(Node* node, int state) } while (IS_NOT_NULL(node = NODE_CDR(node))); break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: setup_called_state_call(node, state); break; @@ -4046,6 +4509,13 @@ setup_called_state(Node* node, int state) case ENCLOSURE_STOP_BACKTRACK: setup_called_state(NODE_BODY(node), state); break; + case ENCLOSURE_IF_ELSE: + setup_called_state(NODE_BODY(node), state); + if (IS_NOT_NULL(en->te.Then)) + setup_called_state(en->te.Then, state); + if (IS_NOT_NULL(en->te.Else)) + setup_called_state(en->te.Else, state); + break; } } break; @@ -4082,16 +4552,17 @@ setup_called_state(Node* node, int state) } break; - case NODE_BREF: - case NODE_STR: + case NODE_BACKREF: + case NODE_STRING: case NODE_CTYPE: case NODE_CCLASS: + case NODE_GIMMICK: default: break; } } -#endif /* USE_SUBEXP_CALL */ +#endif /* USE_CALL */ static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env); @@ -4104,8 +4575,9 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) { /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ - ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE \ - | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT | BIT_NODE_CALL ) + ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \ + | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \ + | BIT_NODE_CALL ) #define ALLOWED_ENCLOSURE_IN_LB ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION ) #define ALLOWED_ENCLOSURE_IN_LB_NOT ENCLOSURE_OPTION @@ -4182,7 +4654,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) } if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { - d = get_min_len(body, env); + d = tree_min_len(body, env); if (d == 0) { #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT qn->body_empty_info = quantifiers_memory_node_info(body); @@ -4208,10 +4680,10 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 - if (NODE_TYPE(body) == NODE_STR) { + if (NODE_TYPE(body) == NODE_STRING) { if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int len = NSTRING_LEN(body); + int len = NODE_STRING_LEN(body); StrNode* sn = STR_(body); if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { @@ -4278,17 +4750,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NODE_STR: - if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + case NODE_STRING: + if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) { r = expand_case_fold_string(node, reg); } break; - case NODE_BREF: + case NODE_BACKREF: { int i; int* p; - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); p = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; @@ -4311,14 +4783,14 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ENCLOSURE_OPTION: { OnigOptionType options = reg->options; - reg->options = ENCLOSURE_(node)->o.option; + reg->options = ENCLOSURE_(node)->o.options; r = setup_tree(NODE_BODY(node), reg, state, env); reg->options = options; } break; case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL state |= en->m.called_state; #endif @@ -4343,6 +4815,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } } break; + + case ENCLOSURE_IF_ELSE: + r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env); + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = setup_tree(en->te.Then, reg, (state | IN_ALT), env); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) + r = setup_tree(en->te.Else, reg, (state | IN_ALT), env); + break; } } break; @@ -4355,11 +4838,12 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) r = setup_anchor(node, reg, state, env); break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: #endif case NODE_CTYPE: case NODE_CCLASS: + case NODE_GIMMICK: default: break; } @@ -4487,7 +4971,7 @@ distance_value(MinMaxLen* mm) OnigLen d; - if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + if (mm->max == INFINITE_LEN) return 0; d = mm->max - mm->min; if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0]))) @@ -5048,15 +5532,15 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); int slen = sn->end - sn->s; - int is_raw = NSTRING_IS_RAW(node); + int is_raw = NODE_STRING_IS_RAW(node); - if (! NSTRING_IS_AMBIG(node)) { + if (! NODE_STRING_IS_AMBIG(node)) { concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NSTRING_IS_RAW(node), env->enc); + NODE_STRING_IS_RAW(node), env->enc); if (slen > 0) { add_char_opt_map_info(&opt->map, *(sn->s), env->enc); } @@ -5065,7 +5549,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { int max; - if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) { int n = onigenc_strlen(env->enc, sn->s, sn->end); max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; } @@ -5191,24 +5675,24 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NODE_BREF: - { + case NODE_BACKREF: + if (! NODE_IS_CHECKER(node)) { int i; int* backs; OnigLen min, max, tmin, tmax; MemEnv* mem_env = SCANENV_MEMENV(env->scan_env); - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); if (NODE_IS_RECURSION(node)) { - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + set_mml(&opt->len, 0, INFINITE_LEN); break; } backs = BACKREFS_P(br); - min = get_min_len(mem_env[backs[0]].node, env->scan_env); - max = get_max_len(mem_env[backs[0]].node, env->scan_env); + min = tree_min_len(mem_env[backs[0]].node, env->scan_env); + max = tree_max_len(mem_env[backs[0]].node, env->scan_env); for (i = 1; i < br->back_num; i++) { - tmin = get_min_len(mem_env[backs[i]].node, env->scan_env); - tmax = get_max_len(mem_env[backs[i]].node, env->scan_env); + tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env); + tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env); if (min > tmin) min = tmin; if (max < tmax) max = tmax; } @@ -5216,13 +5700,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + set_mml(&opt->len, 0, INFINITE_LEN); else { OnigOptionType save = env->options; - env->options = ENCLOSURE_(NODE_BODY(node))->o.option; + env->options = ENCLOSURE_(NODE_BODY(node))->o.options; r = optimize_node_left(NODE_BODY(node), opt, env); env->options = save; } @@ -5242,7 +5726,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { - if (IS_MULTILINE(env->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); @@ -5274,7 +5758,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) min = distance_multiply(nopt.len.min, qn->lower); if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + max = (nopt.len.max > 0 ? INFINITE_LEN : 0); else max = distance_multiply(nopt.len.max, qn->upper); @@ -5291,20 +5775,20 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) { OnigOptionType save = env->options; - env->options = en->o.option; + env->options = en->o.options; r = optimize_node_left(NODE_BODY(node), opt, env); env->options = save; } break; case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { OnigLen min, max; min = 0; - max = ONIG_INFINITE_DISTANCE; + max = INFINITE_LEN; if (NODE_IS_MIN_FIXED(node)) min = en->min_len; if (NODE_IS_MAX_FIXED(node)) max = en->max_len; set_mml(&opt->len, min, max); @@ -5324,10 +5808,39 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ENCLOSURE_STOP_BACKTRACK: r = optimize_node_left(NODE_BODY(node), opt, env); break; + + case ENCLOSURE_IF_ELSE: + { + OptEnv nenv; + NodeOptInfo nopt; + + copy_opt_env(&nenv, env); + r = optimize_node_left(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + if (IS_NOT_NULL(en->te.Then)) { + r = optimize_node_left(en->te.Then, &nopt, &nenv); + if (r == 0) { + concat_left_node_opt_info(env->enc, opt, &nopt); + } + } + + if (IS_NOT_NULL(en->te.Else)) { + r = optimize_node_left(en->te.Else, &nopt, env); + if (r == 0) + alt_merge_node_opt_info(opt, &nopt, env); + } + } + } + break; } } break; + case NODE_GIMMICK: + break; + default: #ifdef ONIG_DEBUG fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node)); @@ -5379,7 +5892,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) reg->dmin = e->mmd.min; reg->dmax = e->mmd.max; - if (reg->dmin != ONIG_INFINITE_DISTANCE) { + if (reg->dmin != INFINITE_LEN) { reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); } @@ -5398,7 +5911,7 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m) reg->dmin = m->mmd.min; reg->dmax = m->mmd.max; - if (reg->dmin != ONIG_INFINITE_DISTANCE) { + if (reg->dmin != INFINITE_LEN) { reg->threshold_len = reg->dmin + 1; } } @@ -5531,14 +6044,14 @@ static void print_enc_string(FILE* fp, OnigEncoding enc, static void print_distance_range(FILE* f, OnigLen a, OnigLen b) { - if (a == ONIG_INFINITE_DISTANCE) + if (a == INFINITE_LEN) fputs("inf", f); else fprintf(f, "(%u)", a); fputs("-", f); - if (b == ONIG_INFINITE_DISTANCE) + if (b == INFINITE_LEN) fputs("inf", f); else fprintf(f, "(%u)", b); @@ -5656,7 +6169,7 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + if (IS_NOT_NULL(REG_EXTP(reg))) xfree(REG_EXTP(reg)); #ifdef USE_NAMED_GROUP onig_names_free(reg); @@ -5702,7 +6215,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, int r, init_size; Node* root; ScanEnv scan_env; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL UnsetAddrList uslist; #endif @@ -5751,7 +6264,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = check_backrefs(root, &scan_env); if (r != 0) goto err; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.num_call > 0) { r = unset_addr_list_init(&uslist, scan_env.num_call); if (r != 0) goto err; @@ -5791,12 +6304,12 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #ifdef USE_COMBINATION_EXPLOSION_CHECK if (scan_env.backrefed_mem == 0 -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL || scan_env.num_call == 0 #endif ) { setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.has_recursion != 0) { scan_env.num_comb_exp_check = 0; } @@ -5829,8 +6342,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = compile_tree(root, reg, &scan_env); if (r == 0) { + if (scan_env.keep_num > 0) { + r = add_opcode(reg, OP_UPDATE_VAR); + if (r != 0) goto err; + r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST); + if (r != 0) goto err; + r = add_mem_num(reg, 0 /* not used */); + if (r != 0) goto err; + } + r = add_opcode(reg, OP_END); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.num_call > 0) { r = unset_addr_list_fix(&uslist, reg); unset_addr_list_end(&uslist); @@ -5847,7 +6369,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->stack_pop_level = STACK_POP_LEVEL_FREE; } } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL else if (scan_env.num_call > 0) { unset_addr_list_end(&uslist); } @@ -5865,7 +6387,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, return r; err_unset: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.num_call > 0) { unset_addr_list_end(&uslist); } @@ -5890,8 +6412,8 @@ static int onig_inited = 0; extern int onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, - OnigEncoding enc, OnigSyntaxType* syntax) + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, OnigSyntaxType* syntax) { int r; @@ -5938,7 +6460,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, (reg)->exact = (UChar* )NULL; (reg)->int_map = (int* )NULL; (reg)->int_map_backward = (int* )NULL; - (reg)->chain = (regex_t* )NULL; + REG_EXTPL(reg) = NULL; (reg)->p = (UChar* )NULL; (reg)->alloc = 0; @@ -6165,11 +6687,13 @@ OnigOpInfoType OnigOpInfo[] = { { OP_BEGIN_POSITION, "begin-position", ARG_NON }, { OP_BACKREF1, "backref1", ARG_NON }, { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, + { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, + { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, + { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, @@ -6181,6 +6705,7 @@ OnigOpInfoType OnigOpInfo[] = { { OP_FAIL, "fail", ARG_NON }, { OP_JUMP, "jump", ARG_RELADDR }, { OP_PUSH, "push", ARG_RELADDR }, + { OP_PUSH_SUPER, "push_SUPER", ARG_RELADDR }, { OP_POP, "pop", ARG_NON }, { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, @@ -6194,10 +6719,10 @@ OnigOpInfoType OnigOpInfo[] = { { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, - { OP_PUSH_POS, "push-pos", ARG_NON }, - { OP_POP_POS, "pop-pos", ARG_NON }, - { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, - { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PREC_READ_START, "push-pos", ARG_NON }, + { OP_PREC_READ_END, "pop-pos", ARG_NON }, + { OP_PUSH_PREC_READ_NOT, "push-prec-read-not", ARG_RELADDR }, + { OP_FAIL_PREC_READ_NOT, "fail-prec-read-not", ARG_NON }, { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, @@ -6205,6 +6730,8 @@ OnigOpInfoType OnigOpInfo[] = { { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, { OP_CALL, "call", ARG_ABSADDR }, { OP_RETURN, "return", ARG_NON }, + { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, + { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, @@ -6272,6 +6799,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, MemNumType mem; StateCheckNumType scn; OnigCodePoint code; + OnigOptionType option; UChar *q; fprintf(f, "%s", op2name(*bp)); @@ -6421,7 +6949,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, } break; - case OP_BACKREFN_IC: + case OP_BACKREF_N_IC: mem = *((MemNumType* )bp); bp += SIZE_MEMNUM; fprintf(f, ":%d", mem); @@ -6429,6 +6957,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, case OP_BACKREF_MULTI_IC: case OP_BACKREF_MULTI: + case OP_BACKREF_CHECK: fputs(" ", f); GET_LENGTH_INC(len, bp); for (i = 0; i < len; i++) { @@ -6439,12 +6968,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, break; case OP_BACKREF_WITH_LEVEL: + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + /* fall */ + case OP_BACKREF_CHECK_WITH_LEVEL: { - OnigOptionType option; LengthType level; - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); GET_LENGTH_INC(level, bp); fprintf(f, ":%d", level); @@ -6501,6 +7031,24 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, p_rel_addr(f, addr, bp, start); break; + case OP_PUSH_SAVE_VAL: + { + SaveType type; + GET_SAVE_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + + case OP_UPDATE_VAR: + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + default: fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); @@ -6576,9 +7124,9 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NODE_STR: + case NODE_STRING: fprintf(f, "<string%s:%p>", - (NSTRING_IS_RAW(node) ? "-raw" : ""), node); + (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); for (p = STR_(node)->s; p < STR_(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); @@ -6659,12 +7207,12 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NODE_BREF: + case NODE_BACKREF: { int* p; - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); p = BACKREFS_P(br); - fprintf(f, "<backref:%p>", node); + fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node); for (i = 0; i < br->back_num; i++) { if (i > 0) fputs(", ", f); fprintf(f, "%d", p[i]); @@ -6672,7 +7220,7 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: { CallNode* cn = CALL_(node); @@ -6693,10 +7241,10 @@ print_indent_tree(FILE* f, Node* node, int indent) fprintf(f, "<enclosure:%p> ", node); switch (ENCLOSURE_(node)->type) { case ENCLOSURE_OPTION: - fprintf(f, "option:%d", ENCLOSURE_(node)->option); + fprintf(f, "option:%d", ENCLOSURE_(node)->o.options); break; case ENCLOSURE_MEMORY: - fprintf(f, "memory:%d", ENCLOSURE_(node)->regnum); + fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum); break; case ENCLOSURE_STOP_BACKTRACK: fprintf(f, "stop-bt"); @@ -6709,6 +7257,24 @@ print_indent_tree(FILE* f, Node* node, int indent) print_indent_tree(f, NODE_BODY(node), indent + add); break; + case NODE_GIMMICK: + fprintf(f, "<gimmick:%p> ", node); + switch (GIMMICK_(node)->type) { + case GIMMICK_FAIL: + fprintf(f, "fail"); + break; + case GIMMICK_KEEP: + fprintf(f, "keep:%d", GIMMICK_(node)->id); + break; + case GIMMICK_SAVE: + fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); + break; + case GIMMICK_UPDATE_VAR: + fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); + break; + } + break; + default: fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node)); break; |