From 98ab313fe496ae7c792db29c80bf6b23347484ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Thu, 14 Dec 2017 16:55:10 +0100 Subject: New upstream version 6.7.0 --- src/regcomp.c | 535 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 263 insertions(+), 272 deletions(-) (limited to 'src/regcomp.c') diff --git a/src/regcomp.c b/src/regcomp.c index ab5701c..63df18b 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -29,6 +29,8 @@ #include "regparse.h" +OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; + #if 0 typedef struct { int n; @@ -106,8 +108,6 @@ int_stack_pop(int_stack* s) } #endif -OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; - extern OnigCaseFoldType onig_get_default_case_fold_flag(void) { @@ -129,7 +129,7 @@ static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; static UChar* str_dup(UChar* s, UChar* end) { - int len = end - s; + int len = (int )(end - s); if (len > 0) { UChar* r = (UChar* )xmalloc(len + 1); @@ -145,12 +145,13 @@ static void swap_node(Node* a, Node* b) { Node c; + c = *a; *a = *b; *b = c; if (NODE_TYPE(a) == NODE_STRING) { StrNode* sn = STR_(a); if (sn->capa == 0) { - int len = sn->end - sn->s; + int len = (int )(sn->end - sn->s); sn->s = sn->buf; sn->end = sn->s + len; } @@ -159,7 +160,7 @@ swap_node(Node* a, Node* b) if (NODE_TYPE(b) == NODE_STRING) { StrNode* sn = STR_(b); if (sn->capa == 0) { - int len = sn->end - sn->s; + int len = (int )(sn->end - sn->s); sn->s = sn->buf; sn->end = sn->s + len; } @@ -192,6 +193,7 @@ static int bitset_is_empty(BitSetRef bs) { int i; + for (i = 0; i < (int )BITSET_SIZE; i++) { if (bs[i] != 0) return 0; } @@ -235,10 +237,9 @@ onig_bbuf_init(BBuf* buf, int size) static int unset_addr_list_init(UnsetAddrList* list, int size) { - UnsetAddr* p; - - p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); + UnsetAddr* p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); CHECK_NULL_RETURN_MEMERR(p); + list->num = 0; list->alloc = size; list->us = p; @@ -562,9 +563,9 @@ static int compile_length_string_node(Node* node, regex_t* reg) { int rlen, r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; UChar *p, *prev; StrNode* sn; + OnigEncoding enc = reg->enc; sn = STR_(node); if (sn->end <= sn->s) @@ -592,6 +593,7 @@ compile_length_string_node(Node* node, regex_t* reg) } p += len; } + r = add_compile_string_length(prev, prev_len, slen, reg, ambig); rlen += r; return rlen; @@ -603,16 +605,17 @@ compile_length_string_raw_node(StrNode* sn, regex_t* reg) if (sn->end <= sn->s) return 0; - return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); + return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s), + reg, 0); } static int compile_string_node(Node* node, regex_t* reg) { int r, len, prev_len, slen, ambig; - OnigEncoding enc = reg->enc; UChar *p, *prev, *end; StrNode* sn; + OnigEncoding enc = reg->enc; sn = STR_(node); if (sn->end <= sn->s) @@ -642,6 +645,7 @@ compile_string_node(Node* node, regex_t* reg) p += len; } + return add_compile_string(prev, prev_len, slen, reg, ambig); } @@ -651,7 +655,7 @@ compile_string_raw_node(StrNode* sn, regex_t* reg) if (sn->end <= sn->s) return 0; - return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0); + return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0); } static int @@ -892,8 +896,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; } else { - len = SIZE_OP_REPEAT_INC - + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; + len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; if (CKN_ON) len += SIZE_OP_STATE_CHECK; } @@ -1066,8 +1069,8 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) if (tlen < 0) return tlen; /* anychar repeat */ - if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) { - if (qn->greedy && infinite) { + if (is_anychar_star_quantifier(qn)) { + if (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; else @@ -1130,7 +1133,8 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (tlen < 0) return tlen; - if (is_anychar_star_quantifier(qn)) { + if (is_anychar_star_quantifier(qn) && + (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { @@ -1184,7 +1188,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); } else if (IS_NOT_NULL(qn->next_head_exact)) { r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, @@ -1194,7 +1198,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); + -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); } else { r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); @@ -1218,7 +1222,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) if (r != 0) return r; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } - else if (!infinite && qn->greedy && + else if (! infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { int n = qn->upper - qn->lower; @@ -1228,13 +1232,13 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) for (i = 0; i < n; i++) { r = add_opcode_rel_addr(reg, OP_PUSH, - (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); + (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); if (r != 0) return r; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); if (r != 0) return r; } } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ + else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, tlen); @@ -1343,7 +1347,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) len = SIZE_OP_MEMORY_START; len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) - ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); + ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); } break; @@ -1357,7 +1361,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; } else { - len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT; + len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END; } break; @@ -1370,7 +1374,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) len = compile_length_tree(cond, reg); if (len < 0) return len; len += SIZE_OP_PUSH; - len += SIZE_OP_PUSH_STOP_BT + SIZE_OP_POP_STOP_BT; + len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END; if (IS_NOT_NULL(Then)) { tlen = compile_length_tree(Then, reg); @@ -1511,11 +1515,11 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); } else { - r = add_opcode(reg, OP_PUSH_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_START); if (r != 0) return r; r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_END); } break; @@ -1526,7 +1530,7 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) Node* Then = node->te.Then; Node* Else = node->te.Else; - r = add_opcode(reg, OP_PUSH_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_START); if (r != 0) return r; cond_len = compile_length_tree(cond, reg); @@ -1538,14 +1542,14 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) else then_len = 0; - jump_len = cond_len + then_len + SIZE_OP_POP_STOP_BT; + jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END; if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP; r = add_opcode_rel_addr(reg, OP_PUSH, jump_len); if (r != 0) return r; r = compile_tree(cond, reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP_STOP_BT); + r = add_opcode(reg, OP_ATOMIC_END); if (r != 0) return r; if (IS_NOT_NULL(Then)) { @@ -1586,13 +1590,13 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END; break; case ANCHOR_PREC_READ_NOT: - len = SIZE_OP_PUSH_PREC_READ_NOT + tlen + SIZE_OP_FAIL_PREC_READ_NOT; + len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END; break; case ANCHOR_LOOK_BEHIND: len = SIZE_OP_LOOK_BEHIND + tlen; break; case ANCHOR_LOOK_BEHIND_NOT: - len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT; + len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END; break; case ANCHOR_WORD_BOUNDARY: @@ -1670,11 +1674,11 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) case ANCHOR_PREC_READ_NOT: len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH_PREC_READ_NOT, len + SIZE_OP_FAIL_PREC_READ_NOT); + r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END); if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_FAIL_PREC_READ_NOT); + r = add_opcode(reg, OP_PREC_READ_NOT_END); break; case ANCHOR_LOOK_BEHIND: @@ -1698,9 +1702,10 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) case ANCHOR_LOOK_BEHIND_NOT: { int n; + len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); - r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, - len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); + r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START, + len + SIZE_OP_LOOK_BEHIND_NOT_END); if (r != 0) return r; if (node->char_len < 0) { r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); @@ -1712,7 +1717,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); + r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END); } break; @@ -2203,6 +2208,7 @@ renumber_by_map(Node* node, GroupNumRemap* map) case NODE_ENCLOSURE: { EnclosureNode* en = ENCLOSURE_(node); + r = renumber_by_map(NODE_BODY(node), map); if (r != 0) return r; @@ -2259,6 +2265,7 @@ numbered_ref_check(Node* node) case NODE_ENCLOSURE: { EnclosureNode* en = ENCLOSURE_(node); + r = numbered_ref_check(NODE_BODY(node)); if (r != 0) return r; @@ -2402,6 +2409,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) { StrNode* sn = STR_(node); UChar *s = sn->s; + while (s < sn->end) { s += enclen(reg->enc, s); (*len)++; @@ -2412,6 +2420,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case NODE_QUANT: { QuantNode* qn = QUANT_(node); + if (qn->lower == qn->upper) { if (qn->upper == 0) { *len = 0; @@ -2444,6 +2453,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case NODE_ENCLOSURE: { EnclosureNode* en = ENCLOSURE_(node); + switch (en->type) { case ENCLOSURE_MEMORY: #ifdef USE_CALL @@ -2465,6 +2475,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case ENCLOSURE_IF_ELSE: { int clen, elen; + r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level); if (r == 0) { if (IS_NOT_NULL(en->te.Then)) { @@ -2487,6 +2498,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } } break; + default: break; } @@ -2566,6 +2578,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) { int range; CClassNode* xc = CCLASS_(x); + switch (ytype) { case NODE_CTYPE: switch (CTYPE_(y)->ctype) { @@ -2616,8 +2629,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) for (i = 0; i < SINGLE_BYTE_SIZE; i++) { v = BITSET_AT(xc->bs, i); - if ((v != 0 && !IS_NCCLASS_NOT(xc)) || - (v == 0 && IS_NCCLASS_NOT(xc))) { + if ((v != 0 && !IS_NCCLASS_NOT(xc)) || (v == 0 && IS_NCCLASS_NOT(xc))) { v = BITSET_AT(yc->bs, i); if ((v != 0 && !IS_NCCLASS_NOT(yc)) || (v == 0 && IS_NCCLASS_NOT(yc))) @@ -2644,6 +2656,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) case NODE_STRING: { StrNode* xs = STR_(x); + if (NODE_STRING_LEN(x) == 0) break; @@ -2687,6 +2700,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) { UChar *q; StrNode* ys = STR_(y); + len = NODE_STRING_LEN(x); if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y); if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) { @@ -2700,7 +2714,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } } break; - + default: break; } @@ -2925,7 +2939,7 @@ tree_min_len(Node* node, ScanEnv* env) case NODE_STRING: { StrNode* sn = STR_(node); - len = sn->end - sn->s; + len = (int )(sn->end - sn->s); } break; @@ -2972,7 +2986,8 @@ tree_min_len(Node* node, ScanEnv* env) break; case ENCLOSURE_IF_ELSE: { - int elen; + OnigLen elen; + len = tree_min_len(NODE_BODY(node), env); if (IS_NOT_NULL(en->te.Then)) len += tree_min_len(en->te.Then, env); @@ -3029,7 +3044,7 @@ tree_max_len(Node* node, ScanEnv* env) case NODE_STRING: { StrNode* sn = STR_(node); - len = sn->end - sn->s; + len = (OnigLen )(sn->end - sn->s); } break; @@ -3108,7 +3123,8 @@ tree_max_len(Node* node, ScanEnv* env) break; case ENCLOSURE_IF_ELSE: { - int tlen, elen; + OnigLen tlen, elen; + len = tree_max_len(NODE_BODY(node), env); if (IS_NOT_NULL(en->te.Then)) { tlen = tree_max_len(en->te.Then, env); @@ -3655,7 +3671,7 @@ update_string_node_case_fold(regex_t* reg, Node *node) StrNode* sn = STR_(node); end = sn->end; - sbuf_size = (end - sn->s) * 2; + sbuf_size = (int )(end - sn->s) * 2; sbuf = (UChar* )xmalloc(sbuf_size); CHECK_NULL_RETURN_MEMERR(sbuf); ebuf = sbuf + sbuf_size; @@ -3688,8 +3704,7 @@ update_string_node_case_fold(regex_t* reg, Node *node) } static int -expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, - regex_t* reg) +expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg) { int r; Node *node; @@ -3711,8 +3726,8 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, static int expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, - regex_t* reg, Node **rnode) + UChar *p, int slen, UChar *end, regex_t* reg, + Node **rnode) { int r, i, j, len, varlen; Node *anode, *var_anode, *snode, *xnode, *an; @@ -3843,8 +3858,8 @@ expand_case_fold_string(Node* node, regex_t* reg) alt_num = 1; p = start; while (p < end) { - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, - p, end, items); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag, p, end, + items); if (n < 0) { r = n; goto err; @@ -3993,11 +4008,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) case NODE_QUANT: { + int var_num; int child_state = state; int add_state = 0; QuantNode* qn = QUANT_(node); Node* target = NODE_QUANT_BODY(qn); - int var_num; if (! IS_REPEAT_INFINITE(qn->upper)) { if (qn->upper > 1) { @@ -4203,7 +4218,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state) if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) { + ! ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) { return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } @@ -4904,11 +4919,11 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) /* set skip map for Boyer-Moore search */ static int set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, - UChar skip[], int** int_skip) + UChar skip[], int** int_skip) { int i, len; - len = end - s; + len = (int )(end - s); if (len < ONIG_CHAR_TABLE_SIZE) { for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = len; @@ -4944,38 +4959,34 @@ typedef struct { } OptEnv; typedef struct { - int left_anchor; - int right_anchor; -} OptAncInfo; + int left; + int right; +} OptAnc; typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - + MinMaxLen mmd; /* info position */ + OptAnc anc; int reach_end; int ignore_case; int len; UChar s[OPT_EXACT_MAXLEN]; -} OptExactInfo; +} OptExact; typedef struct { - MinMaxLen mmd; /* info position */ - OptAncInfo anc; - + MinMaxLen mmd; /* info position */ + OptAnc anc; int value; /* weighted value */ UChar map[ONIG_CHAR_TABLE_SIZE]; -} OptMapInfo; +} OptMap; typedef struct { - MinMaxLen len; - - OptAncInfo anc; - OptExactInfo exb; /* boundary */ - OptExactInfo exm; /* middle */ - OptExactInfo expr; /* prec read (?=...) */ - - OptMapInfo map; /* boundary */ -} NodeOptInfo; + MinMaxLen len; + OptAnc anc; + OptExact exb; /* boundary */ + OptExact exm; /* middle */ + OptExact expr; /* prec read (?=...) */ + OptMap map; /* boundary */ +} NodeOpt; static int @@ -5054,7 +5065,6 @@ is_equal_mml(MinMaxLen* a, MinMaxLen* b) return (a->min == b->min && a->max == b->max) ? 1 : 0; } - static void set_mml(MinMaxLen* mml, OnigLen min, OnigLen max) { @@ -5082,15 +5092,6 @@ add_mml(MinMaxLen* to, MinMaxLen* from) to->max = distance_add(to->max, from->max); } -#if 0 -static void -add_len_mml(MinMaxLen* to, OnigLen len) -{ - to->min = distance_add(to->min, len); - to->max = distance_add(to->max, len); -} -#endif - static void alt_merge_mml(MinMaxLen* to, MinMaxLen* from) { @@ -5105,42 +5106,42 @@ copy_opt_env(OptEnv* to, OptEnv* from) } static void -clear_opt_anc_info(OptAncInfo* anc) +clear_opt_anc_info(OptAnc* anc) { - anc->left_anchor = 0; - anc->right_anchor = 0; + anc->left = 0; + anc->right = 0; } static void -copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from) +copy_opt_anc_info(OptAnc* to, OptAnc* from) { *to = *from; } static void -concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, - OnigLen left_len, OnigLen right_len) +concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right, + OnigLen left_len, OnigLen right_len) { clear_opt_anc_info(to); - to->left_anchor = left->left_anchor; + to->left = left->left; if (left_len == 0) { - to->left_anchor |= right->left_anchor; + to->left |= right->left; } - to->right_anchor = right->right_anchor; + to->right = right->right; if (right_len == 0) { - to->right_anchor |= left->right_anchor; + to->right |= left->right; } else { - to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT); + to->right |= (left->right & ANCHOR_PREC_READ_NOT); } } static int -is_left_anchor(int anc) +is_left(int anc) { - if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || + if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || anc == ANCHOR_PREC_READ_NOT) return 0; @@ -5149,46 +5150,46 @@ is_left_anchor(int anc) } static int -is_set_opt_anc_info(OptAncInfo* to, int anc) +is_set_opt_anc_info(OptAnc* to, int anc) { - if ((to->left_anchor & anc) != 0) return 1; + if ((to->left & anc) != 0) return 1; - return ((to->right_anchor & anc) != 0 ? 1 : 0); + return ((to->right & anc) != 0 ? 1 : 0); } static void -add_opt_anc_info(OptAncInfo* to, int anc) +add_opt_anc_info(OptAnc* to, int anc) { - if (is_left_anchor(anc)) - to->left_anchor |= anc; + if (is_left(anc)) + to->left |= anc; else - to->right_anchor |= anc; + to->right |= anc; } static void -remove_opt_anc_info(OptAncInfo* to, int anc) +remove_opt_anc_info(OptAnc* to, int anc) { - if (is_left_anchor(anc)) - to->left_anchor &= ~anc; + if (is_left(anc)) + to->left &= ~anc; else - to->right_anchor &= ~anc; + to->right &= ~anc; } static void -alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add) +alt_merge_opt_anc_info(OptAnc* to, OptAnc* add) { - to->left_anchor &= add->left_anchor; - to->right_anchor &= add->right_anchor; + to->left &= add->left; + to->right &= add->right; } static int -is_full_opt_exact_info(OptExactInfo* ex) +is_full_opt_exact(OptExact* ex) { return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); } static void -clear_opt_exact_info(OptExactInfo* ex) +clear_opt_exact(OptExact* ex) { clear_mml(&ex->mmd); clear_opt_anc_info(&ex->anc); @@ -5199,17 +5200,17 @@ clear_opt_exact_info(OptExactInfo* ex) } static void -copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from) +copy_opt_exact(OptExact* to, OptExact* from) { *to = *from; } static void -concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) +concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) { int i, j, len; UChar *p, *end; - OptAncInfo tanc; + OptAnc tanc; if (! to->ignore_case && add->ignore_case) { if (to->len >= add->len) return ; /* avoid */ @@ -5230,13 +5231,13 @@ concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc) to->reach_end = (p == end ? add->reach_end : 0); concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); - if (! to->reach_end) tanc.right_anchor = 0; + if (! to->reach_end) tanc.right = 0; copy_opt_anc_info(&to->anc, &tanc); } static void -concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) +concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, + int raw ARG_UNUSED, OnigEncoding enc) { int i, j, len; UChar *p; @@ -5252,17 +5253,17 @@ concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end, } static void -alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) +alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env) { int i, j, len; if (add->len == 0 || to->len == 0) { - clear_opt_exact_info(to); + clear_opt_exact(to); return ; } if (! is_equal_mml(&to->mmd, &add->mmd)) { - clear_opt_exact_info(to); + clear_opt_exact(to); return ; } @@ -5284,11 +5285,11 @@ alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env) to->ignore_case |= add->ignore_case; alt_merge_opt_anc_info(&to->anc, &add->anc); - if (! to->reach_end) to->anc.right_anchor = 0; + if (! to->reach_end) to->anc.right = 0; } static void -select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) +select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt) { int v1, v2; @@ -5299,7 +5300,7 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) return ; } else if (v1 == 0) { - copy_opt_exact_info(now, alt); + copy_opt_exact(now, alt); return ; } else if (v1 <= 2 && v2 <= 2) { @@ -5315,13 +5316,13 @@ select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt) if (alt->ignore_case == 0) v2 *= 2; if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_exact_info(now, alt); + copy_opt_exact(now, alt); } static void -clear_opt_map_info(OptMapInfo* map) +clear_opt_map(OptMap* map) { - static const OptMapInfo clean_info = { + static const OptMap clean_info = { {0, 0}, {0, 0}, 0, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -5343,17 +5344,17 @@ clear_opt_map_info(OptMapInfo* map) } }; - xmemcpy(map, &clean_info, sizeof(OptMapInfo)); + xmemcpy(map, &clean_info, sizeof(OptMap)); } static void -copy_opt_map_info(OptMapInfo* to, OptMapInfo* from) +copy_opt_map(OptMap* to, OptMap* from) { *to = *from; } static void -add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) +add_char_opt_map(OptMap* map, UChar c, OnigEncoding enc) { if (map->map[c] == 0) { map->map[c] = 1; @@ -5362,14 +5363,14 @@ add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc) } static int -add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType case_fold_flag) +add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, + OnigEncoding enc, OnigCaseFoldType case_fold_flag) { OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; int i, n; - add_char_opt_map_info(map, p[0], enc); + add_char_opt_map(map, p[0], enc); case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); @@ -5377,14 +5378,14 @@ add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end, for (i = 0; i < n; i++) { ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf); - add_char_opt_map_info(map, buf[0], enc); + add_char_opt_map(map, buf[0], enc); } return 0; } static void -select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) +select_opt_map(OptMap* now, OptMap* alt) { static int z = 1<<15; /* 32768: something big value */ @@ -5392,18 +5393,18 @@ select_opt_map_info(OptMapInfo* now, OptMapInfo* alt) if (alt->value == 0) return ; if (now->value == 0) { - copy_opt_map_info(now, alt); + copy_opt_map(now, alt); return ; } v1 = z / now->value; v2 = z / alt->value; if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) - copy_opt_map_info(now, alt); + copy_opt_map(now, alt); } static int -comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) +comp_opt_exact_or_map(OptExact* e, OptMap* m) { #define COMP_EM_BASE 20 int ve, vm; @@ -5416,14 +5417,14 @@ comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m) } static void -alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) +alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) { int i, val; /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */ if (to->value == 0) return ; if (add->value == 0 || to->mmd.max < add->mmd.min) { - clear_opt_map_info(to); + clear_opt_map(to); return ; } @@ -5443,7 +5444,7 @@ alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add) } static void -set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) +set_bound_node_opt_info(NodeOpt* opt, MinMaxLen* mmd) { copy_mml(&(opt->exb.mmd), mmd); copy_mml(&(opt->expr.mmd), mmd); @@ -5451,40 +5452,39 @@ set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd) } static void -clear_node_opt_info(NodeOptInfo* opt) +clear_node_opt_info(NodeOpt* opt) { clear_mml(&opt->len); clear_opt_anc_info(&opt->anc); - clear_opt_exact_info(&opt->exb); - clear_opt_exact_info(&opt->exm); - clear_opt_exact_info(&opt->expr); - clear_opt_map_info(&opt->map); + clear_opt_exact(&opt->exb); + clear_opt_exact(&opt->exm); + clear_opt_exact(&opt->expr); + clear_opt_map(&opt->map); } static void -copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from) +copy_node_opt_info(NodeOpt* to, NodeOpt* from) { *to = *from; } static void -concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) +concat_left_node_opt_info(OnigEncoding enc, NodeOpt* to, NodeOpt* add) { int exb_reach, exm_reach; - OptAncInfo tanc; + OptAnc tanc; concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max); copy_opt_anc_info(&to->anc, &tanc); if (add->exb.len > 0 && to->len.max == 0) { - concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, - to->len.max, add->len.max); + concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc, to->len.max, add->len.max); copy_opt_anc_info(&add->exb.anc, &tanc); } if (add->map.value > 0 && to->len.max == 0) { if (add->map.mmd.max == 0) - add->map.anc.left_anchor |= to->anc.left_anchor; + add->map.anc.left |= to->anc.left; } exb_reach = to->exb.reach_end; @@ -5495,16 +5495,16 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) if (add->exb.len > 0) { if (exb_reach) { - concat_opt_exact_info(&to->exb, &add->exb, enc); - clear_opt_exact_info(&add->exb); + concat_opt_exact(&to->exb, &add->exb, enc); + clear_opt_exact(&add->exb); } else if (exm_reach) { - concat_opt_exact_info(&to->exm, &add->exb, enc); - clear_opt_exact_info(&add->exb); + concat_opt_exact(&to->exm, &add->exb, enc); + clear_opt_exact(&add->exb); } } - select_opt_exact_info(enc, &to->exm, &add->exb); - select_opt_exact_info(enc, &to->exm, &add->exm); + select_opt_exact(enc, &to->exm, &add->exb); + select_opt_exact(enc, &to->exm, &add->exm); if (to->expr.len > 0) { if (add->len.max > 0) { @@ -5512,28 +5512,27 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add) to->expr.len = add->len.max; if (to->expr.mmd.max == 0) - select_opt_exact_info(enc, &to->exb, &to->expr); + select_opt_exact(enc, &to->exb, &to->expr); else - select_opt_exact_info(enc, &to->exm, &to->expr); + select_opt_exact(enc, &to->exm, &to->expr); } } else if (add->expr.len > 0) { - copy_opt_exact_info(&to->expr, &add->expr); + copy_opt_exact(&to->expr, &add->expr); } - select_opt_map_info(&to->map, &add->map); - + select_opt_map(&to->map, &add->map); add_mml(&to->len, &add->len); } static void -alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) +alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env) { - alt_merge_opt_anc_info (&to->anc, &add->anc); - alt_merge_opt_exact_info(&to->exb, &add->exb, env); - alt_merge_opt_exact_info(&to->exm, &add->exm, env); - alt_merge_opt_exact_info(&to->expr, &add->expr, env); - alt_merge_opt_map_info(env->enc, &to->map, &add->map); + alt_merge_opt_anc_info(&to->anc, &add->anc); + alt_merge_opt_exact(&to->exb, &add->exb, env); + alt_merge_opt_exact(&to->exm, &add->exm, env); + alt_merge_opt_exact(&to->expr, &add->expr, env); + alt_merge_opt_map(env->enc, &to->map, &add->map); alt_merge_mml(&to->len, &add->len); } @@ -5542,10 +5541,13 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) #define MAX_NODE_OPT_INFO_REF_COUNT 5 static int -optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) +optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) { + OnigEncoding enc; + int i; int r = 0; + enc = env->enc; clear_node_opt_info(opt); set_bound_node_opt_info(opt, &env->mmd); @@ -5553,15 +5555,15 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_LIST: { OptEnv nenv; - NodeOptInfo nopt; + NodeOpt nopt; Node* nd = node; copy_opt_env(&nenv, env); do { - r = optimize_node_left(NODE_CAR(nd), &nopt, &nenv); + r = optimize_nodes(NODE_CAR(nd), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &nopt); } } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd))); } @@ -5569,11 +5571,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_ALT: { - NodeOptInfo nopt; + NodeOpt nopt; Node* nd = node; do { - r = optimize_node_left(NODE_CAR(nd), &nopt, env); + r = optimize_nodes(NODE_CAR(nd), &nopt, env); if (r == 0) { if (nd == node) copy_node_opt_info(opt, &nopt); else alt_merge_node_opt_info(opt, &nopt, env); @@ -5585,14 +5587,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_STRING: { StrNode* sn = STR_(node); - int slen = sn->end - sn->s; + int slen = (int )(sn->end - sn->s); int is_raw = NODE_STRING_IS_RAW(node); if (! NODE_STRING_IS_AMBIG(node)) { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NODE_STRING_IS_RAW(node), env->enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, + NODE_STRING_IS_RAW(node), enc); if (slen > 0) { - add_char_opt_map_info(&opt->map, *(sn->s), env->enc); + add_char_opt_map(&opt->map, *(sn->s), enc); } set_mml(&opt->len, slen, slen); } @@ -5600,17 +5602,16 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) int max; if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) { - int n = onigenc_strlen(env->enc, sn->s, sn->end); - max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; + int n = onigenc_strlen(enc, sn->s, sn->end); + max = ONIGENC_MBC_MAXLEN_DIST(enc) * n; } else { - concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - is_raw, env->enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, is_raw, enc); opt->exb.ignore_case = 1; if (slen > 0) { - r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end, - env->enc, env->case_fold_flag); + r = add_char_amb_opt_map(&opt->map, sn->s, sn->end, + enc, env->case_fold_flag); if (r != 0) break; } @@ -5627,22 +5628,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_CCLASS: { - int i, z; + int z; CClassNode* cc = CCLASS_(node); /* no need to check ignore case. (set in setup_tree()) */ if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) { - OnigLen min = ONIGENC_MBC_MINLEN(env->enc); - OnigLen max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + OnigLen min = ONIGENC_MBC_MINLEN(enc); + OnigLen max = ONIGENC_MBC_MAXLEN_DIST(enc); set_mml(&opt->len, min, max); } else { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { z = BITSET_AT(cc->bs, i); - if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + if ((z && ! IS_NCCLASS_NOT(cc)) || (! z && IS_NCCLASS_NOT(cc))) { + add_char_opt_map(&opt->map, (UChar )i, enc); } } set_mml(&opt->len, 1, 1); @@ -5652,10 +5653,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_CTYPE: { - int i, min, max; + int min, max; int range; - max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + max = ONIGENC_MBC_MAXLEN_DIST(enc); if (max == 1) { min = 1; @@ -5668,18 +5669,18 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) range = CTYPE_(node)->ascii_mode != 0 ? 128 : SINGLE_BYTE_SIZE; if (CTYPE_(node)->not != 0) { for (i = 0; i < range; i++) { - if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + if (! ONIGENC_IS_CODE_WORD(enc, i)) { + add_char_opt_map(&opt->map, (UChar )i, enc); } } for (i = range; i < SINGLE_BYTE_SIZE; i++) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + add_char_opt_map(&opt->map, (UChar )i, enc); } } else { for (i = 0; i < range; i++) { - if (ONIGENC_IS_CODE_WORD(env->enc, i)) { - add_char_opt_map_info(&opt->map, (UChar )i, env->enc); + if (ONIGENC_IS_CODE_WORD(enc, i)) { + add_char_opt_map(&opt->map, (UChar )i, enc); } } } @@ -5687,7 +5688,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } } else { - min = ONIGENC_MBC_MINLEN(env->enc); + min = ONIGENC_MBC_MINLEN(enc); } set_mml(&opt->len, min, max); } @@ -5708,19 +5709,19 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_PREC_READ: { - NodeOptInfo nopt; + NodeOpt nopt; - r = optimize_node_left(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &nopt, env); if (r == 0) { if (nopt.exb.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exb); + copy_opt_exact(&opt->expr, &nopt.exb); else if (nopt.exm.len > 0) - copy_opt_exact_info(&opt->expr, &nopt.exm); + copy_opt_exact(&opt->expr, &nopt.exm); opt->expr.reach_end = 0; if (nopt.map.value > 0) - copy_opt_map_info(&opt->map, &nopt.map); + copy_opt_map(&opt->map, &nopt.map); } } break; @@ -5732,7 +5733,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_BACKREF: if (! NODE_IS_CHECKER(node)) { - int i; int* backs; OnigLen min, max, tmin, tmax; MemEnv* mem_env = SCANENV_MEMENV(env->scan_env); @@ -5762,7 +5762,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { OnigOptionType save = env->options; env->options = ENCLOSURE_(NODE_BODY(node))->o.options; - r = optimize_node_left(NODE_BODY(node), opt, env); + r = optimize_nodes(NODE_BODY(node), opt, env); env->options = save; } break; @@ -5770,12 +5770,11 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case NODE_QUANT: { - int i; OnigLen min, max; - NodeOptInfo nopt; + NodeOpt nopt; QuantNode* qn = QUANT_(node); - r = optimize_node_left(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &nopt, env); if (r != 0) break; if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { @@ -5792,13 +5791,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_node_opt_info(opt, &nopt); if (nopt.exb.len > 0) { if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && - ! is_full_opt_exact_info(&opt->exb); i++) { - concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc); - } - if (i < qn->lower) { - opt->exb.reach_end = 0; + for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { + concat_opt_exact(&opt->exb, &nopt.exb, enc); } + if (i < qn->lower) opt->exb.reach_end = 0; } } @@ -5831,7 +5827,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) OnigOptionType save = env->options; env->options = en->o.options; - r = optimize_node_left(NODE_BODY(node), opt, env); + r = optimize_nodes(NODE_BODY(node), opt, env); env->options = save; } break; @@ -5851,8 +5847,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else #endif { - r = optimize_node_left(NODE_BODY(node), opt, env); - + r = optimize_nodes(NODE_BODY(node), opt, env); if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); @@ -5861,28 +5856,28 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) break; case ENCLOSURE_STOP_BACKTRACK: - r = optimize_node_left(NODE_BODY(node), opt, env); + r = optimize_nodes(NODE_BODY(node), opt, env); break; case ENCLOSURE_IF_ELSE: { OptEnv nenv; - NodeOptInfo nopt; + NodeOpt nopt; copy_opt_env(&nenv, env); - r = optimize_node_left(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); + r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(env->enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &nopt); if (IS_NOT_NULL(en->te.Then)) { - r = optimize_node_left(en->te.Then, &nopt, &nenv); + r = optimize_nodes(en->te.Then, &nopt, &nenv); if (r == 0) { - concat_left_node_opt_info(env->enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &nopt); } } if (IS_NOT_NULL(en->te.Else)) { - r = optimize_node_left(en->te.Else, &nopt, env); + r = optimize_nodes(en->te.Else, &nopt, env); if (r == 0) alt_merge_node_opt_info(opt, &nopt, env); } @@ -5898,7 +5893,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) default: #ifdef ONIG_DEBUG - fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node)); + fprintf(stderr, "optimize_nodes: undefined node type %d\n", NODE_TYPE(node)); #endif r = ONIGERR_TYPE_BUG; break; @@ -5908,7 +5903,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } static int -set_optimize_exact_info(regex_t* reg, OptExactInfo* e) +set_optimize_exact(regex_t* reg, OptExact* e) { int r; @@ -5933,11 +5928,11 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, - reg->map, &(reg->int_map)); + reg->map, &(reg->int_map)); if (r != 0) return r; reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); } else { reg->optimize = ONIG_OPTIMIZE_EXACT; @@ -5948,14 +5943,14 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) reg->dmax = e->mmd.max; if (reg->dmin != INFINITE_LEN) { - reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); + reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact); } return 0; } static void -set_optimize_map_info(regex_t* reg, OptMapInfo* m) +set_optimize_map(regex_t* reg, OptMap* m) { int i; @@ -5972,10 +5967,10 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m) } static void -set_sub_anchor(regex_t* reg, OptAncInfo* anc) +set_sub_anchor(regex_t* reg, OptAnc* anc) { - reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE; - reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; + reg->sub_anchor |= anc->left & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= anc->right & ANCHOR_END_LINE; } #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -5985,28 +5980,27 @@ static void print_optimize_info(FILE* f, regex_t* reg); static int set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) { - int r; - NodeOptInfo opt; + NodeOpt opt; OptEnv env; env.enc = reg->enc; env.options = reg->options; env.case_fold_flag = reg->case_fold_flag; - env.scan_env = scan_env; + env.scan_env = scan_env; clear_mml(&env.mmd); - r = optimize_node_left(node, &opt, &env); + r = optimize_nodes(node, &opt, &env); if (r != 0) return r; - reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | + reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | ANCHOR_LOOK_BEHIND); - if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) + if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | + reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | ANCHOR_PREC_READ_NOT); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { @@ -6015,25 +6009,24 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) } if (opt.exb.len > 0 || opt.exm.len > 0) { - select_opt_exact_info(reg->enc, &opt.exb, &opt.exm); - if (opt.map.value > 0 && - comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) { + select_opt_exact(reg->enc, &opt.exb, &opt.exm); + if (opt.map.value > 0 && comp_opt_exact_or_map(&opt.exb, &opt.map) > 0) { goto set_map; } else { - r = set_optimize_exact_info(reg, &opt.exb); + r = set_optimize_exact(reg, &opt.exb); set_sub_anchor(reg, &opt.exb.anc); } } else if (opt.map.value > 0) { set_map: - set_optimize_map_info(reg, &opt.map); + set_optimize_map(reg, &opt.map); set_sub_anchor(reg, &opt.map.anc); } else { - reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE; + reg->sub_anchor |= opt.anc.left & ANCHOR_BEGIN_LINE; if (opt.len.max == 0) - reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE; + reg->sub_anchor |= opt.anc.right & ANCHOR_END_LINE; } #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) @@ -6061,7 +6054,7 @@ clear_optimize_info(regex_t* reg) #ifdef ONIG_DEBUG static void print_enc_string(FILE* fp, OnigEncoding enc, - const UChar *s, const UChar *end) + const UChar *s, const UChar *end) { fprintf(fp, "\nPATTERN: /"); @@ -6261,7 +6254,7 @@ static void print_tree P_((FILE* f, Node* node)); extern int onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, - OnigErrorInfo* einfo) + OnigErrorInfo* einfo) { #define COMPILE_INIT_SIZE 20 @@ -6280,7 +6273,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #endif if (reg->alloc == 0) { - init_size = (pattern_end - pattern) * 2; + init_size = (int )(pattern_end - pattern) * 2; if (init_size <= 0) init_size = COMPILE_INIT_SIZE; r = BB_INIT(reg, init_size); if (r != 0) goto end; @@ -6460,8 +6453,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, static int onig_inited = 0; extern int -onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, +onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax) { int r; @@ -6521,9 +6513,10 @@ onig_reg_init(regex_t* reg, OnigOptionType option, } extern int -onig_new_without_alloc(regex_t* reg, const UChar* pattern, - const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, - OnigSyntaxType* syntax, OnigErrorInfo* einfo) +onig_new_without_alloc(regex_t* reg, + const UChar* pattern, const UChar* pattern_end, + OnigOptionType option, OnigEncoding enc, + OnigSyntaxType* syntax, OnigErrorInfo* einfo) { int r; @@ -6536,8 +6529,8 @@ onig_new_without_alloc(regex_t* reg, const UChar* pattern, extern int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, - OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, - OnigErrorInfo* einfo) + OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, + OnigErrorInfo* einfo) { int r; @@ -6775,13 +6768,13 @@ OnigOpInfoType OnigOpInfo[] = { { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, { OP_PREC_READ_START, "push-pos", ARG_NON }, { OP_PREC_READ_END, "pop-pos", ARG_NON }, - { OP_PUSH_PREC_READ_NOT, "push-prec-read-not", ARG_RELADDR }, - { OP_FAIL_PREC_READ_NOT, "fail-prec-read-not", ARG_NON }, - { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, - { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, + { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, + { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, + { OP_ATOMIC_START, "atomic-start", ARG_NON }, + { OP_ATOMIC_END, "atomic-end", ARG_NON }, { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, - { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL }, - { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, + { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, { OP_CALL, "call", ARG_ABSADDR }, { OP_RETURN, "return", ARG_NON }, { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, @@ -7077,7 +7070,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, fprintf(f, ":%d", len); break; - case OP_PUSH_LOOK_BEHIND_NOT: + case OP_LOOK_BEHIND_NOT_START: GET_RELADDR_INC(addr, bp); GET_LENGTH_INC(len, bp); fprintf(f, ":%d:", len); @@ -7113,8 +7106,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, break; default: - fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", - *--bp); + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); } } if (nextp) *nextp = bp; @@ -7188,8 +7180,7 @@ print_indent_tree(FILE* f, Node* node, int indent) break; case NODE_STRING: - fprintf(f, "", - (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); + fprintf(f, "", (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); for (p = STR_(node)->s; p < STR_(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); @@ -7303,8 +7294,8 @@ print_indent_tree(FILE* f, Node* node, int indent) case NODE_QUANT: fprintf(f, "{%d,%d}%s\n", node, - QUANT_(node)->lower, QUANT_(node)->upper, - (QUANT_(node)->greedy ? "" : "?")); + QUANT_(node)->lower, QUANT_(node)->upper, + (QUANT_(node)->greedy ? "" : "?")); print_indent_tree(f, NODE_BODY(node), indent + add); break; -- cgit v1.2.3