diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2018-12-21 13:52:15 +0100 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2018-12-21 13:52:15 +0100 |
commit | b746c25a6800aabc468eee653d8bca8e7810f633 (patch) | |
tree | ae87806ec6d6b65e9d3ae1729ccf253e12324e96 /src/regparse.c | |
parent | eb5b295d37e9150e169cc95cbbc39f6ab7b88b2f (diff) | |
parent | 8d7d4edacab0298f96a3826819c01b4e8f6cbcfb (diff) |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src/regparse.c')
-rw-r--r-- | src/regparse.c | 348 |
1 files changed, 213 insertions, 135 deletions
diff --git a/src/regparse.c b/src/regparse.c index fcc05cf..9e42e71 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -71,7 +71,7 @@ OnigSyntaxType OnigSyntaxOniguruma = { ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) - , ( SYN_GNU_REGEX_BV | + , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | @@ -113,7 +113,7 @@ OnigSyntaxType OnigSyntaxRuby = { ONIG_SYN_OP2_CCLASS_SET_OP | ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL | ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | ONIG_SYN_OP2_ESC_H_XDIGIT | ONIG_SYN_OP2_ESC_U_HEX4 ) - , ( SYN_GNU_REGEX_BV | + , ( SYN_GNU_REGEX_BV | ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | @@ -198,17 +198,6 @@ onig_set_parse_depth_limit(unsigned int depth) return 0; } -static int -positive_int_multiply(int x, int y) -{ - if (x == 0 || y == 0) return 0; - - if (x < INT_MAX / y) - return x * y; - else - return -1; -} - static void bbuf_free(BBuf* bbuf) { @@ -966,6 +955,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) #ifdef USE_ST_LIBRARY if (IS_NULL(t)) { t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM); + CHECK_NULL_RETURN_MEMERR(t); reg->name_table = (void* )t; } e = (NameEntry* )xmalloc(sizeof(NameEntry)); @@ -1372,6 +1362,7 @@ callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc, #ifdef USE_ST_LIBRARY if (IS_NULL(t)) { t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM); + CHECK_NULL_RETURN_MEMERR(t); GlobalCalloutNameTable = t; } e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry)); @@ -1571,6 +1562,7 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type, } for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) { if (fe->arg_types[i] == ONIG_TYPE_STRING) { + if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT; OnigValue* val = opt_defaults + j; UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end); CHECK_NULL_RETURN_MEMERR(ds); @@ -1616,6 +1608,7 @@ onig_get_callout_start_func(regex_t* reg, int callout_num) CalloutListEntry* e; e = onig_reg_callout_list_at(reg, callout_num); + CHECK_NULL_RETURN(e); return e->start_func; } @@ -1623,6 +1616,7 @@ extern const UChar* onig_get_callout_tag_start(regex_t* reg, int callout_num) { CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + CHECK_NULL_RETURN(e); return e->tag_start; } @@ -1630,6 +1624,7 @@ extern const UChar* onig_get_callout_tag_end(regex_t* reg, int callout_num) { CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + CHECK_NULL_RETURN(e); return e->tag_end; } @@ -1736,7 +1731,7 @@ setup_ext_callout_list_values(regex_t* reg) int i, j; RegexExt* ext; - ext = REG_EXTP(reg); + ext = reg->extp; if (IS_NOT_NULL(ext->tag_table)) { onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set, (st_data_t )ext); @@ -1766,13 +1761,13 @@ setup_ext_callout_list_values(regex_t* reg) extern int onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num) { - RegexExt* ext = REG_EXTP(reg); + RegexExt* ext = reg->extp; if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0; if (callout_num > ext->callout_num) return 0; return (ext->callout_list[callout_num].flag & - CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0; + CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0; } static int @@ -1814,7 +1809,7 @@ onig_get_callout_num_by_tag(regex_t* reg, RegexExt* ext; CalloutTagVal e; - ext = REG_EXTP(reg); + ext = reg->extp; if (IS_NULL(ext) || IS_NULL(ext->tag_table)) return ONIGERR_INVALID_CALLOUT_TAG_NAME; @@ -1901,9 +1896,11 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end, if (r != ONIG_NORMAL) return r; ext = onig_get_regex_ext(reg); + CHECK_NULL_RETURN_MEMERR(ext); r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val); e = onig_reg_callout_list_at(reg, (int )entry_val); + CHECK_NULL_RETURN_MEMERR(e); e->tag_start = name; e->tag_end = name_end; @@ -2008,7 +2005,7 @@ onig_node_free(Node* node) switch (NODE_TYPE(node)) { case NODE_STRING: - if (STR_(node)->capa != 0 && + if (STR_(node)->capacity != 0 && IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { xfree(STR_(node)->s); } @@ -2040,13 +2037,13 @@ onig_node_free(Node* node) xfree(BACKREF_(node)->back_dynamic); break; - case NODE_ENCLOSURE: + case NODE_BAG: if (NODE_BODY(node)) onig_node_free(NODE_BODY(node)); { - EnclosureNode* en = ENCLOSURE_(node); - if (en->type == ENCLOSURE_IF_ELSE) { + BagNode* en = BAG_(node); + if (en->type == BAG_IF_ELSE) { onig_node_free(en->te.Then); onig_node_free(en->te.Else); } @@ -2082,6 +2079,7 @@ node_new(void) Node* node; node = (Node* )xmalloc(sizeof(Node)); + CHECK_NULL_RETURN(node); xmemset(node, 0, sizeof(*node)); #ifdef DEBUG_NODE_FREE @@ -2138,6 +2136,8 @@ node_new_anychar_with_fixed_option(OnigOptionType option) Node* node; node = node_new_anychar(); + CHECK_NULL_RETURN(node); + ct = CTYPE_(node); ct->options = option; NODE_STATUS_ADD(node, FIXED_OPTION); @@ -2381,62 +2381,62 @@ node_new_quantifier(int lower, int upper, int by_number) } static Node* -node_new_enclosure(enum EnclosureType type) +node_new_bag(enum BagType type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - NODE_SET_TYPE(node, NODE_ENCLOSURE); - ENCLOSURE_(node)->type = type; + NODE_SET_TYPE(node, NODE_BAG); + BAG_(node)->type = type; switch (type) { - case ENCLOSURE_MEMORY: - ENCLOSURE_(node)->m.regnum = 0; - ENCLOSURE_(node)->m.called_addr = -1; - ENCLOSURE_(node)->m.entry_count = 1; - ENCLOSURE_(node)->m.called_state = 0; + case BAG_MEMORY: + BAG_(node)->m.regnum = 0; + BAG_(node)->m.called_addr = -1; + BAG_(node)->m.entry_count = 1; + BAG_(node)->m.called_state = 0; break; - case ENCLOSURE_OPTION: - ENCLOSURE_(node)->o.options = 0; + case BAG_OPTION: + BAG_(node)->o.options = 0; break; - case ENCLOSURE_STOP_BACKTRACK: + case BAG_STOP_BACKTRACK: break; - case ENCLOSURE_IF_ELSE: - ENCLOSURE_(node)->te.Then = 0; - ENCLOSURE_(node)->te.Else = 0; + case BAG_IF_ELSE: + BAG_(node)->te.Then = 0; + BAG_(node)->te.Else = 0; break; } - ENCLOSURE_(node)->opt_count = 0; + BAG_(node)->opt_count = 0; return node; } extern Node* -onig_node_new_enclosure(int type) +onig_node_new_bag(enum BagType type) { - return node_new_enclosure(type); + return node_new_bag(type); } static Node* -node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else) +node_new_bag_if_else(Node* cond, Node* Then, Node* Else) { Node* n; - n = node_new_enclosure(ENCLOSURE_IF_ELSE); + n = node_new_bag(BAG_IF_ELSE); CHECK_NULL_RETURN(n); NODE_BODY(n) = cond; - ENCLOSURE_(n)->te.Then = Then; - ENCLOSURE_(n)->te.Else = Else; + BAG_(n)->te.Then = Then; + BAG_(n)->te.Else = Else; return n; } static Node* node_new_memory(int is_named) { - Node* node = node_new_enclosure(ENCLOSURE_MEMORY); + Node* node = node_new_bag(BAG_MEMORY); CHECK_NULL_RETURN(node); if (is_named != 0) NODE_STATUS_ADD(node, NAMED_GROUP); @@ -2447,12 +2447,37 @@ node_new_memory(int is_named) static Node* node_new_option(OnigOptionType option) { - Node* node = node_new_enclosure(ENCLOSURE_OPTION); + Node* node = node_new_bag(BAG_OPTION); + CHECK_NULL_RETURN(node); + BAG_(node)->o.options = option; + return node; +} + +static Node* +node_new_group(Node* content) +{ + Node* node; + + node = node_new(); CHECK_NULL_RETURN(node); - ENCLOSURE_(node)->o.options = option; + NODE_SET_TYPE(node, NODE_LIST); + NODE_CAR(node) = content; + NODE_CDR(node) = NULL_NODE; + return node; } +static Node* +node_drop_group(Node* group) +{ + Node* content; + + content = NODE_CAR(group); + NODE_CAR(group) = NULL_NODE; + onig_node_free(group); + return content; +} + static int node_new_fail(Node** node, ScanEnv* env) { @@ -2543,7 +2568,7 @@ onig_free_reg_callout_list(int n, CalloutListEntry* list) extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num) { - RegexExt* ext = REG_EXTP(reg); + RegexExt* ext = reg->extp; CHECK_NULL_RETURN(ext); if (num <= 0 || num > ext->callout_num) @@ -2634,7 +2659,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env) ns[1] = NULL_NODE; r = ONIGERR_MEMORY; - ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0); + ns[0] = onig_node_new_anchor(ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0); if (IS_NULL(ns[0])) goto err; r = node_new_true_anychar(&ns[1], env); @@ -2661,7 +2686,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env) ns[0] = x; ns[1] = NULL_NODE; - x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + x = node_new_bag(BAG_STOP_BACKTRACK); if (IS_NULL(x)) goto err; NODE_BODY(x) = ns[0]; @@ -2721,7 +2746,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent, ns[0] = x; if (possessive != 0) { - x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + x = node_new_bag(BAG_STOP_BACKTRACK); if (IS_NULL(x)) goto err0; NODE_BODY(x) = ns[0]; @@ -2873,11 +2898,11 @@ is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody, quant = node; } else { - if (NODE_TYPE(node) == NODE_ENCLOSURE) { - EnclosureNode* en = ENCLOSURE_(node); - if (en->type == ENCLOSURE_STOP_BACKTRACK) { + if (NODE_TYPE(node) == NODE_BAG) { + BagNode* en = BAG_(node); + if (en->type == BAG_STOP_BACKTRACK) { *is_possessive = 1; - quant = NODE_ENCLOSURE_BODY(en); + quant = NODE_BAG_BODY(en); if (NODE_TYPE(quant) != NODE_QUANT) return 0; } @@ -3054,7 +3079,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, else { r = make_absent_tail(&ns[5], &ns[6], id1, env); if (r != 0) goto err; - + x = make_list(7, ns); if (IS_NULL(x)) goto err0; } @@ -3066,7 +3091,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, r = ONIGERR_MEMORY; err: for (i = 0; i < 7; i++) onig_node_free(ns[i]); - return r; + return r; } extern int @@ -3077,11 +3102,11 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) if (addlen > 0) { int len = (int )(STR_(node)->end - STR_(node)->s); - if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) { + if (STR_(node)->capacity > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) { UChar* p; int capa = len + addlen + NODE_STRING_MARGIN; - if (capa <= STR_(node)->capa) { + if (capa <= STR_(node)->capacity) { onig_strcpy(STR_(node)->s + len, s, end); } else { @@ -3092,8 +3117,8 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa); CHECK_NULL_RETURN_MEMERR(p); - STR_(node)->s = p; - STR_(node)->capa = capa; + STR_(node)->s = p; + STR_(node)->capacity = capa; } } else { @@ -3125,24 +3150,24 @@ extern void onig_node_conv_to_str_node(Node* node, int flag) { NODE_SET_TYPE(node, NODE_STRING); - STR_(node)->flag = flag; - STR_(node)->capa = 0; - STR_(node)->s = STR_(node)->buf; - STR_(node)->end = STR_(node)->buf; + STR_(node)->flag = flag; + STR_(node)->capacity = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } extern void onig_node_str_clear(Node* node) { - if (STR_(node)->capa != 0 && + if (STR_(node)->capacity != 0 && IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { xfree(STR_(node)->s); } - STR_(node)->capa = 0; - STR_(node)->flag = 0; - STR_(node)->s = STR_(node)->buf; - STR_(node)->end = STR_(node)->buf; + STR_(node)->capacity = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } static Node* @@ -3152,10 +3177,10 @@ node_new_str(const UChar* s, const UChar* end) CHECK_NULL_RETURN(node); NODE_SET_TYPE(node, NODE_STRING); - STR_(node)->capa = 0; - STR_(node)->flag = 0; - STR_(node)->s = STR_(node)->buf; - STR_(node)->end = STR_(node)->buf; + STR_(node)->capacity = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; if (onig_node_str_cat(node, s, end)) { onig_node_free(node); return NULL; @@ -3173,6 +3198,7 @@ static Node* node_new_str_raw(UChar* s, UChar* end) { Node* node = node_new_str(s, end); + CHECK_NULL_RETURN(node); NODE_STRING_SET_RAW(node); return node; } @@ -3205,6 +3231,7 @@ str_node_split_last_char(Node* node, OnigEncoding enc) p = onigenc_get_prev_char_head(enc, sn->s, sn->end); if (p && p > sn->s) { /* can be split. */ rn = node_new_str(p, sn->end); + CHECK_NULL_RETURN(rn); if (NODE_STRING_IS_RAW(node)) NODE_STRING_SET_RAW(rn); @@ -3795,7 +3822,7 @@ is_invalid_quantifier_target(Node* node) return 1; break; - case NODE_ENCLOSURE: + case NODE_BAG: /* allow enclosed elements */ /* return is_invalid_quantifier_target(NODE_BODY(node)); */ break; @@ -3877,7 +3904,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) if (pnum < 0 || cnum < 0) { if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) { if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) { - int n = positive_int_multiply(p->lower, c->lower); + int n = onig_positive_int_multiply(p->lower, c->lower); if (n >= 0) { p->lower = p->upper = n; NODE_BODY(pnode) = NODE_BODY(cnode); @@ -3972,7 +3999,7 @@ node_new_general_newline(Node** node, ScanEnv* env) if (r != 0) goto err1; } - x = node_new_enclosure_if_else(crnl, 0, ncc); + x = node_new_bag_if_else(crnl, 0, ncc); if (IS_NULL(x)) goto err1; *node = x; @@ -4552,7 +4579,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, OnigCodePoint x; UChar *q; UChar *p = from; - + while (p < to) { x = ONIGENC_MBC_TO_CODE(enc, p, to); q = p + enclen(enc, p); @@ -4701,12 +4728,12 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { PINC; tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); + tok->u.prop.not = c == 'P'; if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { PFETCH(c2); if (c2 == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + tok->u.prop.not = tok->u.prop.not == 0; } else PUNFETCH; @@ -4986,38 +5013,38 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'b': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BOUNDARY; + tok->u.anchor = ANCR_WORD_BOUNDARY; break; case 'B': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY; + tok->u.anchor = ANCR_NO_WORD_BOUNDARY; break; case 'y': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; + tok->u.anchor = ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; break; case 'Y': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; + tok->u.anchor = ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; break; #ifdef USE_WORD_BEGIN_END case '<': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BEGIN; + tok->u.anchor = ANCR_WORD_BEGIN; break; case '>': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_END; + tok->u.anchor = ANCR_WORD_END; break; #endif @@ -5092,26 +5119,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; begin_buf: tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_BUF; + tok->u.subtype = ANCR_BEGIN_BUF; break; case 'Z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_SEMI_END_BUF; + tok->u.subtype = ANCR_SEMI_END_BUF; break; case 'z': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; end_buf: tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_END_BUF; + tok->u.subtype = ANCR_END_BUF; break; case 'G': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = ANCHOR_BEGIN_POSITION; + tok->u.subtype = ANCR_BEGIN_POSITION; break; case '`': @@ -5214,7 +5241,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) goto skip_backref; } - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && + if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node)) @@ -5382,13 +5409,13 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) { PINC; tok->type = TK_CHAR_PROPERTY; - tok->u.prop.not = (c == 'P' ? 1 : 0); + tok->u.prop.not = c == 'P'; if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { PFETCH(c); if (c == '^') { - tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0); + tok->u.prop.not = tok->u.prop.not == 0; } else PUNFETCH; @@ -5606,14 +5633,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->options) - ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); + ? ANCR_BEGIN_BUF : ANCR_BEGIN_LINE); break; case '$': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; tok->u.subtype = (IS_SINGLELINE(env->options) - ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); + ? ANCR_SEMI_END_BUF : ANCR_END_LINE); break; case '[': @@ -6509,7 +6536,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en } static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env); + UChar** src, UChar* end, ScanEnv* env, int group_head); #ifdef USE_CALLOUT @@ -6605,6 +6632,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv if (r != 0) return r; ext = onig_get_regex_ext(env->reg); + CHECK_NULL_RETURN_MEMERR(ext); if (IS_NULL(ext->pattern)) { r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); if (r != ONIG_NORMAL) return r; @@ -6625,6 +6653,11 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv } e = onig_reg_callout_list_at(env->reg, num); + if (IS_NULL(e)) { + xfree(contents); + return ONIGERR_MEMORY; + } + e->of = ONIG_CALLOUT_OF_CONTENTS; e->in = in; e->name_id = ONIG_NON_NAME_ID; @@ -6920,6 +6953,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en if (r != 0) return r; ext = onig_get_regex_ext(env->reg); + CHECK_NULL_RETURN_MEMERR(ext); if (IS_NULL(ext->pattern)) { r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); if (r != ONIG_NORMAL) return r; @@ -6934,6 +6968,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en if (r != ONIG_NORMAL) return r; e = onig_reg_callout_list_at(env->reg, num); + CHECK_NULL_RETURN_MEMERR(e); + e->of = ONIG_CALLOUT_OF_NAME; e->in = in; e->name_id = name_id; @@ -6957,8 +6993,8 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en #endif static int -parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) +parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r, num; Node *target; @@ -6985,20 +7021,20 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, group: r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(np, tok, term, &p, end, env); + r = parse_subexp(np, tok, term, &p, end, env, 0); if (r < 0) return r; *src = p; return 1; /* group */ break; case '=': - *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0); + *np = onig_node_new_anchor(ANCR_PREC_READ, 0); break; case '!': /* preceding read */ - *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0); + *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0); break; case '>': /* (?>...) stop backtrack */ - *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + *np = node_new_bag(BAG_STOP_BACKTRACK); break; case '\'': @@ -7013,9 +7049,9 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; PFETCH(c); if (c == '=') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0); + *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0); else if (c == '!') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0); + *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0); else { if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { UChar *name; @@ -7043,7 +7079,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (r != 0) return r; *np = node_new_memory(1); CHECK_NULL_RETURN_MEMERR(*np); - ENCLOSURE_(*np)->m.regnum = num; + BAG_(*np)->m.regnum = num; if (list_capture != 0) MEM_STATUS_ON_SIMPLE(env->capture_history, num); env->num_named++; @@ -7080,7 +7116,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&absent, tok, term, &p, end, env); + r = parse_subexp(&absent, tok, term, &p, end, env, 1); if (r < 0) { onig_node_free(absent); return r; @@ -7258,7 +7294,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, condition_is_checker = 0; r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&condition, tok, term, &p, end, env); + r = parse_subexp(&condition, tok, term, &p, end, env, 0); if (r < 0) { onig_node_free(condition); return r; @@ -7299,7 +7335,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, onig_node_free(condition); return r; } - r = parse_subexp(&target, tok, term, &p, end, env); + r = parse_subexp(&target, tok, term, &p, end, env, 1); if (r < 0) { onig_node_free(condition); onig_node_free(target); @@ -7327,7 +7363,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } - *np = node_new_enclosure_if_else(condition, Then, Else); + *np = node_new_bag_if_else(condition, Then, Else); if (IS_NULL(*np)) { onig_node_free(condition); onig_node_free(Then); @@ -7362,7 +7398,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, else if (num >= (int )MEM_STATUS_BITS_NUM) { return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } - ENCLOSURE_(*np)->m.regnum = num; + BAG_(*np)->m.regnum = num; MEM_STATUS_ON_SIMPLE(env->capture_history, num); } else { @@ -7431,7 +7467,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, env->options = option; r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); + r = parse_subexp(&target, tok, term, &p, end, env, 0); env->options = prev; if (r < 0) { onig_node_free(target); @@ -7472,13 +7508,13 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) return num; - ENCLOSURE_(*np)->m.regnum = num; + BAG_(*np)->m.regnum = num; } CHECK_NULL_RETURN_MEMERR(*np); r = fetch_token(tok, &p, end, env); if (r < 0) return r; - r = parse_subexp(&target, tok, term, &p, end, env); + r = parse_subexp(&target, tok, term, &p, end, env, 0); if (r < 0) { onig_node_free(target); return r; @@ -7486,10 +7522,10 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, NODE_BODY(*np) = target; - if (NODE_TYPE(*np) == NODE_ENCLOSURE) { - if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) { + if (NODE_TYPE(*np) == NODE_BAG) { + if (BAG_(*np)->type == BAG_MEMORY) { /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np); + r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np); if (r != 0) return r; } } @@ -7518,7 +7554,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) switch (NODE_TYPE(target)) { case NODE_STRING: - if (! group) { + if (group == 0) { if (str_node_can_be_split(target, env->enc)) { Node* n = str_node_split_last_char(target, env->enc); if (IS_NOT_NULL(n)) { @@ -7710,7 +7746,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) static int parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env, int group_head) { int r, len, group = 0; Node* qn; @@ -7724,22 +7760,35 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, case TK_ALT: case TK_EOT: end_of_token: - *np = node_new_empty(); - return tok->type; + *np = node_new_empty(); + CHECK_NULL_RETURN_MEMERR(*np); + return tok->type; break; case TK_SUBEXP_OPEN: - r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env); + r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env); if (r < 0) return r; - if (r == 1) group = 1; + if (r == 1) { /* group */ + if (group_head == 0) + group = 1; + else { + Node* target = *np; + *np = node_new_group(target); + if (IS_NULL(*np)) { + onig_node_free(target); + return ONIGERR_MEMORY; + } + group = 2; + } + } else if (r == 2) { /* option only */ Node* target; OnigOptionType prev = env->options; - env->options = ENCLOSURE_(*np)->o.options; + env->options = BAG_(*np)->o.options; r = fetch_token(tok, src, end, env); if (r < 0) return r; - r = parse_subexp(&target, tok, term, src, end, env); + r = parse_subexp(&target, tok, term, src, end, env, 0); env->options = prev; if (r < 0) { onig_node_free(target); @@ -7968,6 +8017,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, int ascii_mode = IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0; *np = onig_node_new_anchor(tok->u.anchor, ascii_mode); + CHECK_NULL_RETURN_MEMERR(*np); } break; @@ -7976,8 +8026,10 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) { if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_NOT_SPECIFIED; - else + else { *np = node_new_empty(); + CHECK_NULL_RETURN_MEMERR(*np); + } } else { goto tk_byte; @@ -8023,14 +8075,23 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, repeat: if (r == TK_OP_REPEAT || r == TK_INTERVAL) { + Node* target; + if (is_invalid_quantifier_target(*targetp)) return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID; qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, - (r == TK_INTERVAL ? 1 : 0)); + r == TK_INTERVAL); CHECK_NULL_RETURN_MEMERR(qn); QUANT_(qn)->greedy = tok->u.repeat.greedy; - r = set_quantifier(qn, *targetp, group, env); + if (group == 2) { + target = node_drop_group(*np); + *np = NULL_NODE; + } + else { + target = *targetp; + } + r = set_quantifier(qn, target, group, env); if (r < 0) { onig_node_free(qn); return r; @@ -8038,7 +8099,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (tok->u.repeat.possessive != 0) { Node* en; - en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + en = node_new_bag(BAG_STOP_BACKTRACK); if (IS_NULL(en)) { onig_node_free(qn); return ONIGERR_MEMORY; @@ -8077,13 +8138,13 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, static int parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env, int group_head) { int r; Node *node, **headp; *top = NULL; - r = parse_exp(&node, tok, term, src, end, env); + r = parse_exp(&node, tok, term, src, end, env, group_head); if (r < 0) { onig_node_free(node); return r; @@ -8094,9 +8155,14 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, } else { *top = node_new_list(node, NULL); + if (IS_NULL(*top)) { + onig_node_free(node); + return ONIGERR_MEMORY; + } + headp = &(NODE_CDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { - r = parse_exp(&node, tok, term, src, end, env); + r = parse_exp(&node, tok, term, src, end, env, 0); if (r < 0) { onig_node_free(node); return r; @@ -8120,7 +8186,7 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env, int group_head) { int r; Node *node, **headp; @@ -8129,7 +8195,8 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, env->parse_depth++; if (env->parse_depth > ParseDepthLimit) return ONIGERR_PARSE_DEPTH_LIMIT_OVER; - r = parse_branch(&node, tok, term, src, end, env); + + r = parse_branch(&node, tok, term, src, end, env, group_head); if (r < 0) { onig_node_free(node); return r; @@ -8140,16 +8207,27 @@ parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, } else if (r == TK_ALT) { *top = onig_node_new_alt(node, NULL); + if (IS_NULL(*top)) { + onig_node_free(node); + return ONIGERR_MEMORY; + } + headp = &(NODE_CDR(*top)); while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) return r; - r = parse_branch(&node, tok, term, src, end, env); + r = parse_branch(&node, tok, term, src, end, env, 0); if (r < 0) { onig_node_free(node); return r; } *headp = onig_node_new_alt(node, NULL); + if (IS_NULL(*headp)) { + onig_node_free(node); + onig_node_free(*top); + return ONIGERR_MEMORY; + } + headp = &(NODE_CDR(*headp)); } @@ -8177,7 +8255,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) r = fetch_token(&tok, src, end, env); if (r < 0) return r; - r = parse_subexp(top, &tok, TK_EOT, src, end, env); + r = parse_subexp(top, &tok, TK_EOT, src, end, env, 0); if (r < 0) return r; return 0; @@ -8193,7 +8271,7 @@ make_call_zero_body(Node* node, ScanEnv* env, Node** rnode) CHECK_NULL_RETURN_MEMERR(x); NODE_BODY(x) = node; - ENCLOSURE_(x)->m.regnum = 0; + BAG_(x)->m.regnum = 0; r = scan_env_set_mem_node(env, 0, x); if (r != 0) { onig_node_free(x); @@ -8249,7 +8327,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, reg->num_mem = env->num_mem; #ifdef USE_CALLOUT - ext = REG_EXTP(reg); + ext = reg->extp; if (IS_NOT_NULL(ext) && ext->callout_num > 0) { r = setup_ext_callout_list_values(reg); } |