From 995dfd20e78ad16cec678df25422ce032650e3aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Sun, 23 Jul 2017 10:18:42 +0200 Subject: New upstream version 6.4.0 --- src/regparse.c | 791 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 464 insertions(+), 327 deletions(-) (limited to 'src/regparse.c') diff --git a/src/regparse.c b/src/regparse.c index 8153513..a5f8e5b 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -159,14 +159,25 @@ bbuf_clone(BBuf** rto, BBuf* from) *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); CHECK_NULL_RETURN_MEMERR(to); r = BBUF_INIT(to, from->alloc); - if (r != 0) return r; + if (r != 0) { + xfree(to->p); + *rto = 0; + return r; + } to->used = from->used; xmemcpy(to->p, from->p, from->used); return 0; } -#define BACKREF_REL_TO_ABS(rel_no, env) \ - ((env)->num_mem + 1 + (rel_no)) +static int backref_rel_to_abs(int rel_no, ScanEnv* env) +{ + if (rel_no > 0) { + return env->num_mem + rel_no; + } + else { + return env->num_mem + 1 + rel_no; + } +} #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) @@ -179,7 +190,7 @@ bbuf_clone(BBuf** rto, BBuf* from) #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ - if (r) return r;\ + if (r != 0) return r;\ }\ } while (0) @@ -422,6 +433,8 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, int result; key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); + CHECK_NULL_RETURN_MEMERR(key); + key->s = (UChar* )str_key; key->end = (UChar* )end_key; result = onig_st_insert(table, (st_data_t )key, value); @@ -519,7 +532,7 @@ onig_names_free(regex_t* reg) NameTable* t; r = names_clear(reg); - if (r) return r; + if (r != 0) return r; t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) onig_st_free_table(t); @@ -700,7 +713,7 @@ onig_names_free(regex_t* reg) NameTable* t; r = names_clear(reg); - if (r) return r; + if (r != 0) return r; t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) xfree(t); @@ -762,6 +775,7 @@ onig_number_of_names(regex_t* reg) static int name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) { + int r; int alloc; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; @@ -783,8 +797,9 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (IS_NULL(e->name)) { xfree(e); return ONIGERR_MEMORY; } - onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), - (HashDataType )e); + r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); + if (r < 0) return r; e->name_len = name_end - name; e->back_num = 0; @@ -918,14 +933,14 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name, extern int onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) + const UChar* name_end, int** nums) { return ONIG_NO_SUPPORT_CONFIG; } extern int onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion* region) + const UChar* name_end, OnigRegion* region) { return ONIG_NO_SUPPORT_CONFIG; } @@ -962,29 +977,32 @@ onig_noname_group_capture_is_active(regex_t* reg) } -#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 +#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16 static void scan_env_clear(ScanEnv* env) { - int i; - - BIT_STATUS_CLEAR(env->capture_history); - BIT_STATUS_CLEAR(env->bt_mem_start); - BIT_STATUS_CLEAR(env->bt_mem_end); - BIT_STATUS_CLEAR(env->backrefed_mem); + MEM_STATUS_CLEAR(env->capture_history); + MEM_STATUS_CLEAR(env->bt_mem_start); + MEM_STATUS_CLEAR(env->bt_mem_end); + MEM_STATUS_CLEAR(env->backrefed_mem); env->error = (UChar* )NULL; env->error_end = (UChar* )NULL; env->num_call = 0; + +#ifdef USE_SUBEXP_CALL + env->unset_addr_list = NULL; + env->has_call_zero = 0; +#endif + env->num_mem = 0; #ifdef USE_NAMED_GROUP env->num_named = 0; #endif - env->mem_alloc = 0; - env->mem_nodes_dynamic = (Node** )NULL; + env->mem_alloc = 0; + env->mem_env_dynamic = (MemEnv* )NULL; - for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) - env->mem_nodes_static[i] = NULL_NODE; + xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); #ifdef USE_COMBINATION_EXPLOSION_CHECK env->num_comb_exp_check = 0; @@ -999,30 +1017,35 @@ static int scan_env_add_mem_entry(ScanEnv* env) { int i, need, alloc; - Node** p; + MemEnv* p; need = env->num_mem + 1; if (need > MaxCaptureNum && MaxCaptureNum != 0) return ONIGERR_TOO_MANY_CAPTURES; - if (need >= SCANENV_MEMNODES_SIZE) { + if (need >= SCANENV_MEMENV_SIZE) { if (env->mem_alloc <= need) { - if (IS_NULL(env->mem_nodes_dynamic)) { - alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; - p = (Node** )xmalloc(sizeof(Node*) * alloc); - xmemcpy(p, env->mem_nodes_static, - sizeof(Node*) * SCANENV_MEMNODES_SIZE); + if (IS_NULL(env->mem_env_dynamic)) { + alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE; + p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static)); } else { alloc = env->mem_alloc * 2; - p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc); + CHECK_NULL_RETURN_MEMERR(p); } - CHECK_NULL_RETURN_MEMERR(p); - for (i = env->num_mem + 1; i < alloc; i++) - p[i] = NULL_NODE; + for (i = env->num_mem + 1; i < alloc; i++) { + p[i].node = NULL_NODE; +#if 0 + p[i].in = 0; + p[i].recursion = 0; +#endif + } - env->mem_nodes_dynamic = p; + env->mem_env_dynamic = p; env->mem_alloc = alloc; } } @@ -1035,7 +1058,7 @@ static int scan_env_set_mem_node(ScanEnv* env, int num, Node* node) { if (env->num_mem >= num) - SCANENV_MEM_NODES(env)[num] = node; + SCANENV_MEMENV(env)[num].node = node; else return ONIGERR_PARSER_BUG; return 0; @@ -1051,19 +1074,19 @@ onig_node_free(Node* node) fprintf(stderr, "onig_node_free: %p\n", node); #endif - switch (NTYPE(node)) { - case NT_STR: - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); + switch (NODE_TYPE(node)) { + case NODE_STR: + if (STR_(node)->capa != 0 && + IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { + xfree(STR_(node)->s); } break; - case NT_LIST: - case NT_ALT: - onig_node_free(NCAR(node)); + case NODE_LIST: + case NODE_ALT: + onig_node_free(NODE_CAR(node)); { - Node* next_node = NCDR(node); + Node* next_node = NODE_CDR(node); xfree(node); node = next_node; @@ -1071,34 +1094,29 @@ onig_node_free(Node* node) } break; - case NT_CCLASS: + case NODE_CCLASS: { - CClassNode* cc = NCCLASS(node); + CClassNode* cc = CCLASS_(node); - if (IS_NCCLASS_SHARE(cc)) return ; if (cc->mbuf) bbuf_free(cc->mbuf); } break; - case NT_QTFR: - if (NQTFR(node)->target) - onig_node_free(NQTFR(node)->target); - break; - - case NT_ENCLOSE: - if (NENCLOSE(node)->target) - onig_node_free(NENCLOSE(node)->target); + case NODE_BREF: + if (IS_NOT_NULL(BREF_(node)->back_dynamic)) + xfree(BREF_(node)->back_dynamic); break; - case NT_BREF: - if (IS_NOT_NULL(NBREF(node)->back_dynamic)) - xfree(NBREF(node)->back_dynamic); + case NODE_QUANT: + case NODE_ENCLOSURE: + case NODE_ANCHOR: + if (NODE_BODY(node)) + onig_node_free(NODE_BODY(node)); break; - case NT_ANCHOR: - if (NANCHOR(node)->target) - onig_node_free(NANCHOR(node)->target); + case NODE_CTYPE: + case NODE_CALL: break; } @@ -1111,7 +1129,9 @@ node_new(void) Node* node; node = (Node* )xmalloc(sizeof(Node)); - /* xmemset(node, 0, sizeof(Node)); */ + //xmemset(node, 0, sizeof(node->u.base)); + xmemset(node, 0, sizeof(*node)); + #ifdef DEBUG_NODE_FREE fprintf(stderr, "node_new: %p\n", node); #endif @@ -1134,8 +1154,8 @@ node_new_cclass(void) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_CCLASS); - initialize_cclass(NCCLASS(node)); + SET_NODE_TYPE(node, NODE_CCLASS); + initialize_cclass(CCLASS_(node)); return node; } @@ -1145,19 +1165,9 @@ node_new_ctype(int type, int not) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_CTYPE); - NCTYPE(node)->ctype = type; - NCTYPE(node)->not = not; - return node; -} - -static Node* -node_new_anychar(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CANY); + SET_NODE_TYPE(node, NODE_CTYPE); + CTYPE_(node)->ctype = type; + CTYPE_(node)->not = not; return node; } @@ -1167,9 +1177,9 @@ node_new_list(Node* left, Node* right) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_LIST); - NCAR(node) = left; - NCDR(node) = right; + SET_NODE_TYPE(node, NODE_LIST); + NODE_CAR(node) = left; + NODE_CDR(node) = right; return node; } @@ -1188,10 +1198,10 @@ onig_node_list_add(Node* list, Node* x) if (IS_NULL(n)) return NULL_NODE; if (IS_NOT_NULL(list)) { - while (IS_NOT_NULL(NCDR(list))) - list = NCDR(list); + while (IS_NOT_NULL(NODE_CDR(list))) + list = NODE_CDR(list); - NCDR(list) = n; + NODE_CDR(list) = n; } return n; @@ -1203,9 +1213,9 @@ onig_node_new_alt(Node* left, Node* right) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_ALT); - NCAR(node) = left; - NCDR(node) = right; + SET_NODE_TYPE(node, NODE_ALT); + NODE_CAR(node) = left; + NODE_CDR(node) = right; return node; } @@ -1215,10 +1225,9 @@ onig_node_new_anchor(int type) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_ANCHOR); - NANCHOR(node)->type = type; - NANCHOR(node)->target = NULL; - NANCHOR(node)->char_len = -1; + SET_NODE_TYPE(node, NODE_ANCHOR); + ANCHOR_(node)->type = type; + ANCHOR_(node)->char_len = -1; return node; } @@ -1234,31 +1243,30 @@ node_new_backref(int back_num, int* backrefs, int by_name, CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_BREF); - NBREF(node)->state = 0; - NBREF(node)->back_num = back_num; - NBREF(node)->back_dynamic = (int* )NULL; + SET_NODE_TYPE(node, NODE_BREF); + BREF_(node)->back_num = back_num; + BREF_(node)->back_dynamic = (int* )NULL; if (by_name != 0) - NBREF(node)->state |= NST_NAME_REF; + NODE_STATUS_ADD(node, NST_BY_NAME); #ifdef USE_BACKREF_WITH_LEVEL if (exist_level != 0) { - NBREF(node)->state |= NST_NEST_LEVEL; - NBREF(node)->nest_level = nest_level; + NODE_STATUS_ADD(node, NST_NEST_LEVEL); + BREF_(node)->nest_level = nest_level; } #endif for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && - IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { - NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ + IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) { + NODE_STATUS_ADD(node, NST_RECURSION); /* /...(\1).../ */ break; } } if (back_num <= NODE_BACKREFS_SIZE) { for (i = 0; i < back_num; i++) - NBREF(node)->back_static[i] = backrefs[i]; + BREF_(node)->back_static[i] = backrefs[i]; } else { int* p = (int* )xmalloc(sizeof(int) * back_num); @@ -1266,7 +1274,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, onig_node_free(node); return NULL; } - NBREF(node)->back_dynamic = p; + BREF_(node)->back_dynamic = p; for (i = 0; i < back_num; i++) p[i] = backrefs[i]; } @@ -1275,17 +1283,17 @@ node_new_backref(int back_num, int* backrefs, int by_name, #ifdef USE_SUBEXP_CALL static Node* -node_new_call(UChar* name, UChar* name_end, int gnum) +node_new_call(UChar* name, UChar* name_end, int gnum, int by_number) { Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_CALL); - NCALL(node)->state = 0; - NCALL(node)->target = NULL_NODE; - NCALL(node)->name = name; - NCALL(node)->name_end = name_end; - NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ + SET_NODE_TYPE(node, NODE_CALL); + CALL_(node)->by_number = by_number; + CALL_(node)->name = name; + CALL_(node)->name_end = name_end; + CALL_(node)->group_num = gnum; + CALL_(node)->entry_count = 1; return node; } #endif @@ -1296,69 +1304,76 @@ node_new_quantifier(int lower, int upper, int by_number) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_QTFR); - NQTFR(node)->state = 0; - NQTFR(node)->target = NULL; - NQTFR(node)->lower = lower; - NQTFR(node)->upper = upper; - NQTFR(node)->greedy = 1; - NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQTFR(node)->head_exact = NULL_NODE; - NQTFR(node)->next_head_exact = NULL_NODE; - NQTFR(node)->is_refered = 0; + SET_NODE_TYPE(node, NODE_QUANT); + QUANT_(node)->lower = lower; + QUANT_(node)->upper = upper; + QUANT_(node)->greedy = 1; + QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY; + QUANT_(node)->head_exact = NULL_NODE; + QUANT_(node)->next_head_exact = NULL_NODE; + QUANT_(node)->is_refered = 0; if (by_number != 0) - NQTFR(node)->state |= NST_BY_NUMBER; + NODE_STATUS_ADD(node, NST_BY_NUMBER); #ifdef USE_COMBINATION_EXPLOSION_CHECK - NQTFR(node)->comb_exp_check_num = 0; + QUANT_(node)->comb_exp_check_num = 0; #endif return node; } static Node* -node_new_enclose(int type) +node_new_enclosure(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_ENCLOSE); - NENCLOSE(node)->type = type; - NENCLOSE(node)->state = 0; - NENCLOSE(node)->regnum = 0; - NENCLOSE(node)->option = 0; - NENCLOSE(node)->target = NULL; - NENCLOSE(node)->call_addr = -1; - NENCLOSE(node)->opt_count = 0; + SET_NODE_TYPE(node, NODE_ENCLOSURE); + ENCLOSURE_(node)->type = type; + + switch (type) { + case ENCLOSURE_MEMORY: + ENCLOSURE_(node)->m.regnum = 0; + ENCLOSURE_(node)->m.called_addr = -1; + ENCLOSURE_(node)->m.entry_count = 1; + ENCLOSURE_(node)->m.called_state = 0; + break; + + case ENCLOSURE_OPTION: + ENCLOSURE_(node)->o.option = 0; + break; + + case ENCLOSURE_STOP_BACKTRACK: + break; + } + + ENCLOSURE_(node)->opt_count = 0; return node; } extern Node* -onig_node_new_enclose(int type) +onig_node_new_enclosure(int type) { - return node_new_enclose(type); + return node_new_enclosure(type); } static Node* -node_new_enclose_memory(OnigOptionType option, int is_named) +node_new_enclosure_memory(int is_named) { - Node* node = node_new_enclose(ENCLOSE_MEMORY); + Node* node = node_new_enclosure(ENCLOSURE_MEMORY); CHECK_NULL_RETURN(node); if (is_named != 0) - SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); + NODE_STATUS_ADD(node, NST_NAMED_GROUP); -#ifdef USE_SUBEXP_CALL - NENCLOSE(node)->option = option; -#endif return node; } static Node* node_new_option(OnigOptionType option) { - Node* node = node_new_enclose(ENCLOSE_OPTION); + Node* node = node_new_enclosure(ENCLOSURE_OPTION); CHECK_NULL_RETURN(node); - NENCLOSE(node)->option = option; + ENCLOSURE_(node)->o.option = option; return node; } @@ -1368,31 +1383,31 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) int addlen = end - s; if (addlen > 0) { - int len = NSTR(node)->end - NSTR(node)->s; + int len = STR_(node)->end - STR_(node)->s; - if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + if (STR_(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { UChar* p; int capa = len + addlen + NODE_STR_MARGIN; - if (capa <= NSTR(node)->capa) { - onig_strcpy(NSTR(node)->s + len, s, end); + if (capa <= STR_(node)->capa) { + onig_strcpy(STR_(node)->s + len, s, end); } else { - if (NSTR(node)->s == NSTR(node)->buf) - p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, + if (STR_(node)->s == STR_(node)->buf) + p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end, s, end, capa); else - p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); + p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa); CHECK_NULL_RETURN_MEMERR(p); - NSTR(node)->s = p; - NSTR(node)->capa = capa; + STR_(node)->s = p; + STR_(node)->capa = capa; } } else { - onig_strcpy(NSTR(node)->s + len, s, end); + onig_strcpy(STR_(node)->s + len, s, end); } - NSTR(node)->end = NSTR(node)->s + len + addlen; + STR_(node)->end = STR_(node)->s + len + addlen; } return 0; @@ -1417,25 +1432,25 @@ node_str_cat_char(Node* node, UChar c) extern void onig_node_conv_to_str_node(Node* node, int flag) { - SET_NTYPE(node, NT_STR); - NSTR(node)->flag = flag; - NSTR(node)->capa = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; + SET_NODE_TYPE(node, NODE_STR); + STR_(node)->flag = flag; + STR_(node)->capa = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } extern void onig_node_str_clear(Node* node) { - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); + if (STR_(node)->capa != 0 && + IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { + xfree(STR_(node)->s); } - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; + STR_(node)->capa = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } static Node* @@ -1444,11 +1459,11 @@ node_new_str(const UChar* s, const UChar* end) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_STR); - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; + SET_NODE_TYPE(node, NODE_STR); + STR_(node)->capa = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; if (onig_node_str_cat(node, s, end)) { onig_node_free(node); return NULL; @@ -1495,7 +1510,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc) p = onigenc_get_prev_char_head(enc, sn->s, sn->end); if (p && p > sn->s) { /* can be split. */ n = node_new_str(p, sn->end); - if ((sn->flag & NSTR_RAW) != 0) + if ((sn->flag & STRING_RAW) != 0) NSTRING_SET_RAW(n); sn->end = (UChar* )p; @@ -1540,7 +1555,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) PFETCH_READY; num = 0; - while (!PEND) { + while (! PEND) { PFETCH(c); if (ONIGENC_IS_CODE_DIGIT(enc, c)) { val = (unsigned int )DIGITVAL(c); @@ -1596,7 +1611,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, PFETCH_READY; num = 0; - while (!PEND && maxlen-- != 0) { + while (! PEND && maxlen-- != 0) { PFETCH(c); if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { val = ODIGITVAL(c); @@ -1631,9 +1646,13 @@ new_code_range(BBuf** pbuf) BBuf* bbuf; bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(*pbuf); - r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); - if (r) return r; + CHECK_NULL_RETURN_MEMERR(bbuf); + r = BBUF_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE); + if (r != 0) { + xfree(bbuf); + *pbuf = 0; + return r; + } n = 0; BBUF_WRITE_CODE_POINT(bbuf, 0, n); @@ -1654,7 +1673,7 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) if (IS_NULL(*pbuf)) { r = new_code_range(pbuf); - if (r) return r; + if (r != 0) return r; bbuf = *pbuf; n = 0; } @@ -2069,27 +2088,27 @@ conv_backslash_value(OnigCodePoint c, ScanEnv* env) static int is_invalid_quantifier_target(Node* node) { - switch (NTYPE(node)) { - case NT_ANCHOR: + switch (NODE_TYPE(node)) { + case NODE_ANCHOR: return 1; break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: /* allow enclosed elements */ - /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ + /* return is_invalid_quantifier_target(NODE_BODY(node)); */ break; - case NT_LIST: + case NODE_LIST: do { - if (! is_invalid_quantifier_target(NCAR(node))) return 0; - } while (IS_NOT_NULL(node = NCDR(node))); + if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0; + } while (IS_NOT_NULL(node = NODE_CDR(node))); return 0; break; - case NT_ALT: + case NODE_ALT: do { - if (is_invalid_quantifier_target(NCAR(node))) return 1; - } while (IS_NOT_NULL(node = NCDR(node))); + if (is_invalid_quantifier_target(NODE_CAR(node))) return 1; + } while (IS_NOT_NULL(node = NODE_CDR(node))); break; default: @@ -2100,7 +2119,7 @@ is_invalid_quantifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_quantifier_num(QtfrNode* q) +popular_quantifier_num(QuantNode* q) { if (q->greedy) { if (q->lower == 0) { @@ -2147,10 +2166,10 @@ extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode) { int pnum, cnum; - QtfrNode *p, *c; + QuantNode *p, *c; - p = NQTFR(pnode); - c = NQTFR(cnode); + p = QUANT_(pnode); + c = QUANT_(cnode); pnum = popular_quantifier_num(p); cnum = popular_quantifier_num(c); if (pnum < 0 || cnum < 0) return ; @@ -2160,36 +2179,36 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) *pnode = *cnode; break; case RQ_A: - p->target = c->target; + NODE_BODY(pnode) = NODE_BODY(cnode); p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; break; case RQ_AQ: - p->target = c->target; + NODE_BODY(pnode) = NODE_BODY(cnode); p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; break; case RQ_QQ: - p->target = c->target; + NODE_BODY(pnode) = NODE_BODY(cnode); p->lower = 0; p->upper = 1; p->greedy = 0; break; case RQ_P_QQ: - p->target = cnode; + NODE_BODY(pnode) = cnode; p->lower = 0; p->upper = 1; p->greedy = 0; c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; return ; break; case RQ_PQ_Q: - p->target = cnode; + NODE_BODY(pnode) = cnode; p->lower = 0; p->upper = 1; p->greedy = 1; c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; return ; break; case RQ_ASIS: - p->target = cnode; + NODE_BODY(pnode) = cnode; return ; break; } - c->target = NULL_NODE; + NODE_BODY(cnode) = NULL_NODE; onig_node_free(cnode); } @@ -2253,6 +2272,7 @@ typedef struct { UChar* name; UChar* name_end; int gnum; + int by_number; } call; struct { int ctype; @@ -2441,19 +2461,27 @@ get_name_end_code_point(OnigCodePoint start) return (OnigCodePoint )0; } +enum REF_NUM { + IS_NOT_NUM = 0, + IS_ABS_NUM = 1, + IS_REL_NUM = 2 +}; + #ifdef USE_NAMED_GROUP #ifdef USE_BACKREF_WITH_LEVEL /* \k, \k \k, \k \k<-num+n>, \k<-num-n> + \k<+num+n>, \k<+num-n> */ static int fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, - int* rback_num, int* rlevel) + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel, enum REF_NUM* num_type) { - int r, sign, is_num, exist_level; + int r, sign, exist_level; + int digit_count; OnigCodePoint end_code; OnigCodePoint c = 0; OnigEncoding enc = env->enc; @@ -2463,12 +2491,14 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, PFETCH_READY; *rback_num = 0; - is_num = exist_level = 0; + exist_level = 0; + *num_type = IS_NOT_NUM; sign = 1; pnum_head = *src; end_code = get_name_end_code_point(start_code); + digit_count = 0; name_end = end; r = 0; if (PEND) { @@ -2480,13 +2510,19 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, return ONIGERR_EMPTY_GROUP_NAME; if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + *num_type = IS_ABS_NUM; + digit_count++; } else if (c == '-') { - is_num = 2; + *num_type = IS_REL_NUM; sign = -1; pnum_head = p; } + else if (c == '+') { + *num_type = IS_REL_NUM; + sign = 1; + pnum_head = p; + } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } @@ -2496,17 +2532,18 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH(c); if (c == end_code || c == ')' || c == '+' || c == '-') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (*num_type != IS_NOT_NUM && digit_count == 0) + r = ONIGERR_INVALID_GROUP_NAME; break; } - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + digit_count++; } else { r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + *num_type = IS_NOT_NUM; } } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { @@ -2539,16 +2576,20 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, } err: - r = ONIGERR_INVALID_GROUP_NAME; name_end = end; + err2: + r = ONIGERR_INVALID_GROUP_NAME; } end: if (r == 0) { - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) goto err; + else if (*rback_num == 0) { + if (*num_type == IS_REL_NUM) + goto err2; + } *rback_num *= sign; } @@ -2570,9 +2611,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, */ static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, + enum REF_NUM* num_type, int ref) { - int r, is_num, sign; + int r, sign; + int digit_count; OnigCodePoint end_code; OnigCodePoint c = 0; OnigEncoding enc = env->enc; @@ -2584,10 +2627,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, end_code = get_name_end_code_point(start_code); + digit_count = 0; name_end = end; pnum_head = *src; r = 0; - is_num = 0; + *num_type = IS_NOT_NUM; sign = 1; if (PEND) { return ONIGERR_EMPTY_GROUP_NAME; @@ -2599,21 +2643,30 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, if (ONIGENC_IS_CODE_DIGIT(enc, c)) { if (ref == 1) - is_num = 1; + *num_type = IS_ABS_NUM; else { r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; } + digit_count++; } else if (c == '-') { if (ref == 1) { - is_num = 2; + *num_type = IS_REL_NUM; sign = -1; pnum_head = p; } else { r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + } + } + else if (c == '+') { + if (ref == 1) { + *num_type = IS_REL_NUM; + sign = 1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; } } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { @@ -2626,20 +2679,22 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH_S(c); if (c == end_code || c == ')') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (*num_type != IS_NOT_NUM && digit_count == 0) + r = ONIGERR_INVALID_GROUP_NAME; break; } - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + digit_count++; } else { if (!ONIGENC_IS_CODE_WORD(enc, c)) r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; else r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + + *num_type = IS_NOT_NUM; } } else { @@ -2654,12 +2709,14 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, name_end = end; } - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; + if (*num_type == IS_REL_NUM) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } } *rback_num *= sign; @@ -2687,9 +2744,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, #else static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, + enum REF_NUM* num_type, int ref) { - int r, is_num, sign; + int r, sign; + int digit_count; OnigCodePoint end_code; OnigCodePoint c = 0; UChar *name_end; @@ -2702,10 +2761,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, end_code = get_name_end_code_point(start_code); + digit_count = 0; *rname_end = name_end = end; r = 0; pnum_head = *src; - is_num = 0; + *num_type = IS_ABS_NUM; sign = 1; if (PEND) { @@ -2717,37 +2777,61 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, return ONIGERR_EMPTY_GROUP_NAME; if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + *num_type = IS_ABS_NUM; + digit_count++; } else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; + if (ref == 1) { + *num_type = IS_REL_NUM; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + } + } + else if (c == '+') { + if (ref == 1) { + *num_type = IS_REL_NUM; + sign = 1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + } } else { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } - while (!PEND) { + while (! PEND) { name_end = p; PFETCH(c); if (c == end_code || c == ')') break; - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) + digit_count++; + else r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } if (r == 0 && c != end_code) { r = ONIGERR_INVALID_GROUP_NAME; name_end = end; } + if (r == 0 && digit_count == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + } if (r == 0) { *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; + if (*num_type == IS_REL_NUM) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } } *rback_num *= sign; @@ -3418,7 +3502,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node)) return ONIGERR_INVALID_BACKREF; } @@ -3466,30 +3550,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) UChar* name_end; int* backs; int back_num; + enum REF_NUM num_type; prev = p; #ifdef USE_BACKREF_WITH_LEVEL name_end = NULL_UCHARP; /* no need. escape gcc warning. */ r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, - env, &back_num, &tok->u.backref.level); + env, &back_num, &tok->u.backref.level, &num_type); if (r == 1) tok->u.backref.exist_level = 1; else tok->u.backref.exist_level = 0; #else - r = fetch_name(&p, end, &name_end, env, &back_num, 1); + r = fetch_name(&p, end, &name_end, env, &back_num, &num_type, 1); #endif if (r < 0) return r; - if (back_num != 0) { - if (back_num < 0) { - back_num = BACKREF_REL_TO_ABS(back_num, env); - if (back_num <= 0) - return ONIGERR_INVALID_BACKREF; + if (num_type != IS_NOT_NUM) { + if (num_type == IS_REL_NUM) { + back_num = backref_rel_to_abs(back_num, env); } + if (back_num <= 0) + return ONIGERR_INVALID_BACKREF; if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (back_num > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + IS_NULL(SCANENV_MEMENV(env)[back_num].node)) return ONIGERR_INVALID_BACKREF; } tok->type = TK_BACKREF; @@ -3508,7 +3593,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) int i; for (i = 0; i < num; i++) { if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) return ONIGERR_INVALID_BACKREF; } } @@ -3538,15 +3623,30 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (c == '<' || c == '\'') { int gnum; UChar* name_end; + enum REF_NUM num_type; prev = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, + &gnum, &num_type, 1); if (r < 0) return r; + if (num_type != IS_NOT_NUM) { + if (num_type == IS_REL_NUM) { + gnum = backref_rel_to_abs(gnum, env); + if (gnum < 0) + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + tok->u.call.by_number = 1; + tok->u.call.gnum = gnum; + } + else { + tok->u.call.by_number = 0; + tok->u.call.gnum = 0; + } + tok->type = TK_CALL; tok->u.call.name = prev; tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; } else PUNFETCH; @@ -4040,7 +4140,7 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); + cc = CCLASS_(*np); r = add_ctype_to_cc(cc, ctype, 0, env); if (r != 0) return r; if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); @@ -4080,9 +4180,7 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, } } - if (*state != CCS_START) - *state = CCS_VALUE; - + *state = CCS_VALUE; *type = CCV_CLASS; return 0; } @@ -4222,7 +4320,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, *np = node = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(node); - cc = NCCLASS(node); + cc = CCLASS_(node); and_start = 0; state = CCS_START; @@ -4335,7 +4433,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CHAR_TYPE: r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); - if (r != 0) return r; + if (r != 0) goto err; next_class: r = next_state_class(cc, &vs, &val_type, &state, env); @@ -4344,12 +4442,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CHAR_PROPERTY: { - int ctype; - - ctype = fetch_char_property_to_ctype(&p, end, env); - if (ctype < 0) return ctype; + int ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) { + r = ctype; + goto err; + } r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); - if (r != 0) return r; + if (r != 0) goto err; goto next_class; } break; @@ -4369,6 +4468,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } + + if (val_type == CCV_CLASS) { + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + state = CCS_RANGE; } else if (state == CCS_START) { @@ -4418,10 +4523,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, onig_node_free(anode); goto cc_open_err; } - acc = NCCLASS(anode); + acc = CCLASS_(anode); r = or_cclass(cc, acc, env->enc); - onig_node_free(anode); + cc_open_err: if (r != 0) goto err; } @@ -4488,9 +4593,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, NCCLASS_CLEAR_NOT(cc); if (IS_NCCLASS_NOT(cc) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { - int is_empty; - - is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); + int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); if (is_empty != 0) BITSET_IS_EMPTY(cc->bs, is_empty); @@ -4510,7 +4613,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, return 0; err: - if (cc != NCCLASS(*np)) + if (cc != CCLASS_(*np)) bbuf_free(cc->mbuf); return r; } @@ -4519,18 +4622,17 @@ static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env); static int -parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, +parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) { int r, num; Node *target; OnigOptionType option; OnigCodePoint c; - OnigEncoding enc = env->enc; - #ifdef USE_NAMED_GROUP int list_capture; #endif + OnigEncoding enc = env->enc; UChar* p = *src; PFETCH_READY; @@ -4563,7 +4665,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); break; case '>': /* (?>...) stop backtrack */ - *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); break; #ifdef USE_NAMED_GROUP @@ -4588,6 +4690,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { UChar *name; UChar *name_end; + enum REF_NUM num_type; PUNFETCH; c = '<'; @@ -4597,21 +4700,22 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, named_group2: name = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, + &num_type, 0); if (r < 0) return r; num = scan_env_add_mem_entry(env); if (num < 0) return num; - if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) + if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM) return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; r = name_add(env->reg, name, name_end, num, env); if (r != 0) return r; - *np = node_new_enclose_memory(env->option, 1); + *np = node_new_enclosure_memory(1); CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->regnum = num; + ENCLOSURE_(*np)->m.regnum = num; if (list_capture != 0) - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + MEM_STATUS_ON_SIMPLE(env->capture_history, num); env->num_named++; } else { @@ -4637,17 +4741,17 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, PUNFETCH; } #endif - *np = node_new_enclose_memory(env->option, 0); + *np = node_new_enclosure_memory(0); CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { return num; } - else if (num >= (int )BIT_STATUS_BITS_NUM) { + else if (num >= (int )MEM_STATUS_BITS_NUM) { return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } - NENCLOSE(*np)->regnum = num; - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + ENCLOSURE_(*np)->m.regnum = num; + MEM_STATUS_ON_SIMPLE(env->capture_history, num); } else { return ONIGERR_UNDEFINED_GROUP_OPTION; @@ -4717,7 +4821,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } *np = node_new_option(option); CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->target = target; + NODE_BODY(*np) = target; *src = p; return 0; } @@ -4736,11 +4840,11 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; - *np = node_new_enclose_memory(env->option, 0); + *np = node_new_enclosure_memory(0); CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) return num; - NENCLOSE(*np)->regnum = num; + ENCLOSURE_(*np)->m.regnum = num; } CHECK_NULL_RETURN_MEMERR(*np); @@ -4752,13 +4856,12 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return r; } - if (NTYPE(*np) == NT_ANCHOR) - NANCHOR(*np)->target = target; - else { - NENCLOSE(*np)->target = target; - if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { + NODE_BODY(*np) = target; + + if (NODE_TYPE(*np) == NODE_ENCLOSURE) { + if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) { /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); + r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np); if (r != 0) return r; } } @@ -4778,36 +4881,35 @@ static const char* ReduceQStr[] = { static int set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { - QtfrNode* qn; + QuantNode* qn; - qn = NQTFR(qnode); - if (qn->lower == 1 && qn->upper == 1) { + qn = QUANT_(qnode); + if (qn->lower == 1 && qn->upper == 1) return 1; - } - switch (NTYPE(target)) { - case NT_STR: + switch (NODE_TYPE(target)) { + case NODE_STR: if (! group) { - StrNode* sn = NSTR(target); + StrNode* sn = STR_(target); if (str_node_can_be_split(sn, env->enc)) { Node* n = str_node_split_last_char(sn, env->enc); if (IS_NOT_NULL(n)) { - qn->target = n; + NODE_BODY(qnode) = n; return 2; } } } break; - case NT_QTFR: + case NODE_QUANT: { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QtfrNode* qnt = NQTFR(target); + QuantNode* qnt = QUANT_(target); int nestq_num = popular_quantifier_num(qn); int targetq_num = popular_quantifier_num(qnt); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && + if (! NODE_IS_BY_NUMBER(qnode) && ! NODE_IS_BY_NUMBER(target) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { UChar buf[WARN_BUFSIZE]; @@ -4860,7 +4962,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) break; } - qn->target = target; + NODE_BODY(qnode) = target; q_exit: return 0; } @@ -4971,7 +5073,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); - iarg->ptail = &(NCDR((*(iarg->ptail)))); + iarg->ptail = &(NODE_CDR((*(iarg->ptail)))); } } @@ -4999,14 +5101,14 @@ parse_exp(Node** np, OnigToken* tok, int term, break; case TK_SUBEXP_OPEN: - r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); + r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env); if (r < 0) return r; if (r == 1) group = 1; else if (r == 2) { /* option only */ Node* target; OnigOptionType prev = env->option; - env->option = NENCLOSE(*np)->option; + env->option = ENCLOSURE_(*np)->o.option; r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); @@ -5015,7 +5117,7 @@ parse_exp(Node** np, OnigToken* tok, int term, onig_node_free(target); return r; } - NENCLOSE(*np)->target = target; + NODE_BODY(*np) = target; return tok->type; } break; @@ -5057,7 +5159,7 @@ parse_exp(Node** np, OnigToken* tok, int term, len = 1; while (1) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enclen(env->enc, NSTR(*np)->s)) {//should not enclen_end() + if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end() r = fetch_token(tok, src, end, env); NSTRING_CLEAR_RAW(*np); goto string_end; @@ -5072,8 +5174,8 @@ parse_exp(Node** np, OnigToken* tok, int term, int rem; if (len < ONIGENC_MBC_MINLEN(env->enc)) { rem = ONIGENC_MBC_MINLEN(env->enc) - len; - (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); - if (len + rem == enclen(env->enc, NSTR(*np)->s)) { + (void )node_str_head_pad(STR_(*np), rem, (UChar )0); + if (len + rem == enclen(env->enc, STR_(*np)->s)) { NSTRING_CLEAR_RAW(*np); goto string_end; } @@ -5138,7 +5240,7 @@ parse_exp(Node** np, OnigToken* tok, int term, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); + cc = CCLASS_(*np); add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); } @@ -5163,7 +5265,7 @@ parse_exp(Node** np, OnigToken* tok, int term, r = parse_char_class(np, tok, src, end, env); if (r != 0) return r; - cc = NCCLASS(*np); + cc = CCLASS_(*np); if (IS_IGNORECASE(env->option)) { IApplyCaseFoldArg iarg; @@ -5191,16 +5293,16 @@ parse_exp(Node** np, OnigToken* tok, int term, break; case TK_ANYCHAR: - *np = node_new_anychar(); + *np = node_new_ctype(CTYPE_ANYCHAR, 0); CHECK_NULL_RETURN_MEMERR(*np); break; case TK_ANYCHAR_ANYTIME: - *np = node_new_anychar(); + *np = node_new_ctype(CTYPE_ANYCHAR, 0); CHECK_NULL_RETURN_MEMERR(*np); qn = node_new_quantifier(0, REPEAT_INFINITE, 0); CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->target = *np; + NODE_BODY(qn) = *np; *np = qn; break; @@ -5222,14 +5324,13 @@ parse_exp(Node** np, OnigToken* tok, int term, { int gnum = tok->u.call.gnum; - if (gnum < 0) { - gnum = BACKREF_REL_TO_ABS(gnum, env); - if (gnum <= 0) - return ONIGERR_INVALID_BACKREF; - } - *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); + *np = node_new_call(tok->u.call.name, tok->u.call.name_end, + gnum, tok->u.call.by_number); CHECK_NULL_RETURN_MEMERR(*np); env->num_call++; + if (tok->u.call.by_number != 0 && gnum == 0) { + env->has_call_zero = 1; + } } break; #endif @@ -5271,7 +5372,7 @@ parse_exp(Node** np, OnigToken* tok, int term, qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, (r == TK_INTERVAL ? 1 : 0)); CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->greedy = tok->u.repeat.greedy; + QUANT_(qn)->greedy = tok->u.repeat.greedy; r = set_quantifier(qn, *targetp, group, env); if (r < 0) { onig_node_free(qn); @@ -5280,12 +5381,12 @@ parse_exp(Node** np, OnigToken* tok, int term, if (tok->u.repeat.possessive != 0) { Node* en; - en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); if (IS_NULL(en)) { onig_node_free(qn); return ONIGERR_MEMORY; } - NENCLOSE(en)->target = qn; + NODE_BODY(en) = qn; qn = en; } @@ -5303,12 +5404,12 @@ parse_exp(Node** np, OnigToken* tok, int term, onig_node_free(qn); return ONIGERR_MEMORY; } - tmp = NCDR(*targetp) = node_new_list(qn, NULL); + tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL); if (IS_NULL(tmp)) { onig_node_free(qn); return ONIGERR_MEMORY; } - targetp = &(NCAR(tmp)); + targetp = &(NODE_CAR(tmp)); } goto re_entry; } @@ -5336,7 +5437,7 @@ parse_branch(Node** top, OnigToken* tok, int term, } else { *top = node_new_list(node, NULL); - headp = &(NCDR(*top)); + headp = &(NODE_CDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); if (r < 0) { @@ -5344,14 +5445,14 @@ parse_branch(Node** top, OnigToken* tok, int term, return r; } - if (NTYPE(node) == NT_LIST) { + if (NODE_TYPE(node) == NODE_LIST) { *headp = node; - while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); - headp = &(NCDR(node)); + while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node); + headp = &(NODE_CDR(node)); } else { *headp = node_new_list(node, NULL); - headp = &(NCDR(*headp)); + headp = &(NODE_CDR(*headp)); } } } @@ -5382,7 +5483,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, } else if (r == TK_ALT) { *top = onig_node_new_alt(node, NULL); - headp = &(NCDR(*top)); + headp = &(NODE_CDR(*top)); while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) return r; @@ -5392,7 +5493,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, return r; } *headp = onig_node_new_alt(node, NULL); - headp = &(NCDR(*headp)); + headp = &(NODE_CDR(*headp)); } if (tok->type != (enum TokenSyms )term) @@ -5421,12 +5522,35 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) if (r < 0) return r; r = parse_subexp(top, &tok, TK_EOT, src, end, env); if (r < 0) return r; + return 0; } +#ifdef USE_SUBEXP_CALL +static int +make_call_zero_body(Node* node, ScanEnv* env, Node** rnode) +{ + int r; + + Node* x = node_new_enclosure_memory(0 /* 0: is not named */); + CHECK_NULL_RETURN_MEMERR(x); + + NODE_BODY(x) = node; + ENCLOSURE_(x)->m.regnum = 0; + r = scan_env_set_mem_node(env, 0, x); + if (r != 0) { + onig_node_free(x); + return r; + } + + *rnode = x; + return 0; +} +#endif + extern int -onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, - regex_t* reg, ScanEnv* env) +onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, + regex_t* reg, ScanEnv* env) { int r; UChar* p; @@ -5451,6 +5575,19 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, p = (UChar* )pattern; r = parse_regexp(root, &p, (UChar* )end, env); + +#ifdef USE_SUBEXP_CALL + if (r != 0) return r; + + if (env->has_call_zero != 0) { + Node* zero_node; + r = make_call_zero_body(*root, env, &zero_node); + if (r != 0) return r; + + *root = zero_node; + } +#endif + reg->num_mem = env->num_mem; return r; } -- cgit v1.2.3