summaryrefslogtreecommitdiff
path: root/src/regcomp.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regcomp.c')
-rw-r--r--src/regcomp.c1955
1 files changed, 1207 insertions, 748 deletions
diff --git a/src/regcomp.c b/src/regcomp.c
index c2c04a4..69d4b95 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -224,17 +224,17 @@ ops_free(regex_t* reg)
#endif
switch (opcode) {
- case OP_EXACTMBN:
+ case OP_STR_MBN:
if (! is_in_string_pool(reg, op->exact_len_n.s))
xfree(op->exact_len_n.s);
break;
- case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:
+ case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N: case OP_STR_N_IC:
if (! is_in_string_pool(reg, op->exact_n.s))
xfree(op->exact_n.s);
break;
- case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:
- case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:
- case OP_EXACTMB2N3: case OP_EXACT1_IC:
+ case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4:
+ case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2:
+ case OP_STR_MB2N3: case OP_STR_1_IC:
break;
case OP_CCLASS_NOT: case OP_CCLASS:
@@ -298,17 +298,17 @@ ops_calc_size_of_string_pool(regex_t* reg)
#endif
switch (opcode) {
- case OP_EXACTMBN:
+ case OP_STR_MBN:
total += op->exact_len_n.len * op->exact_len_n.n;
break;
- case OP_EXACTN:
- case OP_EXACTN_IC:
+ case OP_STR_N:
+ case OP_STR_N_IC:
total += op->exact_n.n;
break;
- case OP_EXACTMB2N:
+ case OP_STR_MB2N:
total += op->exact_n.n * 2;
break;
- case OP_EXACTMB3N:
+ case OP_STR_MB3N:
total += op->exact_n.n * 3;
break;
@@ -349,15 +349,15 @@ ops_make_string_pool(regex_t* reg)
#endif
switch (opcode) {
- case OP_EXACTMBN:
+ case OP_STR_MBN:
len = op->exact_len_n.len * op->exact_len_n.n;
xmemcpy(curr, op->exact_len_n.s, len);
xfree(op->exact_len_n.s);
op->exact_len_n.s = curr;
curr += len;
break;
- case OP_EXACTN:
- case OP_EXACTN_IC:
+ case OP_STR_N:
+ case OP_STR_N_IC:
len = op->exact_n.n;
copy:
xmemcpy(curr, op->exact_n.s, len);
@@ -365,11 +365,11 @@ ops_make_string_pool(regex_t* reg)
op->exact_n.s = curr;
curr += len;
break;
- case OP_EXACTMB2N:
+ case OP_STR_MB2N:
len = op->exact_n.n * 2;
goto copy;
break;
- case OP_EXACTMB3N:
+ case OP_STR_MB3N:
len = op->exact_n.n * 3;
goto copy;
break;
@@ -427,7 +427,7 @@ onig_positive_int_multiply(int x, int y)
static void
-swap_node(Node* a, Node* b)
+node_swap(Node* a, Node* b)
{
Node c;
@@ -452,6 +452,81 @@ swap_node(Node* a, Node* b)
}
}
+static int
+node_list_len(Node* list)
+{
+ int len;
+
+ len = 1;
+ while (IS_NOT_NULL(NODE_CDR(list))) {
+ list = NODE_CDR(list);
+ len++;
+ }
+
+ return len;
+}
+
+static Node*
+node_list_add(Node* list, Node* x)
+{
+ Node *n;
+
+ n = onig_node_new_list(x, NULL);
+ if (IS_NULL(n)) return NULL_NODE;
+
+ if (IS_NOT_NULL(list)) {
+ while (IS_NOT_NULL(NODE_CDR(list)))
+ list = NODE_CDR(list);
+
+ NODE_CDR(list) = n;
+ }
+
+ return n;
+}
+
+static int
+node_str_node_cat(Node* node, Node* add)
+{
+ int r;
+
+ if (STR_(node)->flag != STR_(add)->flag)
+ return ONIGERR_TYPE_BUG;
+
+ r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end);
+ if (r != 0) return r;
+
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(node))
+ STR_(node)->case_min_len += STR_(add)->case_min_len;
+
+ return 0;
+}
+
+static int
+node_str_cat_case_fold(Node* node, const UChar* s, const UChar* end, int case_min_len)
+{
+ int r;
+
+ if (! NODE_STRING_IS_CASE_FOLD_MATCH(node))
+ return ONIGERR_TYPE_BUG;
+
+ r = onig_node_str_cat(node, s, end);
+ if (r != 0) return r;
+
+ STR_(node)->case_min_len += case_min_len;
+ return 0;
+}
+
+static void
+node_conv_to_str_node(Node* node, int flag)
+{
+ NODE_SET_TYPE(node, NODE_STRING);
+ STR_(node)->flag = flag;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
+ STR_(node)->capacity = 0;
+ STR_(node)->case_min_len = 0;
+}
+
static OnigLen
distance_add(OnigLen d1, OnigLen d2)
{
@@ -549,81 +624,108 @@ static int compile_length_tree(Node* node, regex_t* reg);
static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
-#define IS_NEED_STR_LEN_OP_EXACT(op) \
- ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
- (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
+#define IS_NEED_STR_LEN_OP(op) \
+ ((op) == OP_STR_N || (op) == OP_STR_MB2N ||\
+ (op) == OP_STR_MB3N || (op) == OP_STR_MBN || (op) == OP_STR_N_IC)
static int
-select_str_opcode(int mb_len, int str_len, int ignore_case)
+select_str_opcode(int mb_len, int str_len)
{
int op;
- if (ignore_case) {
+ switch (mb_len) {
+ case 1:
switch (str_len) {
- case 1: op = OP_EXACT1_IC; break;
- default: op = OP_EXACTN_IC; break;
+ case 1: op = OP_STR_1; break;
+ case 2: op = OP_STR_2; break;
+ case 3: op = OP_STR_3; break;
+ case 4: op = OP_STR_4; break;
+ case 5: op = OP_STR_5; break;
+ default: op = OP_STR_N; break;
}
- }
- else {
- switch (mb_len) {
- case 1:
- switch (str_len) {
- case 1: op = OP_EXACT1; break;
- case 2: op = OP_EXACT2; break;
- case 3: op = OP_EXACT3; break;
- case 4: op = OP_EXACT4; break;
- case 5: op = OP_EXACT5; break;
- default: op = OP_EXACTN; break;
- }
- break;
+ break;
- case 2:
- switch (str_len) {
- case 1: op = OP_EXACTMB2N1; break;
- case 2: op = OP_EXACTMB2N2; break;
- case 3: op = OP_EXACTMB2N3; break;
- default: op = OP_EXACTMB2N; break;
- }
- break;
+ case 2:
+ switch (str_len) {
+ case 1: op = OP_STR_MB2N1; break;
+ case 2: op = OP_STR_MB2N2; break;
+ case 3: op = OP_STR_MB2N3; break;
+ default: op = OP_STR_MB2N; break;
+ }
+ break;
- case 3:
- op = OP_EXACTMB3N;
- break;
+ case 3:
+ op = OP_STR_MB3N;
+ break;
- default:
- op = OP_EXACTMBN;
- break;
- }
+ default:
+ op = OP_STR_MBN;
+ break;
}
+
return op;
}
static int
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
+is_strict_real_node(Node* node)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_STRING:
+ {
+ StrNode* sn = STR_(node);
+ return (sn->end != sn->s);
+ }
+ break;
+
+ case NODE_CCLASS:
+ case NODE_CTYPE:
+ return 1;
+ break;
+
+ default:
+ return 0;
+ break;
+ }
+}
+
+static int
+compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int r;
- int saved_num_null_check = reg->num_null_check;
+ int saved_num_empty_check;
+ int emptiness;
+ Node* body;
- if (empty_info != BODY_IS_NOT_EMPTY) {
+ body = NODE_BODY((Node* )qn);
+ emptiness = qn->emptiness;
+ saved_num_empty_check = reg->num_empty_check;
+
+ if (emptiness != BODY_IS_NOT_EMPTY) {
r = add_op(reg, OP_EMPTY_CHECK_START);
if (r != 0) return r;
- COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */
- reg->num_null_check++;
+ COP(reg)->empty_check_start.mem = reg->num_empty_check; /* NULL CHECK ID */
+ reg->num_empty_check++;
}
- r = compile_tree(node, reg, env);
+ r = compile_tree(body, reg, env);
if (r != 0) return r;
- if (empty_info != BODY_IS_NOT_EMPTY) {
- if (empty_info == BODY_IS_EMPTY)
+ if (emptiness != BODY_IS_NOT_EMPTY) {
+ if (emptiness == BODY_IS_EMPTY_POSSIBILITY)
r = add_op(reg, OP_EMPTY_CHECK_END);
- else if (empty_info == BODY_IS_EMPTY_MEM)
- r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
- else if (empty_info == BODY_IS_EMPTY_REC)
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) {
+ if (NODE_IS_EMPTY_STATUS_CHECK(qn) != 0)
+ r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
+ else
+ r = add_op(reg, OP_EMPTY_CHECK_END);
+ }
+#ifdef USE_CALL
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)
r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
+#endif
if (r != 0) return r;
- COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */
+ COP(reg)->empty_check_end.mem = saved_num_empty_check; /* NULL CHECK ID */
}
return r;
}
@@ -660,14 +762,13 @@ compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
static int
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
- regex_t* reg ARG_UNUSED, int ignore_case)
+ regex_t* reg ARG_UNUSED)
{
return 1;
}
static int
-add_compile_string(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
+add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg)
{
int op;
int r;
@@ -675,14 +776,14 @@ add_compile_string(UChar* s, int mb_len, int str_len,
UChar* p;
UChar* end;
- op = select_str_opcode(mb_len, str_len, ignore_case);
+ op = select_str_opcode(mb_len, str_len);
r = add_op(reg, op);
if (r != 0) return r;
byte_len = mb_len * str_len;
end = s + byte_len;
- if (op == OP_EXACTMBN) {
+ if (op == OP_STR_MBN) {
p = onigenc_strdup(reg->enc, s, end);
CHECK_NULL_RETURN_MEMERR(p);
@@ -690,11 +791,11 @@ add_compile_string(UChar* s, int mb_len, int str_len,
COP(reg)->exact_len_n.n = str_len;
COP(reg)->exact_len_n.s = p;
}
- else if (IS_NEED_STR_LEN_OP_EXACT(op)) {
+ else if (IS_NEED_STR_LEN_OP(op)) {
p = onigenc_strdup(reg->enc, s, end);
CHECK_NULL_RETURN_MEMERR(p);
- if (op == OP_EXACTN_IC)
+ if (op == OP_STR_N_IC)
COP(reg)->exact_n.n = byte_len;
else
COP(reg)->exact_n.n = str_len;
@@ -702,8 +803,8 @@ add_compile_string(UChar* s, int mb_len, int str_len,
COP(reg)->exact_n.s = p;
}
else {
+ xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
- COP(reg)->exact.s[byte_len] = '\0';
}
return 0;
@@ -712,7 +813,7 @@ add_compile_string(UChar* s, int mb_len, int str_len,
static int
compile_length_string_node(Node* node, regex_t* reg)
{
- int rlen, r, len, prev_len, slen, ambig;
+ int rlen, r, len, prev_len, slen;
UChar *p, *prev;
StrNode* sn;
OnigEncoding enc = reg->enc;
@@ -721,7 +822,7 @@ compile_length_string_node(Node* node, regex_t* reg)
if (sn->end <= sn->s)
return 0;
- ambig = NODE_STRING_IS_AMBIG(node);
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) return 1;
p = prev = sn->s;
prev_len = enclen(enc, p);
@@ -735,7 +836,7 @@ compile_length_string_node(Node* node, regex_t* reg)
slen++;
}
else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, slen, reg);
rlen += r;
prev = p;
slen = 1;
@@ -744,25 +845,59 @@ compile_length_string_node(Node* node, regex_t* reg)
p += len;
}
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, slen, reg);
rlen += r;
return rlen;
}
static int
-compile_length_string_raw_node(StrNode* sn, regex_t* reg)
+compile_length_string_crude_node(StrNode* sn, regex_t* reg)
{
if (sn->end <= sn->s)
return 0;
return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
- reg, 0);
+ reg);
+}
+
+static int
+compile_ambig_string_node(Node* node, regex_t* reg)
+{
+ int r;
+ int len;
+ int byte_len;
+ UChar* p;
+ StrNode* sn;
+ OnigEncoding enc = reg->enc;
+
+ sn = STR_(node);
+ len = enclen(enc, sn->s);
+ byte_len = (int )(sn->end - sn->s);
+ if (len == byte_len) {
+ r = add_op(reg, OP_STR_1_IC);
+ if (r != 0) return r;
+
+ xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
+ xmemcpy(COP(reg)->exact.s, sn->s, (size_t )byte_len);
+ }
+ else {
+ r = add_op(reg, OP_STR_N_IC);
+ if (r != 0) return r;
+
+ p = onigenc_strdup(enc, sn->s, sn->end);
+ CHECK_NULL_RETURN_MEMERR(p);
+
+ COP(reg)->exact_n.s = p;
+ COP(reg)->exact_n.n = byte_len;
+ }
+
+ return 0;
}
static int
compile_string_node(Node* node, regex_t* reg)
{
- int r, len, prev_len, slen, ambig;
+ int r, len, prev_len, slen;
UChar *p, *prev, *end;
StrNode* sn;
OnigEncoding enc = reg->enc;
@@ -772,7 +907,9 @@ compile_string_node(Node* node, regex_t* reg)
return 0;
end = sn->end;
- ambig = NODE_STRING_IS_AMBIG(node);
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) {
+ return compile_ambig_string_node(node, reg);
+ }
p = prev = sn->s;
prev_len = enclen(enc, p);
@@ -785,7 +922,7 @@ compile_string_node(Node* node, regex_t* reg)
slen++;
}
else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string(prev, prev_len, slen, reg);
if (r != 0) return r;
prev = p;
@@ -796,16 +933,16 @@ compile_string_node(Node* node, regex_t* reg)
p += len;
}
- return add_compile_string(prev, prev_len, slen, reg, ambig);
+ return add_compile_string(prev, prev_len, slen, reg);
}
static int
-compile_string_raw_node(StrNode* sn, regex_t* reg)
+compile_string_crude_node(StrNode* sn, regex_t* reg)
{
if (sn->end <= sn->s)
return 0;
- return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
+ return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg);
}
static void*
@@ -869,15 +1006,27 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
return 0;
}
+static void
+set_addr_in_repeat_range(regex_t* reg)
+{
+ int i;
+
+ for (i = 0; i < reg->num_repeat; i++) {
+ RepeatRange* p = reg->repeat_range + i;
+ int offset = p->u.offset;
+ p->u.pcode = reg->ops + offset;
+ }
+}
+
static int
-entry_repeat_range(regex_t* reg, int id, int lower, int upper)
+entry_repeat_range(regex_t* reg, int id, int lower, int upper, int ops_index)
{
#define REPEAT_RANGE_ALLOC 4
- OnigRepeatRange* p;
+ RepeatRange* p;
if (reg->repeat_range_alloc == 0) {
- p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
+ p = (RepeatRange* )xmalloc(sizeof(RepeatRange) * REPEAT_RANGE_ALLOC);
CHECK_NULL_RETURN_MEMERR(p);
reg->repeat_range = p;
reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
@@ -885,7 +1034,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
else if (reg->repeat_range_alloc <= id) {
int n;
n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
- p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n);
+ p = (RepeatRange* )xrealloc(reg->repeat_range, sizeof(RepeatRange) * n);
CHECK_NULL_RETURN_MEMERR(p);
reg->repeat_range = p;
reg->repeat_range_alloc = n;
@@ -894,13 +1043,14 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
p = reg->repeat_range;
}
- p[id].lower = lower;
- p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
+ p[id].lower = lower;
+ p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
+ p[id].u.offset = ops_index;
return 0;
}
static int
-compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
+compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
regex_t* reg, ScanEnv* env)
{
int r;
@@ -910,24 +1060,16 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
if (r != 0) return r;
COP(reg)->repeat.id = num_repeat;
- COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;
+ COP(reg)->repeat.addr = SIZE_INC + target_len + OPSIZE_REPEAT_INC;
- r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
+ r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper,
+ COP_CURR_OFFSET(reg) + OPSIZE_REPEAT);
if (r != 0) return r;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- if (
-#ifdef USE_CALL
- NODE_IS_IN_MULTI_ENTRY(qn) ||
-#endif
- NODE_IS_IN_REAL_REPEAT(qn)) {
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
- }
- else {
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
- }
+ r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
if (r != 0) return r;
COP(reg)->repeat_inc.id = num_repeat;
@@ -937,7 +1079,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
static int
is_anychar_infinite_greedy(QuantNode* qn)
{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
return 1;
else
@@ -951,8 +1093,8 @@ static int
compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -963,22 +1105,21 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
if (qn->lower <= 1 ||
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ return OPSIZE_ANYCHAR_STAR + tlen * qn->lower;
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- len = SIZE_OP_JUMP;
+ len = OPSIZE_JUMP;
}
else {
len = tlen * qn->lower;
@@ -987,36 +1128,36 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
if (qn->greedy) {
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH_OR_JUMP_EXACT1 + mod_tlen + OPSIZE_JUMP;
else
#endif
if (IS_NOT_NULL(qn->next_head_exact))
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH_IF_PEEK_NEXT + mod_tlen + OPSIZE_JUMP;
else
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP;
}
else
- len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
+ len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH;
}
else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
+ if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
+ len = OPSIZE_JUMP + tlen;
}
else
len = 0;
}
else if (!infinite && qn->greedy &&
(qn->upper == 1 ||
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
+ int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper,
QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
len = tlen * qn->lower;
- len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
+ len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower);
}
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
+ len = OPSIZE_PUSH + OPSIZE_JUMP + tlen;
}
else {
- len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;
+ len = OPSIZE_REPEAT_INC + mod_tlen + OPSIZE_REPEAT;
}
return len;
@@ -1026,8 +1167,8 @@ static int
compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int i, r, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -1055,10 +1196,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
@@ -1071,16 +1211,16 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (qn->greedy) {
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
- COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_PUSH_OR_JUMP_EXACT1 + SIZE_INC;
else
#endif
if (IS_NOT_NULL(qn->next_head_exact))
- COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_PUSH_IF_PEEK_NEXT + SIZE_INC;
else
- COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_PUSH + SIZE_INC;
}
else {
- COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_JUMP + SIZE_INC;
}
}
else {
@@ -1093,36 +1233,36 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (IS_NOT_NULL(qn->head_exact)) {
r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
if (r != 0) return r;
- COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);
+ addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1);
}
else
#endif
if (IS_NOT_NULL(qn->next_head_exact)) {
r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
if (r != 0) return r;
- COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);
+ addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT);
}
else {
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- addr = -(mod_tlen + (int )SIZE_OP_PUSH);
+ addr = -(mod_tlen + (int )OPSIZE_PUSH);
}
r = add_op(reg, OP_JUMP);
@@ -1132,9 +1272,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
else {
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;
+ COP(reg)->jump.addr = mod_tlen + SIZE_INC;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
r = add_op(reg, OP_PUSH);
@@ -1143,10 +1283,10 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
}
else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;
+ COP(reg)->jump.addr = tlen + SIZE_INC;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
@@ -1157,7 +1297,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
else if (! infinite && qn->greedy &&
(qn->upper == 1 ||
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
+ int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper,
QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
int n = qn->upper - qn->lower;
@@ -1165,7 +1305,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
for (i = 0; i < n; i++) {
- int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);
+ int v = onig_positive_int_multiply(n - i, tlen + OPSIZE_PUSH);
if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
r = add_op(reg, OP_PUSH);
@@ -1179,16 +1319,16 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;
+ COP(reg)->jump.addr = tlen + SIZE_INC;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
+ r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
}
return r;
}
@@ -1240,40 +1380,40 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
#ifdef USE_CALL
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
- len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ len = tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
return len;
}
if (NODE_IS_CALLED(node)) {
- len = SIZE_OP_MEMORY_START_PUSH + tlen
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ len = OPSIZE_MEM_START_PUSH + tlen
+ + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
else
len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
}
else if (NODE_IS_RECURSION(node)) {
- len = SIZE_OP_MEMORY_START_PUSH;
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
+ len = OPSIZE_MEM_START_PUSH;
+ len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
+ ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_REC);
}
else
#endif
{
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
- len = SIZE_OP_MEMORY_START_PUSH;
+ if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
+ len = OPSIZE_MEM_START_PUSH;
else
- len = SIZE_OP_MEMORY_START;
+ len = OPSIZE_MEM_START;
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
+ len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
+ ? OPSIZE_MEM_END_PUSH : OPSIZE_MEM_END);
}
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
int v;
QuantNode* qn;
@@ -1283,10 +1423,10 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
v = onig_positive_int_multiply(qn->lower, tlen);
if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
+ len = v + OPSIZE_PUSH + tlen + OPSIZE_POP_OUT + OPSIZE_JUMP;
}
else {
- len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
+ len = OPSIZE_ATOMIC_START + tlen + OPSIZE_ATOMIC_END;
}
break;
@@ -1298,8 +1438,8 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
len = compile_length_tree(cond, reg);
if (len < 0) return len;
- len += SIZE_OP_PUSH;
- len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
+ len += OPSIZE_PUSH;
+ len += OPSIZE_ATOMIC_START + OPSIZE_ATOMIC_END;
if (IS_NOT_NULL(Then)) {
tlen = compile_length_tree(Then, reg);
@@ -1307,8 +1447,9 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
len += tlen;
}
+ len += OPSIZE_JUMP + OPSIZE_ATOMIC_END;
+
if (IS_NOT_NULL(Else)) {
- len += SIZE_OP_JUMP;
tlen = compile_length_tree(Else, reg);
if (tlen < 0) return tlen;
len += tlen;
@@ -1331,24 +1472,25 @@ static int
compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
{
int r;
- int len;
#ifdef USE_CALL
if (NODE_IS_CALLED(node)) {
+ int len;
+
r = add_op(reg, OP_CALL);
if (r != 0) return r;
- node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;
+ node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP;
NODE_STATUS_ADD(node, ADDR_FIXED);
COP(reg)->call.addr = (int )node->m.called_addr;
if (node->m.regnum == 0) {
len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += SIZE_OP_RETURN;
+ len += OPSIZE_RETURN;
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = len + SIZE_INC_OP;
+ COP(reg)->jump.addr = len + SIZE_INC;
r = compile_tree(NODE_BAG_BODY(node), reg, env);
if (r != 0) return r;
@@ -1358,25 +1500,24 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
}
else {
len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ len += (OPSIZE_MEM_START_PUSH + OPSIZE_RETURN);
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
else
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ len += (NODE_IS_RECURSION(node) ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = len + SIZE_INC_OP;
+ COP(reg)->jump.addr = len + SIZE_INC;
}
}
#endif
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
- r = add_op(reg, OP_MEMORY_START_PUSH);
+ if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
+ r = add_op(reg, OP_MEM_START_PUSH);
else
- r = add_op(reg, OP_MEMORY_START);
+ r = add_op(reg, OP_MEM_START);
if (r != 0) return r;
COP(reg)->memory_start.num = node->m.regnum;
@@ -1384,11 +1525,11 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
#ifdef USE_CALL
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
r = add_op(reg, (NODE_IS_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ ? OP_MEM_END_PUSH_REC : OP_MEM_END_PUSH));
else
- r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));
+ r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEM_END_REC : OP_MEM_END));
if (r != 0) return r;
COP(reg)->memory_end.num = node->m.regnum;
@@ -1397,10 +1538,10 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_RETURN);
}
#else
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- r = add_op(reg, OP_MEMORY_END_PUSH);
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
+ r = add_op(reg, OP_MEM_END_PUSH);
else
- r = add_op(reg, OP_MEMORY_END);
+ r = add_op(reg, OP_MEM_END);
if (r != 0) return r;
COP(reg)->memory_end.num = node->m.regnum;
#endif
@@ -1423,7 +1564,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
QuantNode* qn = QUANT_(NODE_BAG_BODY(node));
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
@@ -1433,7 +1574,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP_OUT + OPSIZE_JUMP;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
if (r != 0) return r;
@@ -1442,7 +1583,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);
+ COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP_OUT);
}
else {
r = add_op(reg, OP_ATOMIC_START);
@@ -1455,7 +1596,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
case BAG_IF_ELSE:
{
- int cond_len, then_len, jump_len;
+ int cond_len, then_len, else_len, jump_len;
Node* cond = NODE_BAG_BODY(node);
Node* Then = node->te.Then;
Node* Else = node->te.Else;
@@ -1472,12 +1613,11 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
else
then_len = 0;
- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
- if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
+ jump_len = cond_len + then_len + OPSIZE_ATOMIC_END + OPSIZE_JUMP;
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + jump_len;
+ COP(reg)->push.addr = SIZE_INC + jump_len;
r = compile_tree(cond, reg, env);
if (r != 0) return r;
@@ -1490,11 +1630,20 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
}
if (IS_NOT_NULL(Else)) {
- int else_len = compile_length_tree(Else, reg);
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = else_len + SIZE_INC_OP;
+ else_len = compile_length_tree(Else, reg);
+ if (else_len < 0) return else_len;
+ }
+ else
+ else_len = 0;
+
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = OPSIZE_ATOMIC_END + else_len + SIZE_INC;
+ r = add_op(reg, OP_ATOMIC_END);
+ if (r != 0) return r;
+
+ if (IS_NOT_NULL(Else)) {
r = compile_tree(Else, reg, env);
}
}
@@ -1517,16 +1666,16 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
switch (node->type) {
case ANCR_PREC_READ:
- len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
+ len = OPSIZE_PREC_READ_START + tlen + OPSIZE_PREC_READ_END;
break;
case ANCR_PREC_READ_NOT:
- len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
+ len = OPSIZE_PREC_READ_NOT_START + tlen + OPSIZE_PREC_READ_NOT_END;
break;
case ANCR_LOOK_BEHIND:
- len = SIZE_OP_LOOK_BEHIND + tlen;
+ len = OPSIZE_LOOK_BEHIND + tlen;
break;
case ANCR_LOOK_BEHIND_NOT:
- len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
+ len = OPSIZE_LOOK_BEHIND_NOT_START + tlen + OPSIZE_LOOK_BEHIND_NOT_END;
break;
case ANCR_WORD_BOUNDARY:
@@ -1535,7 +1684,7 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
case ANCR_WORD_BEGIN:
case ANCR_WORD_END:
#endif
- len = SIZE_OP_WORD_BOUNDARY;
+ len = OPSIZE_WORD_BOUNDARY;
break;
case ANCR_TEXT_SEGMENT_BOUNDARY:
@@ -1619,7 +1768,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_PREC_READ_NOT_START);
if (r != 0) return r;
- COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;
+ COP(reg)->prec_read_not_start.addr = SIZE_INC + len + OPSIZE_PREC_READ_NOT_END;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
if (r != 0) return r;
r = add_op(reg, OP_PREC_READ_NOT_END);
@@ -1649,7 +1798,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
r = add_op(reg, OP_LOOK_BEHIND_NOT_START);
if (r != 0) return r;
- COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;
+ COP(reg)->look_behind_not_start.addr = SIZE_INC + len + OPSIZE_LOOK_BEHIND_NOT_END;
if (node->char_len < 0) {
r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
@@ -1735,25 +1884,25 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
switch (node->type) {
case GIMMICK_FAIL:
- len = SIZE_OP_FAIL;
+ len = OPSIZE_FAIL;
break;
case GIMMICK_SAVE:
- len = SIZE_OP_PUSH_SAVE_VAL;
+ len = OPSIZE_PUSH_SAVE_VAL;
break;
case GIMMICK_UPDATE_VAR:
- len = SIZE_OP_UPDATE_VAR;
+ len = OPSIZE_UPDATE_VAR;
break;
#ifdef USE_CALLOUT
case GIMMICK_CALLOUT:
switch (node->detail_type) {
case ONIG_CALLOUT_OF_CONTENTS:
- len = SIZE_OP_CALLOUT_CONTENTS;
+ len = OPSIZE_CALLOUT_CONTENTS;
break;
case ONIG_CALLOUT_OF_NAME:
- len = SIZE_OP_CALLOUT_NAME;
+ len = OPSIZE_CALLOUT_NAME;
break;
default:
@@ -1792,13 +1941,13 @@ compile_length_tree(Node* node, regex_t* reg)
r += compile_length_tree(NODE_CAR(node), reg);
n++;
} while (IS_NOT_NULL(node = NODE_CDR(node)));
- r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
+ r += (OPSIZE_PUSH + OPSIZE_JUMP) * (n - 1);
}
break;
case NODE_STRING:
- if (NODE_STRING_IS_RAW(node))
- r = compile_length_string_raw_node(STR_(node), reg);
+ if (NODE_STRING_IS_CRUDE(node))
+ r = compile_length_string_crude_node(STR_(node), reg);
else
r = compile_length_string_node(node, reg);
break;
@@ -1812,12 +1961,12 @@ compile_length_tree(Node* node, regex_t* reg)
break;
case NODE_BACKREF:
- r = SIZE_OP_BACKREF;
+ r = OPSIZE_BACKREF;
break;
#ifdef USE_CALL
case NODE_CALL:
- r = SIZE_OP_CALL;
+ r = OPSIZE_CALL;
break;
#endif
@@ -1864,7 +2013,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
do {
len += compile_length_tree(NODE_CAR(x), reg);
if (IS_NOT_NULL(NODE_CDR(x))) {
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH + OPSIZE_JUMP;
}
} while (IS_NOT_NULL(x = NODE_CDR(x)));
pos = COP_CURR_OFFSET(reg) + 1 + len; /* goal position */
@@ -1875,7 +2024,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
r = add_op(reg, push);
if (r != 0) break;
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + len + OPSIZE_JUMP;
}
r = compile_tree(NODE_CAR(node), reg, env);
if (r != 0) break;
@@ -1890,8 +2039,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
break;
case NODE_STRING:
- if (NODE_STRING_IS_RAW(node))
- r = compile_string_raw_node(STR_(node), reg);
+ if (NODE_STRING_IS_CRUDE(node))
+ r = compile_string_crude_node(STR_(node), reg);
else
r = compile_string_node(node, reg);
break;
@@ -2061,8 +2210,9 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
Node** ptarget = &(NODE_BODY(node));
Node* old = *ptarget;
r = noname_disable_map(ptarget, map, counter);
+ if (r != 0) return r;
if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
- onig_reduce_nested_quantifier(node, *ptarget);
+ r = onig_reduce_nested_quantifier(node);
}
}
break;
@@ -2274,11 +2424,11 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
}
}
- loc = env->capture_history;
- MEM_STATUS_CLEAR(env->capture_history);
+ loc = env->cap_history;
+ MEM_STATUS_CLEAR(env->cap_history);
for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
if (MEM_STATUS_AT(loc, i)) {
- MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
+ MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val);
}
}
@@ -2654,7 +2804,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
len = NODE_STRING_LEN(x);
if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
- if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(x) || NODE_STRING_IS_CASE_FOLD_MATCH(y)) {
/* tiny version */
return 0;
}
@@ -2714,7 +2864,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
break;
if (exact == 0 ||
- ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {
+ ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_CRUDE(node)) {
n = node;
}
}
@@ -2842,9 +2992,9 @@ tree_min_len(Node* node, ScanEnv* env)
if (NODE_IS_RECURSION(node)) break;
backs = BACKREFS_P(br);
- len = tree_min_len(mem_env[backs[0]].node, env);
+ len = tree_min_len(mem_env[backs[0]].mem_node, env);
for (i = 1; i < br->back_num; i++) {
- tmin = tree_min_len(mem_env[backs[i]].node, env);
+ tmin = tree_min_len(mem_env[backs[i]].mem_node, env);
if (len > tmin) len = tmin;
}
}
@@ -3013,7 +3163,7 @@ tree_max_len(Node* node, ScanEnv* env)
}
backs = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- tmax = tree_max_len(mem_env[backs[i]].node, env);
+ tmax = tree_max_len(mem_env[backs[i]].mem_node, env);
if (len < tmax) len = tmax;
}
}
@@ -3035,7 +3185,7 @@ tree_max_len(Node* node, ScanEnv* env)
if (qn->upper != 0) {
len = tree_max_len(NODE_BODY(node), env);
if (len != 0) {
- if (! IS_REPEAT_INFINITE(qn->upper))
+ if (! IS_INFINITE_REPEAT(qn->upper))
len = distance_multiply(len, qn->upper);
else
len = INFINITE_LEN;
@@ -3150,7 +3300,7 @@ check_backrefs(Node* node, ScanEnv* env)
if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
- NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
+ NODE_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF);
}
r = 0;
}
@@ -3164,6 +3314,204 @@ check_backrefs(Node* node, ScanEnv* env)
return r;
}
+static int
+set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env)
+{
+ int r;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ r = set_empty_repeat_node_trav(NODE_CAR(node), empty, env);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ {
+ AnchorNode* an = ANCHOR_(node);
+
+ if (! ANCHOR_HAS_BODY(an)) {
+ r = 0;
+ break;
+ }
+
+ switch (an->type) {
+ case ANCR_PREC_READ:
+ case ANCR_LOOK_BEHIND:
+ empty = NULL_NODE;
+ break;
+ default:
+ break;
+ }
+ r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env);
+ }
+ break;
+
+ case NODE_QUANT:
+ {
+ QuantNode* qn = QUANT_(node);
+
+ if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node;
+ r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env);
+ }
+ break;
+
+ case NODE_BAG:
+ if (IS_NOT_NULL(NODE_BODY(node))) {
+ r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env);
+ if (r != 0) return r;
+ }
+ {
+ BagNode* en = BAG_(node);
+
+ if (en->type == BAG_MEMORY) {
+ if (NODE_IS_BACKREF(node)) {
+ if (IS_NOT_NULL(empty))
+ SCANENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty;
+ }
+ }
+ else if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = set_empty_repeat_node_trav(en->te.Then, empty, env);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = set_empty_repeat_node_trav(en->te.Else, empty, env);
+ }
+ }
+ }
+ break;
+
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+static int
+is_ancestor_node(Node* node, Node* me)
+{
+ Node* parent;
+
+ while ((parent = NODE_PARENT(me)) != NULL_NODE) {
+ if (parent == node) return 1;
+ me = parent;
+ }
+ return 0;
+}
+
+static void
+set_empty_status_check_trav(Node* node, ScanEnv* env)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ set_empty_status_check_trav(NODE_CAR(node), env);
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ {
+ AnchorNode* an = ANCHOR_(node);
+
+ if (! ANCHOR_HAS_BODY(an)) break;
+ set_empty_status_check_trav(NODE_BODY(node), env);
+ }
+ break;
+
+ case NODE_QUANT:
+ set_empty_status_check_trav(NODE_BODY(node), env);
+ break;
+
+ case NODE_BAG:
+ if (IS_NOT_NULL(NODE_BODY(node)))
+ set_empty_status_check_trav(NODE_BODY(node), env);
+ {
+ BagNode* en = BAG_(node);
+
+ if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ set_empty_status_check_trav(en->te.Then, env);
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ set_empty_status_check_trav(en->te.Else, env);
+ }
+ }
+ }
+ break;
+
+ case NODE_BACKREF:
+ {
+ int i;
+ int* backs;
+ MemEnv* mem_env = SCANENV_MEMENV(env);
+ BackRefNode* br = BACKREF_(node);
+ backs = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ Node* ernode = mem_env[backs[i]].empty_repeat_node;
+ if (IS_NOT_NULL(ernode)) {
+ if (! is_ancestor_node(ernode, node)) {
+ MEM_STATUS_LIMIT_ON(env->reg->empty_status_mem, backs[i]);
+ NODE_STATUS_ADD(ernode, EMPTY_STATUS_CHECK);
+ NODE_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+set_parent_node_trav(Node* node, Node* parent)
+{
+ NODE_PARENT(node) = parent;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ set_parent_node_trav(NODE_CAR(node), node);
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break;
+ set_parent_node_trav(NODE_BODY(node), node);
+ break;
+
+ case NODE_QUANT:
+ set_parent_node_trav(NODE_BODY(node), node);
+ break;
+
+ case NODE_BAG:
+ if (IS_NOT_NULL(NODE_BODY(node)))
+ set_parent_node_trav(NODE_BODY(node), node);
+ {
+ BagNode* en = BAG_(node);
+
+ if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then))
+ set_parent_node_trav(en->te.Then, node);
+ if (IS_NOT_NULL(en->te.Else)) {
+ set_parent_node_trav(en->te.Else, node);
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
#ifdef USE_CALL
@@ -3269,6 +3617,9 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
if ((eret & RECURSION_MUST) == 0)
r &= ~RECURSION_MUST;
}
+ else {
+ r &= ~RECURSION_MUST;
+ }
}
else {
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
@@ -3443,7 +3794,7 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
r = recursive_call_check_trav(NODE_BODY(node), env, state);
if (QUANT_(node)->upper == 0) {
if (r == FOUND_CALLED_NODE)
- QUANT_(node)->is_refered = 1;
+ QUANT_(node)->include_referred = 1;
}
break;
@@ -3466,8 +3817,10 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
if (! NODE_IS_RECURSION(node)) {
NODE_STATUS_ADD(node, MARK1);
r = recursive_call_check(NODE_BODY(node));
- if (r != 0)
+ if (r != 0) {
NODE_STATUS_ADD(node, RECURSION);
+ MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
+ }
NODE_STATUS_REMOVE(node, MARK1);
}
@@ -3508,6 +3861,96 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
#endif
+static void
+remove_from_list(Node* prev, Node* a)
+{
+ if (NODE_CDR(prev) != a) return ;
+
+ NODE_CDR(prev) = NODE_CDR(a);
+ NODE_CDR(a) = NULL_NODE;
+}
+
+static int
+reduce_string_list(Node* node)
+{
+ int r = 0;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ {
+ Node* prev;
+ Node* curr;
+ Node* prev_node;
+ Node* next_node;
+
+ prev = NULL_NODE;
+ do {
+ next_node = NODE_CDR(node);
+ curr = NODE_CAR(node);
+ if (NODE_TYPE(curr) == NODE_STRING) {
+ if (IS_NULL(prev) || STR_(curr)->flag != STR_(prev)->flag) {
+ prev = curr;
+ prev_node = node;
+ }
+ else {
+ r = node_str_node_cat(prev, curr);
+ if (r != 0) return r;
+ remove_from_list(prev_node, node);
+ onig_node_free(node);
+ }
+ }
+ else {
+ prev = NULL_NODE;
+ prev_node = node;
+ }
+
+ node = next_node;
+ } while (r == 0 && IS_NOT_NULL(node));
+ }
+ break;
+
+ case NODE_ALT:
+ do {
+ r = reduce_string_list(NODE_CAR(node));
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ if (IS_NULL(NODE_BODY(node)))
+ break;
+ /* fall */
+ case NODE_QUANT:
+ r = reduce_string_list(NODE_BODY(node));
+ break;
+
+ case NODE_BAG:
+ {
+ BagNode* en = BAG_(node);
+
+ r = reduce_string_list(NODE_BODY(node));
+ if (r != 0) return r;
+
+ if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = reduce_string_list(en->te.Then);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = reduce_string_list(en->te.Else);
+ if (r != 0) return r;
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+
#define IN_ALT (1<<0)
#define IN_NOT (1<<1)
#define IN_REAL_REPEAT (1<<2)
@@ -3530,7 +3973,7 @@ divide_look_behind_alternatives(Node* node)
head = NODE_ANCHOR_BODY(an);
np = NODE_CAR(head);
- swap_node(node, head);
+ node_swap(node, head);
NODE_CAR(node) = head;
NODE_BODY(head) = np;
@@ -3552,7 +3995,7 @@ divide_look_behind_alternatives(Node* node)
}
static int
-setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
+tune_look_behind(Node* node, regex_t* reg, ScanEnv* env)
{
int r, len;
AnchorNode* an = ANCHOR_(node);
@@ -3573,7 +4016,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
}
static int
-next_setup(Node* node, Node* next_node, regex_t* reg)
+tune_next(Node* node, Node* next_node, regex_t* reg)
{
NodeType type;
@@ -3581,7 +4024,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
type = NODE_TYPE(node);
if (type == NODE_QUANT) {
QuantNode* qn = QUANT_(node);
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
#ifdef USE_QUANT_PEEK_NEXT
Node* n = get_head_value_node(next_node, 1, reg);
/* '\0': for UTF-16BE etc... */
@@ -3591,7 +4034,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
#endif
/* automatic posseivation a*b ==> (?>a*)b */
if (qn->lower <= 1) {
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
+ if (is_strict_real_node(NODE_BODY(node))) {
Node *x, *y;
x = get_head_value_node(NODE_BODY(node), 0, reg);
if (IS_NOT_NULL(x)) {
@@ -3599,8 +4042,8 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
CHECK_NULL_RETURN_MEMERR(en);
- NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
- swap_node(node, en);
+ NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);
+ node_swap(node, en);
NODE_BODY(node) = en;
}
}
@@ -3620,23 +4063,57 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
static int
-update_string_node_case_fold(regex_t* reg, Node *node)
+is_all_code_len_1_items(int n, OnigCaseFoldCodeItem items[])
{
- UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ int i;
+
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ if (item->code_len != 1) return 0;
+ }
+
+ return 1;
+}
+
+static int
+get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[], int* rmin, int* rmax)
+{
+ int i, len, minlen, maxlen;
+
+ minlen = INT_MAX;
+ maxlen = 0;
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+
+ len = item->byte_len;
+ if (len < minlen) minlen = len;
+ if (len > maxlen) maxlen = len;
+ }
+
+ *rmin = minlen;
+ *rmax = maxlen;
+ return 0;
+}
+
+static int
+conv_string_case_fold(OnigEncoding enc, OnigCaseFoldType case_fold_flag,
+ UChar* s, UChar* end, UChar** rs, UChar** rend, int* rcase_min_len)
+{
+ UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
UChar *sbuf, *ebuf, *sp;
- int r, i, len, sbuf_size;
- StrNode* sn = STR_(node);
+ int i, n, len, sbuf_size;
- end = sn->end;
- sbuf_size = (int )(end - sn->s) * 2;
+ *rs = NULL;
+ sbuf_size = (int )(end - s) * 2;
sbuf = (UChar* )xmalloc(sbuf_size);
CHECK_NULL_RETURN_MEMERR(sbuf);
ebuf = sbuf + sbuf_size;
+ n = 0;
sp = sbuf;
- p = sn->s;
+ p = s;
while (p < end) {
- len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
+ len = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, buf);
for (i = 0; i < len; i++) {
if (sp >= ebuf) {
sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
@@ -3648,364 +4125,310 @@ update_string_node_case_fold(regex_t* reg, Node *node)
*sp++ = buf[i];
}
+ n++;
}
- r = onig_node_str_set(node, sbuf, sp);
- if (r != 0) {
- xfree(sbuf);
- return r;
- }
-
- xfree(sbuf);
+ *rs = sbuf;
+ *rend = sp;
+ *rcase_min_len = n;
return 0;
}
static int
-expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
+make_code_list_to_string(Node** rnode, OnigEncoding enc,
+ int n, OnigCodePoint codes[])
{
- int r;
- Node *node;
+ int r, i, len;
+ Node* node;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- node = onig_node_new_str(s, end);
- if (IS_NULL(node)) return ONIGERR_MEMORY;
+ *rnode = NULL_NODE;
+ node = onig_node_new_str(NULL, NULL);
+ CHECK_NULL_RETURN_MEMERR(node);
- r = update_string_node_case_fold(reg, node);
- if (r != 0) {
- onig_node_free(node);
- return r;
+ for (i = 0; i < n; i++) {
+ len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
+ if (len < 0) {
+ r = len;
+ goto err;
+ }
+
+ r = onig_node_str_cat(node, buf, buf + len);
+ if (r != 0) goto err;
}
- NODE_STRING_SET_AMBIG(node);
- NODE_STRING_SET_DONT_GET_OPT_INFO(node);
*rnode = node;
return 0;
+
+ err:
+ onig_node_free(node);
+ return r;
}
static int
-expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
- int slen, UChar *end, regex_t* reg, Node **rnode)
+unravel_cf_node_add(Node** rlist, Node* add)
{
- int r, i, j;
- int len;
- int varlen;
- Node *anode, *var_anode, *snode, *xnode, *an;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ Node *list;
- *rnode = var_anode = NULL_NODE;
-
- varlen = 0;
- for (i = 0; i < item_num; i++) {
- if (items[i].byte_len != slen) {
- varlen = 1;
- break;
- }
+ list = *rlist;
+ if (IS_NULL(list)) {
+ list = onig_node_new_list(add, NULL);
+ CHECK_NULL_RETURN_MEMERR(list);
+ *rlist = list;
+ }
+ else {
+ Node* r = node_list_add(list, add);
+ CHECK_NULL_RETURN_MEMERR(r);
}
- if (varlen != 0) {
- *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
+ return 0;
+}
+
+static int
+unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end,
+ unsigned int flag, int case_min_len)
+{
+ int r;
+ Node *sn, *list;
- xnode = onig_node_new_list(NULL, NULL);
- if (IS_NULL(xnode)) goto mem_err;
- NODE_CAR(var_anode) = xnode;
+ list = *rlist;
+ sn = *rsn;
- anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) goto mem_err;
- NODE_CAR(xnode) = anode;
+ if (IS_NOT_NULL(sn) && STR_(sn)->flag == flag) {
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(sn))
+ r = node_str_cat_case_fold(sn, s, end, case_min_len);
+ else
+ r = onig_node_str_cat(sn, s, end);
}
else {
- *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) return ONIGERR_MEMORY;
+ sn = onig_node_new_str(s, end);
+ CHECK_NULL_RETURN_MEMERR(sn);
+
+ STR_(sn)->flag = flag;
+ STR_(sn)->case_min_len = case_min_len;
+ r = unravel_cf_node_add(&list, sn);
}
- snode = onig_node_new_str(p, p + slen);
- if (IS_NULL(snode)) goto mem_err;
+ if (r == 0) {
+ *rlist = list;
+ *rsn = sn;
+ }
+ return r;
+}
- NODE_CAR(anode) = snode;
+static int
+unravel_cf_string_fold_add(Node** rlist, Node** rsn, OnigEncoding enc,
+ OnigCaseFoldType case_fold_flag, UChar* s, UChar* end)
+{
+ int r;
+ int case_min_len;
+ UChar *rs, *rend;
- for (i = 0; i < item_num; i++) {
- snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
+ r = conv_string_case_fold(enc, case_fold_flag, s, end,
+ &rs, &rend, &case_min_len);
+ if (r != 0) return r;
- for (j = 0; j < items[i].code_len; j++) {
- len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
- if (len < 0) {
- r = len;
- goto mem_err2;
- }
+ r = unravel_cf_string_add(rlist, rsn, rs, rend,
+ NODE_STRING_CASE_FOLD_MATCH, case_min_len);
+ xfree(rs);
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r != 0) goto mem_err2;
- }
+ return r;
+}
- an = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(an)) {
- goto mem_err2;
- }
+static int
+unravel_cf_string_alt_or_cc_add(Node** rlist, int n,
+ OnigCaseFoldCodeItem items[], int byte_len, OnigEncoding enc,
+ OnigCaseFoldType case_fold_flag, UChar* s, UChar* end)
+{
+ int r, i;
+ Node* node;
- if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
- Node *rem;
- UChar *q = p + items[i].byte_len;
+ if (is_all_code_len_1_items(n, items)) {
+ OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
- if (q < end) {
- r = expand_case_fold_make_rem_string(&rem, q, end, reg);
- if (r != 0) {
- onig_node_free(an);
- goto mem_err2;
- }
+ codes[0] = ONIGENC_MBC_TO_CODE(enc, s, end);
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ codes[i+1] = item->code[0];
+ }
+ r = onig_new_cclass_with_code_list(&node, enc, n + 1, codes);
+ if (r != 0) return r;
+ }
+ else {
+ Node *snode, *alt, *curr;
- xnode = onig_node_list_add(NULL_NODE, snode);
- if (IS_NULL(xnode)) {
- onig_node_free(an);
- onig_node_free(rem);
- goto mem_err2;
- }
- if (IS_NULL(onig_node_list_add(xnode, rem))) {
- onig_node_free(an);
- onig_node_free(xnode);
- onig_node_free(rem);
- goto mem_err;
- }
+ snode = onig_node_new_str(s, end);
+ CHECK_NULL_RETURN_MEMERR(snode);
+ node = curr = onig_node_new_alt(snode, NULL_NODE);
+ if (IS_NULL(curr)) {
+ onig_node_free(snode);
+ return ONIGERR_MEMORY;
+ }
- NODE_CAR(an) = xnode;
+ r = 0;
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ r = make_code_list_to_string(&snode, enc, item->code_len, item->code);
+ if (r != 0) {
+ onig_node_free(node);
+ return r;
}
- else {
- NODE_CAR(an) = snode;
+
+ alt = onig_node_new_alt(snode, NULL_NODE);
+ if (IS_NULL(alt)) {
+ onig_node_free(snode);
+ onig_node_free(node);
+ return ONIGERR_MEMORY;
}
- NODE_CDR(var_anode) = an;
- var_anode = an;
- }
- else {
- NODE_CAR(an) = snode;
- NODE_CDR(anode) = an;
- anode = an;
+ NODE_CDR(curr) = alt;
+ curr = alt;
}
}
- return varlen;
-
- mem_err2:
- onig_node_free(snode);
-
- mem_err:
- onig_node_free(*rnode);
-
- return ONIGERR_MEMORY;
+ r = unravel_cf_node_add(rlist, node);
+ if (r != 0) onig_node_free(node);
+ return r;
}
static int
-is_good_case_fold_items_for_search(OnigEncoding enc, int slen,
- int n, OnigCaseFoldCodeItem items[])
+unravel_cf_look_behind_add(Node** rlist, Node** rsn,
+ int n, OnigCaseFoldCodeItem items[], OnigEncoding enc,
+ UChar* s, int one_len)
{
- int i, len;
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ int r, i, found;
+ found = 0;
for (i = 0; i < n; i++) {
OnigCaseFoldCodeItem* item = items + i;
+ if (item->byte_len == one_len) {
+ if (item->code_len == 1) {
+ found = 1;
+ }
+ }
+ }
- if (item->code_len != 1) return 0;
- if (item->byte_len != slen) return 0;
- len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);
- if (len != slen) return 0;
+ if (found == 0) {
+ r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */, 0);
}
+ else {
+ Node* node;
+ OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
- return 1;
-}
+ found = 0;
+ codes[found++] = ONIGENC_MBC_TO_CODE(enc, s, s + one_len);
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ if (item->byte_len == one_len) {
+ if (item->code_len == 1) {
+ codes[found++] = item->code[0];
+ }
+ }
+ }
+ r = onig_new_cclass_with_code_list(&node, enc, found, codes);
+ if (r != 0) return r;
+
+ r = unravel_cf_node_add(rlist, node);
+ if (r != 0) onig_node_free(node);
+
+ *rsn = NULL_NODE;
+ }
-#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+ return r;
+}
static int
-expand_case_fold_string(Node* node, regex_t* reg, int state)
-{
- int r, n, len, alt_num;
- int fold_len;
- int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;
- UChar *start, *end, *p;
- UChar* foldp;
- Node *top_root, *root, *snode, *prev_node;
+unravel_case_fold_string(Node* node, regex_t* reg, int state)
+{
+ int r, n, one_len, min_len, max_len, in_look_behind;
+ UChar *start, *end, *p, *q;
+ StrNode* snode;
+ Node *sn, *list;
+ OnigEncoding enc;
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- StrNode* sn;
- if (NODE_STRING_IS_AMBIG(node)) return 0;
+ if (NODE_STRING_IS_CASE_EXPANDED(node)) return 0;
- sn = STR_(node);
+ snode = STR_(node);
- start = sn->s;
- end = sn->end;
+ start = snode->s;
+ end = snode->end;
if (start >= end) return 0;
- is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+ in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+ enc = reg->enc;
- r = 0;
- top_root = root = prev_node = snode = NULL_NODE;
- alt_num = 1;
+ list = sn = NULL_NODE;
p = start;
while (p < end) {
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
- p, end, items);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, end,
+ items);
if (n < 0) {
r = n;
goto err;
}
- len = enclen(reg->enc, p);
- is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);
-
- if (is_in_look_behind ||
- (IS_NOT_NULL(snode) ||
- (is_good
- /* expand single char case: ex. /(?i:a)/ */
- && !(p == start && p + len >= end)))) {
- if (IS_NULL(snode)) {
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- prev_node = snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
-
- prev_is_ambig = -1; /* -1: new */
- prev_is_good = 0; /* escape compiler warning */
- }
- else {
- prev_is_ambig = NODE_STRING_IS_AMBIG(snode);
- prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode);
- }
-
- if (n != 0) {
- foldp = p;
- fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,
- &foldp, end, buf);
- foldp = buf;
- }
- else {
- foldp = p; fold_len = len;
- }
-
- if ((prev_is_ambig == 0 && n != 0) ||
- (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {
- if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
- else {
- r = onig_node_str_cat(snode, foldp, foldp + fold_len);
- if (r != 0) goto err;
- }
-
- if (n != 0) NODE_STRING_SET_AMBIG(snode);
- if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);
+ one_len = enclen(enc, p);
+ if (n == 0) {
+ q = p + one_len;
+ r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */, 0);
+ if (r != 0) goto err;
}
else {
- alt_num *= (n + 1);
- if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
-
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
+ if (in_look_behind != 0) {
+ q = p + one_len;
+ r = unravel_cf_look_behind_add(&list, &sn, n, items, enc, p, one_len);
+ if (r != 0) goto err;
}
-
- r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
- if (r < 0) goto mem_err;
- if (r == 1) {
- if (IS_NULL(root)) {
- top_root = prev_node;
+ else {
+ get_min_max_byte_len_case_fold_items(n, items, &min_len, &max_len);
+ q = p + max_len;
+ if (one_len == max_len && min_len == max_len) {
+ r = unravel_cf_string_alt_or_cc_add(&list, n, items, max_len, enc,
+ reg->case_fold_flag, p, q);
+ if (r != 0) goto err;
+ sn = NULL_NODE;
}
else {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- root = NODE_CAR(prev_node);
- }
- else { /* r == 0 */
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
+ r = unravel_cf_string_fold_add(&list, &sn, enc, reg->case_fold_flag,
+ p, q);
+ if (r != 0) goto err;
}
}
-
- snode = NULL_NODE;
}
- p += len;
+ p = q;
}
- if (p < end) {
- Node *srem;
-
- r = expand_case_fold_make_rem_string(&srem, p, end, reg);
- if (r != 0) goto mem_err;
-
- if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(srem);
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- if (IS_NULL(root)) {
- prev_node = srem;
+ if (IS_NOT_NULL(list)) {
+ if (node_list_len(list) == 1) {
+ node_swap(node, NODE_CAR(list));
}
else {
- if (IS_NULL(onig_node_list_add(root, srem))) {
- onig_node_free(srem);
- goto mem_err;
- }
+ node_swap(node, list);
}
+ onig_node_free(list);
+ }
+ else {
+ node_swap(node, sn);
+ onig_node_free(sn);
}
-
- /* ending */
- top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
- swap_node(node, top_root);
- onig_node_free(top_root);
return 0;
- mem_err:
- r = ONIGERR_MEMORY;
-
err:
- onig_node_free(top_root);
+ if (IS_NOT_NULL(list))
+ onig_node_free(list);
+ else if (IS_NOT_NULL(sn))
+ onig_node_free(sn);
+
return r;
}
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
-static enum BodyEmpty
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+static enum BodyEmptyType
quantifiers_memory_node_info(Node* node)
{
- int r = BODY_IS_EMPTY;
+ int r = BODY_IS_EMPTY_POSSIBILITY;
switch (NODE_TYPE(node)) {
case NODE_LIST:
@@ -4022,7 +4445,7 @@ quantifiers_memory_node_info(Node* node)
#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC; /* tiny version */
+ return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */
}
else
r = quantifiers_memory_node_info(NODE_BODY(node));
@@ -4044,9 +4467,9 @@ quantifiers_memory_node_info(Node* node)
switch (en->type) {
case BAG_MEMORY:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC;
+ return BODY_IS_EMPTY_POSSIBILITY_REC;
}
- return BODY_IS_EMPTY_MEM;
+ return BODY_IS_EMPTY_POSSIBILITY_MEM;
break;
case BAG_OPTION:
@@ -4083,7 +4506,7 @@ quantifiers_memory_node_info(Node* node)
return r;
}
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#ifdef USE_CALL
@@ -4092,7 +4515,7 @@ quantifiers_memory_node_info(Node* node)
__inline
#endif
static int
-setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
+tune_call_node_call(CallNode* cn, ScanEnv* env, int state)
{
MemEnv* mem_env = SCANENV_MEMENV(env);
@@ -4112,7 +4535,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
}
set_call_attr:
- NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
+ NODE_CALL_BODY(cn) = mem_env[cn->group_num].mem_node;
if (IS_NULL(NODE_CALL_BODY(cn))) {
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
cn->name, cn->name_end);
@@ -4143,23 +4566,23 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
}
static void
-setup_call2_call(Node* node)
+tune_call2_call(Node* node)
{
switch (NODE_TYPE(node)) {
case NODE_LIST:
case NODE_ALT:
do {
- setup_call2_call(NODE_CAR(node));
+ tune_call2_call(NODE_CAR(node));
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_QUANT:
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
break;
case NODE_ANCHOR:
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
break;
case NODE_BAG:
@@ -4169,19 +4592,19 @@ setup_call2_call(Node* node)
if (en->type == BAG_MEMORY) {
if (! NODE_IS_MARK1(node)) {
NODE_STATUS_ADD(node, MARK1);
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
NODE_STATUS_REMOVE(node, MARK1);
}
}
else if (en->type == BAG_IF_ELSE) {
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
if (IS_NOT_NULL(en->te.Then))
- setup_call2_call(en->te.Then);
+ tune_call2_call(en->te.Then);
if (IS_NOT_NULL(en->te.Else))
- setup_call2_call(en->te.Else);
+ tune_call2_call(en->te.Else);
}
else {
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
}
}
break;
@@ -4197,7 +4620,7 @@ setup_call2_call(Node* node)
NODE_STATUS_ADD(called, CALLED);
BAG_(called)->m.entry_count++;
- setup_call2_call(called);
+ tune_call2_call(called);
}
NODE_STATUS_REMOVE(node, MARK1);
}
@@ -4209,7 +4632,7 @@ setup_call2_call(Node* node)
}
static int
-setup_call(Node* node, ScanEnv* env, int state)
+tune_call(Node* node, ScanEnv* env, int state)
{
int r;
@@ -4217,7 +4640,7 @@ setup_call(Node* node, ScanEnv* env, int state)
case NODE_LIST:
case NODE_ALT:
do {
- r = setup_call(NODE_CAR(node), env, state);
+ r = tune_call(NODE_CAR(node), env, state);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
@@ -4225,12 +4648,12 @@ setup_call(Node* node, ScanEnv* env, int state)
if (QUANT_(node)->upper == 0)
state |= IN_ZERO_REPEAT;
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
break;
case NODE_ANCHOR:
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
else
r = 0;
break;
@@ -4244,20 +4667,20 @@ setup_call(Node* node, ScanEnv* env, int state)
NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
BAG_(node)->m.entry_count--;
}
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
}
else if (en->type == BAG_IF_ELSE) {
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
- r = setup_call(en->te.Then, env, state);
+ r = tune_call(en->te.Then, env, state);
if (r != 0) return r;
}
if (IS_NOT_NULL(en->te.Else))
- r = setup_call(en->te.Else, env, state);
+ r = tune_call(en->te.Else, env, state);
}
else
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
}
break;
@@ -4267,7 +4690,7 @@ setup_call(Node* node, ScanEnv* env, int state)
CALL_(node)->entry_count--;
}
- r = setup_call_node_call(CALL_(node), env, state);
+ r = tune_call_node_call(CALL_(node), env, state);
break;
default:
@@ -4279,7 +4702,7 @@ setup_call(Node* node, ScanEnv* env, int state)
}
static int
-setup_call2(Node* node)
+tune_call2(Node* node)
{
int r = 0;
@@ -4287,23 +4710,23 @@ setup_call2(Node* node)
case NODE_LIST:
case NODE_ALT:
do {
- r = setup_call2(NODE_CAR(node));
+ r = tune_call2(NODE_CAR(node));
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_QUANT:
if (QUANT_(node)->upper != 0)
- r = setup_call2(NODE_BODY(node));
+ r = tune_call2(NODE_BODY(node));
break;
case NODE_ANCHOR:
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- r = setup_call2(NODE_BODY(node));
+ r = tune_call2(NODE_BODY(node));
break;
case NODE_BAG:
if (! NODE_IS_IN_ZERO_REPEAT(node))
- r = setup_call2(NODE_BODY(node));
+ r = tune_call2(NODE_BODY(node));
{
BagNode* en = BAG_(node);
@@ -4311,18 +4734,18 @@ setup_call2(Node* node)
if (r != 0) return r;
if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
- r = setup_call2(en->te.Then);
+ r = tune_call2(en->te.Then);
if (r != 0) return r;
}
if (IS_NOT_NULL(en->te.Else))
- r = setup_call2(en->te.Else);
+ r = tune_call2(en->te.Else);
}
}
break;
case NODE_CALL:
if (! NODE_IS_IN_ZERO_REPEAT(node)) {
- setup_call2_call(node);
+ tune_call2_call(node);
}
break;
@@ -4335,7 +4758,7 @@ setup_call2(Node* node)
static void
-setup_called_state_call(Node* node, int state)
+tune_called_state_call(Node* node, int state)
{
switch (NODE_TYPE(node)) {
case NODE_ALT:
@@ -4343,7 +4766,7 @@ setup_called_state_call(Node* node, int state)
/* fall */
case NODE_LIST:
do {
- setup_called_state_call(NODE_CAR(node), state);
+ tune_called_state_call(NODE_CAR(node), state);
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
@@ -4351,12 +4774,12 @@ setup_called_state_call(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
- setup_called_state_call(NODE_QUANT_BODY(qn), state);
+ tune_called_state_call(NODE_QUANT_BODY(qn), state);
}
break;
@@ -4371,7 +4794,7 @@ setup_called_state_call(Node* node, int state)
/* fall */
case ANCR_PREC_READ:
case ANCR_LOOK_BEHIND:
- setup_called_state_call(NODE_ANCHOR_BODY(an), state);
+ tune_called_state_call(NODE_ANCHOR_BODY(an), state);
break;
default:
break;
@@ -4387,31 +4810,33 @@ setup_called_state_call(Node* node, int state)
if (NODE_IS_MARK1(node)) {
if ((~en->m.called_state & state) != 0) {
en->m.called_state |= state;
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
}
}
else {
NODE_STATUS_ADD(node, MARK1);
en->m.called_state |= state;
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
NODE_STATUS_REMOVE(node, MARK1);
}
}
else if (en->type == BAG_IF_ELSE) {
+ state |= IN_ALT;
+ tune_called_state_call(NODE_BODY(node), state);
if (IS_NOT_NULL(en->te.Then)) {
- setup_called_state_call(en->te.Then, state);
+ tune_called_state_call(en->te.Then, state);
}
if (IS_NOT_NULL(en->te.Else))
- setup_called_state_call(en->te.Else, state);
+ tune_called_state_call(en->te.Else, state);
}
else {
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
}
}
break;
case NODE_CALL:
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
break;
default:
@@ -4420,7 +4845,7 @@ setup_called_state_call(Node* node, int state)
}
static void
-setup_called_state(Node* node, int state)
+tune_called_state(Node* node, int state)
{
switch (NODE_TYPE(node)) {
case NODE_ALT:
@@ -4428,13 +4853,13 @@ setup_called_state(Node* node, int state)
/* fall */
case NODE_LIST:
do {
- setup_called_state(NODE_CAR(node), state);
+ tune_called_state(NODE_CAR(node), state);
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
#ifdef USE_CALL
case NODE_CALL:
- setup_called_state_call(node, state);
+ tune_called_state_call(node, state);
break;
#endif
@@ -4451,14 +4876,15 @@ setup_called_state(Node* node, int state)
/* fall */
case BAG_OPTION:
case BAG_STOP_BACKTRACK:
- setup_called_state(NODE_BODY(node), state);
+ tune_called_state(NODE_BODY(node), state);
break;
case BAG_IF_ELSE:
- setup_called_state(NODE_BODY(node), state);
+ state |= IN_ALT;
+ tune_called_state(NODE_BODY(node), state);
if (IS_NOT_NULL(en->te.Then))
- setup_called_state(en->te.Then, state);
+ tune_called_state(en->te.Then, state);
if (IS_NOT_NULL(en->te.Else))
- setup_called_state(en->te.Else, state);
+ tune_called_state(en->te.Else, state);
break;
}
}
@@ -4468,12 +4894,12 @@ setup_called_state(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
- setup_called_state(NODE_QUANT_BODY(qn), state);
+ tune_called_state(NODE_QUANT_BODY(qn), state);
}
break;
@@ -4488,7 +4914,7 @@ setup_called_state(Node* node, int state)
/* fall */
case ANCR_PREC_READ:
case ANCR_LOOK_BEHIND:
- setup_called_state(NODE_ANCHOR_BODY(an), state);
+ tune_called_state(NODE_ANCHOR_BODY(an), state);
break;
default:
break;
@@ -4509,13 +4935,13 @@ setup_called_state(Node* node, int state)
#endif /* USE_CALL */
-static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
+static int tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
#ifdef __GNUC__
__inline
#endif
static int
-setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
+tune_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
{
/* allowed node types in look-behind */
#define ALLOWED_TYPE_IN_LB \
@@ -4543,10 +4969,10 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
switch (an->type) {
case ANCR_PREC_READ:
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, state, env);
break;
case ANCR_PREC_READ_NOT:
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
break;
case ANCR_LOOK_BEHIND:
@@ -4555,9 +4981,9 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);
if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
+ r = tune_look_behind(node, reg, env);
}
break;
@@ -4567,10 +4993,10 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),
- env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),
+ env);
if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
+ r = tune_look_behind(node, reg, env);
}
break;
@@ -4586,7 +5012,7 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
__inline
#endif
static int
-setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
+tune_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
{
int r;
OnigLen d;
@@ -4600,44 +5026,37 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
d = tree_min_len(body, env);
if (d == 0) {
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
- qn->empty_info = quantifiers_memory_node_info(body);
- if (qn->empty_info == BODY_IS_EMPTY_REC) {
- if (NODE_TYPE(body) == NODE_BAG &&
- BAG_(body)->type == BAG_MEMORY) {
- MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
- }
- }
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+ qn->emptiness = quantifiers_memory_node_info(body);
#else
- qn->empty_info = BODY_IS_EMPTY;
+ qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;
#endif
}
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
- r = setup_tree(body, reg, state, env);
+ r = tune_tree(body, reg, state, env);
if (r != 0) return r;
/* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
if (NODE_TYPE(body) == NODE_STRING) {
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
int len = NODE_STRING_LEN(body);
- StrNode* sn = STR_(body);
if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
int i, n = qn->lower;
- onig_node_conv_to_str_node(node, STR_(body)->flag);
+ node_conv_to_str_node(node, STR_(body)->flag);
for (i = 0; i < n; i++) {
- r = onig_node_str_cat(node, sn->s, sn->end);
+ r = node_str_node_cat(node, body);
if (r != 0) return r;
}
onig_node_free(body);
@@ -4646,7 +5065,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
}
- if (qn->greedy && (qn->empty_info == BODY_IS_NOT_EMPTY)) {
+ if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
if (NODE_TYPE(body) == NODE_QUANT) {
QuantNode* tqn = QUANT_(body);
if (IS_NOT_NULL(tqn->head_exact)) {
@@ -4662,8 +5081,8 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
return r;
}
-/* setup_tree does the following work.
- 1. check empty loop. (set qn->empty_info)
+/* tune_tree does the following work.
+ 1. check empty loop. (set qn->emptiness)
2. expand ignore-case in char class.
3. set memory status bit flags. (reg->mem_stats)
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
@@ -4671,7 +5090,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
6. expand repeated string.
*/
static int
-setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
+tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
int r = 0;
@@ -4680,9 +5099,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
Node* prev = NULL_NODE;
do {
- r = setup_tree(NODE_CAR(node), reg, state, env);
+ r = tune_tree(NODE_CAR(node), reg, state, env);
if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NODE_CAR(node), reg);
+ r = tune_next(prev, NODE_CAR(node), reg);
}
prev = NODE_CAR(node);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
@@ -4691,13 +5110,13 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case NODE_ALT:
do {
- r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
+ r = tune_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_STRING:
- if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg, state);
+ if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_CRUDE(node)) {
+ r = unravel_case_fold_string(node, reg, state);
}
break;
@@ -4710,12 +5129,18 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
for (i = 0; i < br->back_num; i++) {
if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
MEM_STATUS_ON(env->backrefed_mem, p[i]);
- MEM_STATUS_ON(env->bt_mem_start, p[i]);
+#if 0
#ifdef USE_BACKREF_WITH_LEVEL
if (NODE_IS_NEST_LEVEL(node)) {
- MEM_STATUS_ON(env->bt_mem_end, p[i]);
+ MEM_STATUS_ON(env->backtrack_mem, p[i]);
}
#endif
+#else
+ /* More precisely, it should be checked whether alt/repeat exists before
+ the subject capture node, and then this backreference position
+ exists before (or in) the capture node. */
+ MEM_STATUS_ON(env->backtrack_mem, p[i]);
+#endif
}
}
break;
@@ -4729,7 +5154,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
OnigOptionType options = reg->options;
reg->options = BAG_(node)->o.options;
- r = setup_tree(NODE_BODY(node), reg, state, env);
+ r = tune_tree(NODE_BODY(node), reg, state, env);
reg->options = options;
}
break;
@@ -4741,46 +5166,46 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
|| NODE_IS_RECURSION(node)) {
- MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
+ MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
}
- r = setup_tree(NODE_BODY(node), reg, state, env);
+ r = tune_tree(NODE_BODY(node), reg, state, env);
break;
case BAG_STOP_BACKTRACK:
{
Node* target = NODE_BODY(node);
- r = setup_tree(target, reg, state, env);
+ r = tune_tree(target, reg, state, env);
if (NODE_TYPE(target) == NODE_QUANT) {
QuantNode* tqn = QUANT_(target);
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
- NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
+ if (is_strict_real_node(NODE_BODY(target)))
+ NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);
}
}
}
break;
case BAG_IF_ELSE:
- r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
+ r = tune_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
- r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
+ r = tune_tree(en->te.Then, reg, (state | IN_ALT), env);
if (r != 0) return r;
}
if (IS_NOT_NULL(en->te.Else))
- r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
+ r = tune_tree(en->te.Else, reg, (state | IN_ALT), env);
break;
}
}
break;
case NODE_QUANT:
- r = setup_quant(node, reg, state, env);
+ r = tune_quant(node, reg, state, env);
break;
case NODE_ANCHOR:
- r = setup_anchor(node, reg, state, env);
+ r = tune_anchor(node, reg, state, env);
break;
#ifdef USE_CALL
@@ -4879,7 +5304,7 @@ typedef struct {
} MinMax;
typedef struct {
- MinMax mmd;
+ MinMax mm;
OnigEncoding enc;
OnigOptionType options;
OnigCaseFoldType case_fold_flag;
@@ -4892,17 +5317,16 @@ typedef struct {
} OptAnc;
typedef struct {
- MinMax mmd; /* position */
+ MinMax mm; /* position */
OptAnc anc;
int reach_end;
int case_fold;
- int good_case_fold;
int len;
UChar s[OPT_EXACT_MAXLEN];
} OptStr;
typedef struct {
- MinMax mmd; /* position */
+ MinMax mm; /* position */
OptAnc anc;
int value; /* weighted value */
UChar map[CHAR_MAP_SIZE];
@@ -5119,11 +5543,10 @@ is_full_opt_exact(OptStr* e)
static void
clear_opt_exact(OptStr* e)
{
- clear_mml(&e->mmd);
+ clear_mml(&e->mm);
clear_opt_anc_info(&e->anc);
e->reach_end = 0;
e->case_fold = 0;
- e->good_case_fold = 0;
e->len = 0;
e->s[0] = '\0';
}
@@ -5147,11 +5570,6 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
to->case_fold = 1;
}
- else {
- if (to->good_case_fold != 0) {
- if (add->good_case_fold == 0) return 0;
- }
- }
}
r = 0;
@@ -5206,7 +5624,7 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
return ;
}
- if (! is_equal_mml(&to->mmd, &add->mmd)) {
+ if (! is_equal_mml(&to->mm, &add->mm)) {
clear_opt_exact(to);
return ;
}
@@ -5228,8 +5646,6 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
to->len = i;
if (add->case_fold != 0)
to->case_fold = 1;
- if (add->good_case_fold == 0)
- to->good_case_fold = 0;
alt_merge_opt_anc_info(&to->anc, &add->anc);
if (! to->reach_end) to->anc.right = 0;
@@ -5262,10 +5678,7 @@ select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)
if (now->case_fold == 0) vn *= 2;
if (alt->case_fold == 0) va *= 2;
- if (now->good_case_fold != 0) vn *= 4;
- if (alt->good_case_fold != 0) va *= 4;
-
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
+ if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
copy_opt_exact(now, alt);
}
@@ -5349,7 +5762,7 @@ select_opt_map(OptMap* now, OptMap* alt)
vn = z / now->value;
va = z / alt->value;
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
+ if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
copy_opt_map(now, alt);
}
@@ -5363,17 +5776,14 @@ comp_opt_exact_or_map(OptStr* e, OptMap* m)
if (m->value <= 0) return -1;
if (e->case_fold != 0) {
- if (e->good_case_fold != 0)
- case_value = 2;
- else
- case_value = 1;
+ case_value = 1;
}
else
case_value = 3;
ae = COMP_EM_BASE * e->len * case_value;
am = COMP_EM_BASE * 5 * 2 / m->value;
- return comp_distance_value(&e->mmd, &m->mmd, ae, am);
+ return comp_distance_value(&e->mm, &m->mm, ae, am);
}
static void
@@ -5381,14 +5791,14 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
{
int i, val;
- /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ /* if (! is_equal_mml(&to->mm, &add->mm)) return ; */
if (to->value == 0) return ;
- if (add->value == 0 || to->mmd.max < add->mmd.min) {
+ if (add->value == 0 || to->mm.max < add->mm.min) {
clear_opt_map(to);
return ;
}
- alt_merge_mml(&to->mmd, &add->mmd);
+ alt_merge_mml(&to->mm, &add->mm);
val = 0;
for (i = 0; i < CHAR_MAP_SIZE; i++) {
@@ -5406,9 +5816,9 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
static void
set_bound_node_opt_info(OptNode* opt, MinMax* plen)
{
- copy_mml(&(opt->sb.mmd), plen);
- copy_mml(&(opt->spr.mmd), plen);
- copy_mml(&(opt->map.mmd), plen);
+ copy_mml(&(opt->sb.mm), plen);
+ copy_mml(&(opt->spr.mm), plen);
+ copy_mml(&(opt->map.mm), plen);
}
static void
@@ -5443,7 +5853,7 @@ concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
}
if (add->map.value > 0 && to->len.max == 0) {
- if (add->map.mmd.max == 0)
+ if (add->map.mm.max == 0)
add->map.anc.left |= to->anc.left;
}
@@ -5468,10 +5878,7 @@ concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
if (to->spr.len > 0) {
if (add->len.max > 0) {
- if (to->spr.len > (int )add->len.max)
- to->spr.len = add->len.max;
-
- if (to->spr.mmd.max == 0)
+ if (to->spr.mm.max == 0)
select_opt_exact(enc, &to->sb, &to->spr);
else
select_opt_exact(enc, &to->sm, &to->spr);
@@ -5511,7 +5918,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
r = 0;
enc = env->enc;
clear_node_opt_info(opt);
- set_bound_node_opt_info(opt, &env->mmd);
+ set_bound_node_opt_info(opt, &env->mm);
switch (NODE_TYPE(node)) {
case NODE_LIST:
@@ -5523,7 +5930,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
do {
r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
if (r == 0) {
- add_mml(&nenv.mmd, &xo.len);
+ add_mml(&nenv.mm, &xo.len);
concat_left_node_opt_info(enc, opt, &xo);
}
} while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
@@ -5548,9 +5955,8 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
{
StrNode* sn = STR_(node);
int slen = (int )(sn->end - sn->s);
- /* int is_raw = NODE_STRING_IS_RAW(node); */
- if (! NODE_STRING_IS_AMBIG(node)) {
+ if (! NODE_STRING_IS_CASE_FOLD_MATCH(node)) {
concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
if (slen > 0) {
add_char_opt_map(&opt->map, *(sn->s), enc);
@@ -5558,28 +5964,20 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
set_mml(&opt->len, slen, slen);
}
else {
- int max;
+ int max, min;
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
- int n = onigenc_strlen(enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
- }
- else {
- concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
- opt->sb.case_fold = 1;
- if (NODE_STRING_IS_GOOD_AMBIG(node))
- opt->sb.good_case_fold = 1;
-
- if (slen > 0) {
- r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
- enc, env->case_fold_flag);
- if (r != 0) break;
- }
+ concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
+ opt->sb.case_fold = 1;
- max = slen;
+ if (slen > 0) {
+ r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
+ enc, env->case_fold_flag);
+ if (r != 0) break;
}
- set_mml(&opt->len, slen, max);
+ max = slen;
+ min = sn->case_min_len * ONIGENC_MBC_MINLEN(enc);
+ set_mml(&opt->len, min, max);
}
}
break;
@@ -5589,7 +5987,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
int z;
CClassNode* cc = CCLASS_(node);
- /* no need to check ignore case. (set in setup_tree()) */
+ /* no need to check ignore case. (set in tune_tree()) */
if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
OnigLen min = ONIGENC_MBC_MINLEN(enc);
@@ -5699,11 +6097,11 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
break;
}
backs = BACKREFS_P(br);
- min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
- max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
+ min = tree_min_len(mem_env[backs[0]].mem_node, env->scan_env);
+ max = tree_max_len(mem_env[backs[0]].mem_node, env->scan_env);
for (i = 1; i < br->back_num; i++) {
- tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
- tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
+ tmin = tree_min_len(mem_env[backs[i]].mem_node, env->scan_env);
+ tmax = tree_max_len(mem_env[backs[i]].mem_node, env->scan_env);
if (min > tmin) min = tmin;
if (max < tmax) max = tmax;
}
@@ -5752,8 +6150,8 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
opt->sm.reach_end = 0;
}
- if (IS_REPEAT_INFINITE(qn->upper)) {
- if (env->mmd.max == 0 &&
+ if (IS_INFINITE_REPEAT(qn->upper)) {
+ if (env->mm.max == 0 &&
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);
@@ -5821,7 +6219,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
copy_opt_env(&nenv, env);
r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);
if (r == 0) {
- add_mml(&nenv.mmd, &xo.len);
+ add_mml(&nenv.mm, &xo.len);
concat_left_node_opt_info(enc, opt, &xo);
if (IS_NOT_NULL(en->te.Then)) {
r = optimize_nodes(en->te.Then, &xo, &nenv);
@@ -5870,15 +6268,6 @@ set_optimize_exact(regex_t* reg, OptStr* e)
if (e->case_fold) {
reg->optimize = OPTIMIZE_STR_CASE_FOLD;
- if (e->good_case_fold != 0) {
- if (e->len >= 2) {
- r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,
- reg->exact, reg->exact_end,
- reg->map, &(reg->map_offset));
- if (r != 0) return r;
- reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;
- }
- }
}
else {
int allow_reverse;
@@ -5901,11 +6290,17 @@ set_optimize_exact(regex_t* reg, OptStr* e)
}
}
- reg->dmin = e->mmd.min;
- reg->dmax = e->mmd.max;
+ reg->dist_min = e->mm.min;
+ reg->dist_max = e->mm.max;
+
+ if (reg->dist_min != INFINITE_LEN) {
+ int n;
+ if (e->case_fold != 0)
+ n = 1;
+ else
+ n = (int )(reg->exact_end - reg->exact);
- if (reg->dmin != INFINITE_LEN) {
- reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
+ reg->threshold_len = reg->dist_min + n;
}
return 0;
@@ -5920,11 +6315,11 @@ set_optimize_map(regex_t* reg, OptMap* m)
reg->map[i] = m->map[i];
reg->optimize = OPTIMIZE_MAP;
- reg->dmin = m->mmd.min;
- reg->dmax = m->mmd.max;
+ reg->dist_min = m->mm.min;
+ reg->dist_max = m->mm.max;
- if (reg->dmin != INFINITE_LEN) {
- reg->threshold_len = reg->dmin + 1;
+ if (reg->dist_min != INFINITE_LEN) {
+ reg->threshold_len = reg->dist_min + 1;
}
}
@@ -5950,7 +6345,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
env.options = reg->options;
env.case_fold_flag = reg->case_fold_flag;
env.scan_env = scan_env;
- clear_mml(&env.mmd);
+ clear_mml(&env.mm);
r = optimize_nodes(node, &opt, &env);
if (r != 0) return r;
@@ -5966,8 +6361,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
ANCR_PREC_READ_NOT);
if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {
- reg->anchor_dmin = opt.len.min;
- reg->anchor_dmax = opt.len.max;
+ reg->anc_dist_min = opt.len.min;
+ reg->anc_dist_max = opt.len.max;
}
if (opt.sb.len > 0 || opt.sm.len > 0) {
@@ -6002,8 +6397,8 @@ clear_optimize_info(regex_t* reg)
{
reg->optimize = OPTIMIZE_NONE;
reg->anchor = 0;
- reg->anchor_dmin = 0;
- reg->anchor_dmax = 0;
+ reg->anc_dist_min = 0;
+ reg->anc_dist_max = 0;
reg->sub_anchor = 0;
reg->exact_end = (UChar* )NULL;
reg->map_offset = 0;
@@ -6122,12 +6517,12 @@ print_optimize_info(FILE* f, regex_t* reg)
{
static const char* on[] = { "NONE", "STR",
"STR_FAST", "STR_FAST_STEP_FORWARD",
- "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };
+ "STR_CASE_FOLD", "MAP" };
fprintf(f, "optimize: %s\n", on[reg->optimize]);
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
if ((reg->anchor & ANCR_END_BUF_MASK) != 0)
- print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
+ print_distance_range(f, reg->anc_dist_min, reg->anc_dist_max);
fprintf(f, "\n");
if (reg->optimize) {
@@ -6275,7 +6670,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
Node* root;
ScanEnv scan_env;
#ifdef USE_CALL
- UnsetAddrList uslist;
+ UnsetAddrList uslist = {0};
#endif
root = 0;
@@ -6299,13 +6694,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->string_pool_end = 0;
reg->num_mem = 0;
reg->num_repeat = 0;
- reg->num_null_check = 0;
+ reg->num_empty_check = 0;
reg->repeat_range_alloc = 0;
- reg->repeat_range = (OnigRepeatRange* )NULL;
+ reg->repeat_range = (RepeatRange* )NULL;
+ reg->empty_status_mem = 0;
r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
+ r = reduce_string_list(root);
+ if (r != 0) goto err;
+
/* mixed use named group and no-named group */
if (scan_env.num_named > 0 &&
IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
@@ -6326,38 +6725,65 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = unset_addr_list_init(&uslist, scan_env.num_call);
if (r != 0) goto err;
scan_env.unset_addr_list = &uslist;
- r = setup_call(root, &scan_env, 0);
+ r = tune_call(root, &scan_env, 0);
if (r != 0) goto err_unset;
- r = setup_call2(root);
+ r = tune_call2(root);
if (r != 0) goto err_unset;
r = recursive_call_check_trav(root, &scan_env, 0);
if (r < 0) goto err_unset;
r = infinite_recursive_call_check_trav(root, &scan_env);
if (r != 0) goto err_unset;
- setup_called_state(root, 0);
+ tune_called_state(root, 0);
}
reg->num_call = scan_env.num_call;
#endif
- r = setup_tree(root, reg, 0, &scan_env);
+#ifdef ONIG_DEBUG_PARSE
+ fprintf(stderr, "MAX PARSE DEPTH: %d\n", scan_env.max_parse_depth);
+ fprintf(stderr, "TREE (parsed)\n");
+ print_tree(stderr, root);
+ fprintf(stderr, "\n");
+#endif
+
+ r = tune_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
+ if (scan_env.backref_num != 0) {
+ set_parent_node_trav(root, NULL_NODE);
+ r = set_empty_repeat_node_trav(root, NULL_NODE, &scan_env);
+ if (r != 0) goto err_unset;
+ set_empty_status_check_trav(root, &scan_env);
+ }
+
#ifdef ONIG_DEBUG_PARSE
+ fprintf(stderr, "TREE (after tune)\n");
print_tree(stderr, root);
+ fprintf(stderr, "\n");
#endif
- reg->capture_history = scan_env.capture_history;
- reg->bt_mem_start = scan_env.bt_mem_start;
- reg->bt_mem_start |= reg->capture_history;
- if (IS_FIND_CONDITION(reg->options))
- MEM_STATUS_ON_ALL(reg->bt_mem_end);
+ reg->capture_history = scan_env.cap_history;
+ reg->push_mem_start = scan_env.backtrack_mem | scan_env.cap_history;
+
+#ifdef USE_CALLOUT
+ if (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) {
+ reg->push_mem_end = reg->push_mem_start;
+ }
else {
- reg->bt_mem_end = scan_env.bt_mem_end;
- reg->bt_mem_end |= reg->capture_history;
+ if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
+ reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
+ else
+ reg->push_mem_end = reg->push_mem_start &
+ (scan_env.backrefed_mem | scan_env.cap_history);
}
- reg->bt_mem_start |= reg->bt_mem_end;
+#else
+ if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
+ reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
+ else
+ reg->push_mem_end = reg->push_mem_start &
+ (scan_env.backrefed_mem | scan_env.cap_history);
+#endif
clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE
@@ -6391,14 +6817,20 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
#endif
- if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
+ set_addr_in_repeat_range(reg);
+
+ if ((reg->push_mem_end != 0)
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ || (reg->num_repeat != 0)
+ || (reg->num_empty_check != 0)
+#endif
#ifdef USE_CALLOUT
|| (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)
#endif
)
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
else {
- if (reg->bt_mem_start != 0)
+ if (reg->push_mem_start != 0)
reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
else
reg->stack_pop_level = STACK_POP_LEVEL_FREE;
@@ -6531,11 +6963,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r != 0) goto err;
+ if (r != 0) {
+ xfree(*reg);
+ *reg = NULL;
+ return r;
+ }
r = onig_compile(*reg, pattern, pattern_end, einfo);
if (r != 0) {
- err:
onig_free(*reg);
*reg = NULL;
}
@@ -6672,6 +7107,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
}
else {
len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+ if (len < 0) return 0;
}
return onig_is_code_in_cc_len(len, code, cc);
}
@@ -6679,12 +7115,14 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
#ifdef ONIG_DEBUG_PARSE
+#ifdef USE_CALL
static void
p_string(FILE* f, int len, UChar* s)
{
fputs(":", f);
while (len-- > 0) { fputc(*s++, f); }
}
+#endif
static void
Indent(FILE* f, int indent)
@@ -6704,7 +7142,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
Indent(f, indent);
if (IS_NULL(node)) {
fprintf(f, "ERROR: null node!!!\n");
- exit (0);
+ exit(0);
}
type = NODE_TYPE(node);
@@ -6728,28 +7166,22 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NODE_STRING:
{
+ char* str;
char* mode;
- char* dont;
- char* good;
- if (NODE_STRING_IS_RAW(node))
- mode = "-raw";
- else if (NODE_STRING_IS_AMBIG(node))
- mode = "-ambig";
+ if (NODE_STRING_IS_CRUDE(node))
+ mode = "-crude";
+ else if (NODE_STRING_IS_CASE_FOLD_MATCH(node))
+ mode = "-case_fold_match";
else
mode = "";
- if (NODE_STRING_IS_GOOD_AMBIG(node))
- good = "-good";
- else
- good = "";
-
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))
- dont = " (dont-opt)";
+ if (STR_(node)->s == STR_(node)->end)
+ str = "empty-string";
else
- dont = "";
+ str = "string";
- fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);
+ fprintf(f, "<%s%s:%p>", str, mode, node);
for (p = STR_(node)->s; p < STR_(node)->end; p++) {
if (*p >= 0x20 && *p < 0x7f)
fputc(*p, f);
@@ -6871,6 +7303,34 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NODE_BAG:
fprintf(f, "<bag:%p> ", node);
+ if (BAG_(node)->type == BAG_IF_ELSE) {
+ Node* Then;
+ Node* Else;
+ BagNode* bn;
+
+ bn = BAG_(node);
+ fprintf(f, "if-else\n");
+ print_indent_tree(f, NODE_BODY(node), indent + add);
+
+ Then = bn->te.Then;
+ Else = bn->te.Else;
+ if (IS_NULL(Then)) {
+ Indent(f, indent + add);
+ fprintf(f, "THEN empty\n");
+ }
+ else
+ print_indent_tree(f, Then, indent + add);
+
+ if (IS_NULL(Else)) {
+ Indent(f, indent + add);
+ fprintf(f, "ELSE empty\n");
+ }
+ else
+ print_indent_tree(f, Else, indent + add);
+
+ break;
+ }
+
switch (BAG_(node)->type) {
case BAG_OPTION:
fprintf(f, "option:%d", BAG_(node)->o.options);
@@ -6881,8 +7341,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
case BAG_STOP_BACKTRACK:
fprintf(f, "stop-bt");
break;
- case BAG_IF_ELSE:
- fprintf(f, "if-else");
+ default:
break;
}
fprintf(f, "\n");