summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/gb18030.c6
-rw-r--r--src/oniguruma.h11
-rw-r--r--src/regcomp.c156
-rw-r--r--src/regenc.c2
-rw-r--r--src/regerror.c17
-rw-r--r--src/regexec.c130
-rw-r--r--src/regext.c6
-rw-r--r--src/regint.h6
-rw-r--r--src/regparse.c190
-rw-r--r--src/regparse.h22
-rw-r--r--src/utf16_be.c35
-rw-r--r--src/utf16_le.c26
12 files changed, 393 insertions, 214 deletions
diff --git a/src/gb18030.c b/src/gb18030.c
index 7654432..8d415b0 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -2,7 +2,7 @@
gb18030.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org>
+ * Copyright (c) 2005-2019 KUBO Takehiro <kubo AT jiubao DOT org>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
@@ -67,11 +67,11 @@ gb18030_mbc_enc_len(const UChar* p)
{
if (GB18030_MAP[*p] != CM)
return 1;
+
p++;
if (GB18030_MAP[*p] == C4)
return 4;
- if (GB18030_MAP[*p] == C1)
- return 1; /* illegal sequence */
+
return 2;
}
diff --git a/src/oniguruma.h b/src/oniguruma.h
index f6aa5ba..90cf2d9 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -36,9 +36,9 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 2
+#define ONIGURUMA_VERSION_TEENY 3
-#define ONIGURUMA_VERSION_INT 60902
+#define ONIGURUMA_VERSION_INT 60903
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -52,6 +52,7 @@ extern "C" {
# define PV_(args) args
#endif
+#ifndef ONIG_STATIC
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(ONIGURUMA_EXPORT)
@@ -65,6 +66,9 @@ extern "C" {
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
+#else
+#define ONIG_EXTERN extern
+#endif
/* PART: character encoding */
@@ -517,6 +521,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
+#define ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (1U<<26)
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
@@ -766,6 +771,8 @@ int onig_init P_((void));
ONIG_EXTERN
int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
ONIG_EXTERN
+int onig_is_error_code_needs_param PV_((int code));
+ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
diff --git a/src/regcomp.c b/src/regcomp.c
index c2c04a4..b96c793 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -599,12 +599,34 @@ select_str_opcode(int mb_len, int str_len, int ignore_case)
}
static int
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
+is_strict_real_node(Node* node)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_STRING:
+ {
+ StrNode* sn = STR_(node);
+ return (sn->end != sn->s);
+ }
+ break;
+
+ case NODE_CCLASS:
+ case NODE_CTYPE:
+ return 1;
+ break;
+
+ default:
+ return 0;
+ break;
+ }
+}
+
+static int
+compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env)
{
int r;
int saved_num_null_check = reg->num_null_check;
- if (empty_info != BODY_IS_NOT_EMPTY) {
+ if (emptiness != BODY_IS_NOT_EMPTY) {
r = add_op(reg, OP_EMPTY_CHECK_START);
if (r != 0) return r;
COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */
@@ -614,12 +636,12 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
r = compile_tree(node, reg, env);
if (r != 0) return r;
- if (empty_info != BODY_IS_NOT_EMPTY) {
- if (empty_info == BODY_IS_EMPTY)
+ if (emptiness != BODY_IS_NOT_EMPTY) {
+ if (emptiness == BODY_IS_EMPTY_POSSIBILITY)
r = add_op(reg, OP_EMPTY_CHECK_END);
- else if (empty_info == BODY_IS_EMPTY_MEM)
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM)
r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
- else if (empty_info == BODY_IS_EMPTY_REC)
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)
r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
if (r != 0) return r;
@@ -895,12 +917,12 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
p[id].lower = lower;
- p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
+ p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
return 0;
}
static int
-compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
+compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
regex_t* reg, ScanEnv* env)
{
int r;
@@ -915,7 +937,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
if (r != 0) return r;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
if (
@@ -937,7 +959,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
static int
is_anychar_infinite_greedy(QuantNode* qn)
{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
return 1;
else
@@ -951,8 +973,8 @@ static int
compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -969,10 +991,9 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
@@ -1026,8 +1047,8 @@ static int
compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int i, r, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -1055,10 +1076,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
@@ -1096,7 +1116,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);
@@ -1109,7 +1129,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);
@@ -1119,7 +1139,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
addr = -(mod_tlen + (int )SIZE_OP_PUSH);
@@ -1134,7 +1154,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
r = add_op(reg, OP_PUSH);
@@ -1188,7 +1208,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
+ r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
}
return r;
}
@@ -1273,7 +1293,7 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
int v;
QuantNode* qn;
@@ -1307,8 +1327,9 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
len += tlen;
}
+ len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END;
+
if (IS_NOT_NULL(Else)) {
- len += SIZE_OP_JUMP;
tlen = compile_length_tree(Else, reg);
if (tlen < 0) return tlen;
len += tlen;
@@ -1423,7 +1444,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
QuantNode* qn = QUANT_(NODE_BAG_BODY(node));
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
@@ -1455,7 +1476,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
case BAG_IF_ELSE:
{
- int cond_len, then_len, jump_len;
+ int cond_len, then_len, else_len, jump_len;
Node* cond = NODE_BAG_BODY(node);
Node* Then = node->te.Then;
Node* Else = node->te.Else;
@@ -1472,8 +1493,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
else
then_len = 0;
- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
- if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
+ jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP;
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
@@ -1490,11 +1510,20 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
}
if (IS_NOT_NULL(Else)) {
- int else_len = compile_length_tree(Else, reg);
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = else_len + SIZE_INC_OP;
+ else_len = compile_length_tree(Else, reg);
+ if (else_len < 0) return else_len;
+ }
+ else
+ else_len = 0;
+
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP;
+ r = add_op(reg, OP_ATOMIC_END);
+ if (r != 0) return r;
+
+ if (IS_NOT_NULL(Else)) {
r = compile_tree(Else, reg, env);
}
}
@@ -3035,7 +3064,7 @@ tree_max_len(Node* node, ScanEnv* env)
if (qn->upper != 0) {
len = tree_max_len(NODE_BODY(node), env);
if (len != 0) {
- if (! IS_REPEAT_INFINITE(qn->upper))
+ if (! IS_INFINITE_REPEAT(qn->upper))
len = distance_multiply(len, qn->upper);
else
len = INFINITE_LEN;
@@ -3581,7 +3610,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
type = NODE_TYPE(node);
if (type == NODE_QUANT) {
QuantNode* qn = QUANT_(node);
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
#ifdef USE_QUANT_PEEK_NEXT
Node* n = get_head_value_node(next_node, 1, reg);
/* '\0': for UTF-16BE etc... */
@@ -3591,7 +3620,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
#endif
/* automatic posseivation a*b ==> (?>a*)b */
if (qn->lower <= 1) {
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
+ if (is_strict_real_node(NODE_BODY(node))) {
Node *x, *y;
x = get_head_value_node(NODE_BODY(node), 0, reg);
if (IS_NOT_NULL(x)) {
@@ -3599,7 +3628,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
CHECK_NULL_RETURN_MEMERR(en);
- NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
+ NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);
swap_node(node, en);
NODE_BODY(node) = en;
}
@@ -4001,11 +4030,11 @@ expand_case_fold_string(Node* node, regex_t* reg, int state)
return r;
}
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
-static enum BodyEmpty
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+static enum BodyEmptyType
quantifiers_memory_node_info(Node* node)
{
- int r = BODY_IS_EMPTY;
+ int r = BODY_IS_EMPTY_POSSIBILITY;
switch (NODE_TYPE(node)) {
case NODE_LIST:
@@ -4022,7 +4051,7 @@ quantifiers_memory_node_info(Node* node)
#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC; /* tiny version */
+ return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */
}
else
r = quantifiers_memory_node_info(NODE_BODY(node));
@@ -4044,9 +4073,9 @@ quantifiers_memory_node_info(Node* node)
switch (en->type) {
case BAG_MEMORY:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC;
+ return BODY_IS_EMPTY_POSSIBILITY_REC;
}
- return BODY_IS_EMPTY_MEM;
+ return BODY_IS_EMPTY_POSSIBILITY_MEM;
break;
case BAG_OPTION:
@@ -4083,7 +4112,7 @@ quantifiers_memory_node_info(Node* node)
return r;
}
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#ifdef USE_CALL
@@ -4351,7 +4380,7 @@ setup_called_state_call(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
@@ -4468,7 +4497,7 @@ setup_called_state(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
@@ -4600,24 +4629,24 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
d = tree_min_len(body, env);
if (d == 0) {
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
- qn->empty_info = quantifiers_memory_node_info(body);
- if (qn->empty_info == BODY_IS_EMPTY_REC) {
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+ qn->emptiness = quantifiers_memory_node_info(body);
+ if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) {
if (NODE_TYPE(body) == NODE_BAG &&
BAG_(body)->type == BAG_MEMORY) {
MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
}
}
#else
- qn->empty_info = BODY_IS_EMPTY;
+ qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;
#endif
}
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
@@ -4628,7 +4657,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
/* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
if (NODE_TYPE(body) == NODE_STRING) {
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
int len = NODE_STRING_LEN(body);
StrNode* sn = STR_(body);
@@ -4646,7 +4675,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
}
- if (qn->greedy && (qn->empty_info == BODY_IS_NOT_EMPTY)) {
+ if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
if (NODE_TYPE(body) == NODE_QUANT) {
QuantNode* tqn = QUANT_(body);
if (IS_NOT_NULL(tqn->head_exact)) {
@@ -4663,7 +4692,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
/* setup_tree does the following work.
- 1. check empty loop. (set qn->empty_info)
+ 1. check empty loop. (set qn->emptiness)
2. expand ignore-case in char class.
3. set memory status bit flags. (reg->mem_stats)
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
@@ -4752,10 +4781,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
r = setup_tree(target, reg, state, env);
if (NODE_TYPE(target) == NODE_QUANT) {
QuantNode* tqn = QUANT_(target);
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
- NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
+ if (is_strict_real_node(NODE_BODY(target)))
+ NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);
}
}
}
@@ -5752,7 +5781,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
opt->sm.reach_end = 0;
}
- if (IS_REPEAT_INFINITE(qn->upper)) {
+ if (IS_INFINITE_REPEAT(qn->upper)) {
if (env->mmd.max == 0 &&
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
@@ -6672,6 +6701,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
}
else {
len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+ if (len < 0) return 0;
}
return onig_is_code_in_cc_len(len, code, cc);
}
diff --git a/src/regenc.c b/src/regenc.c
index 6376565..9fab721 100644
--- a/src/regenc.c
+++ b/src/regenc.c
@@ -853,6 +853,8 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
{
+ if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+
if ((code & 0xff00) != 0) return 2;
else return 1;
}
diff --git a/src/regerror.c b/src/regerror.c
index 7564827..e6d1806 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -257,6 +257,23 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
}
+extern int
+onig_is_error_code_needs_param(int code)
+{
+ switch (code) {
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_INVALID_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
diff --git a/src/regexec.c b/src/regexec.c
index 6618996..f957b75 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -980,6 +980,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_CALL_FRAME 0x0400
#define STK_RETURN 0x0500
#define STK_SAVE_VAL 0x0600
+#define STK_PREC_READ_START 0x0700
+#define STK_PREC_READ_END 0x0800
/* stack type check mask */
#define STK_MASK_POP_USED STK_ALT_FLAG
@@ -1544,8 +1546,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
-#define STACK_PUSH_POS(s,sprev) \
- STACK_PUSH(STK_TO_VOID_START,(Operation* )0,s,sprev)
+#define STACK_PUSH_PREC_READ_START(s,sprev) \
+ STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
@@ -1887,6 +1889,27 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
+#define STACK_GET_PREC_READ_START(k) do {\
+ int level = 0;\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_PREC_READ_START) {\
+ if (level == 0) {\
+ break;\
+ }\
+ level--;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ level++;\
+ }\
+ }\
+} while(0)
+
#define STACK_EMPTY_CHECK(isnull,sid,s) do {\
StackType* k = stk;\
while (1) {\
@@ -1913,7 +1936,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while (0)
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
StackType* k = stk;\
while (1) {\
@@ -1927,9 +1950,10 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
else {\
UChar* endp;\
+ int level = 0;\
(isnull) = 1;\
while (k < stk) {\
- if (k->type == STK_MEM_START) {\
+ if (k->type == STK_MEM_START && level == 0) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
@@ -1941,6 +1965,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
(isnull) = -1; /* empty, but position changed */ \
}\
}\
+ else if (k->type == STK_PREC_READ_START) {\
+ level++;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ level--;\
+ }\
k++;\
}\
break;\
@@ -1965,10 +1995,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
else {\
UChar* endp;\
+ int prec_level = 0;\
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START) {\
- if (level == 0) {\
+ if (level == 0 && prec_level == 0) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
@@ -1987,6 +2018,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
else if (k->type == STK_EMPTY_CHECK_END) {\
if (k->zid == (sid)) level--;\
}\
+ else if (k->type == STK_PREC_READ_START) {\
+ prec_level++;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ prec_level--;\
+ }\
k++;\
}\
break;\
@@ -2023,7 +2060,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
} while(0)
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#define STACK_GET_REPEAT(sid, k) do {\
int level = 0;\
@@ -2968,6 +3005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
NEXT_OUT;
CASE_OP(CCLASS_MB)
+ DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
@@ -3441,11 +3479,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(pstart, s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(s, pstart, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3468,11 +3508,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3498,15 +3540,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(pstart, swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(swork, pstart, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3535,15 +3578,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3560,6 +3604,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int len;
int level;
MemNumType* mems;
+ UChar* ssave;
n = 0;
backref_with_level:
@@ -3567,10 +3612,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen = p->backref_general.num;
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
- sprev = s;
+ ssave = s;
if (backref_match_at_nested_level(reg, stk, stk_base, n,
case_fold_flag, level, (int )tlen, mems, &s, end)) {
- if (sprev < end) {
+ if (ssave != s) {
+ sprev = ssave;
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
@@ -3658,7 +3704,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
JUMP_OUT;
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
CASE_OP(EMPTY_CHECK_END_MEMST)
{
int is_empty;
@@ -3683,7 +3729,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int is_empty;
mem = p->empty_check_end.mem; /* mem: null check id */
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
#else
STACK_EMPTY_CHECK_REC(is_empty, mem, s);
@@ -3851,14 +3897,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto repeat_inc_ng;
CASE_OP(PREC_READ_START)
- STACK_PUSH_POS(s, sprev);
+ STACK_PUSH_PREC_READ_START(s, sprev);
INC_OP;
JUMP_OUT;
CASE_OP(PREC_READ_END)
- STACK_EXEC_TO_VOID(stkp);
+ STACK_GET_PREC_READ_START(stkp);
s = stkp->u.state.pstr;
sprev = stkp->u.state.pstr_prev;
+ STACK_PUSH(STK_PREC_READ_END,0,0,0);
INC_OP;
JUMP_OUT;
@@ -5443,6 +5490,9 @@ onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
if (n >= 0) {
n = ONIGERR_INVALID_CALLOUT_BODY;
}
+ else if (onig_is_error_code_needs_param(n)) {
+ n = ONIGERR_INVALID_CALLOUT_BODY;
+ }
return n;
}
diff --git a/src/regext.c b/src/regext.c
index fa4b360..965c793 100644
--- a/src/regext.c
+++ b/src/regext.c
@@ -29,6 +29,7 @@
#include "regint.h"
+#if 0
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
@@ -158,6 +159,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
+#endif
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
@@ -169,9 +171,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
- r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
- &cpat, &cpat_end);
- if (r != 0) return r;
+ return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
else {
cpat = (UChar* )pattern;
diff --git a/src/regint.h b/src/regint.h
index 56767e8..38389a1 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -63,7 +63,7 @@
#define USE_CALL
#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
+#define USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
#define USE_RETRY_LIMIT_IN_MATCH
@@ -348,8 +348,8 @@ typedef unsigned int MemStatusType;
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
-#define REPEAT_INFINITE -1
-#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
+#define INFINITE_REPEAT -1
+#define IS_INFINITE_REPEAT(n) ((n) == INFINITE_REPEAT)
/* bitset */
#define BITS_PER_BYTE 8
diff --git a/src/regparse.c b/src/regparse.c
index f1deea3..7f8b1a9 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -77,6 +77,7 @@ OnigSyntaxType OnigSyntaxOniguruma = {
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
+ ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
@@ -1093,6 +1094,35 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name,
return e->back_num;
}
+static int
+name_to_group_numbers(ScanEnv* env, const UChar* name, const UChar* name_end,
+ int** nums)
+{
+ regex_t* reg;
+ NameEntry* e;
+
+ reg = env->reg;
+ e = name_find(reg, name, name_end);
+
+ if (IS_NULL(e)) {
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
+ (UChar* )name, (UChar* )name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+
+ switch (e->back_num) {
+ case 0:
+ break;
+ case 1:
+ *nums = &(e->back_ref1);
+ break;
+ default:
+ *nums = e->back_refs;
+ break;
+ }
+ return e->back_num;
+}
+
extern int
onig_name_to_backref_number(regex_t* reg, const UChar* name,
const UChar* name_end, OnigRegion *region)
@@ -1869,8 +1899,8 @@ callout_tag_table_new(CalloutTagTable** rt)
}
static int
-callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
- CalloutTagVal entry_val)
+callout_tag_entry_raw(ScanEnv* env, CalloutTagTable* t, UChar* name,
+ UChar* name_end, CalloutTagVal entry_val)
{
int r;
CalloutTagVal val;
@@ -1879,8 +1909,11 @@ callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
return ONIGERR_INVALID_CALLOUT_TAG_NAME;
val = callout_tag_find(t, name, name_end);
- if (val >= 0)
+ if (val >= 0) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
+ name, name_end);
return ONIGERR_MULTIPLEX_DEFINED_NAME;
+ }
r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
if (r < 0) return r;
@@ -1909,7 +1942,7 @@ ext_ensure_tag_table(regex_t* reg)
}
static int
-callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
+callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end,
CalloutTagVal entry_val)
{
int r;
@@ -1921,7 +1954,7 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
ext = onig_get_regex_ext(reg);
CHECK_NULL_RETURN_MEMERR(ext);
- r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);
+ r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
e = onig_reg_callout_list_at(reg, (int )entry_val);
CHECK_NULL_RETURN_MEMERR(e);
@@ -2391,10 +2424,10 @@ node_new_quantifier(int lower, int upper, int by_number)
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_QUANT);
- QUANT_(node)->lower = lower;
- QUANT_(node)->upper = upper;
- QUANT_(node)->greedy = 1;
- QUANT_(node)->empty_info = BODY_IS_NOT_EMPTY;
+ QUANT_(node)->lower = lower;
+ QUANT_(node)->upper = upper;
+ QUANT_(node)->greedy = 1;
+ QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY;
QUANT_(node)->head_exact = NULL_NODE;
QUANT_(node)->next_head_exact = NULL_NODE;
QUANT_(node)->is_refered = 0;
@@ -2694,7 +2727,7 @@ make_text_segment(Node** node, ScanEnv* env)
ns[0] = x;
ns[1] = NULL_NODE;
- x = node_new_quantifier(0, REPEAT_INFINITE, 1);
+ x = node_new_quantifier(0, INFINITE_REPEAT, 1);
if (IS_NULL(x)) goto err;
NODE_BODY(x) = ns[0];
@@ -3044,7 +3077,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (expr == NULL_NODE) {
/* default expr \O* */
- quant = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ quant = node_new_quantifier(0, INFINITE_REPEAT, 0);
if (IS_NULL(quant)) goto err0;
r = node_new_true_anychar(&body, env);
@@ -3086,7 +3119,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (r != 0) goto err;
possessive = 1;
- r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,
+ r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
possessive, is_range_cutter, env);
if (r != 0) goto err;
@@ -3236,10 +3269,18 @@ node_new_empty(void)
static Node*
node_new_str_raw_char(UChar c)
{
+ int i;
UChar p[1];
+ Node* node;
p[0] = c;
- return node_new_str_raw(p, p + 1);
+ node = node_new_str_raw(p, p + 1);
+
+ /* clear buf tail */
+ for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
+ STR_(node)->buf[i] = '\0';
+
+ return node;
}
static Node*
@@ -3275,24 +3316,6 @@ str_node_can_be_split(Node* node, OnigEncoding enc)
return 0;
}
-#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
-static int
-node_str_head_pad(StrNode* sn, int num, UChar val)
-{
- UChar buf[NODE_STRING_BUF_SIZE];
- int i, len;
-
- len = sn->end - sn->s;
- onig_strcpy(buf, sn->s, sn->end);
- onig_strcpy(&(sn->s[num]), buf, buf + len);
- sn->end += num;
-
- for (i = 0; i < num; i++) {
- sn->s[i] = val;
- }
-}
-#endif
-
extern int
onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
{
@@ -3877,19 +3900,19 @@ quantifier_type_num(QuantNode* q)
if (q->greedy) {
if (q->lower == 0) {
if (q->upper == 1) return 0;
- else if (IS_REPEAT_INFINITE(q->upper)) return 1;
+ else if (IS_INFINITE_REPEAT(q->upper)) return 1;
}
else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 2;
+ if (IS_INFINITE_REPEAT(q->upper)) return 2;
}
}
else {
if (q->lower == 0) {
if (q->upper == 1) return 3;
- else if (IS_REPEAT_INFINITE(q->upper)) return 4;
+ else if (IS_INFINITE_REPEAT(q->upper)) return 4;
}
else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 5;
+ if (IS_INFINITE_REPEAT(q->upper)) return 5;
}
}
return -1;
@@ -3926,8 +3949,8 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
pnum = quantifier_type_num(p);
cnum = quantifier_type_num(c);
if (pnum < 0 || cnum < 0) {
- if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {
- if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {
+ if ((p->lower == p->upper) && ! IS_INFINITE_REPEAT(p->upper)) {
+ if ((c->lower == c->upper) && ! IS_INFINITE_REPEAT(c->upper)) {
int n = onig_positive_int_multiply(p->lower, c->lower);
if (n >= 0) {
p->lower = p->upper = n;
@@ -3946,11 +3969,11 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
break;
case RQ_A:
NODE_BODY(pnode) = NODE_BODY(cnode);
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
+ p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 1;
break;
case RQ_AQ:
NODE_BODY(pnode) = NODE_BODY(cnode);
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
+ p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 0;
break;
case RQ_QQ:
NODE_BODY(pnode) = NODE_BODY(cnode);
@@ -3959,13 +3982,13 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
case RQ_P_QQ:
NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 0;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
+ c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 1;
return ;
break;
case RQ_PQ_Q:
NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 1;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
+ c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 0;
return ;
break;
case RQ_ASIS:
@@ -4158,7 +4181,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
if (p == prev) {
if (non_low != 0)
goto invalid;
- up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ up = INFINITE_REPEAT; /* {n,} : {n,infinite} */
}
}
else {
@@ -4178,7 +4201,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
}
if (c != '}') goto invalid;
- if (!IS_REPEAT_INFINITE(up) && low > up) {
+ if (!IS_INFINITE_REPEAT(up) && low > up) {
/* {n,m}+ supported case */
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
@@ -4959,7 +4982,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -4967,7 +4990,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5358,10 +5381,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.backref.ref1 = back_num;
}
else {
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ num = name_to_group_numbers(env, prev, name_end, &backs);
if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
@@ -5514,7 +5535,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
#endif
tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5525,7 +5546,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
#endif
tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5608,7 +5629,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.call.gnum = 0;
tok->u.call.name = p;
PINC;
- if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
+ if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
tok->u.call.name_end = p;
break;
@@ -6249,6 +6270,7 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
env->parse_depth++;
if (env->parse_depth > ParseDepthLimit)
return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
+
prev_cc = (CClassNode* )NULL;
r = fetch_token_in_cc(tok, src, end, env);
if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
@@ -6301,10 +6323,11 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case TK_RAW_BYTE:
/* tok->base != 0 : octal or hexadec. */
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
+ int i, j;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
UChar* psave = p;
- int i, base = tok->base;
+ int base = tok->base;
buf[0] = tok->u.c;
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
@@ -6322,6 +6345,9 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto err;
}
+ /* clear buf tail */
+ for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
+
len = enclen(env->enc, buf);
if (i < len) {
r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
@@ -6359,8 +6385,13 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
val_entry:
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
if (len < 0) {
- r = len;
- goto err;
+ if (state != CCS_RANGE ||
+ ! IS_SYNTAX_BV(env->syntax,
+ ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
+ v < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
+ r = len;
+ goto err;
+ }
}
in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
val_entry2:
@@ -6673,7 +6704,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
}
if (tag_start != tag_end) {
- r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
if (r != ONIG_NORMAL) return r;
}
@@ -6994,7 +7025,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
}
if (tag_start != tag_end) {
- r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
if (r != ONIG_NORMAL) return r;
}
@@ -7271,10 +7302,8 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
int num;
int* backs;
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ num = name_to_group_numbers(env, prev, name_end, &backs);
if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
@@ -7414,6 +7443,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
break;
+#ifdef USE_CAPTURE_HISTORY
case '@':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
@@ -7441,6 +7471,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
break;
+#endif
#ifdef USE_POSIXLINE_OPTION
case 'p':
@@ -7688,7 +7719,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (targetq_num >= 0 && nestq_num < 0) {
if (targetq_num == 1 || targetq_num == 2) { /* * or + */
/* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
- if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+ if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
}
}
@@ -7826,14 +7857,18 @@ static int
parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env, int group_head)
{
- int r, len, group = 0;
+ int r, len, group;
Node* qn;
Node** tp;
+ unsigned int parse_depth;
+ group = 0;
*np = NULL;
if (tok->type == (enum TokenSyms )term)
goto end_of_token;
+ parse_depth = env->parse_depth;
+
switch (tok->type) {
case TK_ALT:
case TK_EOT:
@@ -7914,36 +7949,29 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
len = 1;
while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
- if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */
+ if (len == enclen(env->enc, STR_(*np)->s)) {
r = fetch_token(tok, src, end, env);
- NODE_STRING_CLEAR_RAW(*np);
- goto string_end;
+ goto tk_raw_byte_end;
}
}
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- if (r != TK_RAW_BYTE) {
- /* Don't use this, it is wrong for little endian encodings. */
-#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
- int rem;
- if (len < ONIGENC_MBC_MINLEN(env->enc)) {
- rem = ONIGENC_MBC_MINLEN(env->enc) - len;
- (void )node_str_head_pad(STR_(*np), rem, (UChar )0);
- if (len + rem == enclen(env->enc, STR_(*np)->s)) {
- NODE_STRING_CLEAR_RAW(*np);
- goto string_end;
- }
- }
-#endif
+ if (r != TK_RAW_BYTE)
return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
- }
r = node_str_cat_char(*np, (UChar )tok->u.c);
if (r < 0) return r;
len++;
}
+
+ tk_raw_byte_end:
+ if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
+ NODE_STRING_CLEAR_RAW(*np);
+ goto string_end;
}
break;
@@ -8055,7 +8083,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case TK_ANYCHAR_ANYTIME:
*np = node_new_anychar();
CHECK_NULL_RETURN_MEMERR(*np);
- qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ qn = node_new_quantifier(0, INFINITE_REPEAT, 0);
CHECK_NULL_RETURN_MEMERR(qn);
NODE_BODY(qn) = *np;
*np = qn;
@@ -8158,6 +8186,10 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (is_invalid_quantifier_target(*tp))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+ parse_depth++;
+ if (parse_depth > ParseDepthLimit)
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
+
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
r == TK_INTERVAL);
CHECK_NULL_RETURN_MEMERR(qn);
diff --git a/src/regparse.h b/src/regparse.h
index b7a2867..231f7b5 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -66,11 +66,11 @@ enum GimmickType {
#endif
};
-enum BodyEmpty {
- BODY_IS_NOT_EMPTY = 0,
- BODY_IS_EMPTY = 1,
- BODY_IS_EMPTY_MEM = 2,
- BODY_IS_EMPTY_REC = 3
+enum BodyEmptyType {
+ BODY_IS_NOT_EMPTY = 0,
+ BODY_IS_EMPTY_POSSIBILITY = 1,
+ BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
+ BODY_IS_EMPTY_POSSIBILITY_REC = 3
};
typedef struct {
@@ -101,7 +101,7 @@ typedef struct {
int lower;
int upper;
int greedy;
- enum BodyEmpty empty_info;
+ enum BodyEmptyType emptiness;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
@@ -252,10 +252,6 @@ typedef struct _Node {
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
-#define NODE_IS_SIMPLE_TYPE(node) \
- ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
- (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
-
#define NODE_TYPE(node) ((node)->u.base.node_type)
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
@@ -314,7 +310,7 @@ typedef struct _Node {
#define NODE_ST_CLEN_FIXED (1<<2)
#define NODE_ST_MARK1 (1<<3)
#define NODE_ST_MARK2 (1<<4)
-#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
+#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
#define NODE_ST_RECURSION (1<<6)
#define NODE_ST_CALLED (1<<7)
#define NODE_ST_ADDR_FIXED (1<<8)
@@ -357,8 +353,8 @@ typedef struct _Node {
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
#define NODE_IS_PROHIBIT_RECURSION(node) \
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
-#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
- ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
+#define NODE_IS_STRICT_REAL_REPEAT(node) \
+ ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
#define NODE_BODY(node) ((node)->u.base.body)
#define NODE_QUANT_BODY(node) ((node)->body)
diff --git a/src/utf16_be.c b/src/utf16_be.c
index 22bf74d..b66d868 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -103,7 +103,25 @@ utf16be_mbc_enc_len(const UChar* p)
static int
is_valid_mbc_string(const UChar* s, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_BE, s, end);
+ while (s < end) {
+ int len = utf16be_mbc_enc_len(s);
+ if (len == 4) {
+ if (s + 2 >= end)
+ return FALSE;
+ if (! UTF16_IS_SURROGATE_SECOND(*(s+2)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*s))
+ return FALSE;
+
+ s += len;
+ }
+
+ if (s != end)
+ return FALSE;
+ else
+ return TRUE;
}
static int
@@ -146,7 +164,15 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
- return (code > 0xffff ? 4 : 2);
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 4;
+ }
+ else {
+ return 2;
+ }
}
static int
@@ -243,7 +269,8 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-2)))
s -= 2;
return (UChar* )s;
diff --git a/src/utf16_le.c b/src/utf16_le.c
index 4b231c6..cdc74b0 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = {
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
- return (code > 0xffff ? 4 : 2);
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 4;
+ }
+ else {
+ return 2;
+ }
}
static int
@@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
const UChar* end1 = end - 1;
while (p < end1) {
- p += utf16le_mbc_enc_len(p);
+ int len = utf16le_mbc_enc_len(p);
+ if (len == 4) {
+ if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
+ return FALSE;
+
+ p += len;
}
if (p != end)
@@ -252,7 +269,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-1)))
s -= 2;
return (UChar* )s;