summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff-webhosting.net>2017-07-23 11:38:17 +0200
committerJörg Frings-Fürst <debian@jff-webhosting.net>2017-07-23 11:38:17 +0200
commit3d4afdf89f76345dc7ebae66434b94146368dc83 (patch)
treefcdf7af9a6c468cba36e18adbf256837cd7c0e34 /src
parent7aac082e4e72a80c965825bbc5e8176bc7667e5a (diff)
parentf133fa1db9ef9191f17d55fe086c8fbe951a345d (diff)
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src')
-rw-r--r--src/config.h.cmake.in3
-rw-r--r--src/oniguruma.h6
-rw-r--r--src/regcomp.c3258
-rw-r--r--src/regenc.h4
-rw-r--r--src/regerror.c84
-rw-r--r--src/regexec.c252
-rw-r--r--src/regext.c8
-rw-r--r--src/regint.h119
-rw-r--r--src/regparse.c791
-rw-r--r--src/regparse.h334
-rw-r--r--src/st.c484
-rw-r--r--src/unicode.c12
12 files changed, 2998 insertions, 2357 deletions
diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in
index 93e46c0..e13fad1 100644
--- a/src/config.h.cmake.in
+++ b/src/config.h.cmake.in
@@ -43,6 +43,9 @@
/* Define to 1 if you have the <unistd.h> header file. */
#cmakedefine HAVE_UNISTD_H ${HAVE_UNISTD_H}
+/* Define to 1 if you have the <inttypes.h> header file. */
+#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H}
+
/* Name of package */
#cmakedefine PACKAGE ${PACKAGE}
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 02d4254..a8ae09a 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -35,7 +35,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
-#define ONIGURUMA_VERSION_MINOR 3
+#define ONIGURUMA_VERSION_MINOR 4
#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
@@ -609,7 +609,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
/* #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 */
-/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */
+/* must be smaller than MEM_STATUS_BITS_NUM (unsigned int * 8) */
#define ONIG_MAX_CAPTURE_HISTORY_GROUP 31
#define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \
((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i])
@@ -669,7 +669,7 @@ typedef struct re_pattern_buffer {
int num_mem; /* used memory(...) num counted from 1 */
int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
- int num_null_check; /* OP_NULL_CHECK_START/END id counter */
+ int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
int num_comb_exp_check; /* combination explosion check */
int num_call; /* number of subexp call */
unsigned int capture_history; /* (?@...) flag (1-31) */
diff --git a/src/regcomp.c b/src/regcomp.c
index 0e9a9ab..db83739 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -29,6 +29,83 @@
#include "regparse.h"
+#if 0
+typedef struct {
+ int n;
+ int alloc;
+ int* v;
+} int_stack;
+
+static int
+make_int_stack(int_stack** rs, int init_size)
+{
+ int_stack* s;
+ int* v;
+
+ *rs = 0;
+
+ s = xmalloc(sizeof(*s));
+ if (IS_NULL(s)) return ONIGERR_MEMORY;
+
+ v = (int* )xmalloc(sizeof(int) * init_size);
+ if (IS_NULL(v)) {
+ xfree(s);
+ return ONIGERR_MEMORY;
+ }
+
+ s->n = 0;
+ s->alloc = init_size;
+ s->v = v;
+
+ *rs = s;
+ return ONIG_NORMAL;
+}
+
+static void
+free_int_stack(int_stack* s)
+{
+ if (IS_NOT_NULL(s)) {
+ if (IS_NOT_NULL(s->v))
+ xfree(s->v);
+ xfree(s);
+ }
+}
+
+static int
+int_stack_push(int_stack* s, int v)
+{
+ if (s->n >= s->alloc) {
+ int new_size = s->alloc * 2;
+ int* nv = (int* )xrealloc(s->v, new_size);
+ if (IS_NULL(nv)) return ONIGERR_MEMORY;
+
+ s->alloc = new_size;
+ s->v = nv;
+ }
+
+ s->v[s->n] = v;
+ s->n++;
+ return ONIG_NORMAL;
+}
+
+static int
+int_stack_pop(int_stack* s)
+{
+ int v;
+
+#ifdef ONIG_DEBUG
+ if (s->n <= 0) {
+ fprintf(stderr, "int_stack_pop: fail empty. %p\n", s);
+ return 0;
+ }
+#endif
+
+ v = s->v[s->n];
+ s->n--;
+ return v;
+}
+#endif
+
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
extern OnigCaseFoldType
@@ -70,8 +147,8 @@ swap_node(Node* a, Node* b)
Node c;
c = *a; *a = *b; *b = c;
- if (NTYPE(a) == NT_STR) {
- StrNode* sn = NSTR(a);
+ if (NODE_TYPE(a) == NODE_STR) {
+ StrNode* sn = STR_(a);
if (sn->capa == 0) {
int len = sn->end - sn->s;
sn->s = sn->buf;
@@ -79,8 +156,8 @@ swap_node(Node* a, Node* b)
}
}
- if (NTYPE(b) == NT_STR) {
- StrNode* sn = NSTR(b);
+ if (NODE_TYPE(b) == NODE_STR) {
+ StrNode* sn = STR_(b);
if (sn->capa == 0) {
int len = sn->end - sn->s;
sn->s = sn->buf;
@@ -156,42 +233,42 @@ onig_bbuf_init(BBuf* buf, int size)
#ifdef USE_SUBEXP_CALL
static int
-unset_addr_list_init(UnsetAddrList* uslist, int size)
+unset_addr_list_init(UnsetAddrList* list, int size)
{
UnsetAddr* p;
p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
CHECK_NULL_RETURN_MEMERR(p);
- uslist->num = 0;
- uslist->alloc = size;
- uslist->us = p;
+ list->num = 0;
+ list->alloc = size;
+ list->us = p;
return 0;
}
static void
-unset_addr_list_end(UnsetAddrList* uslist)
+unset_addr_list_end(UnsetAddrList* list)
{
- if (IS_NOT_NULL(uslist->us))
- xfree(uslist->us);
+ if (IS_NOT_NULL(list->us))
+ xfree(list->us);
}
static int
-unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
+unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
{
UnsetAddr* p;
int size;
- if (uslist->num >= uslist->alloc) {
- size = uslist->alloc * 2;
- p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
+ if (list->num >= list->alloc) {
+ size = list->alloc * 2;
+ p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size);
CHECK_NULL_RETURN_MEMERR(p);
- uslist->alloc = size;
- uslist->us = p;
+ list->alloc = size;
+ list->us = p;
}
- uslist->us[uslist->num].offset = offset;
- uslist->us[uslist->num].target = node;
- uslist->num++;
+ list->us[list->num].offset = offset;
+ list->us[list->num].target = node;
+ list->num++;
return 0;
}
#endif /* USE_SUBEXP_CALL */
@@ -251,6 +328,7 @@ add_mem_num(regex_t* reg, int num)
return 0;
}
+#if 0
static int
add_pointer(regex_t* reg, void* addr)
{
@@ -259,6 +337,7 @@ add_pointer(regex_t* reg, void* addr)
BBUF_ADD(reg, &ptr, SIZE_POINTER);
return 0;
}
+#endif
static int
add_option(regex_t* reg, OnigOptionType option)
@@ -273,7 +352,7 @@ add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
int r;
r = add_opcode(reg, opcode);
- if (r) return r;
+ if (r != 0) return r;
r = add_rel_addr(reg, addr);
return r;
}
@@ -298,13 +377,13 @@ add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
int r;
r = add_opcode(reg, opcode);
- if (r) return r;
+ if (r != 0) return r;
r = add_option(reg, option);
return r;
}
static int compile_length_tree(Node* node, regex_t* reg);
-static int compile_tree(Node* node, regex_t* reg);
+static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
#define IS_NEED_STR_LEN_OP_EXACT(op) \
@@ -357,31 +436,31 @@ select_str_opcode(int mb_len, int str_len, int ignore_case)
}
static int
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
+compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
{
int r;
int saved_num_null_check = reg->num_null_check;
- if (empty_info != 0) {
- r = add_opcode(reg, OP_NULL_CHECK_START);
- if (r) return r;
+ if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
+ r = add_opcode(reg, OP_EMPTY_CHECK_START);
+ if (r != 0) return r;
r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
- if (r) return r;
+ if (r != 0) return r;
reg->num_null_check++;
}
- r = compile_tree(node, reg);
- if (r) return r;
+ r = compile_tree(node, reg, env);
+ if (r != 0) return r;
- if (empty_info != 0) {
- if (empty_info == NQ_TARGET_IS_EMPTY)
- r = add_opcode(reg, OP_NULL_CHECK_END);
- else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
- r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
- else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
- r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
+ if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
+ if (empty_info == QUANT_BODY_IS_EMPTY)
+ r = add_opcode(reg, OP_EMPTY_CHECK_END);
+ else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)
+ r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);
+ else if (empty_info == QUANT_BODY_IS_EMPTY_REC)
+ r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
- if (r) return r;
+ if (r != 0) return r;
r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
}
return r;
@@ -389,28 +468,28 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
#ifdef USE_SUBEXP_CALL
static int
-compile_call(CallNode* node, regex_t* reg)
+compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
{
int r;
r = add_opcode(reg, OP_CALL);
- if (r) return r;
- r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
- node->target);
- if (r) return r;
+ if (r != 0) return r;
+ r = unset_addr_list_add(env->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
+ NODE_CALL_BODY(node));
+ if (r != 0) return r;
r = add_abs_addr(reg, 0 /*dummy addr.*/);
return r;
}
#endif
static int
-compile_tree_n_times(Node* node, int n, regex_t* reg)
+compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
{
int i, r;
for (i = 0; i < n; i++) {
- r = compile_tree(node, reg);
- if (r) return r;
+ r = compile_tree(node, reg, env);
+ if (r != 0) return r;
}
return 0;
}
@@ -462,7 +541,7 @@ compile_length_string_node(Node* node, regex_t* reg)
UChar *p, *prev;
StrNode* sn;
- sn = NSTR(node);
+ sn = STR_(node);
if (sn->end <= sn->s)
return 0;
@@ -510,7 +589,7 @@ compile_string_node(Node* node, regex_t* reg)
UChar *p, *prev, *end;
StrNode* sn;
- sn = NSTR(node);
+ sn = STR_(node);
if (sn->end <= sn->s)
return 0;
@@ -529,7 +608,7 @@ compile_string_node(Node* node, regex_t* reg)
}
else {
r = add_compile_string(prev, prev_len, slen, reg, ambig);
- if (r) return r;
+ if (r != 0) return r;
prev = p;
slen = 1;
@@ -578,11 +657,6 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg)
{
int len;
- if (IS_NCCLASS_SHARE(cc)) {
- len = SIZE_OPCODE + SIZE_POINTER;
- return len;
- }
-
if (IS_NULL(cc->mbuf)) {
len = SIZE_OPCODE + SIZE_BITSET;
}
@@ -608,12 +682,6 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
{
int r;
- if (IS_NCCLASS_SHARE(cc)) {
- add_opcode(reg, OP_CCLASS_NODE);
- r = add_pointer(reg, cc);
- return r;
- }
-
if (IS_NULL(cc->mbuf)) {
if (IS_NCCLASS_NOT(cc))
add_opcode(reg, OP_CCLASS_NOT);
@@ -638,7 +706,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
add_opcode(reg, OP_CCLASS_MIX);
r = add_bitset(reg, cc->bs);
- if (r) return r;
+ if (r != 0) return r;
r = add_multi_byte_cclass(cc->mbuf, reg);
}
}
@@ -678,46 +746,46 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
static int
-compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
- regex_t* reg)
+compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
+ regex_t* reg, ScanEnv* env)
{
int r;
int num_repeat = reg->num_repeat;
r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
- if (r) return r;
+ if (r != 0) return r;
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
reg->num_repeat++;
- if (r) return r;
+ if (r != 0) return r;
r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
- if (r) return r;
+ if (r != 0) return r;
r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
- if (r) return r;
+ if (r != 0) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ if (r != 0) return r;
if (
#ifdef USE_SUBEXP_CALL
- reg->num_call > 0 ||
+ NODE_IS_IN_MULTI_ENTRY(qn) ||
#endif
- IS_QUANTIFIER_IN_REPEAT(qn)) {
+ NODE_IS_IN_REAL_REPEAT(qn)) {
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
}
else {
r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
}
- if (r) return r;
+ if (r != 0) return r;
r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
return r;
}
static int
-is_anychar_star_quantifier(QtfrNode* qn)
+is_anychar_star_quantifier(QuantNode* qn)
{
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
- NTYPE(qn->target) == NT_CANY)
+ NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
return 1;
else
return 0;
@@ -729,13 +797,13 @@ is_anychar_star_quantifier(QtfrNode* qn)
#ifdef USE_COMBINATION_EXPLOSION_CHECK
static int
-compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
+compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen, cklen;
int ckn;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
+ int empty_info = qn->body_empty_info;
+ int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -744,7 +812,7 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
/* anychar repeat */
- if (NTYPE(qn->target) == NT_CANY) {
+ if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) {
if (qn->greedy && infinite) {
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
@@ -753,10 +821,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
}
}
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
+ if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
mod_tlen = tlen;
+ else
+ mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
if (infinite && qn->lower <= 1) {
if (qn->greedy) {
@@ -809,33 +877,33 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
}
static int
-compile_quantifier_node(QtfrNode* qn, regex_t* reg)
+compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int r, mod_tlen;
int ckn;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
+ int empty_info = qn->body_empty_info;
+ int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
if (is_anychar_star_quantifier(qn)) {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
+ r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
+ if (r != 0) return r;
if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
if (IS_MULTILINE(reg->options))
r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r) return r;
+ if (r != 0) return r;
if (CKN_ON) {
r = add_state_check_num(reg, ckn);
- if (r) return r;
+ if (r != 0) return r;
}
- return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
}
else {
if (IS_MULTILINE(reg->options)) {
@@ -848,7 +916,7 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
OP_STATE_CHECK_ANYCHAR_STAR
: OP_ANYCHAR_STAR));
}
- if (r) return r;
+ if (r != 0) return r;
if (CKN_ON)
r = add_state_check_num(reg, ckn);
@@ -856,32 +924,32 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
}
}
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
+ if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
mod_tlen = tlen;
+ else
+ mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
if (infinite && qn->lower <= 1) {
if (qn->greedy) {
if (qn->lower == 1) {
r = add_opcode_rel_addr(reg, OP_JUMP,
(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
- if (r) return r;
+ if (r != 0) return r;
}
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
+ if (r != 0) return r;
r = add_state_check_num(reg, ckn);
- if (r) return r;
+ if (r != 0) return r;
r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
}
else {
r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
}
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP
+ (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
@@ -889,15 +957,15 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
else {
if (qn->lower == 0) {
r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r) return r;
+ if (r != 0) return r;
}
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ if (r != 0) return r;
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
- if (r) return r;
+ if (r != 0) return r;
r = add_state_check_num(reg, ckn);
- if (r) return r;
+ if (r != 0) return r;
r = add_rel_addr(reg,
-(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
}
@@ -908,8 +976,8 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
else if (qn->upper == 0) {
if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
+ if (r != 0) return r;
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else
r = 0;
@@ -918,42 +986,42 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
if (qn->lower == 0) {
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
+ if (r != 0) return r;
r = add_state_check_num(reg, ckn);
- if (r) return r;
+ if (r != 0) return r;
r = add_rel_addr(reg, tlen);
}
else {
r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
}
- if (r) return r;
+ if (r != 0) return r;
}
- r = compile_tree(qn->target, reg);
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
if (CKN_ON) {
r = add_opcode(reg, OP_STATE_CHECK_PUSH);
- if (r) return r;
+ if (r != 0) return r;
r = add_state_check_num(reg, ckn);
- if (r) return r;
+ if (r != 0) return r;
r = add_rel_addr(reg, SIZE_OP_JUMP);
}
else {
r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
}
- if (r) return r;
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
+ if (r != 0) return r;
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
if (CKN_ON) {
- if (r) return r;
+ if (r != 0) return r;
r = add_opcode(reg, OP_STATE_CHECK);
- if (r) return r;
+ if (r != 0) return r;
r = add_state_check_num(reg, ckn);
}
}
@@ -963,17 +1031,17 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
#else /* USE_COMBINATION_EXPLOSION_CHECK */
static int
-compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
+compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
+ int empty_info = qn->body_empty_info;
+ int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
/* anychar repeat */
- if (NTYPE(qn->target) == NT_CANY) {
+ if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) {
if (qn->greedy && infinite) {
if (IS_NOT_NULL(qn->next_head_exact))
return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
@@ -982,10 +1050,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
}
}
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
+ if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
mod_tlen = tlen;
+ else
+ mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
if (infinite &&
(qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
@@ -1028,25 +1096,25 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
}
static int
-compile_quantifier_node(QtfrNode* qn, regex_t* reg)
+compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int i, r, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- int empty_info = qn->target_empty_info;
- int tlen = compile_length_tree(qn->target, reg);
+ int empty_info = qn->body_empty_info;
+ int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
if (is_anychar_star_quantifier(qn)) {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
+ r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
+ if (r != 0) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
if (IS_MULTILINE(reg->options))
r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
else
r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
- if (r) return r;
- return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
+ if (r != 0) return r;
+ return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
}
else {
if (IS_MULTILINE(reg->options))
@@ -1056,10 +1124,10 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
}
}
- if (empty_info != 0)
- mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
- else
+ if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
mod_tlen = tlen;
+ else
+ mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
if (infinite &&
(qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
@@ -1075,94 +1143,94 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg)
else {
r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
}
- if (r) return r;
+ if (r != 0) return r;
}
else {
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
+ r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
+ if (r != 0) return r;
}
if (qn->greedy) {
if (IS_NOT_NULL(qn->head_exact)) {
r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
+ if (r != 0) return r;
+ add_bytes(reg, STR_(qn->head_exact)->s, 1);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
}
else if (IS_NOT_NULL(qn->next_head_exact)) {
r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
+ if (r != 0) return r;
+ add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
}
else {
r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
}
}
else {
r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
- if (r) return r;
- r = compile_tree_empty_check(qn->target, reg, empty_info);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
}
}
else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
+ if (r != 0) return r;
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else if (!infinite && qn->greedy &&
(qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
<= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
int n = qn->upper - qn->lower;
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
+ r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
+ if (r != 0) return r;
for (i = 0; i < n; i++) {
r = add_opcode_rel_addr(reg, OP_PUSH,
(n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
+ if (r != 0) return r;
}
}
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
- if (r) return r;
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r) return r;
- r = compile_tree(qn->target, reg);
+ if (r != 0) return r;
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
+ r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
}
return r;
}
#endif /* USE_COMBINATION_EXPLOSION_CHECK */
static int
-compile_length_option_node(EncloseNode* node, regex_t* reg)
+compile_length_option_node(EnclosureNode* node, regex_t* reg)
{
int tlen;
OnigOptionType prev = reg->options;
- reg->options = node->option;
- tlen = compile_length_tree(node->target, reg);
+ reg->options = node->o.option;
+ tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
reg->options = prev;
if (tlen < 0) return tlen;
@@ -1176,82 +1244,88 @@ compile_length_option_node(EncloseNode* node, regex_t* reg)
}
static int
-compile_option_node(EncloseNode* node, regex_t* reg)
+compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
{
int r;
OnigOptionType prev = reg->options;
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
- if (r) return r;
+ if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) {
+ r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.option);
+ if (r != 0) return r;
r = add_opcode_option(reg, OP_SET_OPTION, prev);
- if (r) return r;
+ if (r != 0) return r;
r = add_opcode(reg, OP_FAIL);
- if (r) return r;
+ if (r != 0) return r;
}
- reg->options = node->option;
- r = compile_tree(node->target, reg);
+ reg->options = node->o.option;
+ r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
reg->options = prev;
- if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
- if (r) return r;
+ if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) {
+ if (r != 0) return r;
r = add_opcode_option(reg, OP_SET_OPTION, prev);
}
return r;
}
static int
-compile_length_enclose_node(EncloseNode* node, regex_t* reg)
+compile_length_enclosure_node(EnclosureNode* node, regex_t* reg)
{
int len;
int tlen;
- if (node->type == ENCLOSE_OPTION)
+ if (node->type == ENCLOSURE_OPTION)
return compile_length_option_node(node, reg);
- if (node->target) {
- tlen = compile_length_tree(node->target, reg);
+ if (NODE_ENCLOSURE_BODY(node)) {
+ tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
if (tlen < 0) return tlen;
}
else
tlen = 0;
switch (node->type) {
- case ENCLOSE_MEMORY:
+ case ENCLOSURE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
+
+ if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
+ len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ return len;
+ }
+
+ if (NODE_IS_CALLED(node)) {
len = SIZE_OP_MEMORY_START_PUSH + tlen
+ SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
+ if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ len += (NODE_IS_RECURSION(node)
? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
else
- len += (IS_ENCLOSE_RECURSION(node)
+ len += (NODE_IS_RECURSION(node)
? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
}
- else if (IS_ENCLOSE_RECURSION(node)) {
+ else if (NODE_IS_RECURSION(node)) {
len = SIZE_OP_MEMORY_START_PUSH;
- len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
}
else
#endif
{
- if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
+ if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
len = SIZE_OP_MEMORY_START_PUSH;
else
len = SIZE_OP_MEMORY_START;
- len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
+ len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
}
break;
- case ENCLOSE_STOP_BACKTRACK:
- if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
- QtfrNode* qn = NQTFR(node->target);
- tlen = compile_length_tree(qn->target, reg);
+ case ENCLOSURE_STOP_BACKTRACK:
+ if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
+ tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
len = tlen * qn->lower
@@ -1273,102 +1347,124 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
static int get_char_length_tree(Node* node, regex_t* reg, int* len);
static int
-compile_enclose_node(EncloseNode* node, regex_t* reg)
+compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
{
- int r, len;
+ int r;
+ int len;
- if (node->type == ENCLOSE_OPTION)
- return compile_option_node(node, reg);
+#ifdef USE_SUBEXP_CALL
+ if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
+ r = add_opcode(reg, OP_CALL);
+ if (r != 0) return r;
+ node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+ NODE_STATUS_ADD(node, NST_ADDR_FIXED);
+ r = add_abs_addr(reg, (int )node->m.called_addr);
+ if (r != 0) return r;
+ len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
+ len += SIZE_OP_RETURN;
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r != 0) return r;
+
+ r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
+ if (r != 0) return r;
+ r = add_opcode(reg, OP_RETURN);
+ return r;
+ }
+#endif
- switch (node->type) {
- case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- r = add_opcode(reg, OP_CALL);
- if (r) return r;
- node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
- node->state |= NST_ADDR_FIXED;
- r = add_abs_addr(reg, (int )node->call_addr);
- if (r) return r;
- len = compile_length_tree(node->target, reg);
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (IS_ENCLOSE_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ if (NODE_IS_CALLED(node)) {
+ r = add_opcode(reg, OP_CALL);
+ if (r != 0) return r;
+ node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+ NODE_STATUS_ADD(node, NST_ADDR_FIXED);
+ r = add_abs_addr(reg, (int )node->m.called_addr);
+ if (r != 0) return r;
+ len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg);
+ len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
+ if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ len += (NODE_IS_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (NODE_IS_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) return r;
- }
+ r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ if (r != 0) return r;
+ }
#endif
- if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
- r = add_opcode(reg, OP_MEMORY_START_PUSH);
- else
- r = add_opcode(reg, OP_MEMORY_START);
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
-#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CALLED(node)) {
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
- else
- r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
- ? OP_MEMORY_END_REC : OP_MEMORY_END));
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- if (r) return r;
- r = add_opcode(reg, OP_RETURN);
- }
- else if (IS_ENCLOSE_RECURSION(node)) {
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
- else
- r = add_opcode(reg, OP_MEMORY_END_REC);
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- }
- else
+ if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
+ r = add_opcode(reg, OP_MEMORY_START_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_START);
+ if (r != 0) return r;
+ r = add_mem_num(reg, node->m.regnum);
+ if (r != 0) return r;
+ r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
+ if (r != 0) return r;
+
+#ifdef USE_SUBEXP_CALL
+ if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ r = add_opcode(reg, (NODE_IS_RECURSION(node)
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ else
+ r = add_opcode(reg, (NODE_IS_RECURSION(node)
+ ? OP_MEMORY_END_REC : OP_MEMORY_END));
+ if (r != 0) return r;
+ r = add_mem_num(reg, node->m.regnum);
+ if (NODE_IS_CALLED(node)) {
+ if (r != 0) return r;
+ r = add_opcode(reg, OP_RETURN);
+ }
+#else
+ if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ r = add_opcode(reg, OP_MEMORY_END_PUSH);
+ else
+ r = add_opcode(reg, OP_MEMORY_END);
+ if (r != 0) return r;
+ r = add_mem_num(reg, node->m.regnum);
#endif
- {
- if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
- r = add_opcode(reg, OP_MEMORY_END_PUSH);
- else
- r = add_opcode(reg, OP_MEMORY_END);
- if (r) return r;
- r = add_mem_num(reg, node->regnum);
- }
+
+ return r;
+}
+
+static int
+compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env)
+{
+ int r, len;
+
+ if (node->type == ENCLOSURE_OPTION)
+ return compile_option_node(node, reg, env);
+
+ switch (node->type) {
+ case ENCLOSURE_MEMORY:
+ r = compile_enclosure_memory_node(node, reg, env);
break;
- case ENCLOSE_STOP_BACKTRACK:
- if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
- QtfrNode* qn = NQTFR(node->target);
- r = compile_tree_n_times(qn->target, qn->lower, reg);
- if (r) return r;
+ case ENCLOSURE_STOP_BACKTRACK:
+ if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node));
+ r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
+ if (r != 0) return r;
- len = compile_length_tree(qn->target, reg);
+ len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (len < 0) return len;
r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
- if (r) return r;
- r = compile_tree(qn->target, reg);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
+ if (r != 0) return r;
r = add_opcode(reg, OP_POP);
- if (r) return r;
+ if (r != 0) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
-((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
}
else {
r = add_opcode(reg, OP_PUSH_STOP_BT);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env);
+ if (r != 0) return r;
r = add_opcode(reg, OP_POP_STOP_BT);
}
break;
@@ -1387,8 +1483,8 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
int len;
int tlen = 0;
- if (node->target) {
- tlen = compile_length_tree(node->target, reg);
+ if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) {
+ tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
if (tlen < 0) return tlen;
}
@@ -1415,7 +1511,7 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
}
static int
-compile_anchor_node(AnchorNode* node, regex_t* reg)
+compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
{
int r, len;
@@ -1436,19 +1532,19 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
case ANCHOR_PREC_READ:
r = add_opcode(reg, OP_PUSH_POS);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
+ if (r != 0) return r;
r = add_opcode(reg, OP_POP_POS);
break;
case ANCHOR_PREC_READ_NOT:
- len = compile_length_tree(node->target, reg);
+ len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
if (len < 0) return len;
r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
+ if (r != 0) return r;
r = add_opcode(reg, OP_FAIL_POS);
break;
@@ -1456,37 +1552,37 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
{
int n;
r = add_opcode(reg, OP_LOOK_BEHIND);
- if (r) return r;
+ if (r != 0) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
+ if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
n = node->char_len;
r = add_length(reg, n);
- if (r) return r;
- r = compile_tree(node->target, reg);
+ if (r != 0) return r;
+ r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
}
break;
case ANCHOR_LOOK_BEHIND_NOT:
{
int n;
- len = compile_length_tree(node->target, reg);
+ len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
- if (r) return r;
+ if (r != 0) return r;
if (node->char_len < 0) {
- r = get_char_length_tree(node->target, reg, &n);
- if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n);
+ if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
n = node->char_len;
r = add_length(reg, n);
- if (r) return r;
- r = compile_tree(node->target, reg);
- if (r) return r;
+ if (r != 0) return r;
+ r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
+ if (r != 0) return r;
r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
}
break;
@@ -1502,55 +1598,53 @@ compile_anchor_node(AnchorNode* node, regex_t* reg)
static int
compile_length_tree(Node* node, regex_t* reg)
{
- int len, type, r;
+ int len, r;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
len = 0;
do {
- r = compile_length_tree(NCAR(node), reg);
+ r = compile_length_tree(NODE_CAR(node), reg);
if (r < 0) return r;
len += r;
- } while (IS_NOT_NULL(node = NCDR(node)));
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
r = len;
break;
- case NT_ALT:
+ case NODE_ALT:
{
int n;
n = r = 0;
do {
- r += compile_length_tree(NCAR(node), reg);
+ r += compile_length_tree(NODE_CAR(node), reg);
n++;
- } while (IS_NOT_NULL(node = NCDR(node)));
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
}
break;
- case NT_STR:
+ case NODE_STR:
if (NSTRING_IS_RAW(node))
- r = compile_length_string_raw_node(NSTR(node), reg);
+ r = compile_length_string_raw_node(STR_(node), reg);
else
r = compile_length_string_node(node, reg);
break;
- case NT_CCLASS:
- r = compile_length_cclass_node(NCCLASS(node), reg);
+ case NODE_CCLASS:
+ r = compile_length_cclass_node(CCLASS_(node), reg);
break;
- case NT_CTYPE:
- case NT_CANY:
+ case NODE_CTYPE:
r = SIZE_OPCODE;
break;
- case NT_BREF:
+ case NODE_BREF:
{
- BRefNode* br = NBREF(node);
+ BRefNode* br = BREF_(node);
#ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
+ if (NODE_IS_NEST_LEVEL(node)) {
r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
}
@@ -1567,21 +1661,21 @@ compile_length_tree(Node* node, regex_t* reg)
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
+ case NODE_CALL:
r = SIZE_OP_CALL;
break;
#endif
- case NT_QTFR:
- r = compile_length_quantifier_node(NQTFR(node), reg);
+ case NODE_QUANT:
+ r = compile_length_quantifier_node(QUANT_(node), reg);
break;
- case NT_ENCLOSE:
- r = compile_length_enclose_node(NENCLOSE(node), reg);
+ case NODE_ENCLOSURE:
+ r = compile_length_enclosure_node(ENCLOSURE_(node), reg);
break;
- case NT_ANCHOR:
- r = compile_length_anchor_node(NANCHOR(node), reg);
+ case NODE_ANCHOR:
+ r = compile_length_anchor_node(ANCHOR_(node), reg);
break;
default:
@@ -1593,94 +1687,95 @@ compile_length_tree(Node* node, regex_t* reg)
}
static int
-compile_tree(Node* node, regex_t* reg)
+compile_tree(Node* node, regex_t* reg, ScanEnv* env)
{
- int n, type, len, pos, r = 0;
+ int n, len, pos, r = 0;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
do {
- r = compile_tree(NCAR(node), reg);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ r = compile_tree(NODE_CAR(node), reg, env);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_ALT:
+ case NODE_ALT:
{
Node* x = node;
len = 0;
do {
- len += compile_length_tree(NCAR(x), reg);
- if (NCDR(x) != NULL) {
+ len += compile_length_tree(NODE_CAR(x), reg);
+ if (IS_NOT_NULL(NODE_CDR(x))) {
len += SIZE_OP_PUSH + SIZE_OP_JUMP;
}
- } while (IS_NOT_NULL(x = NCDR(x)));
+ } while (IS_NOT_NULL(x = NODE_CDR(x)));
pos = reg->used + len; /* goal position */
do {
- len = compile_length_tree(NCAR(node), reg);
- if (IS_NOT_NULL(NCDR(node))) {
+ len = compile_length_tree(NODE_CAR(node), reg);
+ if (IS_NOT_NULL(NODE_CDR(node))) {
r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
- if (r) break;
+ if (r != 0) break;
}
- r = compile_tree(NCAR(node), reg);
- if (r) break;
- if (IS_NOT_NULL(NCDR(node))) {
+ r = compile_tree(NODE_CAR(node), reg, env);
+ if (r != 0) break;
+ if (IS_NOT_NULL(NODE_CDR(node))) {
len = pos - (reg->used + SIZE_OP_JUMP);
r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r) break;
+ if (r != 0) break;
}
- } while (IS_NOT_NULL(node = NCDR(node)));
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
}
break;
- case NT_STR:
+ case NODE_STR:
if (NSTRING_IS_RAW(node))
- r = compile_string_raw_node(NSTR(node), reg);
+ r = compile_string_raw_node(STR_(node), reg);
else
r = compile_string_node(node, reg);
break;
- case NT_CCLASS:
- r = compile_cclass_node(NCCLASS(node), reg);
+ case NODE_CCLASS:
+ r = compile_cclass_node(CCLASS_(node), reg);
break;
- case NT_CTYPE:
+ case NODE_CTYPE:
{
int op;
- switch (NCTYPE(node)->ctype) {
+ switch (CTYPE_(node)->ctype) {
+ case CTYPE_ANYCHAR:
+ if (IS_MULTILINE(reg->options))
+ r = add_opcode(reg, OP_ANYCHAR_ML);
+ else
+ r = add_opcode(reg, OP_ANYCHAR);
+ break;
+
case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
+ if (CTYPE_(node)->not != 0) op = OP_NOT_WORD;
else op = OP_WORD;
+
+ r = add_opcode(reg, op);
break;
+
default:
return ONIGERR_TYPE_BUG;
break;
}
- r = add_opcode(reg, op);
}
break;
- case NT_CANY:
- if (IS_MULTILINE(reg->options))
- r = add_opcode(reg, OP_ANYCHAR_ML);
- else
- r = add_opcode(reg, OP_ANYCHAR);
- break;
-
- case NT_BREF:
+ case NODE_BREF:
{
- BRefNode* br = NBREF(node);
+ BRefNode* br = BREF_(node);
#ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
+ if (NODE_IS_NEST_LEVEL(node)) {
r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
- if (r) return r;
+ if (r != 0) return r;
r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
- if (r) return r;
+ if (r != 0) return r;
r = add_length(reg, br->nest_level);
- if (r) return r;
+ if (r != 0) return r;
goto add_bacref_mems;
}
@@ -1690,7 +1785,7 @@ compile_tree(Node* node, regex_t* reg)
n = br->back_static[0];
if (IS_IGNORECASE(reg->options)) {
r = add_opcode(reg, OP_BACKREFN_IC);
- if (r) return r;
+ if (r != 0) return r;
r = add_mem_num(reg, n);
}
else {
@@ -1699,7 +1794,7 @@ compile_tree(Node* node, regex_t* reg)
case 2: r = add_opcode(reg, OP_BACKREF2); break;
default:
r = add_opcode(reg, OP_BACKREFN);
- if (r) return r;
+ if (r != 0) return r;
r = add_mem_num(reg, n);
break;
}
@@ -1715,43 +1810,43 @@ compile_tree(Node* node, regex_t* reg)
else {
r = add_opcode(reg, OP_BACKREF_MULTI);
}
- if (r) return r;
+ if (r != 0) return r;
#ifdef USE_BACKREF_WITH_LEVEL
add_bacref_mems:
#endif
r = add_length(reg, br->back_num);
- if (r) return r;
+ if (r != 0) return r;
p = BACKREFS_P(br);
for (i = br->back_num - 1; i >= 0; i--) {
r = add_mem_num(reg, p[i]);
- if (r) return r;
+ if (r != 0) return r;
}
}
}
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- r = compile_call(NCALL(node), reg);
+ case NODE_CALL:
+ r = compile_call(CALL_(node), reg, env);
break;
#endif
- case NT_QTFR:
- r = compile_quantifier_node(NQTFR(node), reg);
+ case NODE_QUANT:
+ r = compile_quantifier_node(QUANT_(node), reg, env);
break;
- case NT_ENCLOSE:
- r = compile_enclose_node(NENCLOSE(node), reg);
+ case NODE_ENCLOSURE:
+ r = compile_enclosure_node(ENCLOSURE_(node), reg, env);
break;
- case NT_ANCHOR:
- r = compile_anchor_node(NANCHOR(node), reg);
+ case NODE_ANCHOR:
+ r = compile_anchor_node(ANCHOR_(node), reg, env);
break;
default:
#ifdef ONIG_DEBUG
- fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
+ fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node));
#endif
break;
}
@@ -1767,50 +1862,50 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
int r = 0;
Node* node = *plink;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
do {
- r = noname_disable_map(&(NCAR(node)), map, counter);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ r = noname_disable_map(&(NODE_CAR(node)), map, counter);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_QTFR:
+ case NODE_QUANT:
{
- Node** ptarget = &(NQTFR(node)->target);
+ Node** ptarget = &(NODE_BODY(node));
Node* old = *ptarget;
r = noname_disable_map(ptarget, map, counter);
- if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
+ if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
onig_reduce_nested_quantifier(node, *ptarget);
}
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
- if (en->type == ENCLOSE_MEMORY) {
- if (IS_ENCLOSE_NAMED_GROUP(en)) {
+ EnclosureNode* en = ENCLOSURE_(node);
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (NODE_IS_NAMED_GROUP(node)) {
(*counter)++;
- map[en->regnum].new_val = *counter;
- en->regnum = *counter;
- r = noname_disable_map(&(en->target), map, counter);
+ map[en->m.regnum].new_val = *counter;
+ en->m.regnum = *counter;
+ r = noname_disable_map(&(NODE_BODY(node)), map, counter);
}
else {
- *plink = en->target;
- en->target = NULL_NODE;
+ *plink = NODE_BODY(node);
+ NODE_BODY(node) = NULL_NODE;
onig_node_free(node);
r = noname_disable_map(plink, map, counter);
}
}
else
- r = noname_disable_map(&(en->target), map, counter);
+ r = noname_disable_map(&(NODE_BODY(node)), map, counter);
}
break;
- case NT_ANCHOR:
- if (NANCHOR(node)->target)
- r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
+ case NODE_ANCHOR:
+ if (IS_NOT_NULL(NODE_BODY(node)))
+ r = noname_disable_map(&(NODE_BODY(node)), map, counter);
break;
default:
@@ -1825,9 +1920,9 @@ renumber_node_backref(Node* node, GroupNumRemap* map)
{
int i, pos, n, old_num;
int *backs;
- BRefNode* bn = NBREF(node);
+ BRefNode* bn = BREF_(node);
- if (! IS_BACKREF_NAME_REF(bn))
+ if (! NODE_IS_BY_NAME(node))
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
old_num = bn->back_num;
@@ -1853,27 +1948,26 @@ renumber_by_map(Node* node, GroupNumRemap* map)
{
int r = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
do {
- r = renumber_by_map(NCAR(node), map);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ r = renumber_by_map(NODE_CAR(node), map);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_QTFR:
- r = renumber_by_map(NQTFR(node)->target, map);
- break;
- case NT_ENCLOSE:
- r = renumber_by_map(NENCLOSE(node)->target, map);
+
+ case NODE_QUANT:
+ case NODE_ENCLOSURE:
+ r = renumber_by_map(NODE_BODY(node), map);
break;
- case NT_BREF:
+ case NODE_BREF:
r = renumber_node_backref(node, map);
break;
- case NT_ANCHOR:
- if (NANCHOR(node)->target)
- r = renumber_by_map(NANCHOR(node)->target, map);
+ case NODE_ANCHOR:
+ if (IS_NOT_NULL(NODE_BODY(node)))
+ r = renumber_by_map(NODE_BODY(node), map);
break;
default:
@@ -1888,28 +1982,26 @@ numbered_ref_check(Node* node)
{
int r = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
do {
- r = numbered_ref_check(NCAR(node));
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
- case NT_QTFR:
- r = numbered_ref_check(NQTFR(node)->target);
- break;
- case NT_ENCLOSE:
- r = numbered_ref_check(NENCLOSE(node)->target);
+ r = numbered_ref_check(NODE_CAR(node));
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_BREF:
- if (! IS_BACKREF_NAME_REF(NBREF(node)))
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ case NODE_ANCHOR:
+ if (IS_NULL(NODE_BODY(node)))
+ break;
+ /* fall */
+ case NODE_QUANT:
+ case NODE_ENCLOSURE:
+ r = numbered_ref_check(NODE_BODY(node));
break;
- case NT_ANCHOR:
- if (NANCHOR(node)->target)
- r = numbered_ref_check(NANCHOR(node)->target);
+ case NODE_BREF:
+ if (! NODE_IS_BY_NAME(node))
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
break;
default:
@@ -1923,7 +2015,7 @@ static int
disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
{
int r, i, pos, counter;
- BitStatusType loc;
+ MemStatusType loc;
GroupNumRemap* map;
map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
@@ -1940,16 +2032,16 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
for (i = 1, pos = 1; i <= env->num_mem; i++) {
if (map[i].new_val > 0) {
- SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
+ SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i];
pos++;
}
}
loc = env->capture_history;
- BIT_STATUS_CLEAR(env->capture_history);
+ MEM_STATUS_CLEAR(env->capture_history);
for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (BIT_STATUS_AT(loc, i)) {
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
+ if (MEM_STATUS_AT(loc, i)) {
+ MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
}
}
@@ -1965,13 +2057,13 @@ static int
unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
{
int i, offset;
- EncloseNode* en;
+ EnclosureNode* en;
AbsAddrType addr;
for (i = 0; i < uslist->num; i++) {
- en = NENCLOSE(uslist->us[i].target);
- if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
- addr = en->call_addr;
+ en = ENCLOSURE_(uslist->us[i].target);
+ if (! NODE_IS_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
+ addr = en->m.called_addr;
offset = uslist->us[i].offset;
BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
@@ -1980,75 +2072,6 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
}
#endif
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
-static int
-quantifiers_memory_node_info(Node* node)
-{
- int r = 0;
-
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
- {
- int v;
- do {
- v = quantifiers_memory_node_info(NCAR(node));
- if (v > r) r = v;
- } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
- }
- break;
-
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node))) {
- return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
- }
- else
- r = quantifiers_memory_node_info(NCALL(node)->target);
- break;
-#endif
-
- case NT_QTFR:
- {
- QtfrNode* qn = NQTFR(node);
- if (qn->upper != 0) {
- r = quantifiers_memory_node_info(qn->target);
- }
- }
- break;
-
- case NT_ENCLOSE:
- {
- EncloseNode* en = NENCLOSE(node);
- switch (en->type) {
- case ENCLOSE_MEMORY:
- return NQ_TARGET_IS_EMPTY_MEM;
- break;
-
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = quantifiers_memory_node_info(en->target);
- break;
- default:
- break;
- }
- }
- break;
-
- case NT_BREF:
- case NT_STR:
- case NT_CTYPE:
- case NT_CCLASS:
- case NT_CANY:
- case NT_ANCHOR:
- default:
- break;
- }
-
- return r;
-}
-#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
-
#define GET_CHAR_LEN_VARLEN -1
#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
@@ -2062,23 +2085,23 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
level++;
*len = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
do {
- r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
+ r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
if (r == 0)
*len = distance_add(*len, tlen);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_ALT:
+ case NODE_ALT:
{
int tlen2;
int varlen = 0;
- r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
- while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
- r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
+ r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level);
+ while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) {
+ r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level);
if (r == 0) {
if (tlen != tlen2)
varlen = 1;
@@ -2097,9 +2120,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
break;
- case NT_STR:
+ case NODE_STR:
{
- StrNode* sn = NSTR(node);
+ StrNode* sn = STR_(node);
UChar *s = sn->s;
while (s < sn->end) {
s += enclen(reg->enc, s);
@@ -2108,11 +2131,11 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
break;
- case NT_QTFR:
+ case NODE_QUANT:
{
- QtfrNode* qn = NQTFR(node);
+ QuantNode* qn = QUANT_(node);
if (qn->lower == qn->upper) {
- r = get_char_length_tree1(qn->target, reg, &tlen, level);
+ r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level);
if (r == 0)
*len = distance_multiply(tlen, qn->lower);
}
@@ -2122,43 +2145,42 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (! IS_CALL_RECURSION(NCALL(node)))
- r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
+ case NODE_CALL:
+ if (! NODE_IS_RECURSION(node))
+ r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
else
r = GET_CHAR_LEN_VARLEN;
break;
#endif
- case NT_CTYPE:
+ case NODE_CTYPE:
*len = 1;
break;
- case NT_CCLASS:
- case NT_CANY:
+ case NODE_CCLASS:
*len = 1;
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
- case ENCLOSE_MEMORY:
+ case ENCLOSURE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CLEN_FIXED(en))
+ if (NODE_IS_CLEN_FIXED(node))
*len = en->char_len;
else {
- r = get_char_length_tree1(en->target, reg, len, level);
+ r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
if (r == 0) {
en->char_len = *len;
- SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
+ NODE_STATUS_ADD(node, NST_CLEN_FIXED);
}
}
break;
#endif
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = get_char_length_tree1(en->target, reg, len, level);
+ case ENCLOSURE_OPTION:
+ case ENCLOSURE_STOP_BACKTRACK:
+ r = get_char_length_tree1(NODE_BODY(node), reg, len, level);
break;
default:
break;
@@ -2166,7 +2188,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
}
break;
- case NT_ANCHOR:
+ case NODE_ANCHOR:
break;
default:
@@ -2185,28 +2207,32 @@ get_char_length_tree(Node* node, regex_t* reg, int* len)
/* x is not included y ==> 1 : 0 */
static int
-is_not_included(Node* x, Node* y, regex_t* reg)
+is_exclusive(Node* x, Node* y, regex_t* reg)
{
int i, len;
OnigCodePoint code;
UChar *p;
- int ytype;
+ NodeType ytype;
retry:
- ytype = NTYPE(y);
- switch (NTYPE(x)) {
- case NT_CTYPE:
+ ytype = NODE_TYPE(y);
+ switch (NODE_TYPE(x)) {
+ case NODE_CTYPE:
{
+ if (CTYPE_(x)->ctype == CTYPE_ANYCHAR ||
+ CTYPE_(y)->ctype == CTYPE_ANYCHAR)
+ break;
+
switch (ytype) {
- case NT_CTYPE:
- if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
- NCTYPE(y)->not != NCTYPE(x)->not)
+ case NODE_CTYPE:
+ if (CTYPE_(y)->ctype == CTYPE_(x)->ctype &&
+ CTYPE_(y)->not != CTYPE_(x)->not)
return 1;
else
return 0;
break;
- case NT_CCLASS:
+ case NODE_CCLASS:
swap:
{
Node* tmp;
@@ -2215,7 +2241,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
}
break;
- case NT_STR:
+ case NODE_STR:
goto swap;
break;
@@ -2225,14 +2251,18 @@ is_not_included(Node* x, Node* y, regex_t* reg)
}
break;
- case NT_CCLASS:
+ case NODE_CCLASS:
{
- CClassNode* xc = NCCLASS(x);
+ CClassNode* xc = CCLASS_(x);
switch (ytype) {
- case NT_CTYPE:
- switch (NCTYPE(y)->ctype) {
+ case NODE_CTYPE:
+ switch (CTYPE_(y)->ctype) {
+ case CTYPE_ANYCHAR:
+ return 0;
+ break;
+
case ONIGENC_CTYPE_WORD:
- if (NCTYPE(y)->not == 0) {
+ if (CTYPE_(y)->not == 0) {
if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (BITSET_AT(xc->bs, i)) {
@@ -2266,10 +2296,10 @@ is_not_included(Node* x, Node* y, regex_t* reg)
}
break;
- case NT_CCLASS:
+ case NODE_CCLASS:
{
int v;
- CClassNode* yc = NCCLASS(y);
+ CClassNode* yc = CCLASS_(y);
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
v = BITSET_AT(xc->bs, i);
@@ -2288,7 +2318,7 @@ is_not_included(Node* x, Node* y, regex_t* reg)
}
break;
- case NT_STR:
+ case NODE_STR:
goto swap;
break;
@@ -2298,30 +2328,33 @@ is_not_included(Node* x, Node* y, regex_t* reg)
}
break;
- case NT_STR:
+ case NODE_STR:
{
- StrNode* xs = NSTR(x);
+ StrNode* xs = STR_(x);
if (NSTRING_LEN(x) == 0)
break;
//c = *(xs->s);
switch (ytype) {
- case NT_CTYPE:
- switch (NCTYPE(y)->ctype) {
+ case NODE_CTYPE:
+ switch (CTYPE_(y)->ctype) {
+ case CTYPE_ANYCHAR:
+ break;
+
case ONIGENC_CTYPE_WORD:
if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
- return NCTYPE(y)->not;
+ return CTYPE_(y)->not;
else
- return !(NCTYPE(y)->not);
+ return !(CTYPE_(y)->not);
break;
default:
break;
}
break;
- case NT_CCLASS:
+ case NODE_CCLASS:
{
- CClassNode* cc = NCCLASS(y);
+ CClassNode* cc = CCLASS_(y);
code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
@@ -2329,10 +2362,10 @@ is_not_included(Node* x, Node* y, regex_t* reg)
}
break;
- case NT_STR:
+ case NODE_STR:
{
UChar *q;
- StrNode* ys = NSTR(y);
+ StrNode* ys = STR_(y);
len = NSTRING_LEN(x);
if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
@@ -2365,29 +2398,31 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
{
Node* n = NULL_NODE;
- switch (NTYPE(node)) {
- case NT_BREF:
- case NT_ALT:
- case NT_CANY:
+ switch (NODE_TYPE(node)) {
+ case NODE_BREF:
+ case NODE_ALT:
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
+ case NODE_CALL:
#endif
break;
- case NT_CTYPE:
- case NT_CCLASS:
+ case NODE_CTYPE:
+ if (CTYPE_(node)->ctype == CTYPE_ANYCHAR)
+ break;
+ /* fall */
+ case NODE_CCLASS:
if (exact == 0) {
n = node;
}
break;
- case NT_LIST:
- n = get_head_value_node(NCAR(node), exact, reg);
+ case NODE_LIST:
+ n = get_head_value_node(NODE_CAR(node), exact, reg);
break;
- case NT_STR:
+ case NODE_STR:
{
- StrNode* sn = NSTR(node);
+ StrNode* sn = STR_(node);
if (sn->end <= sn->s)
break;
@@ -2401,43 +2436,43 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
}
break;
- case NT_QTFR:
+ case NODE_QUANT:
{
- QtfrNode* qn = NQTFR(node);
+ QuantNode* qn = QUANT_(node);
if (qn->lower > 0) {
if (IS_NOT_NULL(qn->head_exact))
n = qn->head_exact;
else
- n = get_head_value_node(qn->target, exact, reg);
+ n = get_head_value_node(NODE_BODY(node), exact, reg);
}
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
- case ENCLOSE_OPTION:
+ case ENCLOSURE_OPTION:
{
OnigOptionType options = reg->options;
- reg->options = NENCLOSE(node)->option;
- n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
+ reg->options = ENCLOSURE_(node)->o.option;
+ n = get_head_value_node(NODE_BODY(node), exact, reg);
reg->options = options;
}
break;
- case ENCLOSE_MEMORY:
- case ENCLOSE_STOP_BACKTRACK:
- n = get_head_value_node(en->target, exact, reg);
+ case ENCLOSURE_MEMORY:
+ case ENCLOSURE_STOP_BACKTRACK:
+ n = get_head_value_node(NODE_BODY(node), exact, reg);
break;
}
}
break;
- case NT_ANCHOR:
- if (NANCHOR(node)->type == ANCHOR_PREC_READ)
- n = get_head_value_node(NANCHOR(node)->target, exact, reg);
+ case NODE_ANCHOR:
+ if (ANCHOR_(node)->type == ANCHOR_PREC_READ)
+ n = get_head_value_node(NODE_BODY(node), exact, reg);
break;
default:
@@ -2448,46 +2483,45 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
}
static int
-check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
+check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask)
{
- int type, r = 0;
+ NodeType type;
+ int r = 0;
- type = NTYPE(node);
- if ((NTYPE2BIT(type) & type_mask) == 0)
+ type = NODE_TYPE(node);
+ if ((NODE_TYPE2BIT(type) & type_mask) == 0)
return 1;
switch (type) {
- case NT_LIST:
- case NT_ALT:
+ case NODE_LIST:
+ case NODE_ALT:
do {
- r = check_type_tree(NCAR(node), type_mask, enclose_mask,
+ r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask,
anchor_mask);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_QTFR:
- r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
- anchor_mask);
+ case NODE_QUANT:
+ r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
- if ((en->type & enclose_mask) == 0)
+ EnclosureNode* en = ENCLOSURE_(node);
+ if ((en->type & enclosure_mask) == 0)
return 1;
- r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
+ r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
}
break;
- case NT_ANCHOR:
- type = NANCHOR(node)->type;
+ case NODE_ANCHOR:
+ type = ANCHOR_(node)->type;
if ((type & anchor_mask) == 0)
return 1;
- if (NANCHOR(node)->target)
- r = check_type_tree(NANCHOR(node)->target,
- type_mask, enclose_mask, anchor_mask);
+ if (IS_NOT_NULL(NODE_BODY(node)))
+ r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask);
break;
default:
@@ -2496,250 +2530,282 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
return r;
}
-static int
-get_min_len(Node* node, OnigLen *min, ScanEnv* env)
+static OnigLen
+get_min_len(Node* node, ScanEnv* env)
{
+ OnigLen len;
OnigLen tmin;
- int r = 0;
- *min = 0;
- switch (NTYPE(node)) {
- case NT_BREF:
+ len = 0;
+ switch (NODE_TYPE(node)) {
+ case NODE_BREF:
{
int i;
int* backs;
- Node** nodes = SCANENV_MEM_NODES(env);
- BRefNode* br = NBREF(node);
- if (br->state & NST_RECURSION) break;
+ MemEnv* mem_env = SCANENV_MEMENV(env);
+ BRefNode* br = BREF_(node);
+ if (NODE_IS_RECURSION(node)) break;
backs = BACKREFS_P(br);
- if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_len(nodes[backs[0]], min, env);
- if (r != 0) break;
+ len = get_min_len(mem_env[backs[0]].node, env);
for (i = 1; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_min_len(nodes[backs[i]], &tmin, env);
- if (r != 0) break;
- if (*min > tmin) *min = tmin;
+ tmin = get_min_len(mem_env[backs[i]].node, env);
+ if (len > tmin) len = tmin;
}
}
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node))) {
- EncloseNode* en = NENCLOSE(NCALL(node)->target);
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
+ case NODE_CALL:
+ {
+ Node* t = NODE_BODY(node);
+ if (NODE_IS_RECURSION(node)) {
+ if (NODE_IS_MIN_FIXED(t))
+ len = ENCLOSURE_(t)->min_len;
+ }
+ else
+ len = get_min_len(t, env);
}
- else
- r = get_min_len(NCALL(node)->target, min, env);
break;
#endif
- case NT_LIST:
+ case NODE_LIST:
do {
- r = get_min_len(NCAR(node), &tmin, env);
- if (r == 0) *min += tmin;
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ tmin = get_min_len(NODE_CAR(node), env);
+ len += tmin;
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_ALT:
+ case NODE_ALT:
{
Node *x, *y;
y = node;
do {
- x = NCAR(y);
- r = get_min_len(x, &tmin, env);
- if (r != 0) break;
- if (y == node) *min = tmin;
- else if (*min > tmin) *min = tmin;
- } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
+ x = NODE_CAR(y);
+ tmin = get_min_len(x, env);
+ if (y == node) len = tmin;
+ else if (len > tmin) len = tmin;
+ } while (IS_NOT_NULL(y = NODE_CDR(y)));
}
break;
- case NT_STR:
+ case NODE_STR:
{
- StrNode* sn = NSTR(node);
- *min = sn->end - sn->s;
+ StrNode* sn = STR_(node);
+ len = sn->end - sn->s;
}
break;
- case NT_CTYPE:
- *min = 1;
+ case NODE_CTYPE:
+ case NODE_CCLASS:
+ len = 1;
break;
- case NT_CCLASS:
- case NT_CANY:
- *min = 1;
- break;
-
- case NT_QTFR:
+ case NODE_QUANT:
{
- QtfrNode* qn = NQTFR(node);
+ QuantNode* qn = QUANT_(node);
if (qn->lower > 0) {
- r = get_min_len(qn->target, min, env);
- if (r == 0)
- *min = distance_multiply(*min, qn->lower);
+ len = get_min_len(NODE_BODY(node), env);
+ len = distance_multiply(len, qn->lower);
}
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
- case ENCLOSE_MEMORY:
- if (IS_ENCLOSE_MIN_FIXED(en))
- *min = en->min_len;
+ case ENCLOSURE_MEMORY:
+ if (NODE_IS_MIN_FIXED(node))
+ len = en->min_len;
else {
- if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- *min = 0; // recursive
+ if (NODE_IS_MARK1(node))
+ len = 0; // recursive
else {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = get_min_len(en->target, min, env);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- if (r == 0) {
- en->min_len = *min;
- SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
- }
+ NODE_STATUS_ADD(node, NST_MARK1);
+ len = get_min_len(NODE_BODY(node), env);
+ NODE_STATUS_REMOVE(node, NST_MARK1);
+
+ en->min_len = len;
+ NODE_STATUS_ADD(node, NST_MIN_FIXED);
}
}
break;
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = get_min_len(en->target, min, env);
+ case ENCLOSURE_OPTION:
+ case ENCLOSURE_STOP_BACKTRACK:
+ len = get_min_len(NODE_BODY(node), env);
break;
}
}
break;
- case NT_ANCHOR:
+ case NODE_ANCHOR:
default:
break;
}
- return r;
+ return len;
}
-static int
-get_max_len(Node* node, OnigLen *max, ScanEnv* env)
+static OnigLen
+get_max_len(Node* node, ScanEnv* env)
{
+ OnigLen len;
OnigLen tmax;
- int r = 0;
- *max = 0;
- switch (NTYPE(node)) {
- case NT_LIST:
+ len = 0;
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
do {
- r = get_max_len(NCAR(node), &tmax, env);
- if (r == 0)
- *max = distance_add(*max, tmax);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ tmax = get_max_len(NODE_CAR(node), env);
+ len = distance_add(len, tmax);
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_ALT:
+ case NODE_ALT:
do {
- r = get_max_len(NCAR(node), &tmax, env);
- if (r == 0 && *max < tmax) *max = tmax;
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ tmax = get_max_len(NODE_CAR(node), env);
+ if (len < tmax) len = tmax;
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_STR:
+ case NODE_STR:
{
- StrNode* sn = NSTR(node);
- *max = sn->end - sn->s;
+ StrNode* sn = STR_(node);
+ len = sn->end - sn->s;
}
break;
- case NT_CTYPE:
- *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
+ case NODE_CTYPE:
+ case NODE_CCLASS:
+ len = ONIGENC_MBC_MAXLEN_DIST(env->enc);
break;
- case NT_CCLASS:
- case NT_CANY:
- *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- break;
-
- case NT_BREF:
+ case NODE_BREF:
{
int i;
int* backs;
- Node** nodes = SCANENV_MEM_NODES(env);
- BRefNode* br = NBREF(node);
- if (br->state & NST_RECURSION) {
- *max = ONIG_INFINITE_DISTANCE;
+ MemEnv* mem_env = SCANENV_MEMENV(env);
+ BRefNode* br = BREF_(node);
+ if (NODE_IS_RECURSION(node)) {
+ len = ONIG_INFINITE_DISTANCE;
break;
}
backs = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- r = get_max_len(nodes[backs[i]], &tmax, env);
- if (r != 0) break;
- if (*max < tmax) *max = tmax;
+ tmax = get_max_len(mem_env[backs[i]].node, env);
+ if (len < tmax) len = tmax;
}
}
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (! IS_CALL_RECURSION(NCALL(node)))
- r = get_max_len(NCALL(node)->target, max, env);
+ case NODE_CALL:
+ if (! NODE_IS_RECURSION(node))
+ len = get_max_len(NODE_BODY(node), env);
else
- *max = ONIG_INFINITE_DISTANCE;
+ len = ONIG_INFINITE_DISTANCE;
break;
#endif
- case NT_QTFR:
+ case NODE_QUANT:
{
- QtfrNode* qn = NQTFR(node);
+ QuantNode* qn = QUANT_(node);
if (qn->upper != 0) {
- r = get_max_len(qn->target, max, env);
- if (r == 0 && *max != 0) {
+ len = get_max_len(NODE_BODY(node), env);
+ if (len != 0) {
if (! IS_REPEAT_INFINITE(qn->upper))
- *max = distance_multiply(*max, qn->upper);
+ len = distance_multiply(len, qn->upper);
else
- *max = ONIG_INFINITE_DISTANCE;
+ len = ONIG_INFINITE_DISTANCE;
}
}
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
- case ENCLOSE_MEMORY:
- if (IS_ENCLOSE_MAX_FIXED(en))
- *max = en->max_len;
+ case ENCLOSURE_MEMORY:
+ if (NODE_IS_MAX_FIXED(node))
+ len = en->max_len;
else {
- if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- *max = ONIG_INFINITE_DISTANCE;
+ if (NODE_IS_MARK1(node))
+ len = ONIG_INFINITE_DISTANCE;
else {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = get_max_len(en->target, max, env);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- if (r == 0) {
- en->max_len = *max;
- SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
- }
+ NODE_STATUS_ADD(node, NST_MARK1);
+ len = get_max_len(NODE_BODY(node), env);
+ NODE_STATUS_REMOVE(node, NST_MARK1);
+
+ en->max_len = len;
+ NODE_STATUS_ADD(node, NST_MAX_FIXED);
}
}
break;
- case ENCLOSE_OPTION:
- case ENCLOSE_STOP_BACKTRACK:
- r = get_max_len(en->target, max, env);
+ case ENCLOSURE_OPTION:
+ case ENCLOSURE_STOP_BACKTRACK:
+ len = get_max_len(NODE_BODY(node), env);
break;
}
}
break;
- case NT_ANCHOR:
+ case NODE_ANCHOR:
+ default:
+ break;
+ }
+
+ return len;
+}
+
+static int
+check_backrefs(Node* node, ScanEnv* env)
+{
+ int r;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ r = check_backrefs(NODE_CAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
+ r = 0;
+ break;
+ }
+ /* fall */
+ case NODE_QUANT:
+ case NODE_ENCLOSURE:
+ r = check_backrefs(NODE_BODY(node), env);
+ break;
+
+ case NODE_BREF:
+ {
+ int i;
+ BRefNode* br = BREF_(node);
+ int* backs = BACKREFS_P(br);
+ MemEnv* mem_env = SCANENV_MEMENV(env);
+
+ for (i = 0; i < br->back_num; i++) {
+ if (backs[i] > env->num_mem)
+ return ONIGERR_INVALID_BACKREF;
+
+ NODE_STATUS_ADD(mem_env[backs[i]].node, NST_BACKREF);
+ }
+ r = 0;
+ }
+ break;
+
default:
+ r = 0;
break;
}
@@ -2749,18 +2815,17 @@ get_max_len(Node* node, OnigLen *max, ScanEnv* env)
#ifdef USE_SUBEXP_CALL
-#define RECURSION_EXIST 1
-#define RECURSION_INFINITE 2
+#define RECURSION_EXIST (1<<0)
+#define RECURSION_MUST (1<<1)
+#define RECURSION_INFINITE (1<<2)
static int
-subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
+infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
{
- int type;
int r = 0;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
{
Node *x;
OnigLen min;
@@ -2768,64 +2833,70 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
x = node;
do {
- ret = subexp_inf_recursive_check(NCAR(x), env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
+ ret = infinite_recursive_call_check(NODE_CAR(x), env, head);
+ if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
r |= ret;
if (head) {
- ret = get_min_len(NCAR(x), &min, env);
- if (ret != 0) return ret;
+ min = get_min_len(NODE_CAR(x), env);
if (min != 0) head = 0;
}
- } while (IS_NOT_NULL(x = NCDR(x)));
+ } while (IS_NOT_NULL(x = NODE_CDR(x)));
}
break;
- case NT_ALT:
+ case NODE_ALT:
{
int ret;
- r = RECURSION_EXIST;
+ int must;
+
+ must = RECURSION_MUST;
do {
- ret = subexp_inf_recursive_check(NCAR(node), env, head);
- if (ret < 0 || ret == RECURSION_INFINITE) return ret;
- r &= ret;
- } while (IS_NOT_NULL(node = NCDR(node)));
- }
- break;
+ ret = infinite_recursive_call_check(NODE_CAR(node), env, head);
+ if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret;
- case NT_QTFR:
- r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
- if (r == RECURSION_EXIST) {
- if (NQTFR(node)->lower == 0) r = 0;
+ r |= (ret & RECURSION_EXIST);
+ must &= ret;
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ r |= must;
}
break;
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check(an->target, env, head);
- break;
- }
+ case NODE_QUANT:
+ r = infinite_recursive_call_check(NODE_BODY(node), env, head);
+ if (r < 0) return r;
+ if ((r & RECURSION_MUST) != 0) {
+ if (QUANT_(node)->lower == 0)
+ r &= ~RECURSION_MUST;
}
break;
- case NT_CALL:
- r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
+ case NODE_ANCHOR:
+ if (! ANCHOR_HAS_BODY(ANCHOR_(node)))
+ break;
+ /* fall */
+ case NODE_CALL:
+ r = infinite_recursive_call_check(NODE_BODY(node), env, head);
break;
- case NT_ENCLOSE:
- if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
- return 0;
- else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
- else {
- SET_ENCLOSE_STATUS(node, NST_MARK2);
- r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
+ case NODE_ENCLOSURE:
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (NODE_IS_MARK2(node))
+ return 0;
+ else if (NODE_IS_MARK1(node))
+ return (head == 0 ? RECURSION_EXIST | RECURSION_MUST
+ : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE);
+ else {
+ NODE_STATUS_ADD(node, NST_MARK2);
+ r = infinite_recursive_call_check(NODE_BODY(node), env, head);
+ NODE_STATUS_REMOVE(node, NST_MARK2);
+ }
+ }
+ else {
+ r = infinite_recursive_call_check(NODE_BODY(node), env, head);
+ }
}
break;
@@ -2837,53 +2908,53 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
}
static int
-subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
+infinite_recursive_call_check_trav(Node* node, ScanEnv* env)
{
- int type;
- int r = 0;
+ int r;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- case NT_ALT:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
do {
- r = subexp_inf_recursive_check_trav(NCAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ r = infinite_recursive_call_check_trav(NODE_CAR(node), env);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_QTFR:
- r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_inf_recursive_check_trav(an->target, env);
- break;
- }
+ case NODE_ANCHOR:
+ if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
+ r = 0;
+ break;
}
+ /* fall */
+ case NODE_QUANT:
+ r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) {
+ int ret;
+
+ NODE_STATUS_ADD(node, NST_MARK1);
- if (IS_ENCLOSE_RECURSION(en)) {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = subexp_inf_recursive_check(en->target, env, 1);
- if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ ret = infinite_recursive_call_check(NODE_BODY(node), env, 1);
+ if (ret < 0) return ret;
+ else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0)
+ return ONIGERR_NEVER_ENDING_RECURSION;
+
+ NODE_STATUS_REMOVE(node, NST_MARK1);
+ }
}
- r = subexp_inf_recursive_check_trav(en->target, env);
}
+
+ r = infinite_recursive_call_check_trav(NODE_BODY(node), env);
break;
default:
+ r = 0;
break;
}
@@ -2891,227 +2962,129 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
}
static int
-subexp_recursive_check(Node* node)
+recursive_call_check(Node* node)
{
- int r = 0;
+ int r;
- switch (NTYPE(node)) {
- case NT_LIST:
- case NT_ALT:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ r = 0;
do {
- r |= subexp_recursive_check(NCAR(node));
- } while (IS_NOT_NULL(node = NCDR(node)));
+ r |= recursive_call_check(NODE_CAR(node));
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_QTFR:
- r = subexp_recursive_check(NQTFR(node)->target);
- break;
-
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check(an->target);
- break;
- }
+ case NODE_ANCHOR:
+ if (! ANCHOR_HAS_BODY(ANCHOR_(node))) {
+ r = 0;
+ break;
}
+ /* fall */
+ case NODE_QUANT:
+ r = recursive_call_check(NODE_BODY(node));
break;
- case NT_CALL:
- r = subexp_recursive_check(NCALL(node)->target);
- if (r != 0) SET_CALL_RECURSION(node);
+ case NODE_CALL:
+ r = recursive_call_check(NODE_BODY(node));
+ if (r != 0) NODE_STATUS_ADD(node, NST_RECURSION);
break;
- case NT_ENCLOSE:
- if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
- return 0;
- else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- return 1; /* recursion */
- else {
- SET_ENCLOSE_STATUS(node, NST_MARK2);
- r = subexp_recursive_check(NENCLOSE(node)->target);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
+ case NODE_ENCLOSURE:
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (NODE_IS_MARK2(node))
+ return 0;
+ else if (NODE_IS_MARK1(node))
+ return 1; /* recursion */
+ else {
+ NODE_STATUS_ADD(node, NST_MARK2);
+ r = recursive_call_check(NODE_BODY(node));
+ NODE_STATUS_REMOVE(node, NST_MARK2);
+ }
+ }
+ else {
+ r = recursive_call_check(NODE_BODY(node));
+ }
}
break;
default:
+ r = 0;
break;
}
return r;
}
+#define IN_RECURSION (1<<0)
+#define FOUND_CALLED_NODE 1
static int
-subexp_recursive_check_trav(Node* node, ScanEnv* env)
+recursive_call_check_trav(Node* node, ScanEnv* env, int state)
{
-#define FOUND_CALLED_NODE 1
-
- int type;
int r = 0;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- case NT_ALT:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
{
int ret;
do {
- ret = subexp_recursive_check_trav(NCAR(node), env);
+ ret = recursive_call_check_trav(NODE_CAR(node), env, state);
if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
else if (ret < 0) return ret;
- } while (IS_NOT_NULL(node = NCDR(node)));
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
}
break;
- case NT_QTFR:
- r = subexp_recursive_check_trav(NQTFR(node)->target, env);
- if (NQTFR(node)->upper == 0) {
+ case NODE_QUANT:
+ r = recursive_call_check_trav(NODE_BODY(node), env, state);
+ if (QUANT_(node)->upper == 0) {
if (r == FOUND_CALLED_NODE)
- NQTFR(node)->is_refered = 1;
+ QUANT_(node)->is_refered = 1;
}
break;
- case NT_ANCHOR:
+ case NODE_ANCHOR:
{
- AnchorNode* an = NANCHOR(node);
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = subexp_recursive_check_trav(an->target, env);
- break;
- }
+ AnchorNode* an = ANCHOR_(node);
+ if (ANCHOR_HAS_BODY(an))
+ r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state);
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
-
- if (! IS_ENCLOSE_RECURSION(en)) {
- if (IS_ENCLOSE_CALLED(en)) {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = subexp_recursive_check(en->target);
- if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- }
- }
- r = subexp_recursive_check_trav(en->target, env);
- if (IS_ENCLOSE_CALLED(en))
- r |= FOUND_CALLED_NODE;
- }
- break;
-
- default:
- break;
- }
-
- return r;
-}
-
-static int
-setup_subexp_call(Node* node, ScanEnv* env)
-{
- int type;
- int r = 0;
-
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
- do {
- r = setup_subexp_call(NCAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_ALT:
- do {
- r = setup_subexp_call(NCAR(node), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
- break;
-
- case NT_QTFR:
- r = setup_subexp_call(NQTFR(node)->target, env);
- break;
- case NT_ENCLOSE:
- r = setup_subexp_call(NENCLOSE(node)->target, env);
- break;
-
- case NT_CALL:
- {
- CallNode* cn = NCALL(node);
- Node** nodes = SCANENV_MEM_NODES(env);
-
- if (cn->group_num != 0) {
- int gnum = cn->group_num;
-
-#ifdef USE_NAMED_GROUP
- if (env->num_named > 0 &&
- IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
- return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
- }
-#endif
- if (gnum > env->num_mem) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_GROUP_REFERENCE;
- }
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) {
+ if (! NODE_IS_RECURSION(node)) {
+ NODE_STATUS_ADD(node, NST_MARK1);
+ r = recursive_call_check(NODE_BODY(node));
+ if (r != 0)
+ NODE_STATUS_ADD(node, NST_RECURSION);
+ NODE_STATUS_REMOVE(node, NST_MARK1);
+ }
-#ifdef USE_NAMED_GROUP
- set_call_attr:
-#endif
- cn->target = nodes[cn->group_num];
- if (IS_NULL(cn->target)) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
- BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
- cn->unset_addr_list = env->unset_addr_list;
- }
-#ifdef USE_NAMED_GROUP
- else {
- int *refs;
-
- int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
- &refs);
- if (n <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
- return ONIGERR_UNDEFINED_NAME_REFERENCE;
- }
- else if (n > 1) {
- onig_scan_env_set_error_string(env,
- ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
- return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
- }
- else {
- cn->group_num = refs[0];
- goto set_call_attr;
+ if (NODE_IS_CALLED(node))
+ r = FOUND_CALLED_NODE;
}
}
-#endif
- }
- break;
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
+ {
+ int ret;
+ int state1 = state;
- switch (an->type) {
- case ANCHOR_PREC_READ:
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND:
- case ANCHOR_LOOK_BEHIND_NOT:
- r = setup_subexp_call(an->target, env);
- break;
+ if (NODE_IS_RECURSION(node))
+ state1 |= IN_RECURSION;
+
+ ret = recursive_call_check_trav(NODE_BODY(node), env, state1);
+ if (ret == FOUND_CALLED_NODE)
+ r = FOUND_CALLED_NODE;
}
}
break;
@@ -3122,6 +3095,7 @@ setup_subexp_call(Node* node, ScanEnv* env)
return r;
}
+
#endif
/* divide different length alternatives in look-behind.
@@ -3132,30 +3106,28 @@ static int
divide_look_behind_alternatives(Node* node)
{
Node *head, *np, *insert_node;
- AnchorNode* an = NANCHOR(node);
+ AnchorNode* an = ANCHOR_(node);
int anc_type = an->type;
- /* fprintf(stderr, "divide_look_behind: %d\n", (int )node); */
-
- head = an->target;
- np = NCAR(head);
+ head = NODE_ANCHOR_BODY(an);
+ np = NODE_CAR(head);
swap_node(node, head);
- NCAR(node) = head;
- NANCHOR(head)->target = np;
+ NODE_CAR(node) = head;
+ NODE_BODY(head) = np;
np = node;
- while ((np = NCDR(np)) != NULL_NODE) {
+ while (IS_NOT_NULL(np = NODE_CDR(np))) {
insert_node = onig_node_new_anchor(anc_type);
CHECK_NULL_RETURN_MEMERR(insert_node);
- NANCHOR(insert_node)->target = NCAR(np);
- NCAR(np) = insert_node;
+ NODE_BODY(insert_node) = NODE_CAR(np);
+ NODE_CAR(np) = insert_node;
}
if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
np = node;
do {
- SET_NTYPE(np, NT_LIST); /* alt -> list */
- } while ((np = NCDR(np)) != NULL_NODE);
+ SET_NODE_TYPE(np, NODE_LIST); /* alt -> list */
+ } while (IS_NOT_NULL(np = NODE_CDR(np)));
}
return 0;
}
@@ -3164,11 +3136,9 @@ static int
setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
{
int r, len;
- AnchorNode* an = NANCHOR(node);
-
- /* fprintf(stderr, "setup_look_behind: %x\n", (int )node); */
+ AnchorNode* an = ANCHOR_(node);
- r = get_char_length_tree(an->target, reg, &len);
+ r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len);
if (r == 0)
an->char_len = len;
else if (r == GET_CHAR_LEN_VARLEN)
@@ -3186,44 +3156,43 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
static int
next_setup(Node* node, Node* next_node, regex_t* reg)
{
- int type;
+ NodeType type;
retry:
- type = NTYPE(node);
- if (type == NT_QTFR) {
- QtfrNode* qn = NQTFR(node);
+ type = NODE_TYPE(node);
+ if (type == NODE_QUANT) {
+ QuantNode* qn = QUANT_(node);
if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
-#ifdef USE_QTFR_PEEK_NEXT
+#ifdef USE_QUANT_PEEK_NEXT
Node* n = get_head_value_node(next_node, 1, reg);
/* '\0': for UTF-16BE etc... */
- if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
+ if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
qn->next_head_exact = n;
}
#endif
/* automatic posseivation a*b ==> (?>a*)b */
if (qn->lower <= 1) {
- int ttype = NTYPE(qn->target);
- if (IS_NODE_TYPE_SIMPLE(ttype)) {
+ if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
Node *x, *y;
- x = get_head_value_node(qn->target, 0, reg);
+ x = get_head_value_node(NODE_BODY(node), 0, reg);
if (IS_NOT_NULL(x)) {
y = get_head_value_node(next_node, 0, reg);
- if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
- Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
+ Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
CHECK_NULL_RETURN_MEMERR(en);
- SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
+ NODE_STATUS_ADD(en, NST_STOP_BT_SIMPLE_REPEAT);
swap_node(node, en);
- NENCLOSE(node)->target = en;
+ NODE_BODY(node) = en;
}
}
}
}
}
}
- else if (type == NT_ENCLOSE) {
- EncloseNode* en = NENCLOSE(node);
- if (en->type == ENCLOSE_MEMORY) {
- node = en->target;
+ else if (type == NODE_ENCLOSURE) {
+ EnclosureNode* en = ENCLOSURE_(node);
+ if (en->type == ENCLOSURE_MEMORY) {
+ node = NODE_BODY(node);
goto retry;
}
}
@@ -3237,7 +3206,7 @@ update_string_node_case_fold(regex_t* reg, Node *node)
UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
UChar *sbuf, *ebuf, *sp;
int r, i, len, sbuf_size;
- StrNode* sn = NSTR(node);
+ StrNode* sn = STR_(node);
end = sn->end;
sbuf_size = (end - sn->s) * 2;
@@ -3319,11 +3288,11 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
xnode = onig_node_new_list(NULL, NULL);
if (IS_NULL(xnode)) goto mem_err;
- NCAR(var_anode) = xnode;
+ NODE_CAR(var_anode) = xnode;
anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
if (IS_NULL(anode)) goto mem_err;
- NCAR(xnode) = anode;
+ NODE_CAR(xnode) = anode;
}
else {
*rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
@@ -3333,7 +3302,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
snode = onig_node_new_str(p, p + slen);
if (IS_NULL(snode)) goto mem_err;
- NCAR(anode) = snode;
+ NODE_CAR(anode) = snode;
for (i = 0; i < item_num; i++) {
snode = onig_node_new_str(NULL, NULL);
@@ -3379,18 +3348,18 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
goto mem_err;
}
- NCAR(an) = xnode;
+ NODE_CAR(an) = xnode;
}
else {
- NCAR(an) = snode;
+ NODE_CAR(an) = snode;
}
- NCDR(var_anode) = an;
+ NODE_CDR(var_anode) = an;
var_anode = an;
}
else {
- NCAR(an) = snode;
- NCDR(anode) = an;
+ NODE_CAR(an) = snode;
+ NODE_CDR(anode) = an;
anode = an;
}
}
@@ -3415,7 +3384,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
UChar *start, *end, *p;
Node *top_root, *root, *snode, *prev_node;
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- StrNode* sn = NSTR(node);
+ StrNode* sn = STR_(node);
if (NSTRING_IS_AMBIG(node)) return 0;
@@ -3485,7 +3454,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
}
}
- root = NCAR(prev_node);
+ root = NODE_CAR(prev_node);
}
else { /* r == 0 */
if (IS_NOT_NULL(root)) {
@@ -3555,37 +3524,35 @@ expand_case_fold_string(Node* node, regex_t* reg)
static int
setup_comb_exp_check(Node* node, int state, ScanEnv* env)
{
- int type;
int r = state;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
{
Node* prev = NULL_NODE;
do {
- r = setup_comb_exp_check(NCAR(node), r, env);
- prev = NCAR(node);
- } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
+ r = setup_comb_exp_check(NODE_CAR(node), r, env);
+ prev = NODE_CAR(node);
+ } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node)));
}
break;
- case NT_ALT:
+ case NODE_ALT:
{
int ret;
do {
- ret = setup_comb_exp_check(NCAR(node), state, env);
+ ret = setup_comb_exp_check(NODE_CAR(node), state, env);
r |= ret;
- } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
+ } while (ret >= 0 && IS_NOT_NULL(node = NODE_CDR(node)));
}
break;
- case NT_QTFR:
+ case NODE_QUANT:
{
int child_state = state;
int add_state = 0;
- QtfrNode* qn = NQTFR(node);
- Node* target = qn->target;
+ QuantNode* qn = QUANT_(node);
+ Node* target = NODE_QUANT_BODY(qn);
int var_num;
if (! IS_REPEAT_INFINITE(qn->upper)) {
@@ -3595,11 +3562,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
/* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
if (env->backrefed_mem == 0) {
- if (NTYPE(qn->target) == NT_ENCLOSE) {
- EncloseNode* en = NENCLOSE(qn->target);
- if (en->type == ENCLOSE_MEMORY) {
- if (NTYPE(en->target) == NT_QTFR) {
- QtfrNode* q = NQTFR(en->target);
+ if (NODE_TYPE(NODE_QUANT_BODY(qn)) == NODE_ENCLOSURE) {
+ EnclosureNode* en = ENCLOSURE_(NODE_QUANT_BODY(qn));
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (NODE_TYPE(NODE_ENCLOSURE_BODY(en)) == NODE_QUANT) {
+ QuantNode* q = QUANT_(NODE_ENCLOSURE_BODY(en));
if (IS_REPEAT_INFINITE(q->upper)
&& q->greedy == qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
@@ -3645,33 +3612,33 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
- case ENCLOSE_MEMORY:
+ case ENCLOSURE_MEMORY:
{
if (env->curr_max_regnum < en->regnum)
env->curr_max_regnum = en->regnum;
- r = setup_comb_exp_check(en->target, state, env);
+ r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env);
}
break;
default:
- r = setup_comb_exp_check(en->target, state, env);
+ r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env);
break;
}
}
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node)))
+ case NODE_CALL:
+ if (NODE_IS_RECURSION(node))
env->has_recursion = 1;
else
- r = setup_comb_exp_check(NCALL(node)->target, state, env);
+ r = setup_comb_exp_check(NODE_BODY(node), state, env);
break;
#endif
@@ -3683,206 +3650,695 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env)
}
#endif
-#define IN_ALT (1<<0)
-#define IN_NOT (1<<1)
-#define IN_REPEAT (1<<2)
-#define IN_VAR_REPEAT (1<<3)
-#define IN_CALL (1<<4)
-#define IN_RECCALL (1<<5)
-
-/* setup_tree does the following work.
- 1. check empty loop. (set qn->target_empty_info)
- 2. expand ignore-case in char class.
- 3. set memory status bit flags. (reg->mem_stats)
- 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
- 5. find invalid patterns in look-behind.
- 6. expand repeated string.
- */
+#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
static int
-setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
+quantifiers_memory_node_info(Node* node)
{
- int type;
- int r = 0;
+ int r = QUANT_BODY_IS_EMPTY;
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
{
- Node* prev = NULL_NODE;
+ int v;
do {
- r = setup_tree(NCAR(node), reg, state, env);
- if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NCAR(node), reg);
+ v = quantifiers_memory_node_info(NODE_CAR(node));
+ if (v > r) r = v;
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ }
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NODE_CALL:
+ if (NODE_IS_RECURSION(node)) {
+ return QUANT_BODY_IS_EMPTY_REC; /* tiny version */
+ }
+ else
+ r = quantifiers_memory_node_info(NODE_BODY(node));
+ break;
+#endif
+
+ case NODE_QUANT:
+ {
+ QuantNode* qn = QUANT_(node);
+ if (qn->upper != 0) {
+ r = quantifiers_memory_node_info(NODE_BODY(node));
+ }
+ }
+ break;
+
+ case NODE_ENCLOSURE:
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+ switch (en->type) {
+ case ENCLOSURE_MEMORY:
+ if (NODE_IS_RECURSION(node)) {
+ return QUANT_BODY_IS_EMPTY_REC;
}
- prev = NCAR(node);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ return QUANT_BODY_IS_EMPTY_MEM;
+ break;
+
+ case ENCLOSURE_OPTION:
+ case ENCLOSURE_STOP_BACKTRACK:
+ r = quantifiers_memory_node_info(NODE_BODY(node));
+ break;
+ default:
+ break;
+ }
}
break;
- case NT_ALT:
+ case NODE_BREF:
+ case NODE_STR:
+ case NODE_CTYPE:
+ case NODE_CCLASS:
+ case NODE_ANCHOR:
+ default:
+ break;
+ }
+
+ return r;
+}
+#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */
+
+
+#define IN_ALT (1<<0)
+#define IN_NOT (1<<1)
+#define IN_REAL_REPEAT (1<<2)
+#define IN_VAR_REPEAT (1<<3)
+#define IN_ZERO_REPEAT (1<<4)
+#define IN_MULTI_ENTRY (1<<5)
+
+#ifdef USE_SUBEXP_CALL
+
+#ifdef __GNUC__
+__inline
+#endif
+static int
+setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
+{
+ MemEnv* mem_env = SCANENV_MEMENV(env);
+
+ if (cn->by_number != 0) {
+ int gnum = cn->group_num;
+
+#ifdef USE_NAMED_GROUP
+ if (env->num_named > 0 &&
+ IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
+ !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
+ return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
+ }
+#endif
+ if (gnum > env->num_mem) {
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE,
+ cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
+
+#ifdef USE_NAMED_GROUP
+ set_call_attr:
+#endif
+ NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
+ if (IS_NULL(NODE_CALL_BODY(cn))) {
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
+ cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ }
+#ifdef USE_NAMED_GROUP
+ else {
+ int *refs;
+
+ int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs);
+ if (n <= 0) {
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
+ cn->name, cn->name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+ else if (n > 1) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL,
+ cn->name, cn->name_end);
+ return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
+ }
+ else {
+ cn->group_num = refs[0];
+ goto set_call_attr;
+ }
+ }
+#endif
+
+ return 0;
+}
+
+static void
+setup_call2_call(Node* node)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
do {
- r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
- } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
+ setup_call2_call(NODE_CAR(node));
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
- case NT_CCLASS:
+ case NODE_QUANT:
+ setup_call2_call(NODE_BODY(node));
break;
- case NT_STR:
- if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg);
+ case NODE_ANCHOR:
+ if (ANCHOR_HAS_BODY(ANCHOR_(node)))
+ setup_call2_call(NODE_BODY(node));
+ break;
+
+ case NODE_ENCLOSURE:
+ if (! NODE_IS_MARK1(node)) {
+ NODE_STATUS_ADD(node, NST_MARK1);
+ setup_call2_call(NODE_BODY(node));
+ NODE_STATUS_REMOVE(node, NST_MARK1);
}
break;
- case NT_CTYPE:
- case NT_CANY:
+ case NODE_CALL:
+ if (! NODE_IS_MARK1(node)) {
+ NODE_STATUS_ADD(node, NST_MARK1);
+ {
+ CallNode* cn = CALL_(node);
+ Node* called = NODE_CALL_BODY(cn);
+
+ cn->entry_count++;
+
+ NODE_STATUS_ADD(called, NST_CALLED);
+ ENCLOSURE_(called)->m.entry_count++;
+ setup_call2_call(called);
+ }
+ NODE_STATUS_REMOVE(node, NST_MARK1);
+ }
break;
-#ifdef USE_SUBEXP_CALL
- case NT_CALL:
+ default:
break;
-#endif
+ }
+}
+
+static int
+setup_call(Node* node, ScanEnv* env, int state)
+{
+ int r;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ r = setup_call(NODE_CAR(node), env, state);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_QUANT:
+ if (QUANT_(node)->upper == 0)
+ state |= IN_ZERO_REPEAT;
+
+ r = setup_call(NODE_BODY(node), env, state);
+ break;
+
+ case NODE_ANCHOR:
+ if (ANCHOR_HAS_BODY(ANCHOR_(node)))
+ r = setup_call(NODE_BODY(node), env, state);
+ else
+ r = 0;
+ break;
+
+ case NODE_ENCLOSURE:
+ if ((state & IN_ZERO_REPEAT) != 0) {
+ NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT);
+ ENCLOSURE_(node)->m.entry_count--;
+ }
+ r = setup_call(NODE_BODY(node), env, state);
+ break;
+
+ case NODE_CALL:
+ if ((state & IN_ZERO_REPEAT) != 0) {
+ NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT);
+ CALL_(node)->entry_count--;
+ }
+
+ r = setup_call_node_call(CALL_(node), env, state);
+ break;
+
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+static int
+setup_call2(Node* node)
+{
+ int r = 0;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ r = setup_call2(NODE_CAR(node));
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_QUANT:
+ if (QUANT_(node)->upper != 0)
+ r = setup_call2(NODE_BODY(node));
+ break;
+
+ case NODE_ANCHOR:
+ if (ANCHOR_HAS_BODY(ANCHOR_(node)))
+ r = setup_call2(NODE_BODY(node));
+ break;
+
+ case NODE_ENCLOSURE:
+ if (! NODE_IS_IN_ZERO_REPEAT(node))
+ r = setup_call2(NODE_BODY(node));
+ break;
+
+ case NODE_CALL:
+ if (! NODE_IS_IN_ZERO_REPEAT(node)) {
+ setup_call2_call(node);
+ }
+ break;
+
+ default:
+ break;
+ }
- case NT_BREF:
+ return r;
+}
+
+
+static void
+setup_called_state_call(Node* node, int state)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_ALT:
+ state |= IN_ALT;
+ /* fall */
+ case NODE_LIST:
+ do {
+ setup_called_state_call(NODE_CAR(node), state);
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_QUANT:
{
- int i;
- int* p;
- Node** nodes = SCANENV_MEM_NODES(env);
- BRefNode* br = NBREF(node);
- p = BACKREFS_P(br);
- for (i = 0; i < br->back_num; i++) {
- if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
- BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
- BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
-#ifdef USE_BACKREF_WITH_LEVEL
- if (IS_BACKREF_NEST_LEVEL(br)) {
- BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
+ QuantNode* qn = QUANT_(node);
+
+ if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ state |= IN_REAL_REPEAT;
+ if (qn->lower != qn->upper)
+ state |= IN_VAR_REPEAT;
+
+ setup_called_state_call(NODE_QUANT_BODY(qn), state);
+ }
+ break;
+
+ case NODE_ANCHOR:
+ {
+ AnchorNode* an = ANCHOR_(node);
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ state |= IN_NOT;
+ /* fall */
+ case ANCHOR_PREC_READ:
+ case ANCHOR_LOOK_BEHIND:
+ setup_called_state_call(NODE_ANCHOR_BODY(an), state);
+ break;
+ default:
+ break;
+ }
+ }
+ break;
+
+ case NODE_ENCLOSURE:
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ if (en->type == ENCLOSURE_MEMORY) {
+ if (NODE_IS_MARK1(node)) {
+ if ((~en->m.called_state & state) != 0) {
+ en->m.called_state |= state;
+ setup_called_state_call(NODE_BODY(node), state);
+ }
+ }
+ else {
+ NODE_STATUS_ADD(node, NST_MARK1);
+ en->m.called_state |= state;
+ setup_called_state_call(NODE_BODY(node), state);
+ NODE_STATUS_REMOVE(node, NST_MARK1);
}
+ }
+ else {
+ setup_called_state_call(NODE_BODY(node), state);
+ }
+ }
+ break;
+
+ case NODE_CALL:
+ setup_called_state_call(NODE_BODY(node), state);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+setup_called_state(Node* node, int state)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_ALT:
+ state |= IN_ALT;
+ /* fall */
+ case NODE_LIST:
+ do {
+ setup_called_state(NODE_CAR(node), state);
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+#ifdef USE_SUBEXP_CALL
+ case NODE_CALL:
+ setup_called_state_call(node, state);
+ break;
#endif
- SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
+
+ case NODE_ENCLOSURE:
+ {
+ EnclosureNode* en = ENCLOSURE_(node);
+
+ switch (en->type) {
+ case ENCLOSURE_MEMORY:
+ if (en->m.entry_count > 1)
+ state |= IN_MULTI_ENTRY;
+
+ en->m.called_state |= state;
+ /* fall */
+ case ENCLOSURE_OPTION:
+ case ENCLOSURE_STOP_BACKTRACK:
+ setup_called_state(NODE_BODY(node), state);
+ break;
}
}
break;
- case NT_QTFR:
+ case NODE_QUANT:
{
- OnigLen d;
- QtfrNode* qn = NQTFR(node);
- Node* target = qn->target;
+ QuantNode* qn = QUANT_(node);
- if ((state & IN_REPEAT) != 0) {
- qn->state |= NST_IN_REPEAT;
+ if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ state |= IN_REAL_REPEAT;
+ if (qn->lower != qn->upper)
+ state |= IN_VAR_REPEAT;
+
+ setup_called_state(NODE_QUANT_BODY(qn), state);
+ }
+ break;
+
+ case NODE_ANCHOR:
+ {
+ AnchorNode* an = ANCHOR_(node);
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND_NOT:
+ state |= IN_NOT;
+ /* fall */
+ case ANCHOR_PREC_READ:
+ case ANCHOR_LOOK_BEHIND:
+ setup_called_state(NODE_ANCHOR_BODY(an), state);
+ break;
+ default:
+ break;
}
+ }
+ break;
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
- r = get_min_len(target, &d, env);
- if (r) break;
- if (d == 0) {
- qn->target_empty_info = NQ_TARGET_IS_EMPTY;
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- r = quantifiers_memory_node_info(target);
- if (r < 0) break;
- if (r > 0) {
- qn->target_empty_info = r;
- }
+ case NODE_BREF:
+ case NODE_STR:
+ case NODE_CTYPE:
+ case NODE_CCLASS:
+ default:
+ break;
+ }
+}
+
+#endif /* USE_SUBEXP_CALL */
+
+
+static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
+
+#ifdef __GNUC__
+__inline
#endif
-#if 0
- r = get_max_len(target, &d, env);
- if (r == 0 && d == 0) {
- /* ()* ==> ()?, ()+ ==> () */
- qn->upper = 1;
- if (qn->lower > 1) qn->lower = 1;
- if (NTYPE(target) == NT_STR) {
- qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
- }
- }
+static int
+setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
+{
+/* allowed node types in look-behind */
+#define ALLOWED_TYPE_IN_LB \
+ ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE \
+ | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT | BIT_NODE_CALL )
+
+#define ALLOWED_ENCLOSURE_IN_LB ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION )
+#define ALLOWED_ENCLOSURE_IN_LB_NOT ENCLOSURE_OPTION
+
+#define ALLOWED_ANCHOR_IN_LB \
+ ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \
+ | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND \
+ | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
+
+#define ALLOWED_ANCHOR_IN_LB_NOT \
+ ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \
+ | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND \
+ | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
+
+ int r;
+ AnchorNode* an = ANCHOR_(node);
+
+ switch (an->type) {
+ case ANCHOR_PREC_READ:
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
+ break;
+ case ANCHOR_PREC_READ_NOT:
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
+ break;
+
+ case ANCHOR_LOOK_BEHIND:
+ {
+ r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
+ }
+ break;
+
+ case ANCHOR_LOOK_BEHIND_NOT:
+ {
+ r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB,
+ ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
+ if (r < 0) return r;
+ if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
+ r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
+ if (r != 0) return r;
+ r = setup_look_behind(node, reg, env);
+ }
+ break;
+
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+#ifdef __GNUC__
+__inline
#endif
+static int
+setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
+{
+ int r;
+ OnigLen d;
+ QuantNode* qn = QUANT_(node);
+ Node* body = NODE_BODY(node);
+
+ if ((state & IN_REAL_REPEAT) != 0) {
+ NODE_STATUS_ADD(node, NST_IN_REAL_REPEAT);
+ }
+ if ((state & IN_MULTI_ENTRY) != 0) {
+ NODE_STATUS_ADD(node, NST_IN_MULTI_ENTRY);
+ }
+
+ if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ d = get_min_len(body, env);
+ if (d == 0) {
+#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
+ qn->body_empty_info = quantifiers_memory_node_info(body);
+ if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
+ if (NODE_TYPE(body) == NODE_ENCLOSURE &&
+ ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) {
+ MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum);
}
}
+#else
+ qn->body_empty_info = QUANT_BODY_IS_EMPTY;
+#endif
+ }
+ }
- state |= IN_REPEAT;
- if (qn->lower != qn->upper)
- state |= IN_VAR_REPEAT;
- r = setup_tree(target, reg, state, env);
- if (r) break;
+ if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ state |= IN_REAL_REPEAT;
+ if (qn->lower != qn->upper)
+ state |= IN_VAR_REPEAT;
+
+ r = setup_tree(body, reg, state, env);
+ if (r != 0) return r;
- /* expand string */
+ /* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
- if (NTYPE(target) == NT_STR) {
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
- qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int len = NSTRING_LEN(target);
- StrNode* sn = NSTR(target);
-
- if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
- int i, n = qn->lower;
- onig_node_conv_to_str_node(node, NSTR(target)->flag);
- for (i = 0; i < n; i++) {
- r = onig_node_str_cat(node, sn->s, sn->end);
- if (r) break;
- }
- onig_node_free(target);
- break; /* break case NT_QTFR: */
- }
+ if (NODE_TYPE(body) == NODE_STR) {
+ if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int len = NSTRING_LEN(body);
+ StrNode* sn = STR_(body);
+
+ if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
+ int i, n = qn->lower;
+ onig_node_conv_to_str_node(node, STR_(body)->flag);
+ for (i = 0; i < n; i++) {
+ r = onig_node_str_cat(node, sn->s, sn->end);
+ if (r != 0) return r;
}
+ onig_node_free(body);
+ return r;
}
+ }
+ }
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (qn->greedy && (qn->target_empty_info != 0)) {
- if (NTYPE(target) == NT_QTFR) {
- QtfrNode* tqn = NQTFR(target);
- if (IS_NOT_NULL(tqn->head_exact)) {
- qn->head_exact = tqn->head_exact;
- tqn->head_exact = NULL;
- }
+ if (qn->greedy && (qn->body_empty_info != 0)) {
+ if (NODE_TYPE(body) == NODE_QUANT) {
+ QuantNode* tqn = QUANT_(body);
+ if (IS_NOT_NULL(tqn->head_exact)) {
+ qn->head_exact = tqn->head_exact;
+ tqn->head_exact = NULL;
+ }
+ }
+ else {
+ qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
+ }
+ }
+#endif
+
+ return r;
+}
+
+/* setup_tree does the following work.
+ 1. check empty loop. (set qn->body_empty_info)
+ 2. expand ignore-case in char class.
+ 3. set memory status bit flags. (reg->mem_stats)
+ 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
+ 5. find invalid patterns in look-behind.
+ 6. expand repeated string.
+ */
+static int
+setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
+{
+ int r = 0;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ {
+ Node* prev = NULL_NODE;
+ do {
+ r = setup_tree(NODE_CAR(node), reg, state, env);
+ if (IS_NOT_NULL(prev) && r == 0) {
+ r = next_setup(prev, NODE_CAR(node), reg);
}
- else {
- qn->head_exact = get_head_value_node(qn->target, 1, reg);
+ prev = NODE_CAR(node);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ }
+ break;
+
+ case NODE_ALT:
+ do {
+ r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_STR:
+ if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
+ r = expand_case_fold_string(node, reg);
+ }
+ break;
+
+ case NODE_BREF:
+ {
+ int i;
+ int* p;
+ BRefNode* br = BREF_(node);
+ p = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
+ MEM_STATUS_ON(env->backrefed_mem, p[i]);
+ MEM_STATUS_ON(env->bt_mem_start, p[i]);
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (NODE_IS_NEST_LEVEL(node)) {
+ MEM_STATUS_ON(env->bt_mem_end, p[i]);
}
- }
#endif
+ }
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
- case ENCLOSE_OPTION:
+ case ENCLOSURE_OPTION:
{
OnigOptionType options = reg->options;
- reg->options = NENCLOSE(node)->option;
- r = setup_tree(NENCLOSE(node)->target, reg, state, env);
+ reg->options = ENCLOSURE_(node)->o.option;
+ r = setup_tree(NODE_BODY(node), reg, state, env);
reg->options = options;
}
break;
- case ENCLOSE_MEMORY:
- if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
- BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
- /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
+ case ENCLOSURE_MEMORY:
+#ifdef USE_SUBEXP_CALL
+ state |= en->m.called_state;
+#endif
+
+ if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
+ || NODE_IS_RECURSION(node)) {
+ MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
}
- if (IS_ENCLOSE_CALLED(en))
- state |= IN_CALL;
- if (IS_ENCLOSE_RECURSION(en))
- state |= IN_RECCALL;
- else if ((state & IN_RECCALL) != 0)
- SET_CALL_RECURSION(node);
- r = setup_tree(en->target, reg, state, env);
+ r = setup_tree(NODE_BODY(node), reg, state, env);
break;
- case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSURE_STOP_BACKTRACK:
{
- Node* target = en->target;
+ Node* target = NODE_BODY(node);
r = setup_tree(target, reg, state, env);
- if (NTYPE(target) == NT_QTFR) {
- QtfrNode* tqn = NQTFR(target);
+ if (NODE_TYPE(target) == NODE_QUANT) {
+ QuantNode* tqn = QUANT_(target);
if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- int qtype = NTYPE(tqn->target);
- if (IS_NODE_TYPE_SIMPLE(qtype))
- SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
+ if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
+ NODE_STATUS_ADD(node, NST_STOP_BT_SIMPLE_REPEAT);
}
}
}
@@ -3891,59 +4347,19 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
}
break;
- case NT_ANCHOR:
- {
- AnchorNode* an = NANCHOR(node);
-
- switch (an->type) {
- case ANCHOR_PREC_READ:
- r = setup_tree(an->target, reg, state, env);
- break;
- case ANCHOR_PREC_READ_NOT:
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- break;
-
-/* allowed node types in look-behind */
-#define ALLOWED_TYPE_IN_LB \
- ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
- BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
-
-#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
-#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
-
-#define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
-
-#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
-
- case ANCHOR_LOOK_BEHIND:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(an->target, reg, state, env);
- if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
- }
- break;
+ case NODE_QUANT:
+ r = setup_quant(node, reg, state, env);
+ break;
- case ANCHOR_LOOK_BEHIND_NOT:
- {
- r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
- ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
- if (r < 0) return r;
- if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(an->target, reg, (state | IN_NOT), env);
- if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
- }
- break;
- }
- }
+ case NODE_ANCHOR:
+ r = setup_anchor(node, reg, state, env);
break;
+#ifdef USE_SUBEXP_CALL
+ case NODE_CALL:
+#endif
+ case NODE_CTYPE:
+ case NODE_CCLASS:
default:
break;
}
@@ -4594,15 +5010,13 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
static int
optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
{
- int type;
int r = 0;
clear_node_opt_info(opt);
set_bound_node_opt_info(opt, &env->mmd);
- type = NTYPE(node);
- switch (type) {
- case NT_LIST:
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
{
OptEnv nenv;
NodeOptInfo nopt;
@@ -4610,33 +5024,33 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
copy_opt_env(&nenv, env);
do {
- r = optimize_node_left(NCAR(nd), &nopt, &nenv);
+ r = optimize_node_left(NODE_CAR(nd), &nopt, &nenv);
if (r == 0) {
add_mml(&nenv.mmd, &nopt.len);
concat_left_node_opt_info(env->enc, opt, &nopt);
}
- } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
+ } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
}
break;
- case NT_ALT:
+ case NODE_ALT:
{
NodeOptInfo nopt;
Node* nd = node;
do {
- r = optimize_node_left(NCAR(nd), &nopt, env);
+ r = optimize_node_left(NODE_CAR(nd), &nopt, env);
if (r == 0) {
if (nd == node) copy_node_opt_info(opt, &nopt);
else alt_merge_node_opt_info(opt, &nopt, env);
}
- } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
+ } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd)));
}
break;
- case NT_STR:
+ case NODE_STR:
{
- StrNode* sn = NSTR(node);
+ StrNode* sn = STR_(node);
int slen = sn->end - sn->s;
int is_raw = NSTRING_IS_RAW(node);
@@ -4677,10 +5091,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case NT_CCLASS:
+ case NODE_CCLASS:
{
int i, z;
- CClassNode* cc = NCCLASS(node);
+ CClassNode* cc = CCLASS_(node);
/* no need to check ignore case. (set in setup_tree()) */
@@ -4702,7 +5116,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case NT_CTYPE:
+ case NODE_CTYPE:
{
int i, min, max;
@@ -4711,9 +5125,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
if (max == 1) {
min = 1;
- switch (NCTYPE(node)->ctype) {
+ switch (CTYPE_(node)->ctype) {
+ case CTYPE_ANYCHAR:
+ break;
+
case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->not != 0) {
+ if (CTYPE_(node)->not != 0) {
for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
if (! ONIGENC_IS_CODE_WORD(env->enc, i)) {
add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
@@ -4737,16 +5154,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case NT_CANY:
- {
- OnigLen min = ONIGENC_MBC_MINLEN(env->enc);
- OnigLen max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
- set_mml(&opt->len, min, max);
- }
- break;
-
- case NT_ANCHOR:
- switch (NANCHOR(node)->type) {
+ case NODE_ANCHOR:
+ switch (ANCHOR_(node)->type) {
case ANCHOR_BEGIN_BUF:
case ANCHOR_BEGIN_POSITION:
case ANCHOR_BEGIN_LINE:
@@ -4755,14 +5164,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ANCHOR_END_LINE:
case ANCHOR_PREC_READ_NOT:
case ANCHOR_LOOK_BEHIND:
- add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
+ add_opt_anc_info(&opt->anc, ANCHOR_(node)->type);
break;
case ANCHOR_PREC_READ:
{
NodeOptInfo nopt;
- r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
+ r = optimize_node_left(NODE_BODY(node), &nopt, env);
if (r == 0) {
if (nopt.exb.len > 0)
copy_opt_exact_info(&opt->expr, &nopt.exb);
@@ -4782,61 +5191,57 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case NT_BREF:
+ case NODE_BREF:
{
int i;
int* backs;
OnigLen min, max, tmin, tmax;
- Node** nodes = SCANENV_MEM_NODES(env->scan_env);
- BRefNode* br = NBREF(node);
+ MemEnv* mem_env = SCANENV_MEMENV(env->scan_env);
+ BRefNode* br = BREF_(node);
- if (br->state & NST_RECURSION) {
+ if (NODE_IS_RECURSION(node)) {
set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
break;
}
backs = BACKREFS_P(br);
- r = get_min_len(nodes[backs[0]], &min, env->scan_env);
- if (r != 0) break;
- r = get_max_len(nodes[backs[0]], &max, env->scan_env);
- if (r != 0) break;
+ min = get_min_len(mem_env[backs[0]].node, env->scan_env);
+ max = get_max_len(mem_env[backs[0]].node, env->scan_env);
for (i = 1; i < br->back_num; i++) {
- r = get_min_len(nodes[backs[i]], &tmin, env->scan_env);
- if (r != 0) break;
- r = get_max_len(nodes[backs[i]], &tmax, env->scan_env);
- if (r != 0) break;
+ tmin = get_min_len(mem_env[backs[i]].node, env->scan_env);
+ tmax = get_max_len(mem_env[backs[i]].node, env->scan_env);
if (min > tmin) min = tmin;
if (max < tmax) max = tmax;
}
- if (r == 0) set_mml(&opt->len, min, max);
+ set_mml(&opt->len, min, max);
}
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
- if (IS_CALL_RECURSION(NCALL(node)))
+ case NODE_CALL:
+ if (NODE_IS_RECURSION(node))
set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
else {
OnigOptionType save = env->options;
- env->options = NENCLOSE(NCALL(node)->target)->option;
- r = optimize_node_left(NCALL(node)->target, opt, env);
+ env->options = ENCLOSURE_(NODE_BODY(node))->o.option;
+ r = optimize_node_left(NODE_BODY(node), opt, env);
env->options = save;
}
break;
#endif
- case NT_QTFR:
+ case NODE_QUANT:
{
int i;
OnigLen min, max;
NodeOptInfo nopt;
- QtfrNode* qn = NQTFR(node);
+ QuantNode* qn = QUANT_(node);
- r = optimize_node_left(qn->target, &nopt, env);
- if (r) break;
+ r = optimize_node_left(NODE_BODY(node), &nopt, env);
+ if (r != 0) break;
if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) {
if (env->mmd.max == 0 &&
- NTYPE(qn->target) == NT_CANY && qn->greedy) {
+ NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
if (IS_MULTILINE(env->options))
add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
else
@@ -4877,22 +5282,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
{
- EncloseNode* en = NENCLOSE(node);
+ EnclosureNode* en = ENCLOSURE_(node);
switch (en->type) {
- case ENCLOSE_OPTION:
+ case ENCLOSURE_OPTION:
{
OnigOptionType save = env->options;
- env->options = en->option;
- r = optimize_node_left(en->target, opt, env);
+ env->options = en->o.option;
+ r = optimize_node_left(NODE_BODY(node), opt, env);
env->options = save;
}
break;
- case ENCLOSE_MEMORY:
+ case ENCLOSURE_MEMORY:
#ifdef USE_SUBEXP_CALL
en->opt_count++;
if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
@@ -4900,24 +5305,24 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
min = 0;
max = ONIG_INFINITE_DISTANCE;
- if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
- if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
+ if (NODE_IS_MIN_FIXED(node)) min = en->min_len;
+ if (NODE_IS_MAX_FIXED(node)) max = en->max_len;
set_mml(&opt->len, min, max);
}
else
#endif
{
- r = optimize_node_left(en->target, opt, env);
+ r = optimize_node_left(NODE_BODY(node), opt, env);
if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
- if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
+ if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum))
remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
}
}
break;
- case ENCLOSE_STOP_BACKTRACK:
- r = optimize_node_left(en->target, opt, env);
+ case ENCLOSURE_STOP_BACKTRACK:
+ r = optimize_node_left(NODE_BODY(node), opt, env);
break;
}
}
@@ -4925,8 +5330,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
default:
#ifdef ONIG_DEBUG
- fprintf(stderr, "optimize_node_left: undefined node type %d\n",
- NTYPE(node));
+ fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node));
#endif
r = ONIGERR_TYPE_BUG;
break;
@@ -4962,7 +5366,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
reg->map, &(reg->int_map));
- if (r) return r;
+ if (r != 0) return r;
reg->optimize = (allow_reverse != 0
? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
@@ -5006,7 +5410,7 @@ set_sub_anchor(regex_t* reg, OptAncInfo* anc)
reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
}
-#ifdef ONIG_DEBUG
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
static void print_optimize_info(FILE* f, regex_t* reg);
#endif
@@ -5025,7 +5429,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
clear_mml(&env.mmd);
r = optimize_node_left(node, &opt, &env);
- if (r) return r;
+ if (r != 0) return r;
reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
@@ -5120,6 +5524,10 @@ static void print_enc_string(FILE* fp, OnigEncoding enc,
fprintf(fp, "/\n");
}
+#endif /* ONIG_DEBUG */
+
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
+
static void
print_distance_range(FILE* f, OnigLen a, OnigLen b)
{
@@ -5236,7 +5644,7 @@ print_optimize_info(FILE* f, regex_t* reg)
}
}
}
-#endif /* ONIG_DEBUG */
+#endif
extern void
@@ -5278,7 +5686,7 @@ onig_transfer(regex_t* to, regex_t* from)
}
-#ifdef ONIG_DEBUG
+#ifdef ONIG_DEBUG_COMPILE
static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
#endif
#ifdef ONIG_DEBUG_PARSE_TREE
@@ -5323,14 +5731,14 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->num_comb_exp_check = 0;
#endif
- r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
+ r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
#ifdef USE_NAMED_GROUP
/* mixed use named group and no-named group */
if (scan_env.num_named > 0 &&
IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
- !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
+ ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
if (scan_env.num_named != scan_env.num_mem)
r = disable_noname_group_capture(&root, reg, &scan_env);
else
@@ -5340,22 +5748,27 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
#endif
+ r = check_backrefs(root, &scan_env);
+ if (r != 0) goto err;
+
#ifdef USE_SUBEXP_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_init(&uslist, scan_env.num_call);
if (r != 0) goto err;
scan_env.unset_addr_list = &uslist;
- r = setup_subexp_call(root, &scan_env);
+ r = setup_call(root, &scan_env, 0);
+ if (r != 0) goto err_unset;
+ r = setup_call2(root);
if (r != 0) goto err_unset;
- r = subexp_recursive_check_trav(root, &scan_env);
+ r = recursive_call_check_trav(root, &scan_env, 0);
if (r < 0) goto err_unset;
- r = subexp_inf_recursive_check_trav(root, &scan_env);
+ r = infinite_recursive_call_check_trav(root, &scan_env);
if (r != 0) goto err_unset;
- reg->num_call = scan_env.num_call;
+ setup_called_state(root, 0);
}
- else
- reg->num_call = 0;
+
+ reg->num_call = scan_env.num_call;
#endif
r = setup_tree(root, reg, 0, &scan_env);
@@ -5369,11 +5782,12 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->bt_mem_start = scan_env.bt_mem_start;
reg->bt_mem_start |= reg->capture_history;
if (IS_FIND_CONDITION(reg->options))
- BIT_STATUS_ON_ALL(reg->bt_mem_end);
+ MEM_STATUS_ON_ALL(reg->bt_mem_end);
else {
reg->bt_mem_end = scan_env.bt_mem_end;
reg->bt_mem_end |= reg->capture_history;
}
+ reg->bt_mem_start |= reg->bt_mem_end;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
if (scan_env.backrefed_mem == 0
@@ -5391,7 +5805,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
if (scan_env.comb_exp_max_regnum > 0) {
int i;
for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
- if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
+ if (MEM_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
scan_env.num_comb_exp_check = 0;
break;
}
@@ -5408,19 +5822,19 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
if (r != 0) goto err_unset;
#endif
- if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
- xfree(scan_env.mem_nodes_dynamic);
- scan_env.mem_nodes_dynamic = (Node** )NULL;
+ if (IS_NOT_NULL(scan_env.mem_env_dynamic)) {
+ xfree(scan_env.mem_env_dynamic);
+ scan_env.mem_env_dynamic = (MemEnv* )NULL;
}
- r = compile_tree(root, reg);
+ r = compile_tree(root, reg, &scan_env);
if (r == 0) {
r = add_opcode(reg, OP_END);
#ifdef USE_SUBEXP_CALL
if (scan_env.num_call > 0) {
r = unset_addr_list_fix(&uslist, reg);
unset_addr_list_end(&uslist);
- if (r) goto err;
+ if (r != 0) goto err;
}
#endif
@@ -5466,8 +5880,8 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
onig_node_free(root);
- if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
- xfree(scan_env.mem_nodes_dynamic);
+ if (IS_NOT_NULL(scan_env.mem_env_dynamic))
+ xfree(scan_env.mem_env_dynamic);
return r;
}
@@ -5543,7 +5957,7 @@ onig_new_without_alloc(regex_t* reg, const UChar* pattern,
int r;
r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r) return r;
+ if (r != 0) return r;
r = onig_compile(reg, pattern, pattern_end, einfo);
return r;
@@ -5560,10 +5974,10 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r) goto err;
+ if (r != 0) goto err;
r = onig_compile(*reg, pattern, pattern_end, einfo);
- if (r) {
+ if (r != 0) {
err:
onig_free(*reg);
*reg = NULL;
@@ -5657,9 +6071,10 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code)
}
extern int
-onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
+onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg)
{
int found;
+ CClassNode* cc = (CClassNode* )cc_arg;
if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
if (IS_NULL(cc->mbuf)) {
@@ -5775,10 +6190,10 @@ OnigOpInfoType OnigOpInfo[] = {
{ OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
{ OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
{ OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
- { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
- { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
- { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM },
+ { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM },
{ OP_PUSH_POS, "push-pos", ARG_NON },
{ OP_POP_POS, "pop-pos", ARG_NON },
{ OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
@@ -5824,13 +6239,6 @@ op2arg_type(int opcode)
}
static void
-Indent(FILE* f, int indent)
-{
- int i;
- for (i = 0; i < indent; i++) putc(' ', f);
-}
-
-static void
p_string(FILE* f, int len, UChar* s)
{
fputs(":", f);
@@ -5846,8 +6254,16 @@ p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
while (x-- > 0) { fputc(*s++, f); }
}
+static void
+p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start)
+{
+ RelAddrType curr = (RelAddrType )(p - start);
+
+ fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr);
+}
+
extern void
-onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
+onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
OnigEncoding enc)
{
int i, n, arg_type;
@@ -5858,7 +6274,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
OnigCodePoint code;
UChar *q;
- fprintf(f, "[%s", op2name(*bp));
+ fprintf(f, "%s", op2name(*bp));
arg_type = op2arg_type(*bp);
if (arg_type != ARG_SPECIAL) {
bp++;
@@ -5867,11 +6283,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
break;
case ARG_RELADDR:
GET_RELADDR_INC(addr, bp);
- fprintf(f, ":(%d)", addr);
+ fputc(':', f);
+ p_rel_addr(f, addr, bp, start);
break;
case ARG_ABSADDR:
GET_ABSADDR_INC(addr, bp);
- fprintf(f, ":(%d)", addr);
+ fprintf(f, ":{/%d}", addr);
break;
case ARG_LENGTH:
GET_LENGTH_INC(len, bp);
@@ -6056,7 +6473,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
case OP_PUSH_IF_PEEK_NEXT:
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
- fprintf(f, ":(%d)", addr);
+ fputc(':', f);
+ p_rel_addr(f, addr, bp, start);
p_string(f, 1, bp);
bp += 1;
break;
@@ -6069,7 +6487,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
case OP_PUSH_LOOK_BEHIND_NOT:
GET_RELADDR_INC(addr, bp);
GET_LENGTH_INC(len, bp);
- fprintf(f, ":%d:(%d)", len, addr);
+ fprintf(f, ":%d:", len);
+ p_rel_addr(f, addr, bp, start);
break;
case OP_STATE_CHECK_PUSH:
@@ -6078,7 +6497,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
bp += SIZE_STATE_CHECK_NUM;
addr = *((RelAddrType* )bp);
bp += SIZE_RELADDR;
- fprintf(f, ":%d:(%d)", scn, addr);
+ fprintf(f, ":%d:", scn);
+ p_rel_addr(f, addr, bp, start);
break;
default:
@@ -6086,40 +6506,50 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
*--bp);
}
}
- fputs("]", f);
if (nextp) *nextp = bp;
}
+#endif /* ONIG_DEBUG */
+#ifdef ONIG_DEBUG_COMPILE
static void
print_compiled_byte_code_list(FILE* f, regex_t* reg)
{
- int ncode;
- UChar* bp = reg->p;
- UChar* end = reg->p + reg->used;
+ UChar* bp;
+ UChar* start = reg->p;
+ UChar* end = reg->p + reg->used;
- fprintf(f, "code length: %d\n", reg->used);
+ fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
+ reg->bt_mem_start, reg->bt_mem_end);
+ fprintf(f, "code-length: %d\n", reg->used);
- ncode = 0;
+ bp = start;
while (bp < end) {
- ncode++;
- if (bp > reg->p) {
- if (ncode % 5 == 0)
- fprintf(f, "\n");
- else
- fputs(" ", f);
- }
- onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
- }
+ int pos = bp - start;
+ fprintf(f, "%4d: ", pos);
+ onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc);
+ fprintf(f, "\n");
+ }
fprintf(f, "\n");
}
+#endif
+
+#ifdef ONIG_DEBUG_PARSE_TREE
+
+static void
+Indent(FILE* f, int indent)
+{
+ int i;
+ for (i = 0; i < indent; i++) putc(' ', f);
+}
static void
print_indent_tree(FILE* f, Node* node, int indent)
{
- int i, type;
- int add = 3;
+ int i;
+ NodeType type;
UChar* p;
+ int add = 3;
Indent(f, indent);
if (IS_NULL(node)) {
@@ -6127,29 +6557,29 @@ print_indent_tree(FILE* f, Node* node, int indent)
exit (0);
}
- type = NTYPE(node);
+ type = NODE_TYPE(node);
switch (type) {
- case NT_LIST:
- case NT_ALT:
- if (NTYPE(node) == NT_LIST)
+ case NODE_LIST:
+ case NODE_ALT:
+ if (type == NODE_LIST)
fprintf(f, "<list:%p>\n", node);
else
fprintf(f, "<alt:%p>\n", node);
- print_indent_tree(f, NCAR(node), indent + add);
- while (IS_NOT_NULL(node = NCDR(node))) {
- if (NTYPE(node) != type) {
- fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
+ print_indent_tree(f, NODE_CAR(node), indent + add);
+ while (IS_NOT_NULL(node = NODE_CDR(node))) {
+ if (NODE_TYPE(node) != type) {
+ fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node));
exit(0);
}
- print_indent_tree(f, NCAR(node), indent + add);
+ print_indent_tree(f, NODE_CAR(node), indent + add);
}
break;
- case NT_STR:
+ case NODE_STR:
fprintf(f, "<string%s:%p>",
(NSTRING_IS_RAW(node) ? "-raw" : ""), node);
- for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
+ for (p = STR_(node)->s; p < STR_(node)->end; p++) {
if (*p >= 0x20 && *p < 0x7f)
fputc(*p, f);
else {
@@ -6158,11 +6588,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
- case NT_CCLASS:
+ case NODE_CCLASS:
fprintf(f, "<cclass:%p>", node);
- if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
- if (NCCLASS(node)->mbuf) {
- BBuf* bbuf = NCCLASS(node)->mbuf;
+ if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
+ if (CCLASS_(node)->mbuf) {
+ BBuf* bbuf = CCLASS_(node)->mbuf;
for (i = 0; i < bbuf->used; i++) {
if (i > 0) fprintf(f, ",");
fprintf(f, "%0x", bbuf->p[i]);
@@ -6170,11 +6600,15 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
- case NT_CTYPE:
+ case NODE_CTYPE:
fprintf(f, "<ctype:%p> ", node);
- switch (NCTYPE(node)->ctype) {
+ switch (CTYPE_(node)->ctype) {
+ case CTYPE_ANYCHAR:
+ fprintf(f, "<anychar:%p>", node);
+ break;
+
case ONIGENC_CTYPE_WORD:
- if (NCTYPE(node)->not != 0)
+ if (CTYPE_(node)->not != 0)
fputs("not word", f);
else
fputs("word", f);
@@ -6186,13 +6620,9 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
- case NT_CANY:
- fprintf(f, "<anychar:%p>", node);
- break;
-
- case NT_ANCHOR:
+ case NODE_ANCHOR:
fprintf(f, "<anchor:%p> ", node);
- switch (NANCHOR(node)->type) {
+ switch (ANCHOR_(node)->type) {
case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
case ANCHOR_END_BUF: fputs("end buf", f); break;
case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
@@ -6208,19 +6638,19 @@ print_indent_tree(FILE* f, Node* node, int indent)
#endif
case ANCHOR_PREC_READ:
fprintf(f, "prec read\n");
- print_indent_tree(f, NANCHOR(node)->target, indent + add);
+ print_indent_tree(f, NODE_BODY(node), indent + add);
break;
case ANCHOR_PREC_READ_NOT:
fprintf(f, "prec read not\n");
- print_indent_tree(f, NANCHOR(node)->target, indent + add);
+ print_indent_tree(f, NODE_BODY(node), indent + add);
break;
case ANCHOR_LOOK_BEHIND:
fprintf(f, "look behind\n");
- print_indent_tree(f, NANCHOR(node)->target, indent + add);
+ print_indent_tree(f, NODE_BODY(node), indent + add);
break;
case ANCHOR_LOOK_BEHIND_NOT:
fprintf(f, "look behind not\n");
- print_indent_tree(f, NANCHOR(node)->target, indent + add);
+ print_indent_tree(f, NODE_BODY(node), indent + add);
break;
default:
@@ -6229,10 +6659,10 @@ print_indent_tree(FILE* f, Node* node, int indent)
}
break;
- case NT_BREF:
+ case NODE_BREF:
{
int* p;
- BRefNode* br = NBREF(node);
+ BRefNode* br = BREF_(node);
p = BACKREFS_P(br);
fprintf(f, "<backref:%p>", node);
for (i = 0; i < br->back_num; i++) {
@@ -6243,32 +6673,32 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
#ifdef USE_SUBEXP_CALL
- case NT_CALL:
+ case NODE_CALL:
{
- CallNode* cn = NCALL(node);
+ CallNode* cn = CALL_(node);
fprintf(f, "<call:%p>", node);
p_string(f, cn->name_end - cn->name, cn->name);
}
break;
#endif
- case NT_QTFR:
+ case NODE_QUANT:
fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node,
- NQTFR(node)->lower, NQTFR(node)->upper,
- (NQTFR(node)->greedy ? "" : "?"));
- print_indent_tree(f, NQTFR(node)->target, indent + add);
+ QUANT_(node)->lower, QUANT_(node)->upper,
+ (QUANT_(node)->greedy ? "" : "?"));
+ print_indent_tree(f, NODE_BODY(node), indent + add);
break;
- case NT_ENCLOSE:
- fprintf(f, "<enclose:%p> ", node);
- switch (NENCLOSE(node)->type) {
- case ENCLOSE_OPTION:
- fprintf(f, "option:%d", NENCLOSE(node)->option);
+ case NODE_ENCLOSURE:
+ fprintf(f, "<enclosure:%p> ", node);
+ switch (ENCLOSURE_(node)->type) {
+ case ENCLOSURE_OPTION:
+ fprintf(f, "option:%d", ENCLOSURE_(node)->option);
break;
- case ENCLOSE_MEMORY:
- fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
+ case ENCLOSURE_MEMORY:
+ fprintf(f, "memory:%d", ENCLOSURE_(node)->regnum);
break;
- case ENCLOSE_STOP_BACKTRACK:
+ case ENCLOSURE_STOP_BACKTRACK:
fprintf(f, "stop-bt");
break;
@@ -6276,22 +6706,20 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
}
fprintf(f, "\n");
- print_indent_tree(f, NENCLOSE(node)->target, indent + add);
+ print_indent_tree(f, NODE_BODY(node), indent + add);
break;
default:
- fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
+ fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node));
break;
}
- if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
- type != NT_ENCLOSE)
+ if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT &&
+ type != NODE_ENCLOSURE)
fprintf(f, "\n");
fflush(f);
}
-#endif /* ONIG_DEBUG */
-#ifdef ONIG_DEBUG_PARSE_TREE
static void
print_tree(FILE* f, Node* node)
{
diff --git a/src/regenc.h b/src/regenc.h
index e119dab..897c704 100644
--- a/src/regenc.h
+++ b/src/regenc.h
@@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -154,7 +154,7 @@ ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register con
/* in enc/unicode.c */
ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]));
-ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[]));
+ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[]));
ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[]));
ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold));
ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
diff --git a/src/regerror.c b/src/regerror.c
index ee35b36..0285272 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -211,24 +211,24 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
while (p < end) {
code = ONIGENC_MBC_TO_CODE(enc, p, end);
if (code >= 0x80) {
- if (code > 0xffff && len + 10 <= buf_size) {
- sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
- sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
- sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
- sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
- len += 10;
- }
- else if (len + 6 <= buf_size) {
- sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
- sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
- len += 6;
- }
- else {
- break;
- }
+ if (code > 0xffff && len + 10 <= buf_size) {
+ sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24));
+ sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16));
+ sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8));
+ sprint_byte((char*)(&(buf[len+8])), (unsigned int)code);
+ len += 10;
+ }
+ else if (len + 6 <= buf_size) {
+ sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8));
+ sprint_byte((char*)(&(buf[len+4])), (unsigned int)code);
+ len += 6;
+ }
+ else {
+ break;
+ }
}
else {
- buf[len++] = (UChar )code;
+ buf[len++] = (UChar )code;
}
p += enclen(enc, p);
@@ -278,27 +278,27 @@ onig_error_code_to_str(s, code, va_alist)
case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
einfo = va_arg(vargs, OnigErrorInfo*);
len = to_ascii(einfo->enc, einfo->par, einfo->par_end,
- parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
+ parbuf, MAX_ERROR_PAR_LEN - 3, &is_over);
q = onig_error_code_to_format(code);
p = s;
while (*q != '\0') {
if (*q == '%') {
- q++;
- if (*q == 'n') { /* '%n': name */
- xmemcpy(p, parbuf, len);
- p += len;
- if (is_over != 0) {
- xmemcpy(p, "...", 3);
- p += 3;
- }
- q++;
- }
- else
- goto normal_char;
+ q++;
+ if (*q == 'n') { /* '%n': name */
+ xmemcpy(p, parbuf, len);
+ p += len;
+ if (is_over != 0) {
+ xmemcpy(p, "...", 3);
+ p += 3;
+ }
+ q++;
+ }
+ else
+ goto normal_char;
}
else {
normal_char:
- *p++ = *q++;
+ *p++ = *q++;
}
}
*p = '\0';
@@ -359,7 +359,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
int blen;
while (len-- > 0) {
- sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
+ sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
while (blen-- > 0) *s++ = *bp++;
@@ -367,23 +367,23 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist)
}
}
else if (*p == '\\') {
- *s++ = *p++;
- len = enclen(enc, p);
- while (len-- > 0) *s++ = *p++;
+ *s++ = *p++;
+ len = enclen(enc, p);
+ while (len-- > 0) *s++ = *p++;
}
else if (*p == '/') {
- *s++ = (unsigned char )'\\';
- *s++ = *p++;
+ *s++ = (unsigned char )'\\';
+ *s++ = *p++;
}
else if (!ONIGENC_IS_CODE_PRINT(enc, *p) &&
- !ONIGENC_IS_CODE_SPACE(enc, *p)) {
- sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
- len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
+ !ONIGENC_IS_CODE_SPACE(enc, *p)) {
+ sprint_byte_with_x((char* )bs, (unsigned int )(*p++));
+ len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs);
bp = bs;
- while (len-- > 0) *s++ = *bp++;
+ while (len-- > 0) *s++ = *bp++;
}
else {
- *s++ = *p++;
+ *s++ = *p++;
}
}
diff --git a/src/regexec.c b/src/regexec.c
index c0626ef..f66da1f 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -240,6 +240,7 @@ onig_region_new(void)
OnigRegion* r;
r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
+ CHECK_NULL_RETURN(r);
onig_region_init(r);
return r;
}
@@ -247,7 +248,7 @@ onig_region_new(void)
extern void
onig_region_free(OnigRegion* r, int free_self)
{
- if (r) {
+ if (r != 0) {
if (r->allocated > 0) {
if (r->beg) xfree(r->beg);
if (r->end) xfree(r->end);
@@ -271,13 +272,17 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
if (to->allocated == 0) {
if (from->num_regs > 0) {
to->beg = (int* )xmalloc(RREGC_SIZE);
+ if (IS_NULL(to->beg)) return;
to->end = (int* )xmalloc(RREGC_SIZE);
+ if (IS_NULL(to->end)) return;
to->allocated = from->num_regs;
}
}
else if (to->allocated < from->num_regs) {
to->beg = (int* )xrealloc(to->beg, RREGC_SIZE);
+ if (IS_NULL(to->beg)) return;
to->end = (int* )xrealloc(to->end, RREGC_SIZE);
+ if (IS_NULL(to->end)) return;
to->allocated = from->num_regs;
}
@@ -311,8 +316,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_REPEAT_INC 0x0300
#define STK_STATE_CHECK_MARK 0x1000
/* avoided by normal-POP */
-#define STK_NULL_CHECK_START 0x3000
-#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
+#define STK_EMPTY_CHECK_START 0x3000
+#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
#define STK_MEM_END_MARK 0x8400
#define STK_POS 0x0500 /* used when POP-POS */
#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
@@ -333,7 +338,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
(msa).region = (arg_region);\
(msa).start = (arg_start);\
(msa).best_len = ONIG_MISMATCH;\
- (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\
+ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
} while(0)
#else
#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\
@@ -341,7 +346,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
- (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\
+ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
} while(0)
#endif
@@ -400,6 +405,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
is_alloca = 0;\
alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\
+ sizeof(OnigStackType) * (stack_num));\
+ CHECK_NULL_RETURN_MEMERR(alloc_base);\
stk_base = (OnigStackType* )(alloc_base\
+ (sizeof(OnigStackIndex) * msa->ptr_num));\
stk = stk_base;\
@@ -409,6 +415,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
is_alloca = 1;\
alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\
+ sizeof(OnigStackType) * (stack_num));\
+ CHECK_NULL_RETURN_MEMERR(alloc_base);\
stk_base = (OnigStackType* )(alloc_base\
+ (sizeof(OnigStackIndex) * msa->ptr_num));\
stk = stk_base;\
@@ -423,6 +430,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
size_t size = sizeof(OnigStackIndex) * msa->ptr_num \
+ sizeof(OnigStackType) * msa->stack_n;\
msa->stack_p = xmalloc(size);\
+ CHECK_NULL_RETURN_MEMERR(msa->stack_p);\
xmemcpy(msa->stack_p, alloc_base, size);\
}\
else {\
@@ -431,11 +439,9 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
} while(0)
#define UPDATE_FOR_STACK_REALLOC do{\
- repeat_stk = (OnigStackIndex* )alloc_base;\
+ repeat_stk = (OnigStackIndex* )alloc_base;\
mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\
- mem_end_stk = mem_start_stk + num_mem;\
- mem_start_stk--; /* for index start from 1 */\
- mem_end_stk--; /* for index start from 1 */\
+ mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
@@ -533,7 +539,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STATE_CHECK_POS(s,snum) \
(((s) - str) * num_comb_exp_check + ((snum) - 1))
#define STATE_CHECK_VAL(v,snum) do {\
- if (state_check_buff != NULL) {\
+ if (IS_NOT_NULL(state_check_buff)) {\
int x = STATE_CHECK_POS(s,snum);\
(v) = state_check_buff[x/8] & (1<<(x%8));\
}\
@@ -570,12 +576,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
stk->u.state.pstr_prev = (sprev);\
- stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
+ stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\
STACK_INC;\
} while(0)
#define STACK_PUSH_STATE_CHECK(s,snum) do {\
- if (state_check_buff != NULL) {\
+ if (IS_NOT_NULL(state_check_buff)) { \
STACK_ENSURE(1);\
stk->type = STK_STATE_CHECK_MARK;\
stk->u.state.pstr = (s);\
@@ -691,18 +697,18 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
+#define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\
STACK_ENSURE(1);\
- stk->type = STK_NULL_CHECK_START;\
- stk->u.null_check.num = (cnum);\
- stk->u.null_check.pstr = (s);\
+ stk->type = STK_EMPTY_CHECK_START;\
+ stk->u.empty_check.num = (cnum);\
+ stk->u.empty_check.pstr = (s);\
STACK_INC;\
} while(0)
-#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
+#define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\
STACK_ENSURE(1);\
- stk->type = STK_NULL_CHECK_END;\
- stk->u.null_check.num = (cnum);\
+ stk->type = STK_EMPTY_CHECK_END;\
+ stk->u.empty_check.num = (cnum);\
STACK_INC;\
} while(0)
@@ -849,49 +855,29 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_NULL_CHECK(isnull,id,s) do {\
+#define STACK_EMPTY_CHECK(isnull,id,s) do {\
OnigStackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
- (isnull) = (k->u.null_check.pstr == (s));\
+ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
+ if (k->type == STK_EMPTY_CHECK_START) {\
+ if (k->u.empty_check.num == (id)) {\
+ (isnull) = (k->u.empty_check.pstr == (s));\
break;\
}\
}\
}\
} while(0)
-#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
- int level = 0;\
+#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
+#define STACK_EMPTY_CHECK_MEMST(isnull,id,s,reg) do {\
OnigStackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
- if (level == 0) {\
- (isnull) = (k->u.null_check.pstr == (s));\
- break;\
- }\
- else level--;\
- }\
- }\
- else if (k->type == STK_NULL_CHECK_END) {\
- level++;\
- }\
- }\
-} while(0)
-
-#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
- OnigStackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
- if (k->u.null_check.pstr != (s)) {\
+ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \
+ if (k->type == STK_EMPTY_CHECK_START) {\
+ if (k->u.empty_check.num == (id)) {\
+ if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
break;\
}\
@@ -903,10 +889,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
if (k->u.mem.end == INVALID_STACK_INDEX) {\
(isnull) = 0; break;\
}\
- if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ if (MEM_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
else\
endp = (UChar* )k->u.mem.end;\
+ /*fprintf(stderr, "num: %d, pstr: %p, endp: %p\n", k->u.mem.num, STACK_AT(k->u.mem.start)->u.mem.pstr, endp);*/ \
if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
(isnull) = 0; break;\
}\
@@ -916,23 +903,23 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
k++;\
}\
- break;\
+ break;\
}\
}\
}\
}\
} while(0)
-#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
+#define STACK_EMPTY_CHECK_MEMST_REC(isnull,id,s,reg) do {\
int level = 0;\
OnigStackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
- if (k->type == STK_NULL_CHECK_START) {\
- if (k->u.null_check.num == (id)) {\
+ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \
+ if (k->type == STK_EMPTY_CHECK_START) {\
+ if (k->u.empty_check.num == (id)) {\
if (level == 0) {\
- if (k->u.null_check.pstr != (s)) {\
+ if (k->u.empty_check.pstr != (s)) {\
(isnull) = 0;\
break;\
}\
@@ -944,7 +931,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
if (k->u.mem.end == INVALID_STACK_INDEX) {\
(isnull) = 0; break;\
}\
- if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
+ if (MEM_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
else\
endp = (UChar* )k->u.mem.end;\
@@ -957,7 +944,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
k++;\
}\
- break;\
+ break;\
}\
}\
else {\
@@ -965,11 +952,33 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
}\
- else if (k->type == STK_NULL_CHECK_END) {\
- if (k->u.null_check.num == (id)) level++;\
+ else if (k->type == STK_EMPTY_CHECK_END) {\
+ if (k->u.empty_check.num == (id)) level++;\
+ }\
+ }\
+} while(0)
+#else
+#define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\
+ int level = 0;\
+ OnigStackType* k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \
+ if (k->type == STK_EMPTY_CHECK_START) {\
+ if (k->u.empty_check.num == (id)) {\
+ if (level == 0) {\
+ (isnull) = (k->u.empty_check.pstr == (s));\
+ break;\
+ }\
+ }\
+ level--;\
+ }\
+ else if (k->type == STK_EMPTY_CHECK_END) {\
+ level++;\
}\
}\
} while(0)
+#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */
#define STACK_GET_REPEAT(id, k) do {\
int level = 0;\
@@ -1091,7 +1100,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
if (k->type == STK_MEM_START) {
n = k->u.mem.num;
if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
- BIT_STATUS_AT(reg->capture_history, n) != 0) {
+ MEM_STATUS_AT(reg->capture_history, n) != 0) {
child = history_node_new();
CHECK_NULL_RETURN_MEMERR(child);
child->group = n;
@@ -1156,7 +1165,7 @@ static int backref_match_at_nested_level(regex_t* reg
if (k->type == STK_MEM_START) {
if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
pstart = k->u.mem.pstr;
- if (pend != NULL_UCHARP) {
+ if (IS_NOT_NULL(pend)) {
if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
p = pstart;
ss = *s;
@@ -1329,8 +1338,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n",
- (int )str, (int )end, (int )sstart, (int )sprev);
+ fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
+ str, end, sstart, sprev);
fprintf(stderr, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
#endif
@@ -1341,9 +1350,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (1) {
#ifdef ONIG_DEBUG_MATCH
{
+ static unsigned int counter = 1;
+
UChar *q, *bp, buf[50];
int len;
- fprintf(stderr, "%4d> \"", (int )(s - str));
+ fprintf(stderr, "%7u: %7ld: %4d> \"",
+ counter, GET_STACK_INDEX(stk), (int )(s - str));
+ counter++;
+
bp = buf;
for (i = 0, q = s; i < 7 && q < end; i++) {
len = enclen(encode, q);
@@ -1353,8 +1367,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
else { xmemcpy(bp, "\"", 1); bp += 1; }
*bp = 0;
fputs((char* )buf, stderr);
+
for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr);
- onig_print_compiled_byte_code(stderr, p, NULL, encode);
+ fprintf(stderr, "%4d: ", (int )(p - reg->p));
+ onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode);
fprintf(stderr, "\n");
}
#endif
@@ -1386,12 +1402,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
rmt[0].rm_eo = s - str;
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ if (MEM_STATUS_AT(reg->bt_mem_start, i))
rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
else
rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str;
- rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ rmt[i].rm_eo = (MEM_STATUS_AT(reg->bt_mem_end, i)
? STACK_AT(mem_end_stk[i])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[i])) - str;
}
@@ -1406,12 +1422,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
region->end[0] = s - str;
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (BIT_STATUS_AT(reg->bt_mem_start, i))
+ if (MEM_STATUS_AT(reg->bt_mem_start, i))
region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
else
region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
- region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
+ region->end[i] = (MEM_STATUS_AT(reg->bt_mem_end, i)
? STACK_AT(mem_end_stk[i])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[i])) - str;
}
@@ -2156,7 +2172,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem_end_stk[mem] = (OnigStackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ if (MEM_STATUS_AT(reg->bt_mem_start, mem))
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
@@ -2190,12 +2206,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ if (MEM_STATUS_AT(reg->bt_mem_start, mem))
pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
else
pstart = (UChar* )((void* )mem_start_stk[mem]);
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = pend - pstart;
@@ -2222,12 +2238,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ if (MEM_STATUS_AT(reg->bt_mem_start, mem))
pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
else
pstart = (UChar* )((void* )mem_start_stk[mem]);
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = pend - pstart;
@@ -2254,12 +2270,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ if (MEM_STATUS_AT(reg->bt_mem_start, mem))
pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
else
pstart = (UChar* )((void* )mem_start_stk[mem]);
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = pend - pstart;
@@ -2293,12 +2309,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- if (BIT_STATUS_AT(reg->bt_mem_start, mem))
+ if (MEM_STATUS_AT(reg->bt_mem_start, mem))
pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
else
pstart = (UChar* )((void* )mem_start_stk[mem]);
- pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
+ pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = pend - pstart;
@@ -2364,25 +2380,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
#endif
- case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START);
+ case OP_EMPTY_CHECK_START: MOP_IN(OP_EMPTY_CHECK_START);
GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_PUSH_NULL_CHECK_START(mem, s);
+ STACK_PUSH_EMPTY_CHECK_START(mem, s);
MOP_OUT;
continue;
break;
- case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END);
+ case OP_EMPTY_CHECK_END: MOP_IN(OP_EMPTY_CHECK_END);
{
- int isnull;
+ int is_empty;
GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK(isnull, mem, s);
- if (isnull) {
+ STACK_EMPTY_CHECK(is_empty, mem, s);
+ if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n",
- (int )mem, (int )s);
+ fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s);
#endif
- null_check_found:
+ empty_check_found:
/* empty loop founded, skip next instruction */
switch (*p++) {
case OP_JUMP:
@@ -2405,20 +2420,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
continue;
break;
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST);
+#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
+ case OP_EMPTY_CHECK_END_MEMST: MOP_IN(OP_EMPTY_CHECK_END_MEMST);
{
- int isnull;
+ int is_empty;
GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
- if (isnull) {
+ STACK_EMPTY_CHECK_MEMST(is_empty, mem, s, reg);
+ if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n",
- (int )mem, (int )s);
+ fprintf(stderr, "EMPTY_CHECK_END_MEMST: skip id:%d, s:%p\n", (int)mem, s);
#endif
- if (isnull == -1) goto fail;
- goto null_check_found;
+ if (is_empty == -1) goto fail;
+ goto empty_check_found;
}
}
MOP_OUT;
@@ -2427,27 +2441,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
#ifdef USE_SUBEXP_CALL
- case OP_NULL_CHECK_END_MEMST_PUSH:
- MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
+ case OP_EMPTY_CHECK_END_MEMST_PUSH:
+ MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH);
{
- int isnull;
+ int is_empty;
GET_MEMNUM_INC(mem, p); /* mem: null check id */
-#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
- STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
+#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
+ STACK_EMPTY_CHECK_MEMST_REC(is_empty, mem, s, reg);
#else
- STACK_NULL_CHECK_REC(isnull, mem, s);
+ STACK_EMPTY_CHECK_REC(is_empty, mem, s);
#endif
- if (isnull) {
+ if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n",
- (int )mem, (int )s);
+ fprintf(stderr, "EMPTY_CHECK_END_MEMST_PUSH: skip id:%d, s:%p\n",
+ (int )mem, s);
#endif
- if (isnull == -1) goto fail;
- goto null_check_found;
+ if (is_empty == -1) goto fail;
+ goto empty_check_found;
}
else {
- STACK_PUSH_NULL_CHECK_END(mem);
+ STACK_PUSH_EMPTY_CHECK_END(mem);
}
}
MOP_OUT;
@@ -2923,8 +2937,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
int skip, tlen1;
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n",
- (int )text, (int )text_end, (int )text_range);
+ fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n",
+ text, text_end, text_range);
#endif
tail = target_end - 1;
@@ -3143,8 +3157,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
UChar *p, *pprev = (UChar* )NULL;
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n",
- (int )str, (int )end, (int )s, (int )range);
+ fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
+ str, end, s, range);
#endif
p = s;
@@ -3309,7 +3323,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
&(reg->int_map_backward));
- if (r) return r;
+ if (r != 0) return r;
}
p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
end, p);
@@ -3398,8 +3412,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
- "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n",
- (int )str, (int )(end - str), (int )(start - str), (int )(range - str));
+ "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n",
+ str, (int )(end - str), (int )(start - str), (int )(range - str));
#endif
if (region
@@ -3408,7 +3422,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
#endif
) {
r = onig_region_resize_clear(region, reg->num_mem + 1);
- if (r) goto finish_no_msa;
+ if (r != 0) goto finish_no_msa;
}
if (start > end || start < str) goto mismatch_no_msa;
@@ -3853,7 +3867,7 @@ onig_number_of_capture_histories(regex_t* reg)
n = 0;
for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
- if (BIT_STATUS_AT(reg->capture_history, i) != 0)
+ if (MEM_STATUS_AT(reg->capture_history, i) != 0)
n++;
}
return n;
diff --git a/src/regext.c b/src/regext.c
index 1903174..62a557c 100644
--- a/src/regext.c
+++ b/src/regext.c
@@ -2,7 +2,7 @@
regext.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -171,7 +171,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (ci->pattern_enc != ci->target_enc) {
r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
&cpat, &cpat_end);
- if (r) return r;
+ if (r != 0) return r;
}
else {
cpat = (UChar* )pattern;
@@ -186,10 +186,10 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc,
ci->syntax);
- if (r) goto err;
+ if (r != 0) goto err;
r = onig_compile(*reg, cpat, cpat_end, einfo);
- if (r) {
+ if (r != 0) {
err:
onig_free(*reg);
*reg = NULL;
diff --git a/src/regint.h b/src/regint.h
index 9835143..8da27d2 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -59,14 +59,14 @@
#define USE_NAMED_GROUP
#define USE_SUBEXP_CALL
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
+#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
/* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */
/* internal config */
#define USE_OP_PUSH_OR_JUMP_EXACT
-#define USE_QTFR_PEEK_NEXT
+#define USE_QUANT_PEEK_NEXT
#define USE_ST_LIBRARY
#define INIT_MATCH_STACK_SIZE 160
@@ -161,6 +161,10 @@
#endif
#endif
+#ifdef HAVE_INTTYPES_H
+#include <inttypes.h>
+#endif
+
#ifdef __BORLANDC__
#include <malloc.h>
#endif
@@ -169,6 +173,12 @@
# include <stdio.h>
#endif
+#ifdef _WIN32
+#if defined(_MSC_VER) && (_MSC_VER < 1300)
+typedef int intptr_t;
+#endif
+#endif
+
#include "regenc.h"
#ifdef MIN
@@ -230,24 +240,28 @@
#define ONIG_OPTIMIZE_MAP 5 /* char map */
/* bit status */
-typedef unsigned int BitStatusType;
-
-#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8)
-#define BIT_STATUS_CLEAR(stats) (stats) = 0
-#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0)
-#define BIT_STATUS_AT(stats,n) \
- ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1))
-
-#define BIT_STATUS_ON_AT(stats,n) do {\
- if ((n) < (int )BIT_STATUS_BITS_NUM) \
- (stats) |= (1 << (n));\
+typedef unsigned int MemStatusType;
+
+#define MEM_STATUS_BITS_NUM (sizeof(MemStatusType) * 8)
+#define MEM_STATUS_CLEAR(stats) (stats) = 0
+#define MEM_STATUS_ON_ALL(stats) (stats) = ~((MemStatusType )0)
+#define MEM_STATUS_AT(stats,n) \
+ ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1))
+#define MEM_STATUS_AT0(stats,n) \
+ ((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1))
+
+#define MEM_STATUS_ON(stats,n) do {\
+ if ((n) < (int )MEM_STATUS_BITS_NUM) {\
+ if ((n) != 0)\
+ (stats) |= ((MemStatusType )1 << (n));\
+ }\
else\
(stats) |= 1;\
} while (0)
-#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\
- if ((n) < (int )BIT_STATUS_BITS_NUM)\
- (stats) |= (1 << (n));\
+#define MEM_STATUS_ON_SIMPLE(stats,n) do {\
+ if ((n) < (int )MEM_STATUS_BITS_NUM)\
+ (stats) |= ((MemStatusType )1 << (n));\
} while (0)
@@ -394,25 +408,28 @@ typedef struct _BBuf {
#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)]
-#define ANCHOR_BEGIN_BUF (1<<0)
-#define ANCHOR_BEGIN_LINE (1<<1)
-#define ANCHOR_BEGIN_POSITION (1<<2)
-#define ANCHOR_END_BUF (1<<3)
-#define ANCHOR_SEMI_END_BUF (1<<4)
-#define ANCHOR_END_LINE (1<<5)
-
-#define ANCHOR_WORD_BOUND (1<<6)
-#define ANCHOR_NOT_WORD_BOUND (1<<7)
-#define ANCHOR_WORD_BEGIN (1<<8)
-#define ANCHOR_WORD_END (1<<9)
-#define ANCHOR_PREC_READ (1<<10)
-#define ANCHOR_PREC_READ_NOT (1<<11)
-#define ANCHOR_LOOK_BEHIND (1<<12)
-#define ANCHOR_LOOK_BEHIND_NOT (1<<13)
-
+/* has body */
+#define ANCHOR_PREC_READ (1<<0)
+#define ANCHOR_PREC_READ_NOT (1<<1)
+#define ANCHOR_LOOK_BEHIND (1<<2)
+#define ANCHOR_LOOK_BEHIND_NOT (1<<3)
+/* no body */
+#define ANCHOR_BEGIN_BUF (1<<4)
+#define ANCHOR_BEGIN_LINE (1<<5)
+#define ANCHOR_BEGIN_POSITION (1<<6)
+#define ANCHOR_END_BUF (1<<7)
+#define ANCHOR_SEMI_END_BUF (1<<8)
+#define ANCHOR_END_LINE (1<<9)
+#define ANCHOR_WORD_BOUND (1<<10)
+#define ANCHOR_NOT_WORD_BOUND (1<<11)
+#define ANCHOR_WORD_BEGIN (1<<12)
+#define ANCHOR_WORD_END (1<<13)
#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
+#define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF)
+
+
/* operation code */
enum OpCode {
OP_FINISH = 0, /* matching process terminator (no more alternative) */
@@ -490,10 +507,10 @@ enum OpCode {
OP_REPEAT_INC_NG, /* non greedy */
OP_REPEAT_INC_SG, /* search and get in stack */
OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
- OP_NULL_CHECK_START, /* null loop checker start */
- OP_NULL_CHECK_END, /* null loop checker end */
- OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
- OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
+ OP_EMPTY_CHECK_START, /* null loop checker start */
+ OP_EMPTY_CHECK_END, /* null loop checker end */
+ OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */
+ OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
OP_PUSH_POS, /* (?=...) start */
OP_POP_POS, /* (?=...) end */
@@ -581,8 +598,8 @@ typedef void* PointerType;
#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE
#define SIZE_OP_POP_STOP_BT SIZE_OPCODE
-#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_EMPTY_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_EMPTY_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
#define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
#define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE
@@ -644,25 +661,10 @@ typedef void* PointerType;
#define FLAG_NCCLASS_SHARE (1<<1)
#define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT)
-#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE)
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
-#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE)
-
-typedef struct {
- int type;
- /* struct _Node* next; */
- /* unsigned int flags; */
-} NodeBase;
-
-typedef struct {
- NodeBase base;
- unsigned int flags;
- BitSet bs;
- BBuf* mbuf; /* multi-byte info or NULL */
-} CClassNode;
-typedef long OnigStackIndex;
+typedef intptr_t OnigStackIndex;
typedef struct _OnigStackType {
unsigned int type;
@@ -693,7 +695,7 @@ typedef struct _OnigStackType {
struct {
int num; /* null check id */
UChar *pstr; /* start position */
- } null_check;
+ } empty_check;
#ifdef USE_SUBEXP_CALL
struct {
UChar *ret_addr; /* byte code position */
@@ -744,7 +746,7 @@ typedef struct {
extern OnigOpInfoType OnigOpInfo[];
-extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc));
+extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc));
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
@@ -758,8 +760,7 @@ extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncod
extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_transfer P_((regex_t* to, regex_t* from));
-extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
-extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc));
+extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));
/* strend hash */
typedef void hash_table_type;
diff --git a/src/regparse.c b/src/regparse.c
index 8153513..a5f8e5b 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -159,14 +159,25 @@ bbuf_clone(BBuf** rto, BBuf* from)
*rto = to = (BBuf* )xmalloc(sizeof(BBuf));
CHECK_NULL_RETURN_MEMERR(to);
r = BBUF_INIT(to, from->alloc);
- if (r != 0) return r;
+ if (r != 0) {
+ xfree(to->p);
+ *rto = 0;
+ return r;
+ }
to->used = from->used;
xmemcpy(to->p, from->p, from->used);
return 0;
}
-#define BACKREF_REL_TO_ABS(rel_no, env) \
- ((env)->num_mem + 1 + (rel_no))
+static int backref_rel_to_abs(int rel_no, ScanEnv* env)
+{
+ if (rel_no > 0) {
+ return env->num_mem + rel_no;
+ }
+ else {
+ return env->num_mem + 1 + rel_no;
+ }
+}
#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f))
@@ -179,7 +190,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
#define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\
if (! ONIGENC_IS_SINGLEBYTE(enc)) {\
r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\
- if (r) return r;\
+ if (r != 0) return r;\
}\
} while (0)
@@ -422,6 +433,8 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
int result;
key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key));
+ CHECK_NULL_RETURN_MEMERR(key);
+
key->s = (UChar* )str_key;
key->end = (UChar* )end_key;
result = onig_st_insert(table, (st_data_t )key, value);
@@ -519,7 +532,7 @@ onig_names_free(regex_t* reg)
NameTable* t;
r = names_clear(reg);
- if (r) return r;
+ if (r != 0) return r;
t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) onig_st_free_table(t);
@@ -700,7 +713,7 @@ onig_names_free(regex_t* reg)
NameTable* t;
r = names_clear(reg);
- if (r) return r;
+ if (r != 0) return r;
t = (NameTable* )reg->name_table;
if (IS_NOT_NULL(t)) xfree(t);
@@ -762,6 +775,7 @@ onig_number_of_names(regex_t* reg)
static int
name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
{
+ int r;
int alloc;
NameEntry* e;
NameTable* t = (NameTable* )reg->name_table;
@@ -783,8 +797,9 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env)
if (IS_NULL(e->name)) {
xfree(e); return ONIGERR_MEMORY;
}
- onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
- (HashDataType )e);
+ r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)),
+ (HashDataType )e);
+ if (r < 0) return r;
e->name_len = name_end - name;
e->back_num = 0;
@@ -918,14 +933,14 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name,
extern int
onig_name_to_group_numbers(regex_t* reg, const UChar* name,
- const UChar* name_end, int** nums)
+ const UChar* name_end, int** nums)
{
return ONIG_NO_SUPPORT_CONFIG;
}
extern int
onig_name_to_backref_number(regex_t* reg, const UChar* name,
- const UChar* name_end, OnigRegion* region)
+ const UChar* name_end, OnigRegion* region)
{
return ONIG_NO_SUPPORT_CONFIG;
}
@@ -962,29 +977,32 @@ onig_noname_group_capture_is_active(regex_t* reg)
}
-#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16
+#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16
static void
scan_env_clear(ScanEnv* env)
{
- int i;
-
- BIT_STATUS_CLEAR(env->capture_history);
- BIT_STATUS_CLEAR(env->bt_mem_start);
- BIT_STATUS_CLEAR(env->bt_mem_end);
- BIT_STATUS_CLEAR(env->backrefed_mem);
+ MEM_STATUS_CLEAR(env->capture_history);
+ MEM_STATUS_CLEAR(env->bt_mem_start);
+ MEM_STATUS_CLEAR(env->bt_mem_end);
+ MEM_STATUS_CLEAR(env->backrefed_mem);
env->error = (UChar* )NULL;
env->error_end = (UChar* )NULL;
env->num_call = 0;
+
+#ifdef USE_SUBEXP_CALL
+ env->unset_addr_list = NULL;
+ env->has_call_zero = 0;
+#endif
+
env->num_mem = 0;
#ifdef USE_NAMED_GROUP
env->num_named = 0;
#endif
- env->mem_alloc = 0;
- env->mem_nodes_dynamic = (Node** )NULL;
+ env->mem_alloc = 0;
+ env->mem_env_dynamic = (MemEnv* )NULL;
- for (i = 0; i < SCANENV_MEMNODES_SIZE; i++)
- env->mem_nodes_static[i] = NULL_NODE;
+ xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
#ifdef USE_COMBINATION_EXPLOSION_CHECK
env->num_comb_exp_check = 0;
@@ -999,30 +1017,35 @@ static int
scan_env_add_mem_entry(ScanEnv* env)
{
int i, need, alloc;
- Node** p;
+ MemEnv* p;
need = env->num_mem + 1;
if (need > MaxCaptureNum && MaxCaptureNum != 0)
return ONIGERR_TOO_MANY_CAPTURES;
- if (need >= SCANENV_MEMNODES_SIZE) {
+ if (need >= SCANENV_MEMENV_SIZE) {
if (env->mem_alloc <= need) {
- if (IS_NULL(env->mem_nodes_dynamic)) {
- alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE;
- p = (Node** )xmalloc(sizeof(Node*) * alloc);
- xmemcpy(p, env->mem_nodes_static,
- sizeof(Node*) * SCANENV_MEMNODES_SIZE);
+ if (IS_NULL(env->mem_env_dynamic)) {
+ alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE;
+ p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc);
+ CHECK_NULL_RETURN_MEMERR(p);
+ xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static));
}
else {
alloc = env->mem_alloc * 2;
- p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc);
+ p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc);
+ CHECK_NULL_RETURN_MEMERR(p);
}
- CHECK_NULL_RETURN_MEMERR(p);
- for (i = env->num_mem + 1; i < alloc; i++)
- p[i] = NULL_NODE;
+ for (i = env->num_mem + 1; i < alloc; i++) {
+ p[i].node = NULL_NODE;
+#if 0
+ p[i].in = 0;
+ p[i].recursion = 0;
+#endif
+ }
- env->mem_nodes_dynamic = p;
+ env->mem_env_dynamic = p;
env->mem_alloc = alloc;
}
}
@@ -1035,7 +1058,7 @@ static int
scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
{
if (env->num_mem >= num)
- SCANENV_MEM_NODES(env)[num] = node;
+ SCANENV_MEMENV(env)[num].node = node;
else
return ONIGERR_PARSER_BUG;
return 0;
@@ -1051,19 +1074,19 @@ onig_node_free(Node* node)
fprintf(stderr, "onig_node_free: %p\n", node);
#endif
- switch (NTYPE(node)) {
- case NT_STR:
- if (NSTR(node)->capa != 0 &&
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
- xfree(NSTR(node)->s);
+ switch (NODE_TYPE(node)) {
+ case NODE_STR:
+ if (STR_(node)->capa != 0 &&
+ IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
+ xfree(STR_(node)->s);
}
break;
- case NT_LIST:
- case NT_ALT:
- onig_node_free(NCAR(node));
+ case NODE_LIST:
+ case NODE_ALT:
+ onig_node_free(NODE_CAR(node));
{
- Node* next_node = NCDR(node);
+ Node* next_node = NODE_CDR(node);
xfree(node);
node = next_node;
@@ -1071,34 +1094,29 @@ onig_node_free(Node* node)
}
break;
- case NT_CCLASS:
+ case NODE_CCLASS:
{
- CClassNode* cc = NCCLASS(node);
+ CClassNode* cc = CCLASS_(node);
- if (IS_NCCLASS_SHARE(cc)) return ;
if (cc->mbuf)
bbuf_free(cc->mbuf);
}
break;
- case NT_QTFR:
- if (NQTFR(node)->target)
- onig_node_free(NQTFR(node)->target);
- break;
-
- case NT_ENCLOSE:
- if (NENCLOSE(node)->target)
- onig_node_free(NENCLOSE(node)->target);
+ case NODE_BREF:
+ if (IS_NOT_NULL(BREF_(node)->back_dynamic))
+ xfree(BREF_(node)->back_dynamic);
break;
- case NT_BREF:
- if (IS_NOT_NULL(NBREF(node)->back_dynamic))
- xfree(NBREF(node)->back_dynamic);
+ case NODE_QUANT:
+ case NODE_ENCLOSURE:
+ case NODE_ANCHOR:
+ if (NODE_BODY(node))
+ onig_node_free(NODE_BODY(node));
break;
- case NT_ANCHOR:
- if (NANCHOR(node)->target)
- onig_node_free(NANCHOR(node)->target);
+ case NODE_CTYPE:
+ case NODE_CALL:
break;
}
@@ -1111,7 +1129,9 @@ node_new(void)
Node* node;
node = (Node* )xmalloc(sizeof(Node));
- /* xmemset(node, 0, sizeof(Node)); */
+ //xmemset(node, 0, sizeof(node->u.base));
+ xmemset(node, 0, sizeof(*node));
+
#ifdef DEBUG_NODE_FREE
fprintf(stderr, "node_new: %p\n", node);
#endif
@@ -1134,8 +1154,8 @@ node_new_cclass(void)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_CCLASS);
- initialize_cclass(NCCLASS(node));
+ SET_NODE_TYPE(node, NODE_CCLASS);
+ initialize_cclass(CCLASS_(node));
return node;
}
@@ -1145,19 +1165,9 @@ node_new_ctype(int type, int not)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_CTYPE);
- NCTYPE(node)->ctype = type;
- NCTYPE(node)->not = not;
- return node;
-}
-
-static Node*
-node_new_anychar(void)
-{
- Node* node = node_new();
- CHECK_NULL_RETURN(node);
-
- SET_NTYPE(node, NT_CANY);
+ SET_NODE_TYPE(node, NODE_CTYPE);
+ CTYPE_(node)->ctype = type;
+ CTYPE_(node)->not = not;
return node;
}
@@ -1167,9 +1177,9 @@ node_new_list(Node* left, Node* right)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_LIST);
- NCAR(node) = left;
- NCDR(node) = right;
+ SET_NODE_TYPE(node, NODE_LIST);
+ NODE_CAR(node) = left;
+ NODE_CDR(node) = right;
return node;
}
@@ -1188,10 +1198,10 @@ onig_node_list_add(Node* list, Node* x)
if (IS_NULL(n)) return NULL_NODE;
if (IS_NOT_NULL(list)) {
- while (IS_NOT_NULL(NCDR(list)))
- list = NCDR(list);
+ while (IS_NOT_NULL(NODE_CDR(list)))
+ list = NODE_CDR(list);
- NCDR(list) = n;
+ NODE_CDR(list) = n;
}
return n;
@@ -1203,9 +1213,9 @@ onig_node_new_alt(Node* left, Node* right)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_ALT);
- NCAR(node) = left;
- NCDR(node) = right;
+ SET_NODE_TYPE(node, NODE_ALT);
+ NODE_CAR(node) = left;
+ NODE_CDR(node) = right;
return node;
}
@@ -1215,10 +1225,9 @@ onig_node_new_anchor(int type)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_ANCHOR);
- NANCHOR(node)->type = type;
- NANCHOR(node)->target = NULL;
- NANCHOR(node)->char_len = -1;
+ SET_NODE_TYPE(node, NODE_ANCHOR);
+ ANCHOR_(node)->type = type;
+ ANCHOR_(node)->char_len = -1;
return node;
}
@@ -1234,31 +1243,30 @@ node_new_backref(int back_num, int* backrefs, int by_name,
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_BREF);
- NBREF(node)->state = 0;
- NBREF(node)->back_num = back_num;
- NBREF(node)->back_dynamic = (int* )NULL;
+ SET_NODE_TYPE(node, NODE_BREF);
+ BREF_(node)->back_num = back_num;
+ BREF_(node)->back_dynamic = (int* )NULL;
if (by_name != 0)
- NBREF(node)->state |= NST_NAME_REF;
+ NODE_STATUS_ADD(node, NST_BY_NAME);
#ifdef USE_BACKREF_WITH_LEVEL
if (exist_level != 0) {
- NBREF(node)->state |= NST_NEST_LEVEL;
- NBREF(node)->nest_level = nest_level;
+ NODE_STATUS_ADD(node, NST_NEST_LEVEL);
+ BREF_(node)->nest_level = nest_level;
}
#endif
for (i = 0; i < back_num; i++) {
if (backrefs[i] <= env->num_mem &&
- IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) {
- NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */
+ IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {
+ NODE_STATUS_ADD(node, NST_RECURSION); /* /...(\1).../ */
break;
}
}
if (back_num <= NODE_BACKREFS_SIZE) {
for (i = 0; i < back_num; i++)
- NBREF(node)->back_static[i] = backrefs[i];
+ BREF_(node)->back_static[i] = backrefs[i];
}
else {
int* p = (int* )xmalloc(sizeof(int) * back_num);
@@ -1266,7 +1274,7 @@ node_new_backref(int back_num, int* backrefs, int by_name,
onig_node_free(node);
return NULL;
}
- NBREF(node)->back_dynamic = p;
+ BREF_(node)->back_dynamic = p;
for (i = 0; i < back_num; i++)
p[i] = backrefs[i];
}
@@ -1275,17 +1283,17 @@ node_new_backref(int back_num, int* backrefs, int by_name,
#ifdef USE_SUBEXP_CALL
static Node*
-node_new_call(UChar* name, UChar* name_end, int gnum)
+node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_CALL);
- NCALL(node)->state = 0;
- NCALL(node)->target = NULL_NODE;
- NCALL(node)->name = name;
- NCALL(node)->name_end = name_end;
- NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */
+ SET_NODE_TYPE(node, NODE_CALL);
+ CALL_(node)->by_number = by_number;
+ CALL_(node)->name = name;
+ CALL_(node)->name_end = name_end;
+ CALL_(node)->group_num = gnum;
+ CALL_(node)->entry_count = 1;
return node;
}
#endif
@@ -1296,69 +1304,76 @@ node_new_quantifier(int lower, int upper, int by_number)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_QTFR);
- NQTFR(node)->state = 0;
- NQTFR(node)->target = NULL;
- NQTFR(node)->lower = lower;
- NQTFR(node)->upper = upper;
- NQTFR(node)->greedy = 1;
- NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY;
- NQTFR(node)->head_exact = NULL_NODE;
- NQTFR(node)->next_head_exact = NULL_NODE;
- NQTFR(node)->is_refered = 0;
+ SET_NODE_TYPE(node, NODE_QUANT);
+ QUANT_(node)->lower = lower;
+ QUANT_(node)->upper = upper;
+ QUANT_(node)->greedy = 1;
+ QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;
+ QUANT_(node)->head_exact = NULL_NODE;
+ QUANT_(node)->next_head_exact = NULL_NODE;
+ QUANT_(node)->is_refered = 0;
if (by_number != 0)
- NQTFR(node)->state |= NST_BY_NUMBER;
+ NODE_STATUS_ADD(node, NST_BY_NUMBER);
#ifdef USE_COMBINATION_EXPLOSION_CHECK
- NQTFR(node)->comb_exp_check_num = 0;
+ QUANT_(node)->comb_exp_check_num = 0;
#endif
return node;
}
static Node*
-node_new_enclose(int type)
+node_new_enclosure(int type)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_ENCLOSE);
- NENCLOSE(node)->type = type;
- NENCLOSE(node)->state = 0;
- NENCLOSE(node)->regnum = 0;
- NENCLOSE(node)->option = 0;
- NENCLOSE(node)->target = NULL;
- NENCLOSE(node)->call_addr = -1;
- NENCLOSE(node)->opt_count = 0;
+ SET_NODE_TYPE(node, NODE_ENCLOSURE);
+ ENCLOSURE_(node)->type = type;
+
+ switch (type) {
+ case ENCLOSURE_MEMORY:
+ ENCLOSURE_(node)->m.regnum = 0;
+ ENCLOSURE_(node)->m.called_addr = -1;
+ ENCLOSURE_(node)->m.entry_count = 1;
+ ENCLOSURE_(node)->m.called_state = 0;
+ break;
+
+ case ENCLOSURE_OPTION:
+ ENCLOSURE_(node)->o.option = 0;
+ break;
+
+ case ENCLOSURE_STOP_BACKTRACK:
+ break;
+ }
+
+ ENCLOSURE_(node)->opt_count = 0;
return node;
}
extern Node*
-onig_node_new_enclose(int type)
+onig_node_new_enclosure(int type)
{
- return node_new_enclose(type);
+ return node_new_enclosure(type);
}
static Node*
-node_new_enclose_memory(OnigOptionType option, int is_named)
+node_new_enclosure_memory(int is_named)
{
- Node* node = node_new_enclose(ENCLOSE_MEMORY);
+ Node* node = node_new_enclosure(ENCLOSURE_MEMORY);
CHECK_NULL_RETURN(node);
if (is_named != 0)
- SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP);
+ NODE_STATUS_ADD(node, NST_NAMED_GROUP);
-#ifdef USE_SUBEXP_CALL
- NENCLOSE(node)->option = option;
-#endif
return node;
}
static Node*
node_new_option(OnigOptionType option)
{
- Node* node = node_new_enclose(ENCLOSE_OPTION);
+ Node* node = node_new_enclosure(ENCLOSURE_OPTION);
CHECK_NULL_RETURN(node);
- NENCLOSE(node)->option = option;
+ ENCLOSURE_(node)->o.option = option;
return node;
}
@@ -1368,31 +1383,31 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end)
int addlen = end - s;
if (addlen > 0) {
- int len = NSTR(node)->end - NSTR(node)->s;
+ int len = STR_(node)->end - STR_(node)->s;
- if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
+ if (STR_(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) {
UChar* p;
int capa = len + addlen + NODE_STR_MARGIN;
- if (capa <= NSTR(node)->capa) {
- onig_strcpy(NSTR(node)->s + len, s, end);
+ if (capa <= STR_(node)->capa) {
+ onig_strcpy(STR_(node)->s + len, s, end);
}
else {
- if (NSTR(node)->s == NSTR(node)->buf)
- p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end,
+ if (STR_(node)->s == STR_(node)->buf)
+ p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end,
s, end, capa);
else
- p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa);
+ p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa);
CHECK_NULL_RETURN_MEMERR(p);
- NSTR(node)->s = p;
- NSTR(node)->capa = capa;
+ STR_(node)->s = p;
+ STR_(node)->capa = capa;
}
}
else {
- onig_strcpy(NSTR(node)->s + len, s, end);
+ onig_strcpy(STR_(node)->s + len, s, end);
}
- NSTR(node)->end = NSTR(node)->s + len + addlen;
+ STR_(node)->end = STR_(node)->s + len + addlen;
}
return 0;
@@ -1417,25 +1432,25 @@ node_str_cat_char(Node* node, UChar c)
extern void
onig_node_conv_to_str_node(Node* node, int flag)
{
- SET_NTYPE(node, NT_STR);
- NSTR(node)->flag = flag;
- NSTR(node)->capa = 0;
- NSTR(node)->s = NSTR(node)->buf;
- NSTR(node)->end = NSTR(node)->buf;
+ SET_NODE_TYPE(node, NODE_STR);
+ STR_(node)->flag = flag;
+ STR_(node)->capa = 0;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
}
extern void
onig_node_str_clear(Node* node)
{
- if (NSTR(node)->capa != 0 &&
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
- xfree(NSTR(node)->s);
+ if (STR_(node)->capa != 0 &&
+ IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) {
+ xfree(STR_(node)->s);
}
- NSTR(node)->capa = 0;
- NSTR(node)->flag = 0;
- NSTR(node)->s = NSTR(node)->buf;
- NSTR(node)->end = NSTR(node)->buf;
+ STR_(node)->capa = 0;
+ STR_(node)->flag = 0;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
}
static Node*
@@ -1444,11 +1459,11 @@ node_new_str(const UChar* s, const UChar* end)
Node* node = node_new();
CHECK_NULL_RETURN(node);
- SET_NTYPE(node, NT_STR);
- NSTR(node)->capa = 0;
- NSTR(node)->flag = 0;
- NSTR(node)->s = NSTR(node)->buf;
- NSTR(node)->end = NSTR(node)->buf;
+ SET_NODE_TYPE(node, NODE_STR);
+ STR_(node)->capa = 0;
+ STR_(node)->flag = 0;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
if (onig_node_str_cat(node, s, end)) {
onig_node_free(node);
return NULL;
@@ -1495,7 +1510,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc)
p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
if (p && p > sn->s) { /* can be split. */
n = node_new_str(p, sn->end);
- if ((sn->flag & NSTR_RAW) != 0)
+ if ((sn->flag & STRING_RAW) != 0)
NSTRING_SET_RAW(n);
sn->end = (UChar* )p;
@@ -1540,7 +1555,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
PFETCH_READY;
num = 0;
- while (!PEND) {
+ while (! PEND) {
PFETCH(c);
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
val = (unsigned int )DIGITVAL(c);
@@ -1596,7 +1611,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
PFETCH_READY;
num = 0;
- while (!PEND && maxlen-- != 0) {
+ while (! PEND && maxlen-- != 0) {
PFETCH(c);
if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
val = ODIGITVAL(c);
@@ -1631,9 +1646,13 @@ new_code_range(BBuf** pbuf)
BBuf* bbuf;
bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
- CHECK_NULL_RETURN_MEMERR(*pbuf);
- r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE);
- if (r) return r;
+ CHECK_NULL_RETURN_MEMERR(bbuf);
+ r = BBUF_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
+ if (r != 0) {
+ xfree(bbuf);
+ *pbuf = 0;
+ return r;
+ }
n = 0;
BBUF_WRITE_CODE_POINT(bbuf, 0, n);
@@ -1654,7 +1673,7 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
if (IS_NULL(*pbuf)) {
r = new_code_range(pbuf);
- if (r) return r;
+ if (r != 0) return r;
bbuf = *pbuf;
n = 0;
}
@@ -2069,27 +2088,27 @@ conv_backslash_value(OnigCodePoint c, ScanEnv* env)
static int
is_invalid_quantifier_target(Node* node)
{
- switch (NTYPE(node)) {
- case NT_ANCHOR:
+ switch (NODE_TYPE(node)) {
+ case NODE_ANCHOR:
return 1;
break;
- case NT_ENCLOSE:
+ case NODE_ENCLOSURE:
/* allow enclosed elements */
- /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */
+ /* return is_invalid_quantifier_target(NODE_BODY(node)); */
break;
- case NT_LIST:
+ case NODE_LIST:
do {
- if (! is_invalid_quantifier_target(NCAR(node))) return 0;
- } while (IS_NOT_NULL(node = NCDR(node)));
+ if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0;
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
return 0;
break;
- case NT_ALT:
+ case NODE_ALT:
do {
- if (is_invalid_quantifier_target(NCAR(node))) return 1;
- } while (IS_NOT_NULL(node = NCDR(node)));
+ if (is_invalid_quantifier_target(NODE_CAR(node))) return 1;
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
default:
@@ -2100,7 +2119,7 @@ is_invalid_quantifier_target(Node* node)
/* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */
static int
-popular_quantifier_num(QtfrNode* q)
+popular_quantifier_num(QuantNode* q)
{
if (q->greedy) {
if (q->lower == 0) {
@@ -2147,10 +2166,10 @@ extern void
onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
{
int pnum, cnum;
- QtfrNode *p, *c;
+ QuantNode *p, *c;
- p = NQTFR(pnode);
- c = NQTFR(cnode);
+ p = QUANT_(pnode);
+ c = QUANT_(cnode);
pnum = popular_quantifier_num(p);
cnum = popular_quantifier_num(c);
if (pnum < 0 || cnum < 0) return ;
@@ -2160,36 +2179,36 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
*pnode = *cnode;
break;
case RQ_A:
- p->target = c->target;
+ NODE_BODY(pnode) = NODE_BODY(cnode);
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
break;
case RQ_AQ:
- p->target = c->target;
+ NODE_BODY(pnode) = NODE_BODY(cnode);
p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
break;
case RQ_QQ:
- p->target = c->target;
+ NODE_BODY(pnode) = NODE_BODY(cnode);
p->lower = 0; p->upper = 1; p->greedy = 0;
break;
case RQ_P_QQ:
- p->target = cnode;
+ NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 0;
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
return ;
break;
case RQ_PQ_Q:
- p->target = cnode;
+ NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 1;
c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
return ;
break;
case RQ_ASIS:
- p->target = cnode;
+ NODE_BODY(pnode) = cnode;
return ;
break;
}
- c->target = NULL_NODE;
+ NODE_BODY(cnode) = NULL_NODE;
onig_node_free(cnode);
}
@@ -2253,6 +2272,7 @@ typedef struct {
UChar* name;
UChar* name_end;
int gnum;
+ int by_number;
} call;
struct {
int ctype;
@@ -2441,19 +2461,27 @@ get_name_end_code_point(OnigCodePoint start)
return (OnigCodePoint )0;
}
+enum REF_NUM {
+ IS_NOT_NUM = 0,
+ IS_ABS_NUM = 1,
+ IS_REL_NUM = 2
+};
+
#ifdef USE_NAMED_GROUP
#ifdef USE_BACKREF_WITH_LEVEL
/*
\k<name+n>, \k<name-n>
\k<num+n>, \k<num-n>
\k<-num+n>, \k<-num-n>
+ \k<+num+n>, \k<+num-n>
*/
static int
fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env,
- int* rback_num, int* rlevel)
+ UChar** rname_end, ScanEnv* env,
+ int* rback_num, int* rlevel, enum REF_NUM* num_type)
{
- int r, sign, is_num, exist_level;
+ int r, sign, exist_level;
+ int digit_count;
OnigCodePoint end_code;
OnigCodePoint c = 0;
OnigEncoding enc = env->enc;
@@ -2463,12 +2491,14 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
PFETCH_READY;
*rback_num = 0;
- is_num = exist_level = 0;
+ exist_level = 0;
+ *num_type = IS_NOT_NUM;
sign = 1;
pnum_head = *src;
end_code = get_name_end_code_point(start_code);
+ digit_count = 0;
name_end = end;
r = 0;
if (PEND) {
@@ -2480,13 +2510,19 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
return ONIGERR_EMPTY_GROUP_NAME;
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
+ *num_type = IS_ABS_NUM;
+ digit_count++;
}
else if (c == '-') {
- is_num = 2;
+ *num_type = IS_REL_NUM;
sign = -1;
pnum_head = p;
}
+ else if (c == '+') {
+ *num_type = IS_REL_NUM;
+ sign = 1;
+ pnum_head = p;
+ }
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
@@ -2496,17 +2532,18 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
name_end = p;
PFETCH(c);
if (c == end_code || c == ')' || c == '+' || c == '-') {
- if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
+ if (*num_type != IS_NOT_NUM && digit_count == 0)
+ r = ONIGERR_INVALID_GROUP_NAME;
break;
}
- if (is_num != 0) {
+ if (*num_type != IS_NOT_NUM) {
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
+ digit_count++;
}
else {
r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
+ *num_type = IS_NOT_NUM;
}
}
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
@@ -2539,16 +2576,20 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
}
err:
- r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
+ err2:
+ r = ONIGERR_INVALID_GROUP_NAME;
}
end:
if (r == 0) {
- if (is_num != 0) {
+ if (*num_type != IS_NOT_NUM) {
*rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
- else if (*rback_num == 0) goto err;
+ else if (*rback_num == 0) {
+ if (*num_type == IS_REL_NUM)
+ goto err2;
+ }
*rback_num *= sign;
}
@@ -2570,9 +2611,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
*/
static int
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
+ UChar** rname_end, ScanEnv* env, int* rback_num,
+ enum REF_NUM* num_type, int ref)
{
- int r, is_num, sign;
+ int r, sign;
+ int digit_count;
OnigCodePoint end_code;
OnigCodePoint c = 0;
OnigEncoding enc = env->enc;
@@ -2584,10 +2627,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
end_code = get_name_end_code_point(start_code);
+ digit_count = 0;
name_end = end;
pnum_head = *src;
r = 0;
- is_num = 0;
+ *num_type = IS_NOT_NUM;
sign = 1;
if (PEND) {
return ONIGERR_EMPTY_GROUP_NAME;
@@ -2599,21 +2643,30 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
if (ref == 1)
- is_num = 1;
+ *num_type = IS_ABS_NUM;
else {
r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
}
+ digit_count++;
}
else if (c == '-') {
if (ref == 1) {
- is_num = 2;
+ *num_type = IS_REL_NUM;
sign = -1;
pnum_head = p;
}
else {
r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
+ }
+ }
+ else if (c == '+') {
+ if (ref == 1) {
+ *num_type = IS_REL_NUM;
+ sign = 1;
+ pnum_head = p;
+ }
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
}
}
else if (!ONIGENC_IS_CODE_WORD(enc, c)) {
@@ -2626,20 +2679,22 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
name_end = p;
PFETCH_S(c);
if (c == end_code || c == ')') {
- if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME;
+ if (*num_type != IS_NOT_NUM && digit_count == 0)
+ r = ONIGERR_INVALID_GROUP_NAME;
break;
}
- if (is_num != 0) {
+ if (*num_type != IS_NOT_NUM) {
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
+ digit_count++;
}
else {
if (!ONIGENC_IS_CODE_WORD(enc, c))
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
else
r = ONIGERR_INVALID_GROUP_NAME;
- is_num = 0;
+
+ *num_type = IS_NOT_NUM;
}
}
else {
@@ -2654,12 +2709,14 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
name_end = end;
}
- if (is_num != 0) {
+ if (*num_type != IS_NOT_NUM) {
*rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
else if (*rback_num == 0) {
- r = ONIGERR_INVALID_GROUP_NAME;
- goto err;
+ if (*num_type == IS_REL_NUM) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto err;
+ }
}
*rback_num *= sign;
@@ -2687,9 +2744,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
#else
static int
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int* rback_num, int ref)
+ UChar** rname_end, ScanEnv* env, int* rback_num,
+ enum REF_NUM* num_type, int ref)
{
- int r, is_num, sign;
+ int r, sign;
+ int digit_count;
OnigCodePoint end_code;
OnigCodePoint c = 0;
UChar *name_end;
@@ -2702,10 +2761,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
end_code = get_name_end_code_point(start_code);
+ digit_count = 0;
*rname_end = name_end = end;
r = 0;
pnum_head = *src;
- is_num = 0;
+ *num_type = IS_ABS_NUM;
sign = 1;
if (PEND) {
@@ -2717,37 +2777,61 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
return ONIGERR_EMPTY_GROUP_NAME;
if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- is_num = 1;
+ *num_type = IS_ABS_NUM;
+ digit_count++;
}
else if (c == '-') {
- is_num = 2;
- sign = -1;
- pnum_head = p;
+ if (ref == 1) {
+ *num_type = IS_REL_NUM;
+ sign = -1;
+ pnum_head = p;
+ }
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
+ }
+ else if (c == '+') {
+ if (ref == 1) {
+ *num_type = IS_REL_NUM;
+ sign = 1;
+ pnum_head = p;
+ }
+ else {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
}
else {
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
}
- while (!PEND) {
+ while (! PEND) {
name_end = p;
PFETCH(c);
if (c == end_code || c == ')') break;
- if (! ONIGENC_IS_CODE_DIGIT(enc, c))
+
+ if (ONIGENC_IS_CODE_DIGIT(enc, c))
+ digit_count++;
+ else
r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
}
if (r == 0 && c != end_code) {
r = ONIGERR_INVALID_GROUP_NAME;
name_end = end;
}
+ if (r == 0 && digit_count == 0) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ }
if (r == 0) {
*rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
else if (*rback_num == 0) {
- r = ONIGERR_INVALID_GROUP_NAME;
- goto err;
+ if (*num_type == IS_REL_NUM) {
+ r = ONIGERR_INVALID_GROUP_NAME;
+ goto err;
+ }
}
*rback_num *= sign;
@@ -3418,7 +3502,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
(num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
- if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num]))
+ if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))
return ONIGERR_INVALID_BACKREF;
}
@@ -3466,30 +3550,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
UChar* name_end;
int* backs;
int back_num;
+ enum REF_NUM num_type;
prev = p;
#ifdef USE_BACKREF_WITH_LEVEL
name_end = NULL_UCHARP; /* no need. escape gcc warning. */
r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end,
- env, &back_num, &tok->u.backref.level);
+ env, &back_num, &tok->u.backref.level, &num_type);
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
#else
- r = fetch_name(&p, end, &name_end, env, &back_num, 1);
+ r = fetch_name(&p, end, &name_end, env, &back_num, &num_type, 1);
#endif
if (r < 0) return r;
- if (back_num != 0) {
- if (back_num < 0) {
- back_num = BACKREF_REL_TO_ABS(back_num, env);
- if (back_num <= 0)
- return ONIGERR_INVALID_BACKREF;
+ if (num_type != IS_NOT_NUM) {
+ if (num_type == IS_REL_NUM) {
+ back_num = backref_rel_to_abs(back_num, env);
}
+ if (back_num <= 0)
+ return ONIGERR_INVALID_BACKREF;
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
if (back_num > env->num_mem ||
- IS_NULL(SCANENV_MEM_NODES(env)[back_num]))
+ IS_NULL(SCANENV_MEMENV(env)[back_num].node))
return ONIGERR_INVALID_BACKREF;
}
tok->type = TK_BACKREF;
@@ -3508,7 +3593,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
int i;
for (i = 0; i < num; i++) {
if (backs[i] > env->num_mem ||
- IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))
+ IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
return ONIGERR_INVALID_BACKREF;
}
}
@@ -3538,15 +3623,30 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (c == '<' || c == '\'') {
int gnum;
UChar* name_end;
+ enum REF_NUM num_type;
prev = p;
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1);
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
+ &gnum, &num_type, 1);
if (r < 0) return r;
+ if (num_type != IS_NOT_NUM) {
+ if (num_type == IS_REL_NUM) {
+ gnum = backref_rel_to_abs(gnum, env);
+ if (gnum < 0)
+ return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
+ tok->u.call.by_number = 1;
+ tok->u.call.gnum = gnum;
+ }
+ else {
+ tok->u.call.by_number = 0;
+ tok->u.call.gnum = 0;
+ }
+
tok->type = TK_CALL;
tok->u.call.name = prev;
tok->u.call.name_end = name_end;
- tok->u.call.gnum = gnum;
}
else
PUNFETCH;
@@ -4040,7 +4140,7 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
- cc = NCCLASS(*np);
+ cc = CCLASS_(*np);
r = add_ctype_to_cc(cc, ctype, 0, env);
if (r != 0) return r;
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
@@ -4080,9 +4180,7 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
}
}
- if (*state != CCS_START)
- *state = CCS_VALUE;
-
+ *state = CCS_VALUE;
*type = CCV_CLASS;
return 0;
}
@@ -4222,7 +4320,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
*np = node = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(node);
- cc = NCCLASS(node);
+ cc = CCLASS_(node);
and_start = 0;
state = CCS_START;
@@ -4335,7 +4433,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CHAR_TYPE:
r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
- if (r != 0) return r;
+ if (r != 0) goto err;
next_class:
r = next_state_class(cc, &vs, &val_type, &state, env);
@@ -4344,12 +4442,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
case TK_CHAR_PROPERTY:
{
- int ctype;
-
- ctype = fetch_char_property_to_ctype(&p, end, env);
- if (ctype < 0) return ctype;
+ int ctype = fetch_char_property_to_ctype(&p, end, env);
+ if (ctype < 0) {
+ r = ctype;
+ goto err;
+ }
r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
- if (r != 0) return r;
+ if (r != 0) goto err;
goto next_class;
}
break;
@@ -4369,6 +4468,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
}
+
+ if (val_type == CCV_CLASS) {
+ r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
+ goto err;
+ }
+
state = CCS_RANGE;
}
else if (state == CCS_START) {
@@ -4418,10 +4523,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
onig_node_free(anode);
goto cc_open_err;
}
- acc = NCCLASS(anode);
+ acc = CCLASS_(anode);
r = or_cclass(cc, acc, env->enc);
-
onig_node_free(anode);
+
cc_open_err:
if (r != 0) goto err;
}
@@ -4488,9 +4593,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
NCCLASS_CLEAR_NOT(cc);
if (IS_NCCLASS_NOT(cc) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) {
- int is_empty;
-
- is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
+ int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0);
if (is_empty != 0)
BITSET_IS_EMPTY(cc->bs, is_empty);
@@ -4510,7 +4613,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
return 0;
err:
- if (cc != NCCLASS(*np))
+ if (cc != CCLASS_(*np))
bbuf_free(cc->mbuf);
return r;
}
@@ -4519,18 +4622,17 @@ static int parse_subexp(Node** top, OnigToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env);
static int
-parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env)
{
int r, num;
Node *target;
OnigOptionType option;
OnigCodePoint c;
- OnigEncoding enc = env->enc;
-
#ifdef USE_NAMED_GROUP
int list_capture;
#endif
+ OnigEncoding enc = env->enc;
UChar* p = *src;
PFETCH_READY;
@@ -4563,7 +4665,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
*np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
break;
case '>': /* (?>...) stop backtrack */
- *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
break;
#ifdef USE_NAMED_GROUP
@@ -4588,6 +4690,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
UChar *name;
UChar *name_end;
+ enum REF_NUM num_type;
PUNFETCH;
c = '<';
@@ -4597,21 +4700,22 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
named_group2:
name = p;
- r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);
+ r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
+ &num_type, 0);
if (r < 0) return r;
num = scan_env_add_mem_entry(env);
if (num < 0) return num;
- if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM)
+ if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM)
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
r = name_add(env->reg, name, name_end, num, env);
if (r != 0) return r;
- *np = node_new_enclose_memory(env->option, 1);
+ *np = node_new_enclosure_memory(1);
CHECK_NULL_RETURN_MEMERR(*np);
- NENCLOSE(*np)->regnum = num;
+ ENCLOSURE_(*np)->m.regnum = num;
if (list_capture != 0)
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ MEM_STATUS_ON_SIMPLE(env->capture_history, num);
env->num_named++;
}
else {
@@ -4637,17 +4741,17 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
PUNFETCH;
}
#endif
- *np = node_new_enclose_memory(env->option, 0);
+ *np = node_new_enclosure_memory(0);
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) {
return num;
}
- else if (num >= (int )BIT_STATUS_BITS_NUM) {
+ else if (num >= (int )MEM_STATUS_BITS_NUM) {
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
}
- NENCLOSE(*np)->regnum = num;
- BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num);
+ ENCLOSURE_(*np)->m.regnum = num;
+ MEM_STATUS_ON_SIMPLE(env->capture_history, num);
}
else {
return ONIGERR_UNDEFINED_GROUP_OPTION;
@@ -4717,7 +4821,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
*np = node_new_option(option);
CHECK_NULL_RETURN_MEMERR(*np);
- NENCLOSE(*np)->target = target;
+ NODE_BODY(*np) = target;
*src = p;
return 0;
}
@@ -4736,11 +4840,11 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP))
goto group;
- *np = node_new_enclose_memory(env->option, 0);
+ *np = node_new_enclosure_memory(0);
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) return num;
- NENCLOSE(*np)->regnum = num;
+ ENCLOSURE_(*np)->m.regnum = num;
}
CHECK_NULL_RETURN_MEMERR(*np);
@@ -4752,13 +4856,12 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return r;
}
- if (NTYPE(*np) == NT_ANCHOR)
- NANCHOR(*np)->target = target;
- else {
- NENCLOSE(*np)->target = target;
- if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) {
+ NODE_BODY(*np) = target;
+
+ if (NODE_TYPE(*np) == NODE_ENCLOSURE) {
+ if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) {
/* Don't move this to previous of parse_subexp() */
- r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np);
+ r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np);
if (r != 0) return r;
}
}
@@ -4778,36 +4881,35 @@ static const char* ReduceQStr[] = {
static int
set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
{
- QtfrNode* qn;
+ QuantNode* qn;
- qn = NQTFR(qnode);
- if (qn->lower == 1 && qn->upper == 1) {
+ qn = QUANT_(qnode);
+ if (qn->lower == 1 && qn->upper == 1)
return 1;
- }
- switch (NTYPE(target)) {
- case NT_STR:
+ switch (NODE_TYPE(target)) {
+ case NODE_STR:
if (! group) {
- StrNode* sn = NSTR(target);
+ StrNode* sn = STR_(target);
if (str_node_can_be_split(sn, env->enc)) {
Node* n = str_node_split_last_char(sn, env->enc);
if (IS_NOT_NULL(n)) {
- qn->target = n;
+ NODE_BODY(qnode) = n;
return 2;
}
}
}
break;
- case NT_QTFR:
+ case NODE_QUANT:
{ /* check redundant double repeat. */
/* verbose warn (?:.?)? etc... but not warn (.?)? etc... */
- QtfrNode* qnt = NQTFR(target);
+ QuantNode* qnt = QUANT_(target);
int nestq_num = popular_quantifier_num(qn);
int targetq_num = popular_quantifier_num(qnt);
#ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
- if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) &&
+ if (! NODE_IS_BY_NUMBER(qnode) && ! NODE_IS_BY_NUMBER(target) &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) {
UChar buf[WARN_BUFSIZE];
@@ -4860,7 +4962,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
break;
}
- qn->target = target;
+ NODE_BODY(qnode) = target;
q_exit:
return 0;
}
@@ -4971,7 +5073,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
*(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
- iarg->ptail = &(NCDR((*(iarg->ptail))));
+ iarg->ptail = &(NODE_CDR((*(iarg->ptail))));
}
}
@@ -4999,14 +5101,14 @@ parse_exp(Node** np, OnigToken* tok, int term,
break;
case TK_SUBEXP_OPEN:
- r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env);
+ r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env);
if (r < 0) return r;
if (r == 1) group = 1;
else if (r == 2) { /* option only */
Node* target;
OnigOptionType prev = env->option;
- env->option = NENCLOSE(*np)->option;
+ env->option = ENCLOSURE_(*np)->o.option;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, src, end, env);
@@ -5015,7 +5117,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
onig_node_free(target);
return r;
}
- NENCLOSE(*np)->target = target;
+ NODE_BODY(*np) = target;
return tok->type;
}
break;
@@ -5057,7 +5159,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
len = 1;
while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
- if (len == enclen(env->enc, NSTR(*np)->s)) {//should not enclen_end()
+ if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end()
r = fetch_token(tok, src, end, env);
NSTRING_CLEAR_RAW(*np);
goto string_end;
@@ -5072,8 +5174,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
int rem;
if (len < ONIGENC_MBC_MINLEN(env->enc)) {
rem = ONIGENC_MBC_MINLEN(env->enc) - len;
- (void )node_str_head_pad(NSTR(*np), rem, (UChar )0);
- if (len + rem == enclen(env->enc, NSTR(*np)->s)) {
+ (void )node_str_head_pad(STR_(*np), rem, (UChar )0);
+ if (len + rem == enclen(env->enc, STR_(*np)->s)) {
NSTRING_CLEAR_RAW(*np);
goto string_end;
}
@@ -5138,7 +5240,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
- cc = NCCLASS(*np);
+ cc = CCLASS_(*np);
add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
}
@@ -5163,7 +5265,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
r = parse_char_class(np, tok, src, end, env);
if (r != 0) return r;
- cc = NCCLASS(*np);
+ cc = CCLASS_(*np);
if (IS_IGNORECASE(env->option)) {
IApplyCaseFoldArg iarg;
@@ -5191,16 +5293,16 @@ parse_exp(Node** np, OnigToken* tok, int term,
break;
case TK_ANYCHAR:
- *np = node_new_anychar();
+ *np = node_new_ctype(CTYPE_ANYCHAR, 0);
CHECK_NULL_RETURN_MEMERR(*np);
break;
case TK_ANYCHAR_ANYTIME:
- *np = node_new_anychar();
+ *np = node_new_ctype(CTYPE_ANYCHAR, 0);
CHECK_NULL_RETURN_MEMERR(*np);
qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
CHECK_NULL_RETURN_MEMERR(qn);
- NQTFR(qn)->target = *np;
+ NODE_BODY(qn) = *np;
*np = qn;
break;
@@ -5222,14 +5324,13 @@ parse_exp(Node** np, OnigToken* tok, int term,
{
int gnum = tok->u.call.gnum;
- if (gnum < 0) {
- gnum = BACKREF_REL_TO_ABS(gnum, env);
- if (gnum <= 0)
- return ONIGERR_INVALID_BACKREF;
- }
- *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum);
+ *np = node_new_call(tok->u.call.name, tok->u.call.name_end,
+ gnum, tok->u.call.by_number);
CHECK_NULL_RETURN_MEMERR(*np);
env->num_call++;
+ if (tok->u.call.by_number != 0 && gnum == 0) {
+ env->has_call_zero = 1;
+ }
}
break;
#endif
@@ -5271,7 +5372,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
(r == TK_INTERVAL ? 1 : 0));
CHECK_NULL_RETURN_MEMERR(qn);
- NQTFR(qn)->greedy = tok->u.repeat.greedy;
+ QUANT_(qn)->greedy = tok->u.repeat.greedy;
r = set_quantifier(qn, *targetp, group, env);
if (r < 0) {
onig_node_free(qn);
@@ -5280,12 +5381,12 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (tok->u.repeat.possessive != 0) {
Node* en;
- en = node_new_enclose(ENCLOSE_STOP_BACKTRACK);
+ en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
if (IS_NULL(en)) {
onig_node_free(qn);
return ONIGERR_MEMORY;
}
- NENCLOSE(en)->target = qn;
+ NODE_BODY(en) = qn;
qn = en;
}
@@ -5303,12 +5404,12 @@ parse_exp(Node** np, OnigToken* tok, int term,
onig_node_free(qn);
return ONIGERR_MEMORY;
}
- tmp = NCDR(*targetp) = node_new_list(qn, NULL);
+ tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);
if (IS_NULL(tmp)) {
onig_node_free(qn);
return ONIGERR_MEMORY;
}
- targetp = &(NCAR(tmp));
+ targetp = &(NODE_CAR(tmp));
}
goto re_entry;
}
@@ -5336,7 +5437,7 @@ parse_branch(Node** top, OnigToken* tok, int term,
}
else {
*top = node_new_list(node, NULL);
- headp = &(NCDR(*top));
+ headp = &(NODE_CDR(*top));
while (r != TK_EOT && r != term && r != TK_ALT) {
r = parse_exp(&node, tok, term, src, end, env);
if (r < 0) {
@@ -5344,14 +5445,14 @@ parse_branch(Node** top, OnigToken* tok, int term,
return r;
}
- if (NTYPE(node) == NT_LIST) {
+ if (NODE_TYPE(node) == NODE_LIST) {
*headp = node;
- while (IS_NOT_NULL(NCDR(node))) node = NCDR(node);
- headp = &(NCDR(node));
+ while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node);
+ headp = &(NODE_CDR(node));
}
else {
*headp = node_new_list(node, NULL);
- headp = &(NCDR(*headp));
+ headp = &(NODE_CDR(*headp));
}
}
}
@@ -5382,7 +5483,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
}
else if (r == TK_ALT) {
*top = onig_node_new_alt(node, NULL);
- headp = &(NCDR(*top));
+ headp = &(NODE_CDR(*top));
while (r == TK_ALT) {
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
@@ -5392,7 +5493,7 @@ parse_subexp(Node** top, OnigToken* tok, int term,
return r;
}
*headp = onig_node_new_alt(node, NULL);
- headp = &(NCDR(*headp));
+ headp = &(NODE_CDR(*headp));
}
if (tok->type != (enum TokenSyms )term)
@@ -5421,12 +5522,35 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
if (r < 0) return r;
r = parse_subexp(top, &tok, TK_EOT, src, end, env);
if (r < 0) return r;
+
return 0;
}
+#ifdef USE_SUBEXP_CALL
+static int
+make_call_zero_body(Node* node, ScanEnv* env, Node** rnode)
+{
+ int r;
+
+ Node* x = node_new_enclosure_memory(0 /* 0: is not named */);
+ CHECK_NULL_RETURN_MEMERR(x);
+
+ NODE_BODY(x) = node;
+ ENCLOSURE_(x)->m.regnum = 0;
+ r = scan_env_set_mem_node(env, 0, x);
+ if (r != 0) {
+ onig_node_free(x);
+ return r;
+ }
+
+ *rnode = x;
+ return 0;
+}
+#endif
+
extern int
-onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
- regex_t* reg, ScanEnv* env)
+onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
+ regex_t* reg, ScanEnv* env)
{
int r;
UChar* p;
@@ -5451,6 +5575,19 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end,
p = (UChar* )pattern;
r = parse_regexp(root, &p, (UChar* )end, env);
+
+#ifdef USE_SUBEXP_CALL
+ if (r != 0) return r;
+
+ if (env->has_call_zero != 0) {
+ Node* zero_node;
+ r = make_call_zero_body(*root, env, &zero_node);
+ if (r != 0) return r;
+
+ *root = zero_node;
+ }
+#endif
+
reg->num_mem = env->num_mem;
return r;
}
diff --git a/src/regparse.h b/src/regparse.h
index c9d1fe8..884f4d5 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,88 +32,91 @@
#include "regint.h"
/* node type */
-#define NT_STR 0
-#define NT_CCLASS 1
-#define NT_CTYPE 2
-#define NT_CANY 3
-#define NT_BREF 4
-#define NT_QTFR 5
-#define NT_ENCLOSE 6
-#define NT_ANCHOR 7
-#define NT_LIST 8
-#define NT_ALT 9
-#define NT_CALL 10
+typedef enum {
+ NODE_STR = 0,
+ NODE_CCLASS = 1,
+ NODE_CTYPE = 2,
+ NODE_BREF = 3,
+ NODE_QUANT = 4,
+ NODE_ENCLOSURE = 5,
+ NODE_ANCHOR = 6,
+ NODE_LIST = 7,
+ NODE_ALT = 8,
+ NODE_CALL = 9
+} NodeType;
/* node type bit */
-#define NTYPE2BIT(type) (1<<(type))
-
-#define BIT_NT_STR NTYPE2BIT(NT_STR)
-#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
-#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
-#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
-#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
-#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
-#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
-#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
-#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
-#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
-#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
-
-#define IS_NODE_TYPE_SIMPLE(type) \
- ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
- BIT_NT_CANY | BIT_NT_BREF)) != 0)
-
-#define NTYPE(node) ((node)->u.base.type)
-#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
-
-#define NSTR(node) (&((node)->u.str))
-#define NCCLASS(node) (&((node)->u.cclass))
-#define NCTYPE(node) (&((node)->u.ctype))
-#define NBREF(node) (&((node)->u.bref))
-#define NQTFR(node) (&((node)->u.qtfr))
-#define NENCLOSE(node) (&((node)->u.enclose))
-#define NANCHOR(node) (&((node)->u.anchor))
-#define NCONS(node) (&((node)->u.cons))
-#define NCALL(node) (&((node)->u.call))
-
-#define NCAR(node) (NCONS(node)->car)
-#define NCDR(node) (NCONS(node)->cdr)
-
+#define NODE_TYPE2BIT(type) (1<<(type))
+
+#define BIT_NODE_STR NODE_TYPE2BIT(NODE_STR)
+#define BIT_NODE_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
+#define BIT_NODE_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
+#define BIT_NODE_BREF NODE_TYPE2BIT(NODE_BREF)
+#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT)
+#define BIT_NODE_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
+#define BIT_NODE_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
+#define BIT_NODE_LIST NODE_TYPE2BIT(NODE_LIST)
+#define BIT_NODE_ALT NODE_TYPE2BIT(NODE_ALT)
+#define BIT_NODE_CALL NODE_TYPE2BIT(NODE_CALL)
+
+#define NODE_IS_SIMPLE_TYPE(node) \
+ ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
+ (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0)
+
+#define NODE_TYPE(node) ((node)->u.base.node_type)
+#define SET_NODE_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
+
+#define STR_(node) (&((node)->u.str))
+#define CCLASS_(node) (&((node)->u.cclass))
+#define CTYPE_(node) (&((node)->u.ctype))
+#define BREF_(node) (&((node)->u.bref))
+#define QUANT_(node) (&((node)->u.quant))
+#define ENCLOSURE_(node) (&((node)->u.enclosure))
+#define ANCHOR_(node) (&((node)->u.anchor))
+#define CONS_(node) (&((node)->u.cons))
+#define CALL_(node) (&((node)->u.call))
+
+#define NODE_CAR(node) (CONS_(node)->car)
+#define NODE_CDR(node) (CONS_(node)->cdr)
+
+#define CTYPE_ANYCHAR -1
+#define NODE_IS_ANYCHAR(node) \
+ (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
-#define ENCLOSE_MEMORY (1<<0)
-#define ENCLOSE_OPTION (1<<1)
-#define ENCLOSE_STOP_BACKTRACK (1<<2)
+#define ENCLOSURE_MEMORY (1<<0)
+#define ENCLOSURE_OPTION (1<<1)
+#define ENCLOSURE_STOP_BACKTRACK (1<<2)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
-#define NSTR_RAW (1<<0) /* by backslashed number */
-#define NSTR_AMBIG (1<<1)
-#define NSTR_DONT_GET_OPT_INFO (1<<2)
+#define STRING_RAW (1<<0) /* by backslashed number */
+#define STRING_AMBIG (1<<1)
+#define STRING_DONT_GET_OPT_INFO (1<<2)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
-#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
-#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
-#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
+#define NSTRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW
+#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW
+#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG
#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
- (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
-#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
-#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
+ (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO
+#define NSTRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0)
+#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0)
#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
- (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
+ (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
-#define NQ_TARGET_ISNOT_EMPTY 0
-#define NQ_TARGET_IS_EMPTY 1
-#define NQ_TARGET_IS_EMPTY_MEM 2
-#define NQ_TARGET_IS_EMPTY_REC 3
+#define QUANT_BODY_IS_NOT_EMPTY 0
+#define QUANT_BODY_IS_EMPTY 1
+#define QUANT_BODY_IS_EMPTY_MEM 2
+#define QUANT_BODY_IS_EMPTY_REC 3
/* status bits */
#define NST_MIN_FIXED (1<<0)
@@ -121,44 +124,56 @@
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
-#define NST_MEM_BACKREFED (1<<5)
-#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
-#define NST_RECURSION (1<<7)
-#define NST_CALLED (1<<8)
-#define NST_ADDR_FIXED (1<<9)
-#define NST_NAMED_GROUP (1<<10)
-#define NST_NAME_REF (1<<11)
-#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
+#define NST_STOP_BT_SIMPLE_REPEAT (1<<5)
+#define NST_RECURSION (1<<6)
+#define NST_CALLED (1<<7)
+#define NST_ADDR_FIXED (1<<8)
+#define NST_NAMED_GROUP (1<<9)
+#define NST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
+#define NST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
+#define NST_IN_MULTI_ENTRY (1<<12)
#define NST_NEST_LEVEL (1<<13)
#define NST_BY_NUMBER (1<<14) /* {n,m} */
+#define NST_BY_NAME (1<<15) /* backref by name */
+#define NST_BACKREF (1<<16)
+
+
+#define NODE_STATUS(node) (((Node* )node)->u.base.status)
+#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (f))
+#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(f))
+
+#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0)
+#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0)
+#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0)
+#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0)
+#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0)
+#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0)
+#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0)
+#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0)
+#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0)
+#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0)
+#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0)
+#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0)
+#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0)
+#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0)
+#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0)
+#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0)
+#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
+ ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0)
+
+#define NODE_BODY(node) ((node)->u.base.body)
+#define NODE_QUANT_BODY(node) ((node)->body)
+#define NODE_ENCLOSURE_BODY(node) ((node)->body)
+#define NODE_CALL_BODY(node) ((node)->body)
+#define NODE_ANCHOR_BODY(node) ((node)->body)
-#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
-#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
-
-#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
-#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
-#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
-#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
-#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
-#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
-#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
-#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
-#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
- (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
-#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
-
-#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
-#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
-#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
-#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
-#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
-#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
-#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
#define CALLNODE_REFNUM_UNDEF -1
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+
UChar* s;
UChar* end;
unsigned int flag;
@@ -167,35 +182,54 @@ typedef struct {
} StrNode;
typedef struct {
- NodeBase base;
- int state;
- struct _Node* target;
+ NodeType node_type;
+ int status;
+
+ unsigned int flags;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
+
+typedef struct {
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+
int lower;
int upper;
int greedy;
- int target_empty_info;
+ int body_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
-} QtfrNode;
+} QuantNode;
typedef struct {
- NodeBase base;
- int state;
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+
int type;
- int regnum;
- OnigOptionType option;
- struct _Node* target;
- AbsAddrType call_addr;
+ union {
+ struct {
+ int regnum;
+ AbsAddrType called_addr;
+ int entry_count;
+ int called_state;
+ } m;
+ struct {
+ OnigOptionType option;
+ } o;
+ };
/* for multiple call reference */
OnigLen min_len; /* min length (byte) */
OnigLen max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
-} EncloseNode;
+} EnclosureNode;
#ifdef USE_SUBEXP_CALL
@@ -211,20 +245,23 @@ typedef struct {
} UnsetAddrList;
typedef struct {
- NodeBase base;
- int state;
+ NodeType node_type;
+ int status;
+ struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */
+
+ int by_number;
int group_num;
UChar* name;
UChar* name_end;
- struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
- UnsetAddrList* unset_addr_list;
+ int entry_count;
} CallNode;
#endif
typedef struct {
- NodeBase base;
- int state;
+ NodeType node_type;
+ int status;
+
int back_num;
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
@@ -232,37 +269,48 @@ typedef struct {
} BRefNode;
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+
int type;
- struct _Node* target;
int char_len;
} AnchorNode;
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+
struct _Node* car;
struct _Node* cdr;
} ConsAltNode;
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+
int ctype;
int not;
} CtypeNode;
typedef struct _Node {
union {
- NodeBase base;
- StrNode str;
- CClassNode cclass;
- QtfrNode qtfr;
- EncloseNode enclose;
- BRefNode bref;
- AnchorNode anchor;
- ConsAltNode cons;
- CtypeNode ctype;
+ struct {
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+ } base;
+
+ StrNode str;
+ CClassNode cclass;
+ QuantNode quant;
+ EnclosureNode enclosure;
+ BRefNode bref;
+ AnchorNode anchor;
+ ConsAltNode cons;
+ CtypeNode ctype;
#ifdef USE_SUBEXP_CALL
- CallNode call;
+ CallNode call;
#endif
} u;
} Node;
@@ -270,20 +318,28 @@ typedef struct _Node {
#define NULL_NODE ((Node* )0)
-#define SCANENV_MEMNODES_SIZE 8
-#define SCANENV_MEM_NODES(senv) \
- (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
- (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
+#define SCANENV_MEMENV_SIZE 8
+#define SCANENV_MEMENV(senv) \
+ (IS_NOT_NULL((senv)->mem_env_dynamic) ? \
+ (senv)->mem_env_dynamic : (senv)->mem_env_static)
+
+typedef struct {
+ Node* node;
+#if 0
+ int in;
+ int recursion;
+#endif
+} MemEnv;
typedef struct {
OnigOptionType option;
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
- BitStatusType capture_history;
- BitStatusType bt_mem_start;
- BitStatusType bt_mem_end;
- BitStatusType backrefed_mem;
+ MemStatusType capture_history;
+ MemStatusType bt_mem_start;
+ MemStatusType bt_mem_end;
+ MemStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
UChar* error;
@@ -292,14 +348,15 @@ typedef struct {
int num_call;
#ifdef USE_SUBEXP_CALL
UnsetAddrList* unset_addr_list;
+ int has_call_zero;
#endif
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;
#endif
int mem_alloc;
- Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
- Node** mem_nodes_dynamic;
+ MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
+ MemEnv* mem_env_dynamic;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int num_comb_exp_check;
int comb_exp_max_regnum;
@@ -331,7 +388,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
-extern Node* onig_node_new_enclose P_((int type));
+extern Node* onig_node_new_enclosure P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
@@ -339,8 +396,9 @@ extern Node* onig_node_list_add P_((Node* list, Node* x));
extern Node* onig_node_new_alt P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_names_free P_((regex_t* reg));
-extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
+extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
+extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP
diff --git a/src/st.c b/src/st.c
index d4fe867..e5fd1a1 100644
--- a/src/st.c
+++ b/src/st.c
@@ -108,17 +108,16 @@ new_size(size)
#if 0
for (i=3; i<31; i++) {
- if ((1<<i) > size) return 1<<i;
+ if ((1<<i) > size) return 1<<i;
}
return -1;
#else
int newsize;
for (i = 0, newsize = MINSIZE;
- i < (int )(sizeof(primes)/sizeof(primes[0]));
- i++, newsize <<= 1)
- {
- if (newsize > size) return primes[i];
+ i < (int )(sizeof(primes)/sizeof(primes[0]));
+ i++, newsize <<= 1) {
+ if (newsize > size) return primes[i];
}
/* Ran out of polynomials */
return -1; /* should raise exception */
@@ -145,82 +144,82 @@ st_init_table_with_size(type, size)
struct st_hash_type *type;
int size;
{
- st_table *tbl;
+ st_table *tbl;
#ifdef HASH_LOG
- if (init_st == 0) {
- init_st = 1;
- atexit(stat_col);
- }
+ if (init_st == 0) {
+ init_st = 1;
+ atexit(stat_col);
+ }
#endif
- size = new_size(size); /* round up to prime number */
+ size = new_size(size); /* round up to prime number */
- tbl = alloc(st_table);
- if (tbl == 0) return 0;
+ tbl = alloc(st_table);
+ if (tbl == 0) return 0;
- tbl->type = type;
- tbl->num_entries = 0;
- tbl->num_bins = size;
- tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
- if (tbl->bins == 0) {
- free(tbl);
- return 0;
- }
+ tbl->type = type;
+ tbl->num_entries = 0;
+ tbl->num_bins = size;
+ tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*));
+ if (tbl->bins == 0) {
+ free(tbl);
+ return 0;
+ }
- return tbl;
+ return tbl;
}
st_table*
st_init_table(type)
struct st_hash_type *type;
{
- return st_init_table_with_size(type, 0);
+ return st_init_table_with_size(type, 0);
}
st_table*
st_init_numtable(void)
{
- return st_init_table(&type_numhash);
+ return st_init_table(&type_numhash);
}
st_table*
st_init_numtable_with_size(size)
int size;
{
- return st_init_table_with_size(&type_numhash, size);
+ return st_init_table_with_size(&type_numhash, size);
}
st_table*
st_init_strtable(void)
{
- return st_init_table(&type_strhash);
+ return st_init_table(&type_strhash);
}
st_table*
st_init_strtable_with_size(size)
int size;
{
- return st_init_table_with_size(&type_strhash, size);
+ return st_init_table_with_size(&type_strhash, size);
}
void
st_free_table(table)
st_table *table;
{
- register st_table_entry *ptr, *next;
- int i;
+ register st_table_entry *ptr, *next;
+ int i;
- for(i = 0; i < table->num_bins; i++) {
- ptr = table->bins[i];
- while (ptr != 0) {
+ for(i = 0; i < table->num_bins; i++) {
+ ptr = table->bins[i];
+ while (ptr != 0) {
next = ptr->next;
free(ptr);
ptr = next;
- }
}
- free(table->bins);
- free(table);
+ }
+ free(table->bins);
+ free(table);
}
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
@@ -236,187 +235,186 @@ st_free_table(table)
bin_pos = hash_val%(table)->num_bins;\
ptr = (table)->bins[bin_pos];\
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\
- COLLISION;\
- while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
- ptr = ptr->next;\
- }\
- ptr = ptr->next;\
+ COLLISION;\
+ while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\
+ ptr = ptr->next;\
+ }\
+ ptr = ptr->next;\
}\
} while (0)
int
st_lookup(table, key, value)
- st_table *table;
- register st_data_t key;
- st_data_t *value;
+ st_table *table;
+ register st_data_t key;
+ st_data_t *value;
{
- unsigned int hash_val, bin_pos;
- register st_table_entry *ptr;
+ unsigned int hash_val, bin_pos;
+ register st_table_entry *ptr;
- hash_val = do_hash(key, table);
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
- if (ptr == 0) {
- return 0;
- }
- else {
- if (value != 0) *value = ptr->record;
- return 1;
- }
+ if (ptr == 0) {
+ return 0;
+ }
+ else {
+ if (value != 0) *value = ptr->record;
+ return 1;
+ }
}
-#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\
+#define ADD_DIRECT(table, key, value, hash_val, bin_pos, ret) \
do {\
- st_table_entry *entry;\
- if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
- rehash(table);\
- bin_pos = hash_val % table->num_bins;\
- }\
- \
- entry = alloc(st_table_entry);\
- \
- entry->hash = hash_val;\
- entry->key = key;\
- entry->record = value;\
- entry->next = table->bins[bin_pos];\
- table->bins[bin_pos] = entry;\
- table->num_entries++;\
+ st_table_entry *entry;\
+ if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\
+ rehash(table);\
+ bin_pos = hash_val % table->num_bins;\
+ }\
+ entry = alloc(st_table_entry);\
+ if (IS_NULL(entry)) return ret;\
+ entry->hash = hash_val;\
+ entry->key = key;\
+ entry->record = value;\
+ entry->next = table->bins[bin_pos];\
+ table->bins[bin_pos] = entry;\
+ table->num_entries++;\
} while (0)
int
st_insert(table, key, value)
- register st_table *table;
- register st_data_t key;
- st_data_t value;
+ register st_table *table;
+ register st_data_t key;
+ st_data_t value;
{
- unsigned int hash_val, bin_pos;
- register st_table_entry *ptr;
+ unsigned int hash_val, bin_pos;
+ register st_table_entry *ptr;
- hash_val = do_hash(key, table);
- FIND_ENTRY(table, ptr, hash_val, bin_pos);
+ hash_val = do_hash(key, table);
+ FIND_ENTRY(table, ptr, hash_val, bin_pos);
- if (ptr == 0) {
- ADD_DIRECT(table, key, value, hash_val, bin_pos);
- return 0;
- }
- else {
- ptr->record = value;
- return 1;
- }
+ if (ptr == 0) {
+ ADD_DIRECT(table, key, value, hash_val, bin_pos, ONIGERR_MEMORY);
+ return 0;
+ }
+ else {
+ ptr->record = value;
+ return 1;
+ }
}
void
st_add_direct(table, key, value)
- st_table *table;
- st_data_t key;
- st_data_t value;
+ st_table *table;
+ st_data_t key;
+ st_data_t value;
{
- unsigned int hash_val, bin_pos;
+ unsigned int hash_val, bin_pos;
- hash_val = do_hash(key, table);
- bin_pos = hash_val % table->num_bins;
- ADD_DIRECT(table, key, value, hash_val, bin_pos);
+ hash_val = do_hash(key, table);
+ bin_pos = hash_val % table->num_bins;
+ ADD_DIRECT(table, key, value, hash_val, bin_pos,);
}
static void
rehash(table)
- register st_table *table;
+ register st_table *table;
{
- register st_table_entry *ptr, *next, **new_bins;
- int i, old_num_bins = table->num_bins, new_num_bins;
- unsigned int hash_val;
-
- new_num_bins = new_size(old_num_bins+1);
- new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
- if (new_bins == 0) {
- return ;
- }
-
- for(i = 0; i < old_num_bins; i++) {
- ptr = table->bins[i];
- while (ptr != 0) {
+ register st_table_entry *ptr, *next, **new_bins;
+ int i, old_num_bins = table->num_bins, new_num_bins;
+ unsigned int hash_val;
+
+ new_num_bins = new_size(old_num_bins+1);
+ new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
+ if (new_bins == 0) {
+ return ;
+ }
+
+ for(i = 0; i < old_num_bins; i++) {
+ ptr = table->bins[i];
+ while (ptr != 0) {
next = ptr->next;
hash_val = ptr->hash % new_num_bins;
ptr->next = new_bins[hash_val];
new_bins[hash_val] = ptr;
ptr = next;
- }
}
- free(table->bins);
- table->num_bins = new_num_bins;
- table->bins = new_bins;
+ }
+ free(table->bins);
+ table->num_bins = new_num_bins;
+ table->bins = new_bins;
}
st_table*
st_copy(old_table)
- st_table *old_table;
+ st_table *old_table;
{
- st_table *new_table;
- st_table_entry *ptr, *entry;
- int i, num_bins = old_table->num_bins;
+ st_table *new_table;
+ st_table_entry *ptr, *entry;
+ int i, num_bins = old_table->num_bins;
- new_table = alloc(st_table);
- if (new_table == 0) {
- return 0;
- }
+ new_table = alloc(st_table);
+ if (new_table == 0) {
+ return 0;
+ }
- *new_table = *old_table;
- new_table->bins = (st_table_entry**)
- Calloc((unsigned)num_bins, sizeof(st_table_entry*));
+ *new_table = *old_table;
+ new_table->bins = (st_table_entry**)
+ Calloc((unsigned)num_bins, sizeof(st_table_entry*));
- if (new_table->bins == 0) {
- free(new_table);
- return 0;
- }
+ if (new_table->bins == 0) {
+ free(new_table);
+ return 0;
+ }
- for(i = 0; i < num_bins; i++) {
- new_table->bins[i] = 0;
- ptr = old_table->bins[i];
- while (ptr != 0) {
+ for(i = 0; i < num_bins; i++) {
+ new_table->bins[i] = 0;
+ ptr = old_table->bins[i];
+ while (ptr != 0) {
entry = alloc(st_table_entry);
if (entry == 0) {
- free(new_table->bins);
- free(new_table);
- return 0;
+ free(new_table->bins);
+ free(new_table);
+ return 0;
}
*entry = *ptr;
entry->next = new_table->bins[i];
new_table->bins[i] = entry;
ptr = ptr->next;
- }
}
- return new_table;
+ }
+ return new_table;
}
int
st_delete(table, key, value)
- register st_table *table;
- register st_data_t *key;
- st_data_t *value;
+ register st_table *table;
+ register st_data_t *key;
+ st_data_t *value;
{
- unsigned int hash_val;
- st_table_entry *tmp;
- register st_table_entry *ptr;
+ unsigned int hash_val;
+ st_table_entry *tmp;
+ register st_table_entry *ptr;
- hash_val = do_hash_bin(*key, table);
- ptr = table->bins[hash_val];
+ hash_val = do_hash_bin(*key, table);
+ ptr = table->bins[hash_val];
- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
- }
-
- if (EQUAL(table, *key, ptr->key)) {
- table->bins[hash_val] = ptr->next;
- table->num_entries--;
- if (value != 0) *value = ptr->record;
- *key = ptr->key;
- free(ptr);
- return 1;
- }
-
- for(; ptr->next != 0; ptr = ptr->next) {
- if (EQUAL(table, ptr->next->key, *key)) {
+ if (ptr == 0) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
+
+ if (EQUAL(table, *key, ptr->key)) {
+ table->bins[hash_val] = ptr->next;
+ table->num_entries--;
+ if (value != 0) *value = ptr->record;
+ *key = ptr->key;
+ free(ptr);
+ return 1;
+ }
+
+ for(; ptr->next != 0; ptr = ptr->next) {
+ if (EQUAL(table, ptr->next->key, *key)) {
tmp = ptr->next;
ptr->next = ptr->next->next;
table->num_entries--;
@@ -424,41 +422,41 @@ st_delete(table, key, value)
*key = tmp->key;
free(tmp);
return 1;
- }
}
+ }
- return 0;
+ return 0;
}
int
st_delete_safe(table, key, value, never)
- register st_table *table;
- register st_data_t *key;
- st_data_t *value;
- st_data_t never;
+ register st_table *table;
+ register st_data_t *key;
+ st_data_t *value;
+ st_data_t never;
{
- unsigned int hash_val;
- register st_table_entry *ptr;
+ unsigned int hash_val;
+ register st_table_entry *ptr;
- hash_val = do_hash_bin(*key, table);
- ptr = table->bins[hash_val];
+ hash_val = do_hash_bin(*key, table);
+ ptr = table->bins[hash_val];
- if (ptr == 0) {
- if (value != 0) *value = 0;
- return 0;
- }
+ if (ptr == 0) {
+ if (value != 0) *value = 0;
+ return 0;
+ }
- for(; ptr != 0; ptr = ptr->next) {
- if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
+ for(; ptr != 0; ptr = ptr->next) {
+ if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) {
table->num_entries--;
*key = ptr->key;
if (value != 0) *value = ptr->record;
ptr->key = ptr->record = never;
return 1;
- }
}
+ }
- return 0;
+ return 0;
}
static int
@@ -476,114 +474,114 @@ delete_never(key, value, never)
void
st_cleanup_safe(table, never)
- st_table *table;
- st_data_t never;
+ st_table *table;
+ st_data_t never;
{
- int num_entries = table->num_entries;
+ int num_entries = table->num_entries;
- st_foreach(table, delete_never, never);
- table->num_entries = num_entries;
+ st_foreach(table, delete_never, never);
+ table->num_entries = num_entries;
}
int
st_foreach(table, func, arg)
- st_table *table;
- int (*func)();
- st_data_t arg;
+ st_table *table;
+ int (*func)();
+ st_data_t arg;
{
- st_table_entry *ptr, *last, *tmp;
- enum st_retval retval;
- int i;
+ st_table_entry *ptr, *last, *tmp;
+ enum st_retval retval;
+ int i;
- for(i = 0; i < table->num_bins; i++) {
- last = 0;
- for(ptr = table->bins[i]; ptr != 0;) {
+ for(i = 0; i < table->num_bins; i++) {
+ last = 0;
+ for(ptr = table->bins[i]; ptr != 0;) {
retval = (*func)(ptr->key, ptr->record, arg);
switch (retval) {
case ST_CHECK: /* check if hash is modified during iteration */
- tmp = 0;
- if (i < table->num_bins) {
- for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
- if (tmp == ptr) break;
- }
- }
- if (!tmp) {
- /* call func with error notice */
- return 1;
- }
- /* fall through */
+ tmp = 0;
+ if (i < table->num_bins) {
+ for (tmp = table->bins[i]; tmp; tmp=tmp->next) {
+ if (tmp == ptr) break;
+ }
+ }
+ if (!tmp) {
+ /* call func with error notice */
+ return 1;
+ }
+ /* fall through */
case ST_CONTINUE:
- last = ptr;
- ptr = ptr->next;
- break;
+ last = ptr;
+ ptr = ptr->next;
+ break;
case ST_STOP:
- return 0;
+ return 0;
case ST_DELETE:
- tmp = ptr;
- if (last == 0) {
- table->bins[i] = ptr->next;
- }
- else {
- last->next = ptr->next;
- }
- ptr = ptr->next;
- free(tmp);
- table->num_entries--;
+ tmp = ptr;
+ if (last == 0) {
+ table->bins[i] = ptr->next;
+ }
+ else {
+ last->next = ptr->next;
+ }
+ ptr = ptr->next;
+ free(tmp);
+ table->num_entries--;
}
- }
}
- return 0;
+ }
+ return 0;
}
static int
strhash(string)
- register const char *string;
+ register const char *string;
{
- register int c;
+ register int c;
#ifdef HASH_ELFHASH
- register unsigned int h = 0, g;
+ register unsigned int h = 0, g;
- while ((c = *string++) != '\0') {
- h = ( h << 4 ) + c;
- if ( g = h & 0xF0000000 )
+ while ((c = *string++) != '\0') {
+ h = ( h << 4 ) + c;
+ if ( g = h & 0xF0000000 )
h ^= g >> 24;
- h &= ~g;
- }
- return h;
+ h &= ~g;
+ }
+ return h;
#elif HASH_PERL
- register int val = 0;
+ register int val = 0;
- while ((c = *string++) != '\0') {
- val += c;
- val += (val << 10);
- val ^= (val >> 6);
- }
- val += (val << 3);
- val ^= (val >> 11);
+ while ((c = *string++) != '\0') {
+ val += c;
+ val += (val << 10);
+ val ^= (val >> 6);
+ }
+ val += (val << 3);
+ val ^= (val >> 11);
- return val + (val << 15);
+ return val + (val << 15);
#else
- register int val = 0;
+ register int val = 0;
- while ((c = *string++) != '\0') {
- val = val*997 + c;
- }
+ while ((c = *string++) != '\0') {
+ val = val*997 + c;
+ }
- return val + (val>>5);
+ return val + (val>>5);
#endif
}
static int
numcmp(x, y)
- long x, y;
+ long x, y;
{
- return x != y;
+ return x != y;
}
static int
numhash(n)
- long n;
+ long n;
{
- return n;
+ return n;
}
diff --git a/src/unicode.c b/src/unicode.c
index 8812ca2..5b6b3e7 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -2,7 +2,7 @@
unicode.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -91,6 +91,7 @@ extern int
onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
{
UserDefinedPropertyValue* e;
+ int r;
int i;
int n;
int len;
@@ -130,9 +131,10 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)
e = UserDefinedPropertyRanges + UserDefinedPropertyNum;
e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum;
e->ranges = ranges;
- onig_st_insert_strend(UserDefinedPropertyTable,
- (const UChar* )s, (const UChar* )s + n,
- (hash_data_type )((void* )e));
+ r = onig_st_insert_strend(UserDefinedPropertyTable,
+ (const UChar* )s, (const UChar* )s + n,
+ (hash_data_type )((void* )e));
+ if (r < 0) return r;
UserDefinedPropertyNum++;
return 0;
@@ -162,7 +164,7 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype)
extern int
-onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[])
+onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[])
{
if (ctype >= CODE_RANGES_NUM) {
int index = ctype - CODE_RANGES_NUM;