summaryrefslogtreecommitdiff
path: root/src/regcomp.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2019-07-12 09:18:18 +0200
committerJörg Frings-Fürst <debian@jff.email>2019-07-12 09:18:18 +0200
commit5c29b174a11e992dac68553f08645fe4a825c315 (patch)
tree7c72e5a49112a844a53a2c10ef1fda47867aee92 /src/regcomp.c
parent6cac1738b6aebf75b29d225775030d291319f9a2 (diff)
parente25c754918ae26e8b9e68a47bc1af36248e91800 (diff)
Update upstream source from tag 'upstream/6.9.2'
Update to upstream version '6.9.2' with Debian dir a351ad757689417d0ede510a023d625faee8e3e9
Diffstat (limited to 'src/regcomp.c')
-rw-r--r--src/regcomp.c1201
1 files changed, 689 insertions, 512 deletions
diff --git a/src/regcomp.c b/src/regcomp.c
index 400368d..c2c04a4 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,8 @@
#include "regparse.h"
+#define OPS_INIT_SIZE 8
+
OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
#if 0
@@ -108,6 +110,280 @@ int_stack_pop(int_stack* s)
}
#endif
+static int
+ops_init(regex_t* reg, int init_alloc_size)
+{
+ Operation* p;
+ size_t size;
+
+ if (init_alloc_size > 0) {
+ size = sizeof(Operation) * init_alloc_size;
+ p = (Operation* )xrealloc(reg->ops, size);
+ CHECK_NULL_RETURN_MEMERR(p);
+#ifdef USE_DIRECT_THREADED_CODE
+ {
+ enum OpCode* cp;
+ size = sizeof(enum OpCode) * init_alloc_size;
+ cp = (enum OpCode* )xrealloc(reg->ocs, size);
+ CHECK_NULL_RETURN_MEMERR(cp);
+ reg->ocs = cp;
+ }
+#endif
+ }
+ else {
+ p = (Operation* )0;
+#ifdef USE_DIRECT_THREADED_CODE
+ reg->ocs = (enum OpCode* )0;
+#endif
+ }
+
+ reg->ops = p;
+ reg->ops_curr = 0; /* !!! not yet done ops_new() */
+ reg->ops_alloc = init_alloc_size;
+ reg->ops_used = 0;
+
+ return ONIG_NORMAL;
+}
+
+static int
+ops_expand(regex_t* reg, int n)
+{
+#define MIN_OPS_EXPAND_SIZE 4
+
+#ifdef USE_DIRECT_THREADED_CODE
+ enum OpCode* cp;
+#endif
+ Operation* p;
+ size_t size;
+
+ if (n <= 0) n = MIN_OPS_EXPAND_SIZE;
+
+ n += reg->ops_alloc;
+
+ size = sizeof(Operation) * n;
+ p = (Operation* )xrealloc(reg->ops, size);
+ CHECK_NULL_RETURN_MEMERR(p);
+
+#ifdef USE_DIRECT_THREADED_CODE
+ size = sizeof(enum OpCode) * n;
+ cp = (enum OpCode* )xrealloc(reg->ocs, size);
+ CHECK_NULL_RETURN_MEMERR(cp);
+ reg->ocs = cp;
+#endif
+
+ reg->ops = p;
+ reg->ops_alloc = n;
+ if (reg->ops_used == 0)
+ reg->ops_curr = 0;
+ else
+ reg->ops_curr = reg->ops + (reg->ops_used - 1);
+
+ return ONIG_NORMAL;
+}
+
+static int
+ops_new(regex_t* reg)
+{
+ int r;
+
+ if (reg->ops_used >= reg->ops_alloc) {
+ r = ops_expand(reg, reg->ops_alloc);
+ if (r != ONIG_NORMAL) return r;
+ }
+
+ reg->ops_curr = reg->ops + reg->ops_used;
+ reg->ops_used++;
+
+ xmemset(reg->ops_curr, 0, sizeof(Operation));
+ return ONIG_NORMAL;
+}
+
+static int
+is_in_string_pool(regex_t* reg, UChar* s)
+{
+ return (s >= reg->string_pool && s < reg->string_pool_end);
+}
+
+static void
+ops_free(regex_t* reg)
+{
+ int i;
+
+ if (IS_NULL(reg->ops)) return ;
+
+ for (i = 0; i < (int )reg->ops_used; i++) {
+ enum OpCode opcode;
+ Operation* op;
+
+ op = reg->ops + i;
+
+#ifdef USE_DIRECT_THREADED_CODE
+ opcode = *(reg->ocs + i);
+#else
+ opcode = op->opcode;
+#endif
+
+ switch (opcode) {
+ case OP_EXACTMBN:
+ if (! is_in_string_pool(reg, op->exact_len_n.s))
+ xfree(op->exact_len_n.s);
+ break;
+ case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:
+ if (! is_in_string_pool(reg, op->exact_n.s))
+ xfree(op->exact_n.s);
+ break;
+ case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:
+ case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:
+ case OP_EXACTMB2N3: case OP_EXACT1_IC:
+ break;
+
+ case OP_CCLASS_NOT: case OP_CCLASS:
+ xfree(op->cclass.bsp);
+ break;
+
+ case OP_CCLASS_MB_NOT: case OP_CCLASS_MB:
+ xfree(op->cclass_mb.mb);
+ break;
+ case OP_CCLASS_MIX_NOT: case OP_CCLASS_MIX:
+ xfree(op->cclass_mix.mb);
+ xfree(op->cclass_mix.bsp);
+ break;
+
+ case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
+ break;
+ case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_WITH_LEVEL:
+ case OP_BACKREF_WITH_LEVEL_IC:
+ case OP_BACKREF_CHECK:
+ case OP_BACKREF_CHECK_WITH_LEVEL:
+ if (op->backref_general.num != 1)
+ xfree(op->backref_general.ns);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ xfree(reg->ops);
+#ifdef USE_DIRECT_THREADED_CODE
+ xfree(reg->ocs);
+ reg->ocs = 0;
+#endif
+
+ reg->ops = 0;
+ reg->ops_curr = 0;
+ reg->ops_alloc = 0;
+ reg->ops_used = 0;
+}
+
+static int
+ops_calc_size_of_string_pool(regex_t* reg)
+{
+ int i;
+ int total;
+
+ if (IS_NULL(reg->ops)) return 0;
+
+ total = 0;
+ for (i = 0; i < (int )reg->ops_used; i++) {
+ enum OpCode opcode;
+ Operation* op;
+
+ op = reg->ops + i;
+#ifdef USE_DIRECT_THREADED_CODE
+ opcode = *(reg->ocs + i);
+#else
+ opcode = op->opcode;
+#endif
+
+ switch (opcode) {
+ case OP_EXACTMBN:
+ total += op->exact_len_n.len * op->exact_len_n.n;
+ break;
+ case OP_EXACTN:
+ case OP_EXACTN_IC:
+ total += op->exact_n.n;
+ break;
+ case OP_EXACTMB2N:
+ total += op->exact_n.n * 2;
+ break;
+ case OP_EXACTMB3N:
+ total += op->exact_n.n * 3;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ return total;
+}
+
+static int
+ops_make_string_pool(regex_t* reg)
+{
+ int i;
+ int len;
+ int size;
+ UChar* pool;
+ UChar* curr;
+
+ size = ops_calc_size_of_string_pool(reg);
+ if (size <= 0) {
+ return 0;
+ }
+
+ curr = pool = (UChar* )xmalloc((size_t )size);
+ CHECK_NULL_RETURN_MEMERR(pool);
+
+ for (i = 0; i < (int )reg->ops_used; i++) {
+ enum OpCode opcode;
+ Operation* op;
+
+ op = reg->ops + i;
+#ifdef USE_DIRECT_THREADED_CODE
+ opcode = *(reg->ocs + i);
+#else
+ opcode = op->opcode;
+#endif
+
+ switch (opcode) {
+ case OP_EXACTMBN:
+ len = op->exact_len_n.len * op->exact_len_n.n;
+ xmemcpy(curr, op->exact_len_n.s, len);
+ xfree(op->exact_len_n.s);
+ op->exact_len_n.s = curr;
+ curr += len;
+ break;
+ case OP_EXACTN:
+ case OP_EXACTN_IC:
+ len = op->exact_n.n;
+ copy:
+ xmemcpy(curr, op->exact_n.s, len);
+ xfree(op->exact_n.s);
+ op->exact_n.s = curr;
+ curr += len;
+ break;
+ case OP_EXACTMB2N:
+ len = op->exact_n.n * 2;
+ goto copy;
+ break;
+ case OP_EXACTMB3N:
+ len = op->exact_n.n * 3;
+ goto copy;
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ reg->string_pool = pool;
+ reg->string_pool_end = pool + size;
+ return 0;
+}
+
extern OnigCaseFoldType
onig_get_default_case_fold_flag(void)
{
@@ -150,10 +426,6 @@ onig_positive_int_multiply(int x, int y)
}
-#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
-static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
-#endif
-
static void
swap_node(Node* a, Node* b)
{
@@ -213,24 +485,6 @@ bitset_is_empty(BitSetRef bs)
return 1;
}
-extern int
-onig_bbuf_init(BBuf* buf, int size)
-{
- if (size <= 0) {
- size = 0;
- buf->p = NULL;
- }
- else {
- buf->p = (UChar* )xmalloc(size);
- if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
- }
-
- buf->alloc = size;
- buf->used = 0;
- return 0;
-}
-
-
#ifdef USE_CALL
static int
@@ -275,113 +529,19 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node)
static int
-add_opcode(regex_t* reg, int opcode)
-{
- BB_ADD1(reg, opcode);
- return 0;
-}
-
-static int
-add_rel_addr(regex_t* reg, int addr)
-{
- RelAddrType ra = (RelAddrType )addr;
-
- BB_ADD(reg, &ra, SIZE_RELADDR);
- return 0;
-}
-
-static int
-add_abs_addr(regex_t* reg, int addr)
-{
- AbsAddrType ra = (AbsAddrType )addr;
-
- BB_ADD(reg, &ra, SIZE_ABSADDR);
- return 0;
-}
-
-static int
-add_length(regex_t* reg, int len)
-{
- LengthType l = (LengthType )len;
-
- BB_ADD(reg, &l, SIZE_LENGTH);
- return 0;
-}
-
-static int
-add_mem_num(regex_t* reg, int num)
-{
- MemNumType n = (MemNumType )num;
-
- BB_ADD(reg, &n, SIZE_MEMNUM);
- return 0;
-}
-
-#if 0
-static int
-add_pointer(regex_t* reg, void* addr)
-{
- PointerType ptr = (PointerType )addr;
-
- BB_ADD(reg, &ptr, SIZE_POINTER);
- return 0;
-}
-#endif
-
-static int
-add_option(regex_t* reg, OnigOptionType option)
-{
- BB_ADD(reg, &option, SIZE_OPTION);
- return 0;
-}
-
-static int
-add_save_type(regex_t* reg, enum SaveType type)
-{
- SaveType t = (SaveType )type;
-
- BB_ADD(reg, &t, SIZE_SAVE_TYPE);
- return 0;
-}
-
-static int
-add_update_var_type(regex_t* reg, enum UpdateVarType type)
-{
- UpdateVarType t = (UpdateVarType )type;
-
- BB_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE);
- return 0;
-}
-
-static int
-add_mode(regex_t* reg, ModeType mode)
-{
- BB_ADD(reg, &mode, SIZE_MODE);
- return 0;
-}
-
-static int
-add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
+add_op(regex_t* reg, int opcode)
{
int r;
- r = add_opcode(reg, opcode);
- if (r != 0) return r;
- r = add_rel_addr(reg, addr);
- return r;
-}
+ r = ops_new(reg);
+ if (r != ONIG_NORMAL) return r;
-static int
-add_bytes(regex_t* reg, UChar* bytes, int len)
-{
- BB_ADD(reg, bytes, len);
- return 0;
-}
+#ifdef USE_DIRECT_THREADED_CODE
+ *(reg->ocs + (reg->ops_curr - reg->ops)) = opcode;
+#else
+ reg->ops_curr->opcode = opcode;
+#endif
-static int
-add_bitset(regex_t* reg, BitSetRef bs)
-{
- BB_ADD(reg, bs, SIZE_BITSET);
return 0;
}
@@ -444,27 +604,26 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
int r;
int saved_num_null_check = reg->num_null_check;
- if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
- r = add_opcode(reg, OP_EMPTY_CHECK_START);
- if (r != 0) return r;
- r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
+ if (empty_info != BODY_IS_NOT_EMPTY) {
+ r = add_op(reg, OP_EMPTY_CHECK_START);
if (r != 0) return r;
+ COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */
reg->num_null_check++;
}
r = compile_tree(node, reg, env);
if (r != 0) return r;
- if (empty_info != QUANT_BODY_IS_NOT_EMPTY) {
- if (empty_info == QUANT_BODY_IS_EMPTY)
- r = add_opcode(reg, OP_EMPTY_CHECK_END);
- else if (empty_info == QUANT_BODY_IS_EMPTY_MEM)
- r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST);
- else if (empty_info == QUANT_BODY_IS_EMPTY_REC)
- r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
+ if (empty_info != BODY_IS_NOT_EMPTY) {
+ if (empty_info == BODY_IS_EMPTY)
+ r = add_op(reg, OP_EMPTY_CHECK_END);
+ else if (empty_info == BODY_IS_EMPTY_MEM)
+ r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
+ else if (empty_info == BODY_IS_EMPTY_REC)
+ r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
if (r != 0) return r;
- r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
+ COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */
}
return r;
}
@@ -474,13 +633,15 @@ static int
compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
{
int r;
+ int offset;
- r = add_opcode(reg, OP_CALL);
- if (r != 0) return r;
- r = unset_addr_list_add(env->unset_addr_list, BB_GET_OFFSET_POS(reg),
- NODE_CALL_BODY(node));
+ r = add_op(reg, OP_CALL);
if (r != 0) return r;
- r = add_abs_addr(reg, 0 /*dummy addr.*/);
+
+ COP(reg)->call.addr = 0; /* dummy addr. */
+
+ offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
+ r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node));
return r;
}
#endif
@@ -501,41 +662,53 @@ static int
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
regex_t* reg ARG_UNUSED, int ignore_case)
{
- int len;
- int op = select_str_opcode(mb_len, str_len, ignore_case);
-
- len = SIZE_OPCODE;
-
- if (op == OP_EXACTMBN) len += SIZE_LENGTH;
- if (IS_NEED_STR_LEN_OP_EXACT(op))
- len += SIZE_LENGTH;
-
- len += mb_len * str_len;
- return len;
+ return 1;
}
static int
add_compile_string(UChar* s, int mb_len, int str_len,
regex_t* reg, int ignore_case)
{
- int op = select_str_opcode(mb_len, str_len, ignore_case);
- add_opcode(reg, op);
+ int op;
+ int r;
+ int byte_len;
+ UChar* p;
+ UChar* end;
+
+ op = select_str_opcode(mb_len, str_len, ignore_case);
+ r = add_op(reg, op);
+ if (r != 0) return r;
- if (op == OP_EXACTMBN)
- add_length(reg, mb_len);
+ byte_len = mb_len * str_len;
+ end = s + byte_len;
+
+ if (op == OP_EXACTMBN) {
+ p = onigenc_strdup(reg->enc, s, end);
+ CHECK_NULL_RETURN_MEMERR(p);
+
+ COP(reg)->exact_len_n.len = mb_len;
+ COP(reg)->exact_len_n.n = str_len;
+ COP(reg)->exact_len_n.s = p;
+ }
+ else if (IS_NEED_STR_LEN_OP_EXACT(op)) {
+ p = onigenc_strdup(reg->enc, s, end);
+ CHECK_NULL_RETURN_MEMERR(p);
- if (IS_NEED_STR_LEN_OP_EXACT(op)) {
if (op == OP_EXACTN_IC)
- add_length(reg, mb_len * str_len);
+ COP(reg)->exact_n.n = byte_len;
else
- add_length(reg, str_len);
+ COP(reg)->exact_n.n = str_len;
+
+ COP(reg)->exact_n.s = p;
+ }
+ else {
+ xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
+ COP(reg)->exact.s[byte_len] = '\0';
}
- add_bytes(reg, s, mb_len * str_len);
return 0;
}
-
static int
compile_length_string_node(Node* node, regex_t* reg)
{
@@ -635,52 +808,24 @@ compile_string_raw_node(StrNode* sn, regex_t* reg)
return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
}
-static int
-add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
+static void*
+set_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
{
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- add_length(reg, mbuf->used);
- return add_bytes(reg, mbuf->p, mbuf->used);
-#else
- int r, pad_size;
- UChar* p = BB_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
+ size_t len;
+ void* p;
- GET_ALIGNMENT_PAD_SIZE(p, pad_size);
- add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
+ len = (size_t )mbuf->used;
+ p = xmalloc(len);
+ if (IS_NULL(p)) return NULL;
- r = add_bytes(reg, mbuf->p, mbuf->used);
-
- /* padding for return value from compile_length_cclass_node() to be fix. */
- pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
- if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
- return r;
-#endif
+ xmemcpy(p, mbuf->p, len);
+ return p;
}
static int
compile_length_cclass_node(CClassNode* cc, regex_t* reg)
{
- int len;
-
- if (IS_NULL(cc->mbuf)) {
- len = SIZE_OPCODE + SIZE_BITSET;
- }
- else {
- if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- len = SIZE_OPCODE;
- }
- else {
- len = SIZE_OPCODE + SIZE_BITSET;
- }
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
- len += SIZE_LENGTH + cc->mbuf->used;
-#else
- len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
-#endif
- }
-
- return len;
+ return 1;
}
static int
@@ -689,35 +834,39 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
int r;
if (IS_NULL(cc->mbuf)) {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_NOT);
- else
- add_opcode(reg, OP_CCLASS);
+ r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_NOT : OP_CCLASS);
+ if (r != 0) return r;
- r = add_bitset(reg, cc->bs);
+ COP(reg)->cclass.bsp = xmalloc(SIZE_BITSET);
+ CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass.bsp);
+ xmemcpy(COP(reg)->cclass.bsp, cc->bs, SIZE_BITSET);
}
else {
+ void* p;
+
if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MB_NOT);
- else
- add_opcode(reg, OP_CCLASS_MB);
+ r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MB_NOT : OP_CCLASS_MB);
+ if (r != 0) return r;
- r = add_multi_byte_cclass(cc->mbuf, reg);
+ p = set_multi_byte_cclass(cc->mbuf, reg);
+ CHECK_NULL_RETURN_MEMERR(p);
+ COP(reg)->cclass_mb.mb = p;
}
else {
- if (IS_NCCLASS_NOT(cc))
- add_opcode(reg, OP_CCLASS_MIX_NOT);
- else
- add_opcode(reg, OP_CCLASS_MIX);
-
- r = add_bitset(reg, cc->bs);
+ r = add_op(reg, IS_NCCLASS_NOT(cc) ? OP_CCLASS_MIX_NOT : OP_CCLASS_MIX);
if (r != 0) return r;
- r = add_multi_byte_cclass(cc->mbuf, reg);
+
+ COP(reg)->cclass_mix.bsp = xmalloc(SIZE_BITSET);
+ CHECK_NULL_RETURN_MEMERR(COP(reg)->cclass_mix.bsp);
+ xmemcpy(COP(reg)->cclass_mix.bsp, cc->bs, SIZE_BITSET);
+
+ p = set_multi_byte_cclass(cc->mbuf, reg);
+ CHECK_NULL_RETURN_MEMERR(p);
+ COP(reg)->cclass_mix.mb = p;
}
}
- return r;
+ return 0;
}
static int
@@ -736,8 +885,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
else if (reg->repeat_range_alloc <= id) {
int n;
n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
- p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
- sizeof(OnigRepeatRange) * n);
+ p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n);
CHECK_NULL_RETURN_MEMERR(p);
reg->repeat_range = p;
reg->repeat_range_alloc = n;
@@ -756,16 +904,14 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
regex_t* reg, ScanEnv* env)
{
int r;
- int num_repeat = reg->num_repeat;
+ int num_repeat = reg->num_repeat++;
- r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
- if (r != 0) return r;
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
- reg->num_repeat++;
- if (r != 0) return r;
- r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
+ r = add_op(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
if (r != 0) return r;
+ COP(reg)->repeat.id = num_repeat;
+ COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;
+
r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
if (r != 0) return r;
@@ -777,13 +923,14 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
NODE_IS_IN_MULTI_ENTRY(qn) ||
#endif
NODE_IS_IN_REAL_REPEAT(qn)) {
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
+ r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
}
else {
- r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
+ r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
}
if (r != 0) return r;
- r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
+
+ COP(reg)->repeat_inc.id = num_repeat;
return r;
}
@@ -797,7 +944,7 @@ is_anychar_infinite_greedy(QuantNode* qn)
return 0;
}
-#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
+#define QUANTIFIER_EXPAND_LIMIT_SIZE 10
#define CKN_ON (ckn > 0)
static int
@@ -805,7 +952,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum QuantBodyEmpty empty_info = qn->body_empty_info;
+ enum BodyEmpty empty_info = qn->empty_info;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -822,7 +969,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
}
}
- if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
+ if (empty_info == BODY_IS_NOT_EMPTY)
mod_tlen = tlen;
else
mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
@@ -838,9 +985,12 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
- else if (IS_NOT_NULL(qn->next_head_exact))
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
else
len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
@@ -848,8 +998,12 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
else
len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
}
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ len = SIZE_OP_JUMP + tlen;
+ }
+ else
+ len = 0;
}
else if (!infinite && qn->greedy &&
(qn->upper == 1 ||
@@ -862,8 +1016,7 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
}
else {
- len = SIZE_OP_REPEAT_INC
- + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
+ len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;
}
return len;
@@ -874,7 +1027,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int i, r, mod_tlen;
int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum QuantBodyEmpty empty_info = qn->body_empty_info;
+ enum BodyEmpty empty_info = qn->empty_info;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -886,22 +1039,23 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
if (IS_NOT_NULL(qn->next_head_exact)) {
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
- r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
- else
- r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
+ r = add_op(reg,
+ IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?
+ OP_ANYCHAR_ML_STAR_PEEK_NEXT : OP_ANYCHAR_STAR_PEEK_NEXT);
if (r != 0) return r;
- return add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
+
+ COP(reg)->anychar_star_peek_next.c = STR_(qn->next_head_exact)->s[0];
+ return 0;
}
else {
- if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)))
- return add_opcode(reg, OP_ANYCHAR_ML_STAR);
- else
- return add_opcode(reg, OP_ANYCHAR_STAR);
+ r = add_op(reg,
+ IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg)) ?
+ OP_ANYCHAR_ML_STAR : OP_ANYCHAR_STAR);
+ return r;
}
}
- if (empty_info == QUANT_BODY_IS_NOT_EMPTY)
+ if (empty_info == BODY_IS_NOT_EMPTY)
mod_tlen = tlen;
else
mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
@@ -909,19 +1063,25 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (infinite &&
(qn->lower <= 1 ||
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
+ int addr;
+
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
- else if (IS_NOT_NULL(qn->next_head_exact))
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
+ COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;
else
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact))
+ COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;
+ else
+ COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;
}
else {
- r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
+ COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;
}
- if (r != 0) return r;
}
else {
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
@@ -929,47 +1089,71 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
if (qn->greedy) {
+#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
- mod_tlen + SIZE_OP_JUMP);
+ r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
if (r != 0) return r;
- add_bytes(reg, STR_(qn->head_exact)->s, 1);
+ COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];
+
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
+
+ addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);
}
- else if (IS_NOT_NULL(qn->next_head_exact)) {
- r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
- mod_tlen + SIZE_OP_JUMP);
+ else
+#endif
+ if (IS_NOT_NULL(qn->next_head_exact)) {
+ r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
if (r != 0) return r;
- add_bytes(reg, STR_(qn->next_head_exact)->s, 1);
+ COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];
+
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
+
+ addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);
}
else {
- r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
+ r = add_op(reg, OP_PUSH);
if (r != 0) return r;
+ COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
+
+ addr = -(mod_tlen + (int )SIZE_OP_PUSH);
}
+
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = addr;
}
else {
- r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
+ r = add_op(reg, OP_JUMP);
if (r != 0) return r;
+ COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;
+
r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
+
+ r = add_op(reg, OP_PUSH);
+ if (r != 0) return r;
+ COP(reg)->push.addr = -mod_tlen;
}
}
- else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
- if (r != 0) return r;
- r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
+ else if (qn->upper == 0) {
+ if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = tlen + SIZE_INC_OP;
+
+ r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
+ }
+ else {
+ /* Nothing output */
+ r = 0;
+ }
}
else if (! infinite && qn->greedy &&
(qn->upper == 1 ||
@@ -981,19 +1165,26 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
for (i = 0; i < n; i++) {
- int v = onig_positive_int_multiply(n - i, tlen);
+ int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);
if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- r = add_opcode_rel_addr(reg, OP_PUSH, v + (n - i - 1) * SIZE_OP_PUSH);
+
+ r = add_op(reg, OP_PUSH);
if (r != 0) return r;
+ COP(reg)->push.addr = v;
+
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
if (r != 0) return r;
}
}
else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
+ r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
+ COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;
+
+ r = add_op(reg, OP_JUMP);
if (r != 0) return r;
+ COP(reg)->jump.addr = tlen + SIZE_INC_OP;
+
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else {
@@ -1126,7 +1317,8 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
break;
case BAG_OPTION:
- len = tlen;
+ /* never come here, but set for escape warning */
+ len = 0;
break;
}
@@ -1142,75 +1334,75 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
int len;
#ifdef USE_CALL
- if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
- r = add_opcode(reg, OP_CALL);
+ if (NODE_IS_CALLED(node)) {
+ r = add_op(reg, OP_CALL);
if (r != 0) return r;
- node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
+
+ node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;
NODE_STATUS_ADD(node, ADDR_FIXED);
- r = add_abs_addr(reg, (int )node->m.called_addr);
- if (r != 0) return r;
- len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += SIZE_OP_RETURN;
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r != 0) return r;
+ COP(reg)->call.addr = (int )node->m.called_addr;
- r = compile_tree(NODE_BAG_BODY(node), reg, env);
- if (r != 0) return r;
- r = add_opcode(reg, OP_RETURN);
- return r;
- }
+ if (node->m.regnum == 0) {
+ len = compile_length_tree(NODE_BAG_BODY(node), reg);
+ len += SIZE_OP_RETURN;
- if (NODE_IS_CALLED(node)) {
- r = add_opcode(reg, OP_CALL);
- if (r != 0) return r;
- node->m.called_addr = BB_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
- NODE_STATUS_ADD(node, ADDR_FIXED);
- r = add_abs_addr(reg, (int )node->m.called_addr);
- if (r != 0) return r;
- len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
- else
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = len + SIZE_INC_OP;
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
- if (r != 0) return r;
+ r = compile_tree(NODE_BAG_BODY(node), reg, env);
+ if (r != 0) return r;
+
+ r = add_op(reg, OP_RETURN);
+ return r;
+ }
+ else {
+ len = compile_length_tree(NODE_BAG_BODY(node), reg);
+ len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
+ if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ len += (NODE_IS_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ else
+ len += (NODE_IS_RECURSION(node)
+ ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = len + SIZE_INC_OP;
+ }
}
#endif
if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
- r = add_opcode(reg, OP_MEMORY_START_PUSH);
+ r = add_op(reg, OP_MEMORY_START_PUSH);
else
- r = add_opcode(reg, OP_MEMORY_START);
- if (r != 0) return r;
- r = add_mem_num(reg, node->m.regnum);
+ r = add_op(reg, OP_MEMORY_START);
if (r != 0) return r;
+ COP(reg)->memory_start.num = node->m.regnum;
+
r = compile_tree(NODE_BAG_BODY(node), reg, env);
if (r != 0) return r;
#ifdef USE_CALL
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- r = add_opcode(reg, (NODE_IS_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ r = add_op(reg, (NODE_IS_RECURSION(node)
+ ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
else
- r = add_opcode(reg, (NODE_IS_RECURSION(node)
- ? OP_MEMORY_END_REC : OP_MEMORY_END));
+ r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));
if (r != 0) return r;
- r = add_mem_num(reg, node->m.regnum);
+ COP(reg)->memory_end.num = node->m.regnum;
+
if (NODE_IS_CALLED(node)) {
if (r != 0) return r;
- r = add_opcode(reg, OP_RETURN);
+ r = add_op(reg, OP_RETURN);
}
#else
if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- r = add_opcode(reg, OP_MEMORY_END_PUSH);
+ r = add_op(reg, OP_MEMORY_END_PUSH);
else
- r = add_opcode(reg, OP_MEMORY_END);
+ r = add_op(reg, OP_MEMORY_END);
if (r != 0) return r;
- r = add_mem_num(reg, node->m.regnum);
+ COP(reg)->memory_end.num = node->m.regnum;
#endif
return r;
@@ -1239,21 +1431,25 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
len = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (len < 0) return len;
- r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP);
+ r = add_op(reg, OP_PUSH);
if (r != 0) return r;
+ COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
+
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_POP_OUT);
+ r = add_op(reg, OP_POP_OUT);
+ if (r != 0) return r;
+
+ r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- r = add_opcode_rel_addr(reg, OP_JUMP,
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP));
+ COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);
}
else {
- r = add_opcode(reg, OP_ATOMIC_START);
+ r = add_op(reg, OP_ATOMIC_START);
if (r != 0) return r;
r = compile_tree(NODE_BAG_BODY(node), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_ATOMIC_END);
+ r = add_op(reg, OP_ATOMIC_END);
}
break;
@@ -1264,7 +1460,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
Node* Then = node->te.Then;
Node* Else = node->te.Else;
- r = add_opcode(reg, OP_ATOMIC_START);
+ r = add_op(reg, OP_ATOMIC_START);
if (r != 0) return r;
cond_len = compile_length_tree(cond, reg);
@@ -1279,11 +1475,13 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
- r = add_opcode_rel_addr(reg, OP_PUSH, jump_len);
+ r = add_op(reg, OP_PUSH);
if (r != 0) return r;
+ COP(reg)->push.addr = SIZE_INC_OP + jump_len;
+
r = compile_tree(cond, reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_ATOMIC_END);
+ r = add_op(reg, OP_ATOMIC_END);
if (r != 0) return r;
if (IS_NOT_NULL(Then)) {
@@ -1293,8 +1491,10 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
if (IS_NOT_NULL(Else)) {
int else_len = compile_length_tree(Else, reg);
- r = add_opcode_rel_addr(reg, OP_JUMP, else_len);
+ r = add_op(reg, OP_JUMP);
if (r != 0) return r;
+ COP(reg)->jump.addr = else_len + SIZE_INC_OP;
+
r = compile_tree(Else, reg, env);
}
}
@@ -1338,8 +1538,8 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
len = SIZE_OP_WORD_BOUNDARY;
break;
- case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
+ case ANCR_TEXT_SEGMENT_BOUNDARY:
+ case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
len = SIZE_OPCODE;
break;
@@ -1358,19 +1558,19 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
enum OpCode op;
switch (node->type) {
- case ANCR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
- case ANCR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
- case ANCR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
- case ANCR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
- case ANCR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
- case ANCR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
+ case ANCR_BEGIN_BUF: r = add_op(reg, OP_BEGIN_BUF); break;
+ case ANCR_END_BUF: r = add_op(reg, OP_END_BUF); break;
+ case ANCR_BEGIN_LINE: r = add_op(reg, OP_BEGIN_LINE); break;
+ case ANCR_END_LINE: r = add_op(reg, OP_END_LINE); break;
+ case ANCR_SEMI_END_BUF: r = add_op(reg, OP_SEMI_END_BUF); break;
+ case ANCR_BEGIN_POSITION: r = add_op(reg, OP_BEGIN_POSITION); break;
case ANCR_WORD_BOUNDARY:
op = OP_WORD_BOUNDARY;
word:
- r = add_opcode(reg, op);
+ r = add_op(reg, op);
if (r != 0) return r;
- r = add_mode(reg, (ModeType )node->ascii_mode);
+ COP(reg)->word_boundary.mode = (ModeType )node->ascii_mode;
break;
case ANCR_NO_WORD_BOUNDARY:
@@ -1385,36 +1585,50 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
break;
#endif
- case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- r = add_opcode(reg, OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
- break;
+ case ANCR_TEXT_SEGMENT_BOUNDARY:
+ case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
+ {
+ enum TextSegmentBoundaryType type;
+
+ r = add_op(reg, OP_TEXT_SEGMENT_BOUNDARY);
+ if (r != 0) return r;
- case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- r = add_opcode(reg, OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY);
+ type = EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
+#ifdef USE_UNICODE_WORD_BREAK
+ if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_TEXT_SEGMENT_WORD))
+ type = WORD_BOUNDARY;
+#endif
+
+ COP(reg)->text_segment_boundary.type = type;
+ COP(reg)->text_segment_boundary.not =
+ (node->type == ANCR_NO_TEXT_SEGMENT_BOUNDARY ? 1 : 0);
+ }
break;
case ANCR_PREC_READ:
- r = add_opcode(reg, OP_PREC_READ_START);
+ r = add_op(reg, OP_PREC_READ_START);
if (r != 0) return r;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_PREC_READ_END);
+ r = add_op(reg, OP_PREC_READ_END);
break;
case ANCR_PREC_READ_NOT:
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
if (len < 0) return len;
- r = add_opcode_rel_addr(reg, OP_PREC_READ_NOT_START, len + SIZE_OP_PREC_READ_NOT_END);
+
+ r = add_op(reg, OP_PREC_READ_NOT_START);
if (r != 0) return r;
+ COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_PREC_READ_NOT_END);
+ r = add_op(reg, OP_PREC_READ_NOT_END);
break;
case ANCR_LOOK_BEHIND:
{
int n;
- r = add_opcode(reg, OP_LOOK_BEHIND);
+ r = add_op(reg, OP_LOOK_BEHIND);
if (r != 0) return r;
if (node->char_len < 0) {
r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
@@ -1423,8 +1637,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
else
n = node->char_len;
- r = add_length(reg, n);
- if (r != 0) return r;
+ COP(reg)->look_behind.len = n;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
}
break;
@@ -1434,20 +1647,22 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
int n;
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
- r = add_opcode_rel_addr(reg, OP_LOOK_BEHIND_NOT_START,
- len + SIZE_OP_LOOK_BEHIND_NOT_END);
+ r = add_op(reg, OP_LOOK_BEHIND_NOT_START);
if (r != 0) return r;
+ COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;
+
if (node->char_len < 0) {
r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
else
n = node->char_len;
- r = add_length(reg, n);
- if (r != 0) return r;
+
+ COP(reg)->look_behind_not_start.len = n;
+
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
if (r != 0) return r;
- r = add_opcode(reg, OP_LOOK_BEHIND_NOT_END);
+ r = add_op(reg, OP_LOOK_BEHIND_NOT_END);
}
break;
@@ -1466,31 +1681,21 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg)
switch (node->type) {
case GIMMICK_FAIL:
- r = add_opcode(reg, OP_FAIL);
- break;
-
- case GIMMICK_KEEP:
- r = add_opcode(reg, OP_PUSH_SAVE_VAL);
- if (r != 0) return r;
- r = add_save_type(reg, SAVE_KEEP);
- if (r != 0) return r;
- r = add_mem_num(reg, node->id);
+ r = add_op(reg, OP_FAIL);
break;
case GIMMICK_SAVE:
- r = add_opcode(reg, OP_PUSH_SAVE_VAL);
- if (r != 0) return r;
- r = add_save_type(reg, node->detail_type);
+ r = add_op(reg, OP_PUSH_SAVE_VAL);
if (r != 0) return r;
- r = add_mem_num(reg, node->id);
+ COP(reg)->push_save_val.type = node->detail_type;
+ COP(reg)->push_save_val.id = node->id;
break;
case GIMMICK_UPDATE_VAR:
- r = add_opcode(reg, OP_UPDATE_VAR);
+ r = add_op(reg, OP_UPDATE_VAR);
if (r != 0) return r;
- r = add_update_var_type(reg, node->detail_type);
- if (r != 0) return r;
- r = add_mem_num(reg, node->id);
+ COP(reg)->update_var.type = node->detail_type;
+ COP(reg)->update_var.id = node->id;
break;
#ifdef USE_CALLOUT
@@ -1499,15 +1704,17 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg)
case ONIG_CALLOUT_OF_CONTENTS:
case ONIG_CALLOUT_OF_NAME:
{
- r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ?
- OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME);
- if (r != 0) return r;
if (node->detail_type == ONIG_CALLOUT_OF_NAME) {
- r = add_mem_num(reg, node->id);
+ r = add_op(reg, OP_CALLOUT_NAME);
if (r != 0) return r;
+ COP(reg)->callout_name.id = node->id;
+ COP(reg)->callout_name.num = node->num;
+ }
+ else {
+ r = add_op(reg, OP_CALLOUT_CONTENTS);
+ if (r != 0) return r;
+ COP(reg)->callout_contents.num = node->num;
}
- r = add_mem_num(reg, node->num);
- if (r != 0) return r;
}
break;
@@ -1531,7 +1738,6 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
len = SIZE_OP_FAIL;
break;
- case GIMMICK_KEEP:
case GIMMICK_SAVE:
len = SIZE_OP_PUSH_SAVE_VAL;
break;
@@ -1606,35 +1812,7 @@ compile_length_tree(Node* node, regex_t* reg)
break;
case NODE_BACKREF:
- {
- BackRefNode* br = BACKREF_(node);
-
- if (NODE_IS_CHECKER(node)) {
-#ifdef USE_BACKREF_WITH_LEVEL
- if (NODE_IS_NEST_LEVEL(node)) {
- r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- else
-#endif
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- else {
-#ifdef USE_BACKREF_WITH_LEVEL
- if (NODE_IS_NEST_LEVEL(node)) {
- r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
- SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- else
-#endif
- if (br->back_num == 1) {
- r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
- ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
- }
- else {
- r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
- }
- }
- }
+ r = SIZE_OP_BACKREF;
break;
#ifdef USE_CALL
@@ -1689,21 +1867,23 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
len += SIZE_OP_PUSH + SIZE_OP_JUMP;
}
} while (IS_NOT_NULL(x = NODE_CDR(x)));
- pos = reg->used + len; /* goal position */
+ pos = COP_CURR_OFFSET(reg) + 1 + len; /* goal position */
do {
len = compile_length_tree(NODE_CAR(node), reg);
if (IS_NOT_NULL(NODE_CDR(node))) {
enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
- r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP);
+ r = add_op(reg, push);
if (r != 0) break;
+ COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;
}
r = compile_tree(NODE_CAR(node), reg, env);
if (r != 0) break;
if (IS_NOT_NULL(NODE_CDR(node))) {
- len = pos - (reg->used + SIZE_OP_JUMP);
- r = add_opcode_rel_addr(reg, OP_JUMP, len);
+ len = pos - (COP_CURR_OFFSET(reg) + 1);
+ r = add_op(reg, OP_JUMP);
if (r != 0) break;
+ COP(reg)->jump.addr = len;
}
} while (IS_NOT_NULL(node = NODE_CDR(node)));
}
@@ -1726,10 +1906,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
switch (CTYPE_(node)->ctype) {
case CTYPE_ANYCHAR:
- if (IS_MULTILINE(CTYPE_OPTION(node, reg)))
- r = add_opcode(reg, OP_ANYCHAR_ML);
- else
- r = add_opcode(reg, OP_ANYCHAR);
+ r = add_op(reg, IS_MULTILINE(CTYPE_OPTION(node, reg)) ?
+ OP_ANYCHAR_ML : OP_ANYCHAR);
break;
case ONIGENC_CTYPE_WORD:
@@ -1739,7 +1917,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
else {
op = CTYPE_(node)->not != 0 ? OP_NO_WORD_ASCII : OP_WORD_ASCII;
}
- r = add_opcode(reg, op);
+ r = add_op(reg, op);
break;
default:
@@ -1756,30 +1934,28 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
if (NODE_IS_CHECKER(node)) {
#ifdef USE_BACKREF_WITH_LEVEL
if (NODE_IS_NEST_LEVEL(node)) {
- r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL);
- if (r != 0) return r;
- r = add_length(reg, br->nest_level);
+ r = add_op(reg, OP_BACKREF_CHECK_WITH_LEVEL);
if (r != 0) return r;
+ COP(reg)->backref_general.nest_level = br->nest_level;
}
else
#endif
{
- r = add_opcode(reg, OP_BACKREF_CHECK);
+ r = add_op(reg, OP_BACKREF_CHECK);
if (r != 0) return r;
}
-
goto add_bacref_mems;
}
else {
#ifdef USE_BACKREF_WITH_LEVEL
if (NODE_IS_NEST_LEVEL(node)) {
- r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
- if (r != 0) return r;
- r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
- if (r != 0) return r;
- r = add_length(reg, br->nest_level);
- if (r != 0) return r;
+ if ((reg->options & ONIG_OPTION_IGNORECASE) != 0)
+ r = add_op(reg, OP_BACKREF_WITH_LEVEL_IC);
+ else
+ r = add_op(reg, OP_BACKREF_WITH_LEVEL);
+ if (r != 0) return r;
+ COP(reg)->backref_general.nest_level = br->nest_level;
goto add_bacref_mems;
}
else
@@ -1787,41 +1963,47 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
if (br->back_num == 1) {
n = br->back_static[0];
if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREF_N_IC);
+ r = add_op(reg, OP_BACKREF_N_IC);
if (r != 0) return r;
- r = add_mem_num(reg, n);
+ COP(reg)->backref_n.n1 = n;
}
else {
switch (n) {
- case 1: r = add_opcode(reg, OP_BACKREF1); break;
- case 2: r = add_opcode(reg, OP_BACKREF2); break;
+ case 1: r = add_op(reg, OP_BACKREF1); break;
+ case 2: r = add_op(reg, OP_BACKREF2); break;
default:
- r = add_opcode(reg, OP_BACKREF_N);
+ r = add_op(reg, OP_BACKREF_N);
if (r != 0) return r;
- r = add_mem_num(reg, n);
+ COP(reg)->backref_n.n1 = n;
break;
}
}
}
else {
- int i;
+ int num;
int* p;
- if (IS_IGNORECASE(reg->options)) {
- r = add_opcode(reg, OP_BACKREF_MULTI_IC);
- }
- else {
- r = add_opcode(reg, OP_BACKREF_MULTI);
- }
+ r = add_op(reg, IS_IGNORECASE(reg->options) ?
+ OP_BACKREF_MULTI_IC : OP_BACKREF_MULTI);
if (r != 0) return r;
add_bacref_mems:
- r = add_length(reg, br->back_num);
- if (r != 0) return r;
- p = BACKREFS_P(br);
- for (i = br->back_num - 1; i >= 0; i--) {
- r = add_mem_num(reg, p[i]);
- if (r != 0) return r;
+ num = br->back_num;
+ COP(reg)->backref_general.num = num;
+ if (num == 1) {
+ COP(reg)->backref_general.n1 = br->back_static[0];
+ }
+ else {
+ int i, j;
+ MemNumType* ns;
+
+ ns = xmalloc(sizeof(MemNumType) * num);
+ CHECK_NULL_RETURN_MEMERR(ns);
+ COP(reg)->backref_general.ns = ns;
+ p = BACKREFS_P(br);
+ for (i = num - 1, j = 0; i >= 0; i--, j++) {
+ ns[j] = p[i];
+ }
}
}
}
@@ -2113,6 +2295,7 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
int i, offset;
BagNode* en;
AbsAddrType addr;
+ AbsAddrType* paddr;
for (i = 0; i < uslist->num; i++) {
if (! NODE_IS_ADDR_FIXED(uslist->us[i].target))
@@ -2122,7 +2305,8 @@ fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg)
addr = en->m.called_addr;
offset = uslist->us[i].offset;
- BB_WRITE(reg, offset, &addr, SIZE_ABSADDR);
+ paddr = (AbsAddrType* )((char* )reg->ops + offset);
+ *paddr = addr;
}
return 0;
}
@@ -2598,8 +2782,7 @@ check_type_tree(Node* node, int type_mask, int bag_mask, int anchor_mask)
case NODE_LIST:
case NODE_ALT:
do {
- r = check_type_tree(NODE_CAR(node), type_mask, bag_mask,
- anchor_mask);
+ r = check_type_tree(NODE_CAR(node), type_mask, bag_mask, anchor_mask);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
@@ -3819,10 +4002,10 @@ expand_case_fold_string(Node* node, regex_t* reg, int state)
}
#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
-static enum QuantBodyEmpty
+static enum BodyEmpty
quantifiers_memory_node_info(Node* node)
{
- int r = QUANT_BODY_IS_EMPTY;
+ int r = BODY_IS_EMPTY;
switch (NODE_TYPE(node)) {
case NODE_LIST:
@@ -3839,7 +4022,7 @@ quantifiers_memory_node_info(Node* node)
#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node)) {
- return QUANT_BODY_IS_EMPTY_REC; /* tiny version */
+ return BODY_IS_EMPTY_REC; /* tiny version */
}
else
r = quantifiers_memory_node_info(NODE_BODY(node));
@@ -3861,9 +4044,9 @@ quantifiers_memory_node_info(Node* node)
switch (en->type) {
case BAG_MEMORY:
if (NODE_IS_RECURSION(node)) {
- return QUANT_BODY_IS_EMPTY_REC;
+ return BODY_IS_EMPTY_REC;
}
- return QUANT_BODY_IS_EMPTY_MEM;
+ return BODY_IS_EMPTY_MEM;
break;
case BAG_OPTION:
@@ -4340,22 +4523,20 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
| NODE_BIT_CTYPE | NODE_BIT_ANCHOR | NODE_BIT_BAG | NODE_BIT_QUANT \
| NODE_BIT_CALL | NODE_BIT_GIMMICK)
-#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION )
-#define ALLOWED_BAG_IN_LB_NOT (1<<BAG_OPTION)
+#define ALLOWED_BAG_IN_LB ( 1<<BAG_MEMORY | 1<<BAG_OPTION | 1<<BAG_IF_ELSE )
+#define ALLOWED_BAG_IN_LB_NOT ( 1<<BAG_OPTION | 1<<BAG_IF_ELSE )
#define ALLOWED_ANCHOR_IN_LB \
( ANCR_LOOK_BEHIND | ANCR_BEGIN_LINE | ANCR_END_LINE | ANCR_BEGIN_BUF \
| ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY | ANCR_NO_WORD_BOUNDARY \
| ANCR_WORD_BEGIN | ANCR_WORD_END \
- | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
- | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
+ | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
#define ALLOWED_ANCHOR_IN_LB_NOT \
( ANCR_LOOK_BEHIND | ANCR_LOOK_BEHIND_NOT | ANCR_BEGIN_LINE \
| ANCR_END_LINE | ANCR_BEGIN_BUF | ANCR_BEGIN_POSITION | ANCR_WORD_BOUNDARY \
| ANCR_NO_WORD_BOUNDARY | ANCR_WORD_BEGIN | ANCR_WORD_END \
- | ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY \
- | ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY )
+ | ANCR_TEXT_SEGMENT_BOUNDARY | ANCR_NO_TEXT_SEGMENT_BOUNDARY )
int r;
AnchorNode* an = ANCHOR_(node);
@@ -4423,15 +4604,15 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
d = tree_min_len(body, env);
if (d == 0) {
#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
- qn->body_empty_info = quantifiers_memory_node_info(body);
- if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
+ qn->empty_info = quantifiers_memory_node_info(body);
+ if (qn->empty_info == BODY_IS_EMPTY_REC) {
if (NODE_TYPE(body) == NODE_BAG &&
BAG_(body)->type == BAG_MEMORY) {
MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
}
}
#else
- qn->body_empty_info = QUANT_BODY_IS_EMPTY;
+ qn->empty_info = BODY_IS_EMPTY;
#endif
}
}
@@ -4465,8 +4646,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
}
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) {
+ if (qn->greedy && (qn->empty_info == BODY_IS_NOT_EMPTY)) {
if (NODE_TYPE(body) == NODE_QUANT) {
QuantNode* tqn = QUANT_(body);
if (IS_NOT_NULL(tqn->head_exact)) {
@@ -4478,13 +4658,12 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg);
}
}
-#endif
return r;
}
/* setup_tree does the following work.
- 1. check empty loop. (set qn->body_empty_info)
+ 1. check empty loop. (set qn->empty_info)
2. expand ignore-case in char class.
3. set memory status bit flags. (reg->mem_stats)
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
@@ -6052,12 +6231,15 @@ onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_en
return ONIG_NORMAL;
}
-
extern void
onig_free_body(regex_t* reg)
{
if (IS_NOT_NULL(reg)) {
- if (IS_NOT_NULL(reg->p)) xfree(reg->p);
+ ops_free(reg);
+ if (IS_NOT_NULL(reg->string_pool)) {
+ xfree(reg->string_pool);
+ reg->string_pool_end = reg->string_pool = 0;
+ }
if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
if (IS_NOT_NULL(reg->extp)) {
@@ -6078,30 +6260,18 @@ onig_free(regex_t* reg)
}
}
-#define REGEX_TRANSFER(to,from) do {\
- onig_free_body(to);\
- xmemcpy(to, from, sizeof(regex_t));\
- xfree(from);\
-} while (0)
-
-extern void
-onig_transfer(regex_t* to, regex_t* from)
-{
- REGEX_TRANSFER(to, from);
-}
-
#ifdef ONIG_DEBUG_PARSE
static void print_tree P_((FILE* f, Node* node));
#endif
+extern int onig_init_for_match_at(regex_t* reg);
+
extern int
onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
OnigErrorInfo* einfo)
{
-#define COMPILE_INIT_SIZE 20
-
- int r, init_size;
+ int r;
Node* root;
ScanEnv scan_env;
#ifdef USE_CALL
@@ -6118,15 +6288,15 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
print_enc_string(stderr, reg->enc, pattern, pattern_end);
#endif
- if (reg->alloc == 0) {
- init_size = (int )(pattern_end - pattern) * 2;
- if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
- r = BB_INIT(reg, init_size);
+ if (reg->ops_alloc == 0) {
+ r = ops_init(reg, OPS_INIT_SIZE);
if (r != 0) goto end;
}
else
- reg->used = 0;
+ reg->ops_used = 0;
+ reg->string_pool = 0;
+ reg->string_pool_end = 0;
reg->num_mem = 0;
reg->num_repeat = 0;
reg->num_null_check = 0;
@@ -6203,15 +6373,16 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = compile_tree(root, reg, &scan_env);
if (r == 0) {
if (scan_env.keep_num > 0) {
- r = add_opcode(reg, OP_UPDATE_VAR);
- if (r != 0) goto err;
- r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST);
- if (r != 0) goto err;
- r = add_mem_num(reg, 0 /* not used */);
+ r = add_op(reg, OP_UPDATE_VAR);
if (r != 0) goto err;
+
+ COP(reg)->update_var.type = UPDATE_VAR_KEEP_FROM_STACK_LAST;
+ COP(reg)->update_var.id = 0; /* not used */
}
- r = add_opcode(reg, OP_END);
+ r = add_op(reg, OP_END);
+ if (r != 0) goto err;
+
#ifdef USE_CALL
if (scan_env.num_call > 0) {
r = fix_unset_addr_list(&uslist, reg);
@@ -6232,6 +6403,9 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
else
reg->stack_pop_level = STACK_POP_LEVEL_FREE;
}
+
+ r = ops_make_string_pool(reg);
+ if (r != 0) goto err;
}
#ifdef USE_CALL
else if (scan_env.num_call > 0) {
@@ -6245,6 +6419,11 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
onig_print_compiled_byte_code_list(stderr, reg);
#endif
+#ifdef USE_DIRECT_THREADED_CODE
+ /* opcode -> opaddr */
+ onig_init_for_match_at(reg);
+#endif
+
end:
return r;
@@ -6316,9 +6495,10 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl
(reg)->exact = (UChar* )NULL;
(reg)->extp = (RegexExt* )NULL;
- (reg)->p = (UChar* )NULL;
- (reg)->alloc = 0;
- (reg)->used = 0;
+ (reg)->ops = (Operation* )NULL;
+ (reg)->ops_curr = (Operation* )NULL;
+ (reg)->ops_used = 0;
+ (reg)->ops_alloc = 0;
(reg)->name_table = (void* )NULL;
(reg)->case_fold_flag = case_fold_flag;
@@ -6632,10 +6812,10 @@ print_indent_tree(FILE* f, Node* node, int indent)
case ANCR_WORD_BEGIN: fputs("word begin", f); break;
case ANCR_WORD_END: fputs("word end", f); break;
#endif
- case ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- fputs("extended-grapheme-cluster boundary", f); break;
- case ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
- fputs("no-extended-grapheme-cluster boundary", f); break;
+ case ANCR_TEXT_SEGMENT_BOUNDARY:
+ fputs("text-segment boundary", f); break;
+ case ANCR_NO_TEXT_SEGMENT_BOUNDARY:
+ fputs("no text-segment boundary", f); break;
case ANCR_PREC_READ:
fprintf(f, "prec read\n");
print_indent_tree(f, NODE_BODY(node), indent + add);
@@ -6715,9 +6895,6 @@ print_indent_tree(FILE* f, Node* node, int indent)
case GIMMICK_FAIL:
fprintf(f, "fail");
break;
- case GIMMICK_KEEP:
- fprintf(f, "keep:%d", GIMMICK_(node)->id);
- break;
case GIMMICK_SAVE:
fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id);
break;