summaryrefslogtreecommitdiff
path: root/src/regparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regparse.c')
-rw-r--r--src/regparse.c646
1 files changed, 370 insertions, 276 deletions
diff --git a/src/regparse.c b/src/regparse.c
index 25291c5..1fb2357 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -50,6 +50,7 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
+ ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
@@ -163,7 +164,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
*rto = to = (BBuf* )xmalloc(sizeof(BBuf));
CHECK_NULL_RETURN_MEMERR(to);
- r = BBUF_INIT(to, from->alloc);
+ r = BB_INIT(to, from->alloc);
if (r != 0) {
xfree(to->p);
*rto = 0;
@@ -288,7 +289,6 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end)
}
}
-#ifdef USE_NAMED_GROUP
static UChar*
strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
{
@@ -307,7 +307,6 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end)
return r;
}
-#endif
static int
save_entry(ScanEnv* env, enum SaveType type, int* id)
@@ -483,8 +482,6 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
#endif /* USE_ST_LIBRARY */
-#ifdef USE_NAMED_GROUP
-
#define INIT_NAME_BACKREFS_ALLOC_NUM 8
typedef struct {
@@ -965,49 +962,17 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name,
}
}
-#else /* USE_NAMED_GROUP */
-
-extern int
-onig_name_to_group_numbers(regex_t* reg, const UChar* name,
- const UChar* name_end, int** nums)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onig_name_to_backref_number(regex_t* reg, const UChar* name,
- const UChar* name_end, OnigRegion* region)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onig_foreach_name(regex_t* reg,
- int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg)
-{
- return ONIG_NO_SUPPORT_CONFIG;
-}
-
-extern int
-onig_number_of_names(regex_t* reg)
-{
- return 0;
-}
-#endif /* else USE_NAMED_GROUP */
-
extern int
onig_noname_group_capture_is_active(regex_t* reg)
{
if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP))
return 0;
-#ifdef USE_NAMED_GROUP
if (onig_number_of_names(reg) > 0 &&
IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
!ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
return 0;
}
-#endif
return 1;
}
@@ -1032,10 +997,8 @@ scan_env_clear(ScanEnv* env)
#endif
env->num_mem = 0;
-#ifdef USE_NAMED_GROUP
env->num_named = 0;
-#endif
- env->mem_alloc = 0;
+ env->mem_alloc = 0;
env->mem_env_dynamic = (MemEnv* )NULL;
xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
@@ -1204,7 +1167,6 @@ node_new(void)
Node* node;
node = (Node* )xmalloc(sizeof(Node));
- //xmemset(node, 0, sizeof(node->u.base));
xmemset(node, 0, sizeof(*node));
#ifdef DEBUG_NODE_FREE
@@ -1218,7 +1180,6 @@ static void
initialize_cclass(CClassNode* cc)
{
BITSET_CLEAR(cc->bs);
- /* cc->base.flags = 0; */
cc->flags = 0;
cc->mbuf = NULL;
}
@@ -1235,21 +1196,23 @@ node_new_cclass(void)
}
static Node*
-node_new_ctype(int type, int not)
+node_new_ctype(int type, int not, OnigOptionType options)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_CTYPE);
- CTYPE_(node)->ctype = type;
- CTYPE_(node)->not = not;
+ CTYPE_(node)->ctype = type;
+ CTYPE_(node)->not = not;
+ CTYPE_(node)->options = options;
+ CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options);
return node;
}
static Node*
node_new_anychar(void)
{
- Node* node = node_new_ctype(CTYPE_ANYCHAR, 0);
+ Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
return node;
}
@@ -1383,14 +1346,15 @@ make_alt(int n, Node* ns[])
}
extern Node*
-onig_node_new_anchor(int type)
+onig_node_new_anchor(int type, int ascii_mode)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_ANCHOR);
- ANCHOR_(node)->type = type;
- ANCHOR_(node)->char_len = -1;
+ ANCHOR_(node)->type = type;
+ ANCHOR_(node)->char_len = -1;
+ ANCHOR_(node)->ascii_mode = ascii_mode;
return node;
}
@@ -1506,7 +1470,7 @@ node_new_quantifier(int lower, int upper, int by_number)
}
static Node*
-node_new_enclosure(int type)
+node_new_enclosure(enum EnclosureType type)
{
Node* node = node_new();
CHECK_NULL_RETURN(node);
@@ -1637,6 +1601,61 @@ node_new_keep(Node** node, ScanEnv* env)
}
static int
+make_extended_grapheme_cluster(Node** node, ScanEnv* env)
+{
+ int r;
+ int i;
+ Node* x;
+ Node* ns[2];
+
+ /* \X == (?>\O(?:\Y\O)*) */
+
+ ns[1] = NULL_NODE;
+
+ r = ONIGERR_MEMORY;
+ ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);
+ if (IS_NULL(ns[0])) goto err;
+
+ r = node_new_true_anychar(&ns[1], env);
+ if (r != 0) goto err1;
+
+ x = make_list(2, ns);
+ if (IS_NULL(x)) goto err;
+ ns[0] = x;
+ ns[1] = NULL_NODE;
+
+ x = node_new_quantifier(0, REPEAT_INFINITE, 1);
+ if (IS_NULL(x)) goto err;
+
+ NODE_BODY(x) = ns[0];
+ ns[0] = NULL_NODE;
+ ns[1] = x;
+
+ r = node_new_true_anychar(&ns[0], env);
+ if (r != 0) goto err1;
+
+ x = make_list(2, ns);
+ if (IS_NULL(x)) goto err;
+
+ ns[0] = x;
+ ns[1] = NULL_NODE;
+
+ x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
+ if (IS_NULL(x)) goto err;
+
+ NODE_BODY(x) = ns[0];
+
+ *node = x;
+ return ONIG_NORMAL;
+
+ err:
+ r = ONIGERR_MEMORY;
+ err1:
+ for (i = 0; i < 2; i++) onig_node_free(ns[i]);
+ return r;
+}
+
+static int
make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
Node* step_one, int lower, int upper, int possessive,
int is_range_cutter, ScanEnv* env)
@@ -1663,26 +1682,26 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
if (r != 0) goto err;
x = make_list(4, ns);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
ns[0] = x;
ns[1] = step_one;
ns[2] = ns[3] = NULL_NODE;
x = make_alt(2, ns);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
ns[0] = x;
x = node_new_quantifier(lower, upper, 0);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
NODE_BODY(x) = ns[0];
ns[0] = x;
if (possessive != 0) {
x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
NODE_BODY(x) = ns[0];
ns[0] = x;
@@ -1696,12 +1715,12 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
if (r != 0) goto err;
x = make_list(2, ns + 1);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
ns[1] = x; ns[2] = NULL_NODE;
x = make_alt(2, ns);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
if (is_range_cutter != 0)
NODE_STATUS_ADD(x, NST_SUPER);
@@ -1709,6 +1728,8 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
*node = x;
return ONIG_NORMAL;
+ err0:
+ r = ONIGERR_MEMORY;
err:
for (i = 0; i < 4; i++) onig_node_free(ns[i]);
return r;
@@ -1739,7 +1760,7 @@ make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
if (r != 0) goto err;
x = make_list(2, ns);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
ns[0] = NULL_NODE; ns[1] = x;
@@ -1748,12 +1769,68 @@ make_absent_tail(Node** node1, Node** node2, int pre_save_right_id,
if (r != 0) goto err;
x = make_alt(2, ns);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
*node1 = save;
*node2 = x;
return ONIG_NORMAL;
+ err0:
+ r = ONIGERR_MEMORY;
+ err:
+ onig_node_free(save);
+ onig_node_free(ns[0]);
+ onig_node_free(ns[1]);
+ return r;
+}
+
+static int
+make_range_clear(Node** node, ScanEnv* env)
+{
+ int r;
+ int id;
+ Node* save;
+ Node* x;
+ Node* ns[2];
+
+ *node = NULL_NODE;
+ save = ns[0] = ns[1] = NULL_NODE;
+
+ r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env);
+ if (r != 0) goto err;
+
+ id = GIMMICK_(save)->id;
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
+ id, env);
+ if (r != 0) goto err;
+
+ r = node_new_fail(&ns[1], env);
+ if (r != 0) goto err;
+
+ x = make_list(2, ns);
+ if (IS_NULL(x)) goto err0;
+
+ ns[0] = NULL_NODE; ns[1] = x;
+
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
+ if (r != 0) goto err;
+
+ x = make_alt(2, ns);
+ if (IS_NULL(x)) goto err0;
+
+ NODE_STATUS_ADD(x, NST_SUPER);
+
+ ns[0] = save;
+ ns[1] = x;
+ save = NULL_NODE;
+ x = make_list(2, ns);
+ if (IS_NULL(x)) goto err0;
+
+ *node = x;
+ return ONIG_NORMAL;
+
+ err0:
+ r = ONIGERR_MEMORY;
err:
onig_node_free(save);
onig_node_free(ns[0]);
@@ -1790,6 +1867,9 @@ is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody,
return 0;
}
+ if (QUANT_(quant)->greedy == 0)
+ return 0;
+
body = NODE_BODY(quant);
switch (NODE_TYPE(body)) {
case NODE_STRING:
@@ -1856,15 +1936,18 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua
ns[2] = ns[3] = NULL_NODE;
- r = make_absent_tail(&ns[2], &ns[3], id1, env);
+ r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK,
+ id1, env);
if (r != 0) goto err;
- x = make_list(4, ns);
- if (IS_NULL(x)) goto err;
+ x = make_list(3, ns);
+ if (IS_NULL(x)) goto err0;
*node = x;
return ONIG_NORMAL;
+ err0:
+ r = ONIGERR_MEMORY;
err:
for (i = 0; i < 4; i++) onig_node_free(ns[i]);
return r;
@@ -1892,7 +1975,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (expr == NULL_NODE) {
/* default expr \O* */
quant = node_new_quantifier(0, REPEAT_INFINITE, 0);
- if (IS_NULL(quant)) goto err;
+ if (IS_NULL(quant)) goto err0;
r = node_new_true_anychar(&body, env);
if (r != 0) {
@@ -1945,19 +2028,21 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (is_range_cutter != 0) {
x = make_list(4, ns);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
}
else {
r = make_absent_tail(&ns[5], &ns[6], id1, env);
if (r != 0) goto err;
x = make_list(7, ns);
- if (IS_NULL(x)) goto err;
+ if (IS_NULL(x)) goto err0;
}
*node = x;
return ONIG_NORMAL;
+ err0:
+ r = ONIGERR_MEMORY;
err:
for (i = 0; i < 7; i++) onig_node_free(ns[i]);
return r;
@@ -2143,7 +2228,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
num = 0;
while (! PEND) {
PFETCH(c);
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {
val = (unsigned int )DIGITVAL(c);
if ((INT_MAX_LIMIT - val) / 10UL < num)
return -1; /* overflow */
@@ -2161,7 +2246,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
static int
scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
- OnigEncoding enc)
+ OnigEncoding enc)
{
OnigCodePoint c;
unsigned int num, val;
@@ -2171,7 +2256,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
num = 0;
while (! PEND && maxlen-- != 0) {
PFETCH(c);
- if (ONIGENC_IS_CODE_XDIGIT(enc, c)) {
+ if (IS_CODE_XDIGIT_ASCII(enc, c)) {
val = (unsigned int )XDIGITVAL(enc,c);
if ((INT_MAX_LIMIT - val) / 16UL < num)
return -1; /* overflow */
@@ -2189,7 +2274,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen,
static int
scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
- OnigEncoding enc)
+ OnigEncoding enc)
{
OnigCodePoint c;
unsigned int num, val;
@@ -2199,7 +2284,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
num = 0;
while (! PEND && maxlen-- != 0) {
PFETCH(c);
- if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') {
+ if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
val = ODIGITVAL(c);
if ((INT_MAX_LIMIT - val) / 8UL < num)
return -1; /* overflow */
@@ -2216,8 +2301,8 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
}
-#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \
- BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
+#define BB_WRITE_CODE_POINT(bbuf,pos,code) \
+ BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT)
/* data format:
[n][from-1][to-1][from-2][to-2] ... [from-n][to-n]
@@ -2233,7 +2318,7 @@ new_code_range(BBuf** pbuf)
bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf));
CHECK_NULL_RETURN_MEMERR(bbuf);
- r = BBUF_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
+ r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE);
if (r != 0) {
xfree(bbuf);
*pbuf = 0;
@@ -2241,7 +2326,7 @@ new_code_range(BBuf** pbuf)
}
n = 0;
- BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+ BB_WRITE_CODE_POINT(bbuf, 0, n);
return 0;
}
@@ -2304,19 +2389,19 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to)
int size = (n - high) * 2 * SIZE_CODE_POINT;
if (inc_n > 0) {
- BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
+ BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size);
}
else {
- BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
+ BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos);
}
}
pos = SIZE_CODE_POINT * (1 + low * 2);
- BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
- BBUF_WRITE_CODE_POINT(bbuf, pos, from);
- BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
+ BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2);
+ BB_WRITE_CODE_POINT(bbuf, pos, from);
+ BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to);
n += inc_n;
- BBUF_WRITE_CODE_POINT(bbuf, 0, n);
+ BB_WRITE_CODE_POINT(bbuf, 0, n);
return 0;
}
@@ -2369,7 +2454,7 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
return r;
}
-#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\
+#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\
BBuf *tbuf; \
int tnot; \
tnot = not1; not1 = not2; not2 = tnot; \
@@ -2393,7 +2478,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
r = 0;
if (IS_NULL(bbuf2))
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+ SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
if (IS_NULL(bbuf1)) {
if (not1 != 0) {
@@ -2410,7 +2495,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
}
if (not1 != 0)
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+ SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
data1 = (OnigCodePoint* )(bbuf1->p);
GET_CODE_POINT(n1, data1);
@@ -2435,7 +2520,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1,
static int
and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,
- OnigCodePoint* data, int n)
+ OnigCodePoint* data, int n)
{
int i, r;
OnigCodePoint from2, to2;
@@ -2493,7 +2578,7 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf)
}
if (not1 != 0)
- SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2);
+ SWAP_BB_NOT(bbuf1, not1, bbuf2, not2);
data1 = (OnigCodePoint* )(bbuf1->p);
data2 = (OnigCodePoint* )(bbuf2->p);
@@ -2556,7 +2641,6 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
bitset_and(bsr1, bsr2);
if (bsr1 != dest->bs) {
bitset_copy(dest->bs, bsr1);
- bsr1 = dest->bs;
}
if (not1 != 0) {
bitset_invert(dest->bs);
@@ -2614,7 +2698,6 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc)
bitset_or(bsr1, bsr2);
if (bsr1 != dest->bs) {
bitset_copy(dest->bs, bsr1);
- bsr1 = dest->bs;
}
if (not1 != 0) {
bitset_invert(dest->bs);
@@ -2661,7 +2744,7 @@ conv_backslash_value(OnigCodePoint c, ScanEnv* env)
case 'e': return '\033';
case 'v':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB))
- return '\v';
+ return '\v';
break;
default:
@@ -2874,6 +2957,7 @@ enum TokenSyms {
TK_GENERAL_NEWLINE, /* \R */
TK_NO_NEWLINE, /* \N */
TK_TRUE_ANYCHAR, /* \O */
+ TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */
/* in cc */
TK_CC_CLOSE,
@@ -3110,7 +3194,6 @@ enum REF_NUM {
IS_REL_NUM = 2
};
-#ifdef USE_NAMED_GROUP
#ifdef USE_BACKREF_WITH_LEVEL
/*
\k<name+n>, \k<name-n>
@@ -3152,7 +3235,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {
*num_type = IS_ABS_NUM;
digit_count++;
}
@@ -3181,7 +3264,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
}
if (*num_type != IS_NOT_NUM) {
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {
digit_count++;
}
else {
@@ -3204,7 +3287,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
goto end;
}
PFETCH(c);
- if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
+ if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
PUNFETCH;
level = onig_scan_unsigned_number(&p, end, enc);
if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
@@ -3284,7 +3367,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
if (c == end_code)
return ONIGERR_EMPTY_GROUP_NAME;
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {
if (ref == 1)
*num_type = IS_ABS_NUM;
else {
@@ -3328,7 +3411,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
}
if (*num_type != IS_NOT_NUM) {
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
+ if (IS_CODE_DIGIT_ASCII(enc, c)) {
digit_count++;
}
else {
@@ -3384,111 +3467,6 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
return r;
}
}
-#else
-static int
-fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
- UChar** rname_end, ScanEnv* env, int* rback_num,
- enum REF_NUM* num_type, int ref)
-{
- int r, sign;
- int digit_count;
- OnigCodePoint end_code;
- OnigCodePoint c = 0;
- UChar *name_end;
- OnigEncoding enc = env->enc;
- UChar *pnum_head;
- UChar *p = *src;
- PFETCH_READY;
-
- *rback_num = 0;
-
- end_code = get_name_end_code_point(start_code);
-
- digit_count = 0;
- *rname_end = name_end = end;
- r = 0;
- pnum_head = *src;
- *num_type = IS_ABS_NUM;
- sign = 1;
-
- if (PEND) {
- return ONIGERR_EMPTY_GROUP_NAME;
- }
- else {
- PFETCH(c);
- if (c == end_code)
- return ONIGERR_EMPTY_GROUP_NAME;
-
- if (ONIGENC_IS_CODE_DIGIT(enc, c)) {
- *num_type = IS_ABS_NUM;
- digit_count++;
- }
- else if (c == '-') {
- if (ref == 1) {
- *num_type = IS_REL_NUM;
- sign = -1;
- pnum_head = p;
- }
- else {
- r = ONIGERR_INVALID_GROUP_NAME;
- }
- }
- else if (c == '+') {
- if (ref == 1) {
- *num_type = IS_REL_NUM;
- sign = 1;
- pnum_head = p;
- }
- else {
- r = ONIGERR_INVALID_GROUP_NAME;
- }
- }
- else {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- }
-
- while (! PEND) {
- name_end = p;
-
- PFETCH(c);
- if (c == end_code || c == ')') break;
-
- if (ONIGENC_IS_CODE_DIGIT(enc, c))
- digit_count++;
- else
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- }
- if (r == 0 && c != end_code) {
- r = ONIGERR_INVALID_GROUP_NAME;
- name_end = end;
- }
- if (r == 0 && digit_count == 0) {
- r = ONIGERR_INVALID_GROUP_NAME;
- }
-
- if (r == 0) {
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
- if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
- else if (*rback_num == 0) {
- if (*num_type == IS_REL_NUM) {
- r = ONIGERR_INVALID_GROUP_NAME;
- goto err;
- }
- }
- *rback_num *= sign;
-
- *rname_end = name_end;
- *src = p;
- return 0;
- }
- else {
- err:
- onig_scan_env_set_error_string(env, r, *src, name_end);
- return r;
- }
-}
-#endif /* USE_NAMED_GROUP */
static void
CC_ESC_WARN(ScanEnv* env, UChar *c)
@@ -3521,7 +3499,7 @@ CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c)
static UChar*
find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
- UChar **next, OnigEncoding enc)
+ UChar **next, OnigEncoding enc)
{
int i;
OnigCodePoint x;
@@ -3550,7 +3528,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to,
static int
str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
- OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
+ OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)
{
int i, in_esc;
OnigCodePoint x;
@@ -3699,7 +3677,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
if (!PEND) {
c2 = PPEEK;
- if (ONIGENC_IS_CODE_DIGIT(enc, c2))
+ if (IS_CODE_DIGIT_ASCII(enc, c2))
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
@@ -3726,7 +3704,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
if (!PEND) {
c2 = PPEEK;
- if (ONIGENC_IS_CODE_XDIGIT(enc, c2))
+ if (IS_CODE_XDIGIT_ASCII(enc, c2))
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
@@ -3956,13 +3934,25 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case 'b':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_WORD_BOUND;
+ tok->u.anchor = ANCHOR_WORD_BOUNDARY;
break;
case 'B':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCHOR_NOT_WORD_BOUND;
+ tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY;
+ break;
+
+ case 'y':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
+ break;
+
+ case 'Y':
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
+ tok->type = TK_ANCHOR;
+ tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
break;
#ifdef USE_WORD_BEGIN_END
@@ -4041,6 +4031,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->type = TK_TRUE_ANYCHAR;
break;
+ case 'X':
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
+ tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;
+ break;
+
case 'A':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break;
begin_buf:
@@ -4086,7 +4081,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
num = scan_unsigned_octal_number(&p, end, 11, enc);
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
if (!PEND) {
- if (ONIGENC_IS_CODE_DIGIT(enc, PPEEK))
+ if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
@@ -4111,7 +4106,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
num = scan_unsigned_hexadecimal_number(&p, end, 8, enc);
if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
if (!PEND) {
- if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK))
+ if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
}
@@ -4205,7 +4200,6 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
break;
-#ifdef USE_NAMED_GROUP
case 'k':
if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {
PFETCH(c);
@@ -4277,7 +4271,6 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PUNFETCH;
}
break;
-#endif
#ifdef USE_CALL
case 'g':
@@ -4531,8 +4524,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
- OnigEncoding enc ARG_UNUSED,
- OnigCodePoint sb_out, const OnigCodePoint mbr[])
+ OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out,
+ const OnigCodePoint mbr[])
{
int i, r;
OnigCodePoint j;
@@ -4591,33 +4584,140 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
if (r != 0) return r;
}
prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1;
+ if (prev == 0) goto end;
}
- if (prev < 0x7fffffff) {
- r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);
+
+ r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
+ if (r != 0) return r;
+ }
+
+ end:
+ return 0;
+}
+
+static int
+add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not,
+ OnigEncoding enc ARG_UNUSED,
+ OnigCodePoint sb_out,
+ const OnigCodePoint mbr[], OnigCodePoint limit)
+{
+ int i, r;
+ OnigCodePoint j;
+ OnigCodePoint from;
+ OnigCodePoint to;
+
+ int n = ONIGENC_CODE_RANGE_NUM(mbr);
+
+ if (not == 0) {
+ for (i = 0; i < n; i++) {
+ for (j = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) {
+ if (j > limit) goto end;
+ if (j >= sb_out) {
+ if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) {
+ to = ONIGENC_CODE_RANGE_TO(mbr, i);
+ if (to > limit) to = limit;
+ r = add_code_range_to_buf(&(cc->mbuf), j, to);
+ if (r != 0) return r;
+ i++;
+ }
+
+ goto sb_end;
+ }
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ }
+
+ sb_end:
+ for ( ; i < n; i++) {
+ from = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ to = ONIGENC_CODE_RANGE_TO(mbr, i);
+ if (from > limit) break;
+ if (to > limit) to = limit;
+ r = add_code_range_to_buf(&(cc->mbuf), from, to);
if (r != 0) return r;
}
}
+ else {
+ OnigCodePoint prev = 0;
+
+ for (i = 0; i < n; i++) {
+ from = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ if (from > limit) {
+ for (j = prev; j < sb_out; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ goto sb_end2;
+ }
+ for (j = prev; j < from; j++) {
+ if (j >= sb_out) goto sb_end2;
+ BITSET_SET_BIT(cc->bs, j);
+ }
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i);
+ if (prev > limit) prev = limit;
+ prev++;
+ if (prev == 0) goto end;
+ }
+ for (j = prev; j < sb_out; j++) {
+ BITSET_SET_BIT(cc->bs, j);
+ }
+
+ sb_end2:
+ prev = sb_out;
+
+ for (i = 0; i < n; i++) {
+ from = ONIGENC_CODE_RANGE_FROM(mbr, i);
+ if (from > limit) goto last;
+
+ if (prev < from) {
+ r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1);
+ if (r != 0) return r;
+ }
+ prev = ONIGENC_CODE_RANGE_TO(mbr, i);
+ if (prev > limit) prev = limit;
+ prev++;
+ if (prev == 0) goto end;
+ }
+
+ last:
+ r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT);
+ if (r != 0) return r;
+ }
+ end:
return 0;
}
static int
add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
{
+#define ASCII_LIMIT 127
+
int c, r;
+ int ascii_mode;
const OnigCodePoint *ranges;
+ OnigCodePoint limit;
OnigCodePoint sb_out;
OnigEncoding enc = env->enc;
+ ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options);
+
r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges);
if (r == 0) {
- return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
+ if (ascii_mode == 0)
+ r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges);
+ else
+ r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out,
+ ranges, ASCII_LIMIT);
+ return r;
}
else if (r != ONIG_NO_SUPPORT_CONFIG) {
return r;
}
r = 0;
+ limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
+
switch (ctype) {
case ONIGENC_CTYPE_ALPHA:
case ONIGENC_CTYPE_BLANK:
@@ -4631,14 +4731,18 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
case ONIGENC_CTYPE_ASCII:
case ONIGENC_CTYPE_ALNUM:
if (not != 0) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ for (c = 0; c < limit; c++) {
if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
+ for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
+ BITSET_SET_BIT(cc->bs, c);
+ }
+
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
+ for (c = 0; c < limit; c++) {
if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
@@ -4647,34 +4751,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
case ONIGENC_CTYPE_GRAPH:
case ONIGENC_CTYPE_PRINT:
+ case ONIGENC_CTYPE_WORD:
if (not != 0) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ for (c = 0; c < limit; c++) {
+ if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
+ && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
- }
- else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
+ if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
BITSET_SET_BIT(cc->bs, c);
}
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
- }
- break;
-
- case ONIGENC_CTYPE_WORD:
- if (not == 0) {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c);
- }
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
- for (c = 0; c < SINGLE_BYTE_SIZE; c++) {
- if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */
- && ! ONIGENC_IS_CODE_WORD(enc, c))
+ for (c = 0; c < limit; c++) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
+ if (ascii_mode == 0)
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
break;
@@ -4792,8 +4887,7 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
}
static int
-parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end,
- ScanEnv* env)
+parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, ctype;
CClassNode* cc;
@@ -4827,7 +4921,7 @@ enum CCVALTYPE {
static int
next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
- enum CCSTATE* state, ScanEnv* env)
+ enum CCSTATE* state, ScanEnv* env)
{
int r;
@@ -4850,9 +4944,9 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
static int
next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
- int* from_israw, int to_israw,
- enum CCVALTYPE intype, enum CCVALTYPE* type,
- enum CCSTATE* state, ScanEnv* env)
+ int* from_israw, int to_israw,
+ enum CCVALTYPE intype, enum CCVALTYPE* type,
+ enum CCSTATE* state, ScanEnv* env)
{
int r;
@@ -4921,7 +5015,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
static int
code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
- ScanEnv* env)
+ ScanEnv* env)
{
int in_esc;
OnigCodePoint code;
@@ -5282,19 +5376,17 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
}
static int parse_subexp(Node** top, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env);
+ UChar** src, UChar* end, ScanEnv* env);
static int
parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env)
+ ScanEnv* env)
{
int r, num;
Node *target;
OnigOptionType option;
OnigCodePoint c;
-#ifdef USE_NAMED_GROUP
int list_capture;
-#endif
OnigEncoding enc = env->enc;
UChar* p = *src;
@@ -5322,16 +5414,15 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
break;
case '=':
- *np = onig_node_new_anchor(ANCHOR_PREC_READ);
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0);
break;
case '!': /* preceding read */
- *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT);
+ *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0);
break;
case '>': /* (?>...) stop backtrack */
*np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK);
break;
-#ifdef USE_NAMED_GROUP
case '\'':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
goto named_group1;
@@ -5339,16 +5430,14 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
else
return ONIGERR_UNDEFINED_GROUP_OPTION;
break;
-#endif
case '<': /* look behind (?<=...), (?<!...) */
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
PFETCH(c);
if (c == '=')
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND);
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0);
else if (c == '!')
- *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT);
-#ifdef USE_NAMED_GROUP
+ *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0);
else {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
UChar *name;
@@ -5385,11 +5474,6 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
}
-#else
- else {
- return ONIGERR_UNDEFINED_GROUP_OPTION;
- }
-#endif
break;
case '~':
@@ -5406,10 +5490,9 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
head_bar = 1;
- if (PPEEK_IS(')')) { // (?~|) : absent clear
+ if (PPEEK_IS(')')) { // (?~|) : range clear
PINC;
- r = node_new_update_var_gimmick(np, UPDATE_VAR_RIGHT_RANGE_INIT,
- 0, env);
+ r = make_range_clear(np, env);
if (r != 0) return r;
goto end;
}
@@ -5471,7 +5554,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
PFETCH(c);
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- if (ONIGENC_IS_CODE_DIGIT(enc, c)
+ if (IS_CODE_DIGIT_ASCII(enc, c)
|| c == '-' || c == '+' || c == '<' || c == '\'') {
UChar* name_end;
int back_num;
@@ -5647,7 +5730,6 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case '@':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
-#ifdef USE_NAMED_GROUP
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
PFETCH(c);
if (c == '<' || c == '\'') {
@@ -5656,7 +5738,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
PUNFETCH;
}
-#endif
+
*np = node_new_memory(0);
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
@@ -5678,6 +5760,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case 'p':
#endif
case '-': case 'i': case 'm': case 's': case 'x':
+ case 'W': case 'D': case 'S': case 'P':
{
int neg = 0;
@@ -5713,6 +5796,11 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg);
break;
#endif
+ case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break;
+ case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break;
+ case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;
+ case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;
+
default:
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
@@ -5918,8 +6006,7 @@ typedef struct {
} IApplyCaseFoldArg;
static int
-i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
- int to_len, void* arg)
+i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
{
IApplyCaseFoldArg* iarg;
ScanEnv* env;
@@ -5998,8 +6085,8 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[],
}
static int
-parse_exp(Node** np, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env)
+parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
{
int r, len, group = 0;
Node* qn;
@@ -6145,7 +6232,7 @@ parse_exp(Node** np, OnigToken* tok, int term,
{
switch (tok->u.prop.ctype) {
case ONIGENC_CTYPE_WORD:
- *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not);
+ *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options);
CHECK_NULL_RETURN_MEMERR(*np);
break;
@@ -6253,7 +6340,11 @@ parse_exp(Node** np, OnigToken* tok, int term,
#endif
case TK_ANCHOR:
- *np = onig_node_new_anchor(tok->u.anchor);
+ {
+ int ascii_mode =
+ IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0;
+ *np = onig_node_new_anchor(tok->u.anchor, ascii_mode);
+ }
break;
case TK_OP_REPEAT:
@@ -6289,6 +6380,11 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (r < 0) return r;
break;
+ case TK_EXTENDED_GRAPHEME_CLUSTER:
+ r = make_extended_grapheme_cluster(np, env);
+ if (r < 0) return r;
+ break;
+
default:
return ONIGERR_PARSER_BUG;
break;
@@ -6356,8 +6452,8 @@ parse_exp(Node** np, OnigToken* tok, int term,
}
static int
-parse_branch(Node** top, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env)
+parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
{
int r;
Node *node, **headp;
@@ -6399,8 +6495,8 @@ parse_branch(Node** top, OnigToken* tok, int term,
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
static int
-parse_subexp(Node** top, OnigToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env)
+parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
{
int r;
Node *node, **headp;
@@ -6492,9 +6588,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
int r;
UChar* p;
-#ifdef USE_NAMED_GROUP
names_clear(reg);
-#endif
scan_env_clear(env);
env->options = reg->options;
@@ -6531,7 +6625,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
extern void
onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED,
- UChar* arg, UChar* arg_end)
+ UChar* arg, UChar* arg_end)
{
env->error = arg;
env->error_end = arg_end;