summaryrefslogtreecommitdiff
path: root/src/regparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regparse.c')
-rw-r--r--src/regparse.c240
1 files changed, 160 insertions, 80 deletions
diff --git a/src/regparse.c b/src/regparse.c
index 9e42e71..f1deea3 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -54,13 +54,13 @@ OnigSyntaxType OnigSyntaxOniguruma = {
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
- ONIG_SYN_OP2_OPTION_RUBY |
+ ONIG_SYN_OP2_OPTION_ONIGURUMA |
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS |
ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME |
- ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
+ ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT |
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
@@ -103,7 +103,7 @@ OnigSyntaxType OnigSyntaxRuby = {
ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF |
ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE |
ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP |
- ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER |
+ ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT |
ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE |
ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP |
ONIG_SYN_OP2_ESC_G_SUBEXP_CALL |
@@ -198,6 +198,23 @@ onig_set_parse_depth_limit(unsigned int depth)
return 0;
}
+static int
+bbuf_init(BBuf* buf, int size)
+{
+ if (size <= 0) {
+ size = 0;
+ buf->p = NULL;
+ }
+ else {
+ buf->p = (UChar* )xmalloc(size);
+ if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
+ }
+
+ buf->alloc = size;
+ buf->used = 0;
+ return 0;
+}
+
static void
bbuf_free(BBuf* bbuf)
{
@@ -512,6 +529,8 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
}
+#ifdef USE_CALLOUT
+
typedef struct {
OnigEncoding enc;
int type; /* callout type: single or not */
@@ -610,6 +629,7 @@ st_insert_callout_name_table(hash_table_type* table,
}
return result;
}
+#endif
#endif /* USE_ST_LIBRARY */
@@ -1562,9 +1582,13 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
}
for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) {
if (fe->arg_types[i] == ONIG_TYPE_STRING) {
+ OnigValue* val;
+ UChar* ds;
+
if (IS_NULL(opt_defaults)) return ONIGERR_INVALID_ARGUMENT;
- OnigValue* val = opt_defaults + j;
- UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end);
+
+ val = opt_defaults + j;
+ ds = onigenc_strdup(enc, val->s.start, val->s.end);
CHECK_NULL_RETURN_MEMERR(ds);
fe->opt_defaults[i].s.start = ds;
@@ -2367,10 +2391,10 @@ node_new_quantifier(int lower, int upper, int by_number)
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_QUANT);
- QUANT_(node)->lower = lower;
- QUANT_(node)->upper = upper;
- QUANT_(node)->greedy = 1;
- QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY;
+ QUANT_(node)->lower = lower;
+ QUANT_(node)->upper = upper;
+ QUANT_(node)->greedy = 1;
+ QUANT_(node)->empty_info = BODY_IS_NOT_EMPTY;
QUANT_(node)->head_exact = NULL_NODE;
QUANT_(node)->next_head_exact = NULL_NODE;
QUANT_(node)->is_refered = 0;
@@ -2647,7 +2671,7 @@ node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id,
#endif
static int
-make_extended_grapheme_cluster(Node** node, ScanEnv* env)
+make_text_segment(Node** node, ScanEnv* env)
{
int r;
int i;
@@ -2659,7 +2683,7 @@ make_extended_grapheme_cluster(Node** node, ScanEnv* env)
ns[1] = NULL_NODE;
r = ONIGERR_MEMORY;
- ns[0] = onig_node_new_anchor(ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0);
+ ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, 0);
if (IS_NULL(ns[0])) goto err;
r = node_new_true_anychar(&ns[1], env);
@@ -4017,7 +4041,7 @@ enum TokenSyms {
TK_BACKREF,
TK_CALL,
TK_ANCHOR,
- TK_OP_REPEAT,
+ TK_REPEAT,
TK_INTERVAL,
TK_ANYCHAR_ANYTIME, /* SQL '%' == .* */
TK_ALT,
@@ -4030,7 +4054,7 @@ enum TokenSyms {
TK_GENERAL_NEWLINE, /* \R */
TK_NO_NEWLINE, /* \N */
TK_TRUE_ANYCHAR, /* \O */
- TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */
+ TK_TEXT_SEGMENT, /* \X */
/* in cc */
TK_CC_CLOSE,
@@ -4078,11 +4102,11 @@ typedef struct {
int not;
} prop;
} u;
-} OnigToken;
+} PToken;
static int
-fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
+fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
{
int low, up, syn_allow, non_low = 0;
int r = 0;
@@ -4155,8 +4179,18 @@ fetch_range_quantifier(UChar** src, UChar* end, OnigToken* tok, ScanEnv* env)
if (c != '}') goto invalid;
if (!IS_REPEAT_INFINITE(up) && low > up) {
- return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
+ /* {n,m}+ supported case */
+ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
+ return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
+
+ tok->u.repeat.possessive = 1;
+ {
+ int tmp;
+ tmp = low; low = up; up = tmp;
+ }
}
+ else
+ tok->u.repeat.possessive = 0;
tok->type = TK_INTERVAL;
tok->u.repeat.lower = low;
@@ -4245,7 +4279,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
return 0;
}
-static int fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env);
+static int fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env);
static OnigCodePoint
get_name_end_code_point(OnigCodePoint start)
@@ -4639,7 +4673,7 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
}
static int
-fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int num;
OnigCodePoint c, c2;
@@ -4891,7 +4925,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
static int
-fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, num;
OnigCodePoint c;
@@ -4923,7 +4957,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
switch (c) {
case '*':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
- tok->type = TK_OP_REPEAT;
+ tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = REPEAT_INFINITE;
goto greedy_check;
@@ -4931,7 +4965,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '+':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
- tok->type = TK_OP_REPEAT;
+ tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
tok->u.repeat.upper = REPEAT_INFINITE;
goto greedy_check;
@@ -4939,44 +4973,44 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '?':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_QMARK_ZERO_ONE)) break;
- tok->type = TK_OP_REPEAT;
+ tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = 1;
greedy_check:
+ tok->u.repeat.possessive = 0;
+ greedy_check2:
if (!PEND && PPEEK_IS('?') &&
- IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY) &&
+ tok->u.repeat.possessive == 0) {
PFETCH(c);
- tok->u.repeat.greedy = 0;
+ tok->u.repeat.greedy = 0;
tok->u.repeat.possessive = 0;
}
else {
possessive_check:
+ tok->u.repeat.greedy = 1;
if (!PEND && PPEEK_IS('+') &&
((IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT) &&
tok->type != TK_INTERVAL) ||
(IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL) &&
- tok->type == TK_INTERVAL))) {
+ tok->type == TK_INTERVAL)) &&
+ tok->u.repeat.possessive == 0) {
PFETCH(c);
- tok->u.repeat.greedy = 1;
tok->u.repeat.possessive = 1;
}
- else {
- tok->u.repeat.greedy = 1;
- tok->u.repeat.possessive = 0;
- }
}
break;
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
- r = fetch_range_quantifier(&p, end, tok, env);
+ r = fetch_interval_quantifier(&p, end, tok, env);
if (r < 0) return r; /* error */
- if (r == 0) goto greedy_check;
+ if (r == 0) goto greedy_check2;
else if (r == 2) { /* {n} */
if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
goto possessive_check;
- goto greedy_check;
+ goto greedy_check2;
}
/* r == 1 : normal char */
break;
@@ -5023,15 +5057,15 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case 'y':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
+ tok->u.anchor = ANCR_TEXT_SEGMENT_BOUNDARY;
break;
case 'Y':
- if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
+ if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
tok->type = TK_ANCHOR;
- tok->u.anchor = ANCR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY;
+ tok->u.anchor = ANCR_NO_TEXT_SEGMENT_BOUNDARY;
break;
#ifdef USE_WORD_BEGIN_END
@@ -5111,8 +5145,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case 'X':
- if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break;
- tok->type = TK_EXTENDED_GRAPHEME_CLUSTER;
+ if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT)) break;
+ tok->type = TK_TEXT_SEGMENT;
break;
case 'A':
@@ -5478,7 +5512,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_VARIABLE_META_CHARS
anytime:
#endif
- tok->type = TK_OP_REPEAT;
+ tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = REPEAT_INFINITE;
goto greedy_check;
@@ -5489,7 +5523,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_VARIABLE_META_CHARS
one_or_more_time:
#endif
- tok->type = TK_OP_REPEAT;
+ tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
tok->u.repeat.upper = REPEAT_INFINITE;
goto greedy_check;
@@ -5500,7 +5534,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
#ifdef USE_VARIABLE_META_CHARS
zero_or_one_time:
#endif
- tok->type = TK_OP_REPEAT;
+ tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
tok->u.repeat.upper = 1;
goto greedy_check;
@@ -5508,14 +5542,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
- r = fetch_range_quantifier(&p, end, tok, env);
+ r = fetch_interval_quantifier(&p, end, tok, env);
if (r < 0) return r; /* error */
- if (r == 0) goto greedy_check;
+ if (r == 0) goto greedy_check2;
else if (r == 2) { /* {n} */
if (IS_SYNTAX_BV(syn, ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY))
goto possessive_check;
- goto greedy_check;
+ goto greedy_check2;
}
/* r == 1 : normal char */
break;
@@ -6048,7 +6082,7 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
}
static int
-parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, ctype;
CClassNode* cc;
@@ -6198,7 +6232,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
}
static int
-parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
+parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, neg, len, fetched, and_start;
OnigCodePoint v, vs;
@@ -6535,7 +6569,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en
return r;
}
-static int parse_subexp(Node** top, OnigToken* tok, int term,
+static int parse_subexp(Node** top, PToken* tok, int term,
UChar** src, UChar* end, ScanEnv* env, int group_head);
#ifdef USE_CALLOUT
@@ -6593,7 +6627,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
PFETCH_S(c);
if (c == '[') {
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- tag_start = p;
+ tag_end = tag_start = p;
while (! PEND) {
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
tag_end = p;
@@ -6883,7 +6917,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
if (c == '[') {
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- tag_start = p;
+ tag_end = tag_start = p;
while (! PEND) {
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
tag_end = p;
@@ -6993,7 +7027,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
#endif
static int
-parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env)
{
int r, num;
@@ -7301,7 +7335,9 @@ parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
}
+#ifdef USE_CALLOUT
end_condition:
+#endif
CHECK_NULL_RETURN_MEMERR(condition);
if (PEND) {
@@ -7322,9 +7358,9 @@ parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
int then_is_empty;
Node *Then, *Else;
+ Then = 0;
if (PPEEK_IS('|')) {
PFETCH(c);
- Then = 0;
then_is_empty = 1;
}
else
@@ -7411,6 +7447,7 @@ parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
#endif
case '-': case 'i': case 'm': case 's': case 'x':
case 'W': case 'D': case 'S': case 'P':
+ case 'y':
{
int neg = 0;
@@ -7435,7 +7472,8 @@ parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
}
- else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {
+ else if (IS_SYNTAX_OP2(env->syntax,
+ ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg);
}
else
@@ -7451,6 +7489,46 @@ parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break;
case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break;
+ case 'y': /* y{g}, y{w} */
+ {
+ if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_ONIGURUMA))
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+
+ if (neg != 0) return ONIGERR_UNDEFINED_GROUP_OPTION;
+
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ if (! PPEEK_IS('{')) return ONIGERR_UNDEFINED_GROUP_OPTION;
+ PFETCH(c);
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ switch (c) {
+ case 'g':
+ if (! ONIGENC_IS_UNICODE_ENCODING(enc))
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 0);
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 1);
+ break;
+#ifdef USE_UNICODE_WORD_BREAK
+ case 'w':
+ if (! ONIGENC_IS_UNICODE_ENCODING(enc))
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 0);
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 1);
+ break;
+#endif
+ default:
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+ }
+ if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ PFETCH(c);
+ if (c != '}')
+ return ONIGERR_UNDEFINED_GROUP_OPTION;
+ break;
+ } /* case 'y' */
+
default:
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
@@ -7482,7 +7560,7 @@ parse_bag(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH(c);
- }
+ } /* while (1) */
}
break;
@@ -7745,12 +7823,12 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
}
static int
-parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
+parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env, int group_head)
{
int r, len, group = 0;
Node* qn;
- Node** targetp;
+ Node** tp;
*np = NULL;
if (tok->type == (enum TokenSyms )term)
@@ -7823,7 +7901,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
string_end:
- targetp = np;
+ tp = np;
goto repeat;
}
break;
@@ -7872,12 +7950,12 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
case TK_CODE_POINT:
{
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- int num = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
- if (num < 0) return num;
+ len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
+ if (len < 0) return len;
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
- *np = node_new_str_raw(buf, buf + num);
+ *np = node_new_str_raw(buf, buf + len);
#else
- *np = node_new_str(buf, buf + num);
+ *np = node_new_str(buf, buf + len);
#endif
CHECK_NULL_RETURN_MEMERR(*np);
}
@@ -8021,7 +8099,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
break;
- case TK_OP_REPEAT:
+ case TK_REPEAT:
case TK_INTERVAL:
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS)) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS))
@@ -8056,8 +8134,8 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (r < 0) return r;
break;
- case TK_EXTENDED_GRAPHEME_CLUSTER:
- r = make_extended_grapheme_cluster(np, env);
+ case TK_TEXT_SEGMENT:
+ r = make_text_segment(np, env);
if (r < 0) return r;
break;
@@ -8067,17 +8145,17 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
{
- targetp = np;
+ tp = np;
re_entry:
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
repeat:
- if (r == TK_OP_REPEAT || r == TK_INTERVAL) {
+ if (r == TK_REPEAT || r == TK_INTERVAL) {
Node* target;
- if (is_invalid_quantifier_target(*targetp))
+ if (is_invalid_quantifier_target(*tp))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
@@ -8085,11 +8163,11 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(qn);
QUANT_(qn)->greedy = tok->u.repeat.greedy;
if (group == 2) {
- target = node_drop_group(*np);
- *np = NULL_NODE;
+ target = node_drop_group(*tp);
+ *tp = NULL_NODE;
}
else {
- target = *targetp;
+ target = *tp;
}
r = set_quantifier(qn, target, group, env);
if (r < 0) {
@@ -8109,26 +8187,28 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
if (r == 0) {
- *targetp = qn;
+ *tp = qn;
}
- else if (r == 1) {
+ else if (r == 1) { /* x{1,1} ==> x */
onig_node_free(qn);
+ *tp = target;
}
else if (r == 2) { /* split case: /abc+/ */
Node *tmp;
- *targetp = node_new_list(*targetp, NULL);
- if (IS_NULL(*targetp)) {
+ *tp = node_new_list(*tp, NULL);
+ if (IS_NULL(*tp)) {
onig_node_free(qn);
return ONIGERR_MEMORY;
}
- tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL);
+ tmp = NODE_CDR(*tp) = node_new_list(qn, NULL);
if (IS_NULL(tmp)) {
onig_node_free(qn);
return ONIGERR_MEMORY;
}
- targetp = &(NODE_CAR(tmp));
+ tp = &(NODE_CAR(tmp));
}
+ group = 0;
goto re_entry;
}
}
@@ -8137,7 +8217,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
}
static int
-parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
+parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env, int group_head)
{
int r;
@@ -8185,7 +8265,7 @@ parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
static int
-parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end,
+parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env, int group_head)
{
int r;
@@ -8251,7 +8331,7 @@ static int
parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
{
int r;
- OnigToken tok;
+ PToken tok;
r = fetch_token(&tok, src, end, env);
if (r < 0) return r;