summaryrefslogtreecommitdiff
path: root/src/regparse.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/regparse.c')
-rw-r--r--src/regparse.c179
1 files changed, 114 insertions, 65 deletions
diff --git a/src/regparse.c b/src/regparse.c
index 11f9e34..8153513 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,8 @@
OnigSyntaxType OnigSyntaxRuby = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
@@ -553,8 +554,8 @@ i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
int r = (*(arg->func))(e->name,
e->name + e->name_len,
e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
- arg->reg, arg->arg);
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ arg->reg, arg->arg);
if (r != 0) {
arg->ret = r;
return ST_STOP;
@@ -1053,7 +1054,7 @@ onig_node_free(Node* node)
switch (NTYPE(node)) {
case NT_STR:
if (NSTR(node)->capa != 0 &&
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
+ IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
xfree(NSTR(node)->s);
}
break;
@@ -2519,8 +2520,8 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
int flag = (c == '-' ? -1 : 1);
if (PEND) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- goto end;
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ goto end;
}
PFETCH(c);
if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
@@ -2531,9 +2532,9 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
exist_level = 1;
if (!PEND) {
- PFETCH(c);
- if (c == end_code)
- goto end;
+ PFETCH(c);
+ if (c == end_code)
+ goto end;
}
}
@@ -2945,19 +2946,46 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
c2 = PPEEK;
if (c2 == '{' &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
- PINC;
- tok->type = TK_CHAR_PROPERTY;
- tok->u.prop.not = (c == 'P' ? 1 : 0);
-
- if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
- PFETCH(c2);
- if (c2 == '^') {
- tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
- }
- else
- PUNFETCH;
- }
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c2);
+ if (c2 == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
+ }
+ break;
+
+ case 'o':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
+ PINC;
+ num = scan_unsigned_octal_number(&p, end, 11, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ c2 = PPEEK;
+ if (ONIGENC_IS_CODE_DIGIT(enc, c2))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->base = 8;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
}
break;
@@ -3020,7 +3048,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PUNFETCH;
prev = p;
num = scan_unsigned_octal_number(&p, end, 3, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -3132,7 +3160,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.repeat.upper = 1;
greedy_check:
if (!PEND && PPEEK_IS('?') &&
- IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
PFETCH(c);
tok->u.repeat.greedy = 0;
tok->u.repeat.possessive = 0;
@@ -3302,6 +3330,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto end_buf;
break;
+ case 'o':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
+ PINC;
+ num = scan_unsigned_octal_number(&p, end, 11, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, PPEEK))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ break;
+
case 'x':
if (PEND) break;
@@ -3392,7 +3445,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
prev = p;
num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -3541,7 +3594,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
else { /* string */
p = tok->backp + enclen(enc, tok->backp);
- }
+ }
}
break;
}
@@ -3753,8 +3806,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
OnigCodePoint prev = 0;
for (i = 0; i < n; i++) {
- for (j = prev;
- j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
+ for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
if (j >= sb_out) {
goto sb_end2;
}
@@ -4028,14 +4080,16 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
}
}
- *state = CCS_VALUE;
+ if (*state != CCS_START)
+ *state = CCS_VALUE;
+
*type = CCV_CLASS;
return 0;
}
static int
-next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
- int* vs_israw, int v_israw,
+next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
+ int* from_israw, int to_israw,
enum CCVALTYPE intype, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
{
@@ -4044,10 +4098,13 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
switch (*state) {
case CCS_VALUE:
if (*type == CCV_SB) {
- BITSET_SET_BIT(cc->bs, (int )(*vs));
+ if (*from > 0xff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ BITSET_SET_BIT(cc->bs, (int )(*from));
}
else if (*type == CCV_CODE_POINT) {
- r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ r = add_code_range(&(cc->mbuf), env, *from, *from);
if (r < 0) return r;
}
break;
@@ -4055,40 +4112,32 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
case CCS_RANGE:
if (intype == *type) {
if (intype == CCV_SB) {
- if (*vs > 0xff || v > 0xff)
+ if (*from > 0xff || to > 0xff)
return ONIGERR_INVALID_CODE_POINT_VALUE;
- if (*vs > v) {
+ if (*from > to) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
goto ccs_range_end;
else
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
- bitset_set_range(cc->bs, (int )*vs, (int )v);
+ bitset_set_range(cc->bs, (int )*from, (int )to);
}
else {
- r = add_code_range(&(cc->mbuf), env, *vs, v);
+ r = add_code_range(&(cc->mbuf), env, *from, to);
if (r < 0) return r;
}
}
else {
-#if 0
- if (intype == CCV_CODE_POINT && *type == CCV_SB) {
-#endif
- if (*vs > v) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- goto ccs_range_end;
- else
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
- bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
- r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
- if (r < 0) return r;
-#if 0
+ if (*from > to) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
- else
- return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
-#endif
+ bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
+ if (r < 0) return r;
}
ccs_range_end:
*state = CCS_COMPLETE;
@@ -4103,9 +4152,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
break;
}
- *vs_israw = v_israw;
- *vs = v;
- *type = intype;
+ *from_israw = to_israw;
+ *from = to;
+ *type = intype;
return 0;
}
@@ -4366,9 +4415,9 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = parse_char_class(&anode, tok, &p, end, env);
if (r != 0) {
- onig_node_free(anode);
- goto cc_open_err;
- }
+ onig_node_free(anode);
+ goto cc_open_err;
+ }
acc = NCCLASS(anode);
r = or_cclass(cc, acc, env->enc);
@@ -4663,9 +4712,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
r = parse_subexp(&target, tok, term, &p, end, env);
env->option = prev;
if (r < 0) {
- onig_node_free(target);
- return r;
- }
+ onig_node_free(target);
+ return r;
+ }
*np = node_new_option(option);
CHECK_NULL_RETURN_MEMERR(*np);
NENCLOSE(*np)->target = target;
@@ -5291,8 +5340,8 @@ parse_branch(Node** top, OnigToken* tok, int term,
while (r != TK_EOT && r != term && r != TK_ALT) {
r = parse_exp(&node, tok, term, src, end, env);
if (r < 0) {
- onig_node_free(node);
- return r;
+ onig_node_free(node);
+ return r;
}
if (NTYPE(node) == NT_LIST) {