summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rwxr-xr-xsrc/gperf_unfold_key_conv.py2
-rw-r--r--src/oniguruma.h3
-rw-r--r--src/regcomp.c98
-rw-r--r--src/regexec.c46
-rw-r--r--src/regparse.c179
-rw-r--r--src/regsyntax.c6
-rw-r--r--src/unicode_fold1_key.c12
-rw-r--r--src/unicode_fold2_key.c12
-rw-r--r--src/unicode_fold3_key.c12
-rw-r--r--src/unicode_unfold_key.c14
10 files changed, 233 insertions, 151 deletions
diff --git a/src/gperf_unfold_key_conv.py b/src/gperf_unfold_key_conv.py
index dcd8587..34f9c2f 100755
--- a/src/gperf_unfold_key_conv.py
+++ b/src/gperf_unfold_key_conv.py
@@ -36,7 +36,7 @@ def parse_line(s):
if r != s: return r
r = re.sub(REG_GET_CODE, 'OnigCodePoint gcode = wordlist[key].code;', s)
if r != s: return r
- r = re.sub(REG_CODE_CHECK, 'if (code == gcode)', s)
+ r = re.sub(REG_CODE_CHECK, 'if (code == gcode && wordlist[key].index >= 0)', s)
if r != s: return r
return s
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 33e2a0a..02d4254 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -35,7 +35,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
-#define ONIGURUMA_VERSION_MINOR 2
+#define ONIGURUMA_VERSION_MINOR 3
#define ONIGURUMA_VERSION_TEENY 0
#ifdef __cplusplus
@@ -473,6 +473,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_OP_ESC_OCTAL3 (1U<<28) /* \OOO */
#define ONIG_SYN_OP_ESC_X_HEX2 (1U<<29) /* \xHH */
#define ONIG_SYN_OP_ESC_X_BRACE_HEX8 (1U<<30) /* \x{7HHHHHHH} */
+#define ONIG_SYN_OP_ESC_O_BRACE_OCTAL (1U<<31) /* \o{1OOOOOOOOOO} */
#define ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE (1U<<0) /* \Q...\E */
#define ONIG_SYN_OP2_QMARK_GROUP_EFFECT (1U<<1) /* (?...) */
diff --git a/src/regcomp.c b/src/regcomp.c
index 5c924b5..0e9a9ab 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -761,17 +761,17 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
if (infinite && qn->lower <= 1) {
if (qn->greedy) {
if (qn->lower == 1)
- len = SIZE_OP_JUMP;
+ len = SIZE_OP_JUMP;
else
- len = 0;
+ len = 0;
len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
}
else {
if (qn->lower == 0)
- len = SIZE_OP_JUMP;
+ len = SIZE_OP_JUMP;
else
- len = 0;
+ len = 0;
len += mod_tlen + SIZE_OP_PUSH + cklen;
}
@@ -785,10 +785,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
else if (qn->upper == 1 && qn->greedy) {
if (qn->lower == 0) {
if (CKN_ON) {
- len = SIZE_OP_STATE_CHECK_PUSH + tlen;
+ len = SIZE_OP_STATE_CHECK_PUSH + tlen;
}
else {
- len = SIZE_OP_PUSH + tlen;
+ len = SIZE_OP_PUSH + tlen;
}
}
else {
@@ -1255,7 +1255,7 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg)
if (tlen < 0) return tlen;
len = tlen * qn->lower
- + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
+ + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
}
else {
len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
@@ -1362,7 +1362,7 @@ compile_enclose_node(EncloseNode* node, regex_t* reg)
r = add_opcode(reg, OP_POP);
if (r) return r;
r = add_opcode_rel_addr(reg, OP_JUMP,
- -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
+ -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
}
else {
r = add_opcode(reg, OP_PUSH_STOP_BT);
@@ -2145,16 +2145,16 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
switch (en->type) {
case ENCLOSE_MEMORY:
#ifdef USE_SUBEXP_CALL
- if (IS_ENCLOSE_CLEN_FIXED(en))
- *len = en->char_len;
- else {
- r = get_char_length_tree1(en->target, reg, len, level);
- if (r == 0) {
- en->char_len = *len;
- SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
- }
- }
- break;
+ if (IS_ENCLOSE_CLEN_FIXED(en))
+ *len = en->char_len;
+ else {
+ r = get_char_length_tree1(en->target, reg, len, level);
+ if (r == 0) {
+ en->char_len = *len;
+ SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
+ }
+ }
+ break;
#endif
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
@@ -2594,17 +2594,17 @@ get_min_len(Node* node, OnigLen *min, ScanEnv* env)
if (IS_ENCLOSE_MIN_FIXED(en))
*min = en->min_len;
else {
- if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- *min = 0; // recursive
- else {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = get_min_len(en->target, min, env);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- if (r == 0) {
- en->min_len = *min;
- SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
- }
- }
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *min = 0; // recursive
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_min_len(en->target, min, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->min_len = *min;
+ SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
+ }
+ }
}
break;
@@ -2713,22 +2713,22 @@ get_max_len(Node* node, OnigLen *max, ScanEnv* env)
EncloseNode* en = NENCLOSE(node);
switch (en->type) {
case ENCLOSE_MEMORY:
- if (IS_ENCLOSE_MAX_FIXED(en))
- *max = en->max_len;
- else {
- if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
- *max = ONIG_INFINITE_DISTANCE;
- else {
- SET_ENCLOSE_STATUS(node, NST_MARK1);
- r = get_max_len(en->target, max, env);
- CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
- if (r == 0) {
- en->max_len = *max;
- SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
- }
- }
- }
- break;
+ if (IS_ENCLOSE_MAX_FIXED(en))
+ *max = en->max_len;
+ else {
+ if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
+ *max = ONIG_INFINITE_DISTANCE;
+ else {
+ SET_ENCLOSE_STATUS(node, NST_MARK1);
+ r = get_max_len(en->target, max, env);
+ CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
+ if (r == 0) {
+ en->max_len = *max;
+ SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
+ }
+ }
+ }
+ break;
case ENCLOSE_OPTION:
case ENCLOSE_STOP_BACKTRACK:
@@ -4559,7 +4559,7 @@ concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
if (to->expr.len > 0) {
if (add->len.max > 0) {
if (to->expr.len > (int )add->len.max)
- to->expr.len = add->len.max;
+ to->expr.len = add->len.max;
if (to->expr.mmd.max == 0)
select_opt_exact_info(enc, &to->exb, &to->expr);
@@ -4957,7 +4957,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
reg->exact_end = reg->exact + e->len;
allow_reverse =
- ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
+ ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
r = set_bm_skip(reg->exact, reg->exact_end, reg->enc,
@@ -5045,7 +5045,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
if (opt.exb.len > 0 || opt.exm.len > 0) {
select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
if (opt.map.value > 0 &&
- comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
+ comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
goto set_map;
}
else {
diff --git a/src/regexec.c b/src/regexec.c
index 35fef11..c0626ef 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -1346,8 +1346,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fprintf(stderr, "%4d> \"", (int )(s - str));
bp = buf;
for (i = 0, q = s; i < 7 && q < end; i++) {
- len = enclen(encode, q);
- while (len-- > 0) *bp++ = *q++;
+ len = enclen(encode, q);
+ while (len-- > 0) *bp++ = *q++;
}
if (q < end) { xmemcpy(bp, "...\"", 4); bp += 4; }
else { xmemcpy(bp, "\"", 1); bp += 1; }
@@ -1473,14 +1473,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
case OP_EXACT1: MOP_IN(OP_EXACT1);
-#if 0
DATA_ENSURE(1);
if (*p != *s) goto fail;
p++; s++;
-#endif
- if (*p != *s++) goto fail;
- DATA_ENSURE(0);
- p++;
MOP_OUT;
break;
@@ -3159,6 +3154,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
else {
UChar *q = p + reg->dmin;
+
+ if (q >= end) return 0; /* fail */
while (p < q) p += enclen(reg->enc, p);
}
}
@@ -3238,18 +3235,25 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
else {
if (reg->dmax != ONIG_INFINITE_DISTANCE) {
- *low = p - reg->dmax;
- if (*low > s) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
- *low, (const UChar** )low_prev);
- if (low_prev && IS_NULL(*low_prev))
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low);
+ if (p - str < reg->dmax) {
+ *low = (UChar* )str;
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
}
else {
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low);
+ *low = p - reg->dmax;
+ if (*low > s) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low, (const UChar** )low_prev);
+ if (low_prev && IS_NULL(*low_prev))
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : s), *low);
+ }
+ else {
+ if (low_prev)
+ *low_prev = onigenc_get_prev_char_head(reg->enc,
+ (pprev ? pprev : str), *low);
+ }
}
}
}
@@ -3790,8 +3794,10 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
if (rs != 0)
return rs;
- if (region->end[0] == start - str)
- start++;
+ if (region->end[0] == start - str) {
+ if (start >= end) break;
+ start += enclen(reg->enc, start);
+ }
else
start = str + region->end[0];
diff --git a/src/regparse.c b/src/regparse.c
index 11f9e34..8153513 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -41,7 +41,8 @@
OnigSyntaxType OnigSyntaxRuby = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT |
@@ -553,8 +554,8 @@ i_names(UChar* key ARG_UNUSED, NameEntry* e, INamesArg* arg)
int r = (*(arg->func))(e->name,
e->name + e->name_len,
e->back_num,
- (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
- arg->reg, arg->arg);
+ (e->back_num > 1 ? e->back_refs : &(e->back_ref1)),
+ arg->reg, arg->arg);
if (r != 0) {
arg->ret = r;
return ST_STOP;
@@ -1053,7 +1054,7 @@ onig_node_free(Node* node)
switch (NTYPE(node)) {
case NT_STR:
if (NSTR(node)->capa != 0 &&
- IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
+ IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) {
xfree(NSTR(node)->s);
}
break;
@@ -2519,8 +2520,8 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
int flag = (c == '-' ? -1 : 1);
if (PEND) {
- r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
- goto end;
+ r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME;
+ goto end;
}
PFETCH(c);
if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err;
@@ -2531,9 +2532,9 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
exist_level = 1;
if (!PEND) {
- PFETCH(c);
- if (c == end_code)
- goto end;
+ PFETCH(c);
+ if (c == end_code)
+ goto end;
}
}
@@ -2945,19 +2946,46 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
c2 = PPEEK;
if (c2 == '{' &&
- IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
- PINC;
- tok->type = TK_CHAR_PROPERTY;
- tok->u.prop.not = (c == 'P' ? 1 : 0);
-
- if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
- PFETCH(c2);
- if (c2 == '^') {
- tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
- }
- else
- PUNFETCH;
- }
+ IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY)) {
+ PINC;
+ tok->type = TK_CHAR_PROPERTY;
+ tok->u.prop.not = (c == 'P' ? 1 : 0);
+
+ if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {
+ PFETCH(c2);
+ if (c2 == '^') {
+ tok->u.prop.not = (tok->u.prop.not == 0 ? 1 : 0);
+ }
+ else
+ PUNFETCH;
+ }
+ }
+ break;
+
+ case 'o':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
+ PINC;
+ num = scan_unsigned_octal_number(&p, end, 11, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ c2 = PPEEK;
+ if (ONIGENC_IS_CODE_DIGIT(enc, c2))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if (p > prev + enclen(enc, prev) && !PEND && (PPEEK_IS('}'))) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->base = 8;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
}
break;
@@ -3020,7 +3048,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
PUNFETCH;
prev = p;
num = scan_unsigned_octal_number(&p, end, 3, enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -3132,7 +3160,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.repeat.upper = 1;
greedy_check:
if (!PEND && PPEEK_IS('?') &&
- IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
+ IS_SYNTAX_OP(syn, ONIG_SYN_OP_QMARK_NON_GREEDY)) {
PFETCH(c);
tok->u.repeat.greedy = 0;
tok->u.repeat.possessive = 0;
@@ -3302,6 +3330,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto end_buf;
break;
+ case 'o':
+ if (PEND) break;
+
+ prev = p;
+ if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
+ PINC;
+ num = scan_unsigned_octal_number(&p, end, 11, enc);
+ if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ if (!PEND) {
+ if (ONIGENC_IS_CODE_DIGIT(enc, PPEEK))
+ return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
+ }
+
+ if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
+ PINC;
+ tok->type = TK_CODE_POINT;
+ tok->u.code = (OnigCodePoint )num;
+ }
+ else {
+ /* can't read nothing or invalid format */
+ p = prev;
+ }
+ }
+ break;
+
case 'x':
if (PEND) break;
@@ -3392,7 +3445,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
prev = p;
num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
- if (num < 0) return ONIGERR_TOO_BIG_NUMBER;
+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
num = 0; /* but, it's not error */
}
@@ -3541,7 +3594,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
else { /* string */
p = tok->backp + enclen(enc, tok->backp);
- }
+ }
}
break;
}
@@ -3753,8 +3806,7 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not,
OnigCodePoint prev = 0;
for (i = 0; i < n; i++) {
- for (j = prev;
- j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
+ for (j = prev; j < ONIGENC_CODE_RANGE_FROM(mbr, i); j++) {
if (j >= sb_out) {
goto sb_end2;
}
@@ -4028,14 +4080,16 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
}
}
- *state = CCS_VALUE;
+ if (*state != CCS_START)
+ *state = CCS_VALUE;
+
*type = CCV_CLASS;
return 0;
}
static int
-next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
- int* vs_israw, int v_israw,
+next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
+ int* from_israw, int to_israw,
enum CCVALTYPE intype, enum CCVALTYPE* type,
enum CCSTATE* state, ScanEnv* env)
{
@@ -4044,10 +4098,13 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
switch (*state) {
case CCS_VALUE:
if (*type == CCV_SB) {
- BITSET_SET_BIT(cc->bs, (int )(*vs));
+ if (*from > 0xff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ BITSET_SET_BIT(cc->bs, (int )(*from));
}
else if (*type == CCV_CODE_POINT) {
- r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ r = add_code_range(&(cc->mbuf), env, *from, *from);
if (r < 0) return r;
}
break;
@@ -4055,40 +4112,32 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
case CCS_RANGE:
if (intype == *type) {
if (intype == CCV_SB) {
- if (*vs > 0xff || v > 0xff)
+ if (*from > 0xff || to > 0xff)
return ONIGERR_INVALID_CODE_POINT_VALUE;
- if (*vs > v) {
+ if (*from > to) {
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
goto ccs_range_end;
else
return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
- bitset_set_range(cc->bs, (int )*vs, (int )v);
+ bitset_set_range(cc->bs, (int )*from, (int )to);
}
else {
- r = add_code_range(&(cc->mbuf), env, *vs, v);
+ r = add_code_range(&(cc->mbuf), env, *from, to);
if (r < 0) return r;
}
}
else {
-#if 0
- if (intype == CCV_CODE_POINT && *type == CCV_SB) {
-#endif
- if (*vs > v) {
- if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
- goto ccs_range_end;
- else
- return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
- }
- bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff));
- r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v);
- if (r < 0) return r;
-#if 0
+ if (*from > to) {
+ if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC))
+ goto ccs_range_end;
+ else
+ return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS;
}
- else
- return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE;
-#endif
+ bitset_set_range(cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff));
+ r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to);
+ if (r < 0) return r;
}
ccs_range_end:
*state = CCS_COMPLETE;
@@ -4103,9 +4152,9 @@ next_state_val(CClassNode* cc, OnigCodePoint *vs, OnigCodePoint v,
break;
}
- *vs_israw = v_israw;
- *vs = v;
- *type = intype;
+ *from_israw = to_israw;
+ *from = to;
+ *type = intype;
return 0;
}
@@ -4366,9 +4415,9 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
r = parse_char_class(&anode, tok, &p, end, env);
if (r != 0) {
- onig_node_free(anode);
- goto cc_open_err;
- }
+ onig_node_free(anode);
+ goto cc_open_err;
+ }
acc = NCCLASS(anode);
r = or_cclass(cc, acc, env->enc);
@@ -4663,9 +4712,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
r = parse_subexp(&target, tok, term, &p, end, env);
env->option = prev;
if (r < 0) {
- onig_node_free(target);
- return r;
- }
+ onig_node_free(target);
+ return r;
+ }
*np = node_new_option(option);
CHECK_NULL_RETURN_MEMERR(*np);
NENCLOSE(*np)->target = target;
@@ -5291,8 +5340,8 @@ parse_branch(Node** top, OnigToken* tok, int term,
while (r != TK_EOT && r != term && r != TK_ALT) {
r = parse_exp(&node, tok, term, src, end, env);
if (r < 0) {
- onig_node_free(node);
- return r;
+ onig_node_free(node);
+ return r;
}
if (NTYPE(node) == NT_LIST) {
diff --git a/src/regsyntax.c b/src/regsyntax.c
index ade5b55..e751e24 100644
--- a/src/regsyntax.c
+++ b/src/regsyntax.c
@@ -168,7 +168,8 @@ OnigSyntaxType OnigSyntaxJava = {
OnigSyntaxType OnigSyntaxPerl = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
@@ -192,7 +193,8 @@ OnigSyntaxType OnigSyntaxPerl = {
OnigSyntaxType OnigSyntaxPerl_NG = {
(( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY |
ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 |
- ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_CONTROL_CHARS |
+ ONIG_SYN_OP_ESC_X_BRACE_HEX8 | ONIG_SYN_OP_ESC_O_BRACE_OCTAL |
+ ONIG_SYN_OP_ESC_CONTROL_CHARS |
ONIG_SYN_OP_ESC_C_CONTROL )
& ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END )
, ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE |
diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c
index 6b390fc..2151211 100644
--- a/src/unicode_fold1_key.c
+++ b/src/unicode_fold1_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
-/* ANSI-C code produced by gperf version 3.0.3 */
-/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf */
+/* ANSI-C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold1_key unicode_fold1_key.gperf */
/* Computed positions: -k'1-3' */
@@ -60,6 +60,12 @@ hash(OnigCodePoint codes[])
return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+3] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)];
}
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
int
unicode_fold1_key(OnigCodePoint codes[])
{
@@ -2534,7 +2540,7 @@ unicode_fold1_key(OnigCodePoint codes[])
{
int key = hash(codes);
- if (key <= MAX_HASH_VALUE)
+ if (key <= MAX_HASH_VALUE && key >= 0)
{
int index = wordlist[key];
diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c
index 74e9876..07cfa4e 100644
--- a/src/unicode_fold2_key.c
+++ b/src/unicode_fold2_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
-/* ANSI-C code produced by gperf version 3.0.3 */
-/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf */
+/* ANSI-C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold2_key unicode_fold2_key.gperf */
/* Computed positions: -k'3,6' */
@@ -60,6 +60,12 @@ hash(OnigCodePoint codes[])
return asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)];
}
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
int
unicode_fold2_key(OnigCodePoint codes[])
{
@@ -189,7 +195,7 @@ unicode_fold2_key(OnigCodePoint codes[])
{
int key = hash(codes);
- if (key <= MAX_HASH_VALUE)
+ if (key <= MAX_HASH_VALUE && key >= 0)
{
int index = wordlist[key];
diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c
index 8095b1c..1b4d9d4 100644
--- a/src/unicode_fold3_key.c
+++ b/src/unicode_fold3_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
-/* ANSI-C code produced by gperf version 3.0.3 */
-/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf */
+/* ANSI-C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N unicode_fold3_key unicode_fold3_key.gperf */
/* Computed positions: -k'3,6,9' */
@@ -60,6 +60,12 @@ hash(OnigCodePoint codes[])
return asso_values[(unsigned char)onig_codes_byte_at(codes, 8)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 5)] + asso_values[(unsigned char)onig_codes_byte_at(codes, 2)];
}
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
int
unicode_fold3_key(OnigCodePoint codes[])
{
@@ -99,7 +105,7 @@ unicode_fold3_key(OnigCodePoint codes[])
{
int key = hash(codes);
- if (key <= MAX_HASH_VALUE)
+ if (key <= MAX_HASH_VALUE && key >= 0)
{
int index = wordlist[key];
diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c
index c6261d2..15302ca 100644
--- a/src/unicode_unfold_key.c
+++ b/src/unicode_unfold_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_unfold_key_conv.py
from gperf output file. */
-/* ANSI-C code produced by gperf version 3.0.3 */
-/* Command-line: /Library/Developer/CommandLineTools/usr/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf */
+/* ANSI-C code produced by gperf version 3.0.4 */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N unicode_unfold_key unicode_unfold_key.gperf */
/* Computed positions: -k'1-3' */
@@ -64,6 +64,12 @@ hash(OnigCodePoint codes[])
return asso_values[(unsigned char)onig_codes_byte_at(codes, 2)+35] + asso_values[(unsigned char)onig_codes_byte_at(codes, 1)+1] + asso_values[(unsigned char)onig_codes_byte_at(codes, 0)];
}
+#ifdef __GNUC__
+__inline
+#if defined __GNUC_STDC_INLINE__ || defined __GNUC_GNU_INLINE__
+__attribute__ ((__gnu_inline__))
+#endif
+#endif
const struct ByUnfoldKey *
unicode_unfold_key(OnigCodePoint code)
{
@@ -2840,11 +2846,11 @@ unicode_unfold_key(OnigCodePoint code)
{
int key = hash(&code);
- if (key <= MAX_HASH_VALUE)
+ if (key <= MAX_HASH_VALUE && key >= 0)
{
OnigCodePoint gcode = wordlist[key].code;
- if (code == gcode)
+ if (code == gcode && wordlist[key].index >= 0)
return &wordlist[key];
}
}