diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/oniguruma.h | 15 | ||||
-rw-r--r-- | src/regcomp.c | 33 | ||||
-rw-r--r-- | src/regerror.c | 24 | ||||
-rw-r--r-- | src/regexec.c | 41 | ||||
-rw-r--r-- | src/regint.h | 5 | ||||
-rw-r--r-- | src/regparse.c | 52 | ||||
-rw-r--r-- | src/regparse.h | 1 |
7 files changed, 131 insertions, 40 deletions
diff --git a/src/oniguruma.h b/src/oniguruma.h index 090b809..33e2a0a 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,8 +35,8 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 1 -#define ONIGURUMA_VERSION_TEENY 3 +#define ONIGURUMA_VERSION_MINOR 2 +#define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus # ifndef HAVE_PROTOTYPES @@ -372,7 +372,7 @@ int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const /* config parameters */ #define ONIG_NREGION 10 -#define ONIG_MAX_CAPTURE_NUM 32767 +#define ONIG_MAX_CAPTURE_NUM 2147483647 /* 2**31 - 1 */ #define ONIG_MAX_BACKREF_NUM 1000 #define ONIG_MAX_REPEAT_NUM 100000 #define ONIG_MAX_MULTI_BYTE_RANGES_NUM 10000 @@ -543,6 +543,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_UNDEFINED_BYTECODE -13 #define ONIGERR_UNEXPECTED_BYTECODE -14 #define ONIGERR_MATCH_STACK_LIMIT_OVER -15 +#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_FAIL_TO_INITIALIZE -23 @@ -821,6 +822,12 @@ unsigned int onig_get_match_stack_limit_size P_((void)); ONIG_EXTERN int onig_set_match_stack_limit_size P_((unsigned int size)); ONIG_EXTERN +unsigned int onig_get_parse_depth_limit P_((void)); +ONIG_EXTERN +int onig_set_capture_num_limit P_((int num)); +ONIG_EXTERN +int onig_set_parse_depth_limit P_((unsigned int depth)); +ONIG_EXTERN int onig_unicode_define_user_property P_((const char* name, OnigCodePoint* ranges)); ONIG_EXTERN int onig_end P_((void)); diff --git a/src/regcomp.c b/src/regcomp.c index 11ba1e7..5c924b5 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -1230,6 +1230,11 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg) len += (IS_ENCLOSE_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } + else if (IS_ENCLOSE_RECURSION(node)) { + len = SIZE_OP_MEMORY_START_PUSH; + len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC); + } else #endif { @@ -1321,6 +1326,14 @@ compile_enclose_node(EncloseNode* node, regex_t* reg) if (r) return r; r = add_opcode(reg, OP_RETURN); } + else if (IS_ENCLOSE_RECURSION(node)) { + if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH_REC); + else + r = add_opcode(reg, OP_MEMORY_END_REC); + if (r) return r; + r = add_mem_num(reg, node->regnum); + } else #endif { @@ -2231,6 +2244,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) return 0; } else { + if (IS_NOT_NULL(xc->mbuf)) return 0; for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! IS_CODE_SB_WORD(reg->enc, i)) { if (!IS_NCCLASS_NOT(xc)) { @@ -3673,6 +3687,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) #define IN_NOT (1<<1) #define IN_REPEAT (1<<2) #define IN_VAR_REPEAT (1<<3) +#define IN_CALL (1<<4) +#define IN_RECCALL (1<<5) /* setup_tree does the following work. 1. check empty loop. (set qn->target_empty_info) @@ -3843,10 +3859,16 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) break; case ENCLOSE_MEMORY: - if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) { + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) { BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ } + if (IS_ENCLOSE_CALLED(en)) + state |= IN_CALL; + if (IS_ENCLOSE_RECURSION(en)) + state |= IN_RECCALL; + else if ((state & IN_RECCALL) != 0) + SET_CALL_RECURSION(node); r = setup_tree(en->target, reg, state, env); break; @@ -4160,6 +4182,9 @@ concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right, if (right_len == 0) { to->right_anchor |= left->right_anchor; } + else { + to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT); + } } static int @@ -5003,12 +5028,14 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (r) return r; reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML); + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | + ANCHOR_LOOK_BEHIND); if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; - reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF); + reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | + ANCHOR_PREC_READ_NOT); if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) { reg->anchor_dmin = opt.len.min; diff --git a/src/regerror.c b/src/regerror.c index 05fc9d8..ee35b36 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -54,6 +54,8 @@ onig_error_code_to_format(int code) p = "fail to memory allocation"; break; case ONIGERR_MATCH_STACK_LIMIT_OVER: p = "match-stack limit over"; break; + case ONIGERR_PARSE_DEPTH_LIMIT_OVER: + p = "parse depth limit over"; break; case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; case ONIGERR_PARSER_BUG: @@ -348,21 +350,12 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) p = pat; while (p < pat_end) { - if (*p == '\\') { - *s++ = *p++; - len = enclen(enc, p); - while (len-- > 0) *s++ = *p++; - } - else if (*p == '/') { - *s++ = (unsigned char )'\\'; - *s++ = *p++; - } - else if (ONIGENC_IS_MBC_HEAD(enc, p)) { + if (ONIGENC_IS_MBC_HEAD(enc, p)) { len = enclen(enc, p); if (ONIGENC_MBC_MINLEN(enc) == 1) { while (len-- > 0) *s++ = *p++; } - else { /* for UTF16 */ + else { /* for UTF16/32 */ int blen; while (len-- > 0) { @@ -373,6 +366,15 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) } } } + else if (*p == '\\') { + *s++ = *p++; + len = enclen(enc, p); + while (len-- > 0) *s++ = *p++; + } + else if (*p == '/') { + *s++ = (unsigned char )'\\'; + *s++ = *p++; + } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && !ONIGENC_IS_CODE_SPACE(enc, *p)) { sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); diff --git a/src/regexec.c b/src/regexec.c index 7e8d3d1..35fef11 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -462,6 +462,7 @@ stack_double(int is_alloca, char** arg_alloc_base, unsigned int n; int used; size_t size; + size_t new_size; char* alloc_base; char* new_alloc_base; OnigStackType *stk_base, *stk_end, *stk; @@ -472,10 +473,11 @@ stack_double(int is_alloca, char** arg_alloc_base, stk = *arg_stk; n = stk_end - stk_base; - n *= 2; size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; + n *= 2; + new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; if (is_alloca != 0) { - new_alloc_base = (char* )xmalloc(size); + new_alloc_base = (char* )xmalloc(new_size); if (IS_NULL(new_alloc_base)) { STACK_SAVE; return ONIGERR_MEMORY; @@ -489,7 +491,7 @@ stack_double(int is_alloca, char** arg_alloc_base, else n = MatchStackLimitSize; } - new_alloc_base = (char* )xrealloc(alloc_base, size); + new_alloc_base = (char* )xrealloc(alloc_base, new_size); if (IS_NULL(new_alloc_base)) { STACK_SAVE; return ONIGERR_MEMORY; @@ -1242,16 +1244,24 @@ onig_statistics_init(void) MaxStackDepth = 0; } -extern void +extern int onig_print_statistics(FILE* f) { + int r; int i; - fprintf(f, " count prev time\n"); + + r = fprintf(f, " count prev time\n"); + if (r < 0) return -1; + for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + r = fprintf(f, "%8d: %8d: %10ld: %s\n", + OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + if (r < 0) return -1; } - fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); + r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); + if (r < 0) return -1; + + return 0; } #define STACK_INC do {\ @@ -3493,15 +3503,14 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, start = min_semi_end - reg->anchor_dmax; if (start < end) start = onigenc_get_right_adjust_char_head(reg->enc, str, start); - else { /* match with empty at end */ - start = onigenc_get_prev_char_head(reg->enc, str, end); - } } if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) { range = max_semi_end - reg->anchor_dmin + 1; } - if (start >= range) goto mismatch_no_msa; + if (start > range) goto mismatch_no_msa; + /* If start == range, match with empty at end. + Backward search is used. */ } else { if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) { @@ -3626,9 +3635,11 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, prev = s; s += enclen(reg->enc, s); - while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { - prev = s; - s += enclen(reg->enc, s); + if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) { + while (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end) && s < range) { + prev = s; + s += enclen(reg->enc, s); + } } } while (s < range); goto mismatch; diff --git a/src/regint.h b/src/regint.h index 7a3283d..9835143 100644 --- a/src/regint.h +++ b/src/regint.h @@ -71,6 +71,7 @@ #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ +#define DEFAULT_PARSE_DEPTH_LIMIT 4096 #if defined(__GNUC__) # define ARG_UNUSED __attribute__ ((unused)) @@ -522,7 +523,7 @@ typedef int RelAddrType; typedef int AbsAddrType; typedef int LengthType; typedef int RepeatNumType; -typedef short int MemNumType; +typedef int MemNumType; typedef short int StateCheckNumType; typedef void* PointerType; @@ -747,7 +748,7 @@ extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, #ifdef ONIG_DEBUG_STATISTICS extern void onig_statistics_init P_((void)); -extern void onig_print_statistics P_((FILE* f)); +extern int onig_print_statistics P_((FILE* f)); #endif #endif diff --git a/src/regparse.c b/src/regparse.c index 8f1d1cb..11f9e34 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -108,6 +108,38 @@ onig_warning(const char* s) (*onig_warn)(s); } +#define DEFAULT_MAX_CAPTURE_NUM 32767 + +static int MaxCaptureNum = DEFAULT_MAX_CAPTURE_NUM; + +extern int +onig_set_capture_num_limit(int num) +{ + if (num < 0) return -1; + + MaxCaptureNum = num; + return 0; +} + +static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; + +extern unsigned int +onig_get_parse_depth_limit(void) +{ + return ParseDepthLimit; +} + +extern int +onig_set_parse_depth_limit(unsigned int depth) +{ + if (depth == 0) + ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; + else + ParseDepthLimit = depth; + return 0; +} + + static void bbuf_free(BBuf* bbuf) { @@ -959,6 +991,7 @@ scan_env_clear(ScanEnv* env) env->curr_max_regnum = 0; env->has_recursion = 0; #endif + env->parse_depth = 0; } static int @@ -968,7 +1001,7 @@ scan_env_add_mem_entry(ScanEnv* env) Node** p; need = env->num_mem + 1; - if (need > ONIG_MAX_CAPTURE_NUM) + if (need > MaxCaptureNum && MaxCaptureNum != 0) return ONIGERR_TOO_MANY_CAPTURES; if (need >= SCANENV_MEMNODES_SIZE) { @@ -1639,9 +1672,10 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) bound = x; } - for (high = low, bound = n; high < bound; ) { + high = (to == ~((OnigCodePoint )0)) ? n : low; + for (bound = n; high < bound; ) { x = (high + bound) >> 1; - if (to >= data[x*2] - 1) + if (to + 1 >= data[x*2]) high = x + 1; else bound = x; @@ -4113,8 +4147,11 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, enum CCVALTYPE val_type, in_type; int val_israw, in_israw; - prev_cc = (CClassNode* )NULL; *np = NULL_NODE; + env->parse_depth++; + if (env->parse_depth > ParseDepthLimit) + return ONIGERR_PARSE_DEPTH_LIMIT_OVER; + prev_cc = (CClassNode* )NULL; r = fetch_token_in_cc(tok, src, end, env); if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { neg = 1; @@ -4315,7 +4352,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC)) { CC_ESC_WARN(env, (UChar* )"-"); - goto any_char_in; /* [0-9-a] is allowed as [0-9\-a] */ + goto range_end_val; /* [0-9-a] is allowed as [0-9\-a] */ } r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; goto err; @@ -4420,6 +4457,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } } *src = p; + env->parse_depth--; return 0; err: @@ -5281,6 +5319,9 @@ parse_subexp(Node** top, OnigToken* tok, int term, Node *node, **headp; *top = NULL; + env->parse_depth++; + if (env->parse_depth > ParseDepthLimit) + return ONIGERR_PARSE_DEPTH_LIMIT_OVER; r = parse_branch(&node, tok, term, src, end, env); if (r < 0) { onig_node_free(node); @@ -5317,6 +5358,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, return ONIGERR_PARSER_BUG; } + env->parse_depth--; return r; } diff --git a/src/regparse.h b/src/regparse.h index 9e366fe..c9d1fe8 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -306,6 +306,7 @@ typedef struct { int curr_max_regnum; int has_recursion; #endif + unsigned int parse_depth; } ScanEnv; |