summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/big5.c23
-rw-r--r--src/euc_jp.c34
-rw-r--r--src/euc_kr.c22
-rw-r--r--src/euc_tw.c37
-rw-r--r--src/gb18030.c38
-rw-r--r--src/onigposix.h2
-rw-r--r--src/oniguruma.h9
-rw-r--r--src/regcomp.c34
-rw-r--r--src/regenc.h2
-rw-r--r--src/regexec.c24
-rw-r--r--src/regint.h2
-rw-r--r--src/regparse.c59
-rw-r--r--src/sjis.c31
13 files changed, 274 insertions, 43 deletions
diff --git a/src/big5.c b/src/big5.c
index 3d44975..bc713ab 100644
--- a/src/big5.c
+++ b/src/big5.c
@@ -55,9 +55,28 @@ big5_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_BIG5, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ return FALSE;
+ }
+ else if (*p < 0xff) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40) return FALSE;
+ if (*p > 0x7e && *p < 0xa1) return FALSE;
+ if (*p == 0xff) return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/euc_jp.c b/src/euc_jp.c
index 19422ce..3b54e95 100644
--- a/src/euc_jp.c
+++ b/src/euc_jp.c
@@ -57,9 +57,39 @@ mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_JP, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p > 0xa0) {
+ if (*p == 0xff) return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff) return FALSE;
+ p++;
+ }
+ else if (*p == 0x8e) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p > 0xdf) return FALSE;
+ p++;
+ }
+ else if (*p == 0x8f) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff) return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff) return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/euc_kr.c b/src/euc_kr.c
index 12803cd..450caf1 100644
--- a/src/euc_kr.c
+++ b/src/euc_kr.c
@@ -55,9 +55,27 @@ euckr_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_KR, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ return FALSE;
+ }
+ else if (*p < 0xff) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/euc_tw.c b/src/euc_tw.c
index 4e07567..b3ee628 100644
--- a/src/euc_tw.c
+++ b/src/euc_tw.c
@@ -55,9 +55,42 @@ euctw_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_EUC_TW, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ if (*p == 0x8e) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p > 0xb0) return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+ else if (*p < 0xff) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0xa1 || *p == 0xff)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/gb18030.c b/src/gb18030.c
index 36fc3de..c8b5865 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -76,9 +76,43 @@ gb18030_mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_GB18030, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p == 0x80 || *p == 0xff) {
+ return FALSE;
+ }
+ else {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40) {
+ if (*p < 0x30 || *p > 0x39)
+ return FALSE;
+
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x81 || *p == 0xff) return FALSE;
+
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x30 || *p > 0x39)
+ return FALSE;
+
+ p++;
+ }
+ else if (*p == 0x7f || *p == 0xff) {
+ return FALSE;
+ }
+ else {
+ p++;
+ }
+ }
+ }
+
+ return TRUE;
}
static OnigCodePoint
diff --git a/src/onigposix.h b/src/onigposix.h
index 6c41537..2af3717 100644
--- a/src/onigposix.h
+++ b/src/onigposix.h
@@ -39,7 +39,7 @@ extern "C" {
#define REG_NEWLINE (1<<1)
#define REG_NOTBOL (1<<2)
#define REG_NOTEOL (1<<3)
-#define REG_EXTENDED (1<<4) /* if not setted, Basic Onigular Expression */
+#define REG_EXTENDED (1<<4) /* if not set, Basic Onigular Expression */
#define REG_NOSUB (1<<5)
/* POSIX error codes */
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 5aa49f6..6090165 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -36,7 +36,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 1
-#define ONIGURUMA_VERSION_TEENY 1
+#define ONIGURUMA_VERSION_TEENY 2
#ifdef __cplusplus
# ifndef HAVE_PROTOTYPES
@@ -364,7 +364,7 @@ int onigenc_strlen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
-int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end));
+int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
@@ -398,7 +398,8 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_NOTBOL (ONIG_OPTION_CAPTURE_GROUP << 1)
#define ONIG_OPTION_NOTEOL (ONIG_OPTION_NOTBOL << 1)
#define ONIG_OPTION_POSIX_REGION (ONIG_OPTION_NOTEOL << 1)
-#define ONIG_OPTION_MAXBIT ONIG_OPTION_POSIX_REGION /* limit */
+#define ONIG_OPTION_CHECK_VALIDITY_OF_STRING (ONIG_OPTION_POSIX_REGION << 1)
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_CHECK_VALIDITY_OF_STRING /* limit */
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@@ -742,7 +743,7 @@ void onig_free P_((OnigRegex));
ONIG_EXTERN
void onig_free_body P_((OnigRegex));
ONIG_EXTERN
-int onig_scan(regex_t* reg, const UChar* str, const UChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);
+int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg);
ONIG_EXTERN
int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
diff --git a/src/regcomp.c b/src/regcomp.c
index 0235a9f..11ba1e7 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -1795,6 +1795,11 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
}
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
+ break;
+
default:
break;
}
@@ -1853,6 +1858,11 @@ renumber_by_map(Node* node, GroupNumRemap* map)
r = renumber_node_backref(node, map);
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = renumber_by_map(NANCHOR(node)->target, map);
+ break;
+
default:
break;
}
@@ -1884,6 +1894,11 @@ numbered_ref_check(Node* node)
return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
break;
+ case NT_ANCHOR:
+ if (NANCHOR(node)->target)
+ r = numbered_ref_check(NANCHOR(node)->target);
+ break;
+
default:
break;
}
@@ -3875,9 +3890,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
#define ALLOWED_ANCHOR_IN_LB \
-( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
+
#define ALLOWED_ANCHOR_IN_LB_NOT \
-( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION )
+( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
case ANCHOR_LOOK_BEHIND:
{
@@ -3913,7 +3929,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
return r;
}
-/* set skip map for Boyer-Moor search */
+/* set skip map for Boyer-Moore search */
static int
set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
UChar skip[], int** int_skip)
@@ -4641,7 +4657,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
int i, z;
CClassNode* cc = NCCLASS(node);
- /* no need to check ignore case. (setted in setup_tree()) */
+ /* no need to check ignore case. (set in setup_tree()) */
if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
OnigLen min = ONIGENC_MBC_MINLEN(env->enc);
@@ -4712,6 +4728,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
case ANCHOR_END_BUF:
case ANCHOR_SEMI_END_BUF:
case ANCHOR_END_LINE:
+ case ANCHOR_PREC_READ_NOT:
+ case ANCHOR_LOOK_BEHIND:
add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
break;
@@ -4734,8 +4752,6 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
}
break;
- case ANCHOR_PREC_READ_NOT:
- case ANCHOR_LOOK_BEHIND: /* Sorry, I can't make use of it. */
case ANCHOR_LOOK_BEHIND_NOT:
break;
}
@@ -4989,6 +5005,9 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML);
+ if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
+ reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
+
reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF);
if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
@@ -5133,7 +5152,7 @@ print_anchor(FILE* f, int anchor)
}
if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
if (q) fprintf(f, ", ");
- fprintf(f, "anychar-star-pl");
+ fprintf(f, "anychar-star-ml");
}
fprintf(f, "]");
@@ -5252,6 +5271,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
UnsetAddrList uslist;
#endif
+ root = 0;
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
#ifdef ONIG_DEBUG
diff --git a/src/regenc.h b/src/regenc.h
index 49227fa..e119dab 100644
--- a/src/regenc.h
+++ b/src/regenc.h
@@ -110,7 +110,7 @@ struct PropertyNameCtype {
/* #define USE_CRNL_AS_LINE_TERMINATOR */
#define USE_UNICODE_PROPERTIES
/* #define USE_UNICODE_CASE_FOLD_TURKISH_AZERI */
-/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTF#18 */
+/* #define USE_UNICODE_ALL_LINE_TERMINATORS */ /* see Unicode.org UTS #18 */
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
diff --git a/src/regexec.c b/src/regexec.c
index 70ac89e..7e8d3d1 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -3111,6 +3111,13 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
r = 0;
if (r == 0) {
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
+ r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ goto end;
+ }
+ }
+
prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
r = match_at(reg, str, end,
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
@@ -3119,6 +3126,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, On
at, prev, &msa);
}
+ end:
MATCH_ARG_FREE(msa);
return r;
}
@@ -3391,6 +3399,13 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
if (start > end || start < str) goto mismatch_no_msa;
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end)) {
+ r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ goto finish_no_msa;
+ }
+ }
+
#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
@@ -3707,7 +3722,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
MATCH_ARG_FREE(msa);
/* If result is mismatch and no FIND_NOT_EMPTY option,
- then the region is not setted in match_at(). */
+ then the region is not set in match_at(). */
if (IS_FIND_NOT_EMPTY(reg->options) && region
#ifdef USE_POSIX_API_REGION_OPTION
&& !IS_POSIX_REGION(option)
@@ -3747,6 +3762,13 @@ onig_scan(regex_t* reg, const UChar* str, const UChar* end,
int rs;
const UChar* start;
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(reg->enc, str, end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
+ ONIG_OPTION_OFF(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING);
+ }
+
n = 0;
start = str;
while (1) {
diff --git a/src/regint.h b/src/regint.h
index d320e26..7a3283d 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -685,7 +685,7 @@ typedef struct _OnigStackType {
struct {
int num; /* memory num */
UChar *pstr; /* start/end position */
- /* Following information is setted, if this stack type is MEM-START */
+ /* Following information is set, if this stack type is MEM-START */
OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */
OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */
} mem;
diff --git a/src/regparse.c b/src/regparse.c
index e8a6e20..8f1d1cb 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -29,6 +29,10 @@
#include "regparse.h"
#include "st.h"
+#ifdef DEBUG_NODE_FREE
+#include <stdio.h>
+#endif
+
#define WARN_BUFSIZE 256
#define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
@@ -1003,13 +1007,16 @@ scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
return 0;
}
-
extern void
onig_node_free(Node* node)
{
start:
if (IS_NULL(node)) return ;
+#ifdef DEBUG_NODE_FREE
+ fprintf(stderr, "onig_node_free: %p\n", node);
+#endif
+
switch (NTYPE(node)) {
case NT_STR:
if (NSTR(node)->capa != 0 &&
@@ -1071,6 +1078,9 @@ node_new(void)
node = (Node* )xmalloc(sizeof(Node));
/* xmemset(node, 0, sizeof(Node)); */
+#ifdef DEBUG_NODE_FREE
+ fprintf(stderr, "node_new: %p\n", node);
+#endif
return node;
}
@@ -1449,7 +1459,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc)
if (sn->end > sn->s) {
p = onigenc_get_prev_char_head(enc, sn->s, sn->end);
- if (p && p > sn->s) { /* can be splitted. */
+ if (p && p > sn->s) { /* can be split. */
n = node_new_str(p, sn->end);
if ((sn->flag & NSTR_RAW) != 0)
NSTRING_SET_RAW(n);
@@ -2520,7 +2530,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
#endif /* USE_BACKREF_WITH_LEVEL */
/*
- def: 0 -> define name (don't allow number name)
+ ref: 0 -> define name (don't allow number name)
1 -> reference name (allow number name)
*/
static int
@@ -3000,7 +3010,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
else if (c == '[') {
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_POSIX_BRACKET) && (PPEEK_IS(':'))) {
OnigCodePoint send[] = { (OnigCodePoint )':', (OnigCodePoint )']' };
- tok->backp = p; /* point at '[' is readed */
+ tok->backp = p; /* point at '[' is read */
PINC;
if (str_exist_check_with_esc(send, 2, p, end,
(OnigCodePoint )']', enc, syn)) {
@@ -4318,7 +4328,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
CClassNode* acc;
r = parse_char_class(&anode, tok, &p, end, env);
- if (r != 0) goto cc_open_err;
+ if (r != 0) {
+ onig_node_free(anode);
+ goto cc_open_err;
+ }
acc = NCCLASS(anode);
r = or_cclass(cc, acc, env->enc);
@@ -4412,7 +4425,6 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end,
err:
if (cc != NCCLASS(*np))
bbuf_free(cc->mbuf);
- onig_node_free(*np);
return r;
}
@@ -4542,11 +4554,9 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(*np);
num = scan_env_add_mem_entry(env);
if (num < 0) {
- onig_node_free(*np);
return num;
}
else if (num >= (int )BIT_STATUS_BITS_NUM) {
- onig_node_free(*np);
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
}
NENCLOSE(*np)->regnum = num;
@@ -4614,7 +4624,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (r < 0) return r;
r = parse_subexp(&target, tok, term, &p, end, env);
env->option = prev;
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
*np = node_new_option(option);
CHECK_NULL_RETURN_MEMERR(*np);
NENCLOSE(*np)->target = target;
@@ -4647,7 +4660,10 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
r = parse_subexp(&target, tok, term, &p, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
if (NTYPE(*np) == NT_ANCHOR)
NANCHOR(*np)->target = target;
@@ -4908,7 +4924,10 @@ parse_exp(Node** np, OnigToken* tok, int term,
if (r < 0) return r;
r = parse_subexp(&target, tok, term, src, end, env);
env->option = prev;
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(target);
+ return r;
+ }
NENCLOSE(*np)->target = target;
return tok->type;
}
@@ -5220,7 +5239,10 @@ parse_branch(Node** top, OnigToken* tok, int term,
*top = NULL;
r = parse_exp(&node, tok, term, src, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
if (r == TK_EOT || r == term || r == TK_ALT) {
*top = node;
@@ -5230,7 +5252,10 @@ parse_branch(Node** top, OnigToken* tok, int term,
headp = &(NCDR(*top));
while (r != TK_EOT && r != term && r != TK_ALT) {
r = parse_exp(&node, tok, term, src, end, env);
- if (r < 0) return r;
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
if (NTYPE(node) == NT_LIST) {
*headp = node;
@@ -5272,8 +5297,10 @@ parse_subexp(Node** top, OnigToken* tok, int term,
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
r = parse_branch(&node, tok, term, src, end, env);
- if (r < 0) return r;
-
+ if (r < 0) {
+ onig_node_free(node);
+ return r;
+ }
*headp = onig_node_new_alt(node, NULL);
headp = &(NCDR(*headp));
}
@@ -5282,8 +5309,8 @@ parse_subexp(Node** top, OnigToken* tok, int term,
goto err;
}
else {
- err:
onig_node_free(node);
+ err:
if (term == TK_SUBEXP_CLOSE)
return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
else
diff --git a/src/sjis.c b/src/sjis.c
index a607b3d..3378474 100644
--- a/src/sjis.c
+++ b/src/sjis.c
@@ -77,9 +77,36 @@ mbc_enc_len(const UChar* p)
}
static int
-is_valid_mbc_string(const UChar* s, const UChar* end)
+is_valid_mbc_string(const UChar* p, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_SJIS, s, end);
+ while (p < end) {
+ if (*p < 0x80) {
+ p++;
+ }
+ else if (*p < 0xa1) {
+ if (*p == 0xa0 || *p == 0x80)
+ return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40 || *p > 0xfc || *p == 0x7f)
+ return FALSE;
+ p++;
+ }
+ else if (*p < 0xe0) {
+ p++;
+ }
+ else if (*p < 0xfd) {
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x40 || *p > 0xfc || *p == 0x7f)
+ return FALSE;
+ p++;
+ }
+ else
+ return FALSE;
+ }
+
+ return TRUE;
}
static int