summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am25
-rw-r--r--src/Makefile.windows2
-rw-r--r--src/big5.c13
-rw-r--r--src/config.h.cmake.in6
-rw-r--r--src/euc_jp.c29
-rw-r--r--src/euc_kr.c13
-rw-r--r--src/euc_tw.c19
-rw-r--r--src/gb18030.c24
-rwxr-xr-xsrc/make_property.sh2
-rwxr-xr-xsrc/make_unicode_property.sh2
-rwxr-xr-xsrc/make_unicode_property_data.py4
-rw-r--r--src/onigposix.h41
-rw-r--r--src/oniguruma.h15
-rw-r--r--src/regcomp.c310
-rw-r--r--src/regenc.c6
-rw-r--r--src/regerror.c2
-rw-r--r--src/regexec.c592
-rw-r--r--src/regint.h42
-rw-r--r--src/regparse.c315
-rw-r--r--src/regparse.h4
-rw-r--r--src/regposerr.c28
-rw-r--r--src/regposix.c94
-rw-r--r--src/sjis.c14
-rw-r--r--src/st.c8
-rw-r--r--src/unicode.c13
-rw-r--r--src/utf16_be.c2
-rw-r--r--src/utf16_le.c2
-rw-r--r--src/utf32_be.c7
-rw-r--r--src/utf32_le.c7
29 files changed, 1106 insertions, 535 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 36c2222..44a4167 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -11,10 +11,15 @@ posix_headers = onigposix.h
if ENABLE_POSIX_API
posix_sources = regposix.c regposerr.c
include_HEADERS += $(posix_headers)
+AM_CFLAGS += -DUSE_POSIX_API
else
posix_sources =
endif
+if ENABLE_BINARY_COMPATIBLE_POSIX_API
+AM_CFLAGS += -DUSE_BINARY_COMPATIBLE_POSIX_API
+endif
+
lib_LTLIBRARIES = $(libname)
@@ -45,13 +50,29 @@ libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \
gb18030.c koi8_r.c cp1251.c \
onig_init.c
-libonig_la_LDFLAGS = -version-info $(LTVERSION)
-
EXTRA_DIST = koi8.c mktable.c \
unicode_fold_data.c unicode_property_data.c \
unicode_property_data_posix.c \
unicode_egcb_data.c unicode_wb_data.c
+
+libonig_la_LDFLAGS = $(EXTRA_LIBONIG_LDFLAGS) -version-info $(LTVERSION)
+
+if USE_LIBONIG_DEF_FILE
+
+libonig_la_LDFLAGS += -Wl,--output-def,$(LIBONIG_DEF_FILE)
+
+install-data-hook:
+ echo "$(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_DATA) $(LIBONIG_DEF_FILE) $(DESTDIR)$(libdir)"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL_DATA) $(LIBONIG_DEF_FILE) $(DESTDIR)$(libdir) || exit 1
+
+uninstall-hook:
+ echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$(LIBONIG_DEF_FILE)'"; \
+ $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$(LIBONIG_DEF_FILE)"
+
+endif
+
+
dll:
$(CXX) -shared -Wl,--output-def,libonig.def -o libonig.dll *.o \
$(LIBS)
diff --git a/src/Makefile.windows b/src/Makefile.windows
index 90ebf28..11d6fd8 100644
--- a/src/Makefile.windows
+++ b/src/Makefile.windows
@@ -18,7 +18,7 @@ LINKFLAGS = -link -incremental:no -pdb:none
INSTALL = install -c
CP = copy
CC = cl
-DEFS = -DHAVE_CONFIG_H
+DEFS = -DHAVE_CONFIG_H -DUSE_POSIX_API -DUSE_BINARY_COMPATIBLE_POSIX_API
subdirs =
diff --git a/src/big5.c b/src/big5.c
index 79ae1e3..faff845 100644
--- a/src/big5.c
+++ b/src/big5.c
@@ -2,7 +2,7 @@
big5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,8 +58,15 @@ static int
big5_code_to_mbclen(OnigCodePoint code)
{
if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
- if ((code & 0xff00) != 0) return 2;
- if (EncLen_BIG5[(int )(code & 0xff)] == 1) return 1;
+
+ if ((code & 0xff00) != 0) {
+ if (EncLen_BIG5[(int )(code >> 8) & 0xff] == 2)
+ return 2;
+ }
+ else {
+ if (EncLen_BIG5[(int )(code & 0xff)] == 1)
+ return 1;
+ }
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in
index 60db86c..c213a09 100644
--- a/src/config.h.cmake.in
+++ b/src/config.h.cmake.in
@@ -43,6 +43,12 @@
/* The size of `long', as computed by sizeof. */
#cmakedefine SIZEOF_LONG ${SIZEOF_LONG}
+/* The size of `long long', as computed by sizeof. */
+#cmakedefine SIZEOF_LONG_LONG ${SIZEOF_LONG_LONG}
+
+/* The size of `void*', as computed by sizeof. */
+#cmakedefine SIZEOF_VOIDP ${SIZEOF_VOIDP}
+
/* Define if enable CR+NL as line terminator */
#cmakedefine USE_CRNL_AS_LINE_TERMINATOR ${USE_CRNL_AS_LINE_TERMINATOR}
diff --git a/src/euc_jp.c b/src/euc_jp.c
index 640b3e3..bfe91bf 100644
--- a/src/euc_jp.c
+++ b/src/euc_jp.c
@@ -2,7 +2,7 @@
euc_jp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -114,10 +114,20 @@ static int
code_to_mbclen(OnigCodePoint code)
{
if (ONIGENC_IS_CODE_ASCII(code)) return 1;
- else if ((code & 0xff0000) != 0) return 3;
- else if ((code & 0xff00) != 0) return 2;
- else
- return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff0000) != 0) {
+ if (EncLen_EUCJP[(int )(code >> 16) & 0xff] == 3)
+ return 3;
+ }
+ else if ((code & 0xff00) != 0) {
+ if (EncLen_EUCJP[(int )(code >> 8) & 0xff] == 2)
+ return 2;
+ }
+ else if (code < 256) {
+ if (EncLen_EUCJP[(int )(code & 0xff)] == 1)
+ return 1;
+ }
+
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static int
@@ -125,8 +135,13 @@ code_to_mbc(OnigCodePoint code, UChar *buf)
{
UChar *p = buf;
- if ((code & 0xff0000) != 0) *p++ = (UChar )(((code >> 16) & 0xff));
- if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
+ if ((code & 0xff0000) != 0) {
+ *p++ = (UChar )(((code >> 16) & 0xff));
+ *p++ = (UChar )(((code >> 8) & 0xff));
+ }
+ else if ((code & 0xff00) != 0)
+ *p++ = (UChar )(((code >> 8) & 0xff));
+
*p++ = (UChar )(code & 0xff);
#if 1
diff --git a/src/euc_kr.c b/src/euc_kr.c
index 7fa50af..b0e9fbf 100644
--- a/src/euc_kr.c
+++ b/src/euc_kr.c
@@ -2,7 +2,7 @@
euc_kr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -58,8 +58,15 @@ static int
euckr_code_to_mbclen(OnigCodePoint code)
{
if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
- if ((code & 0xff00) != 0) return 2;
- if (EncLen_EUCKR[(int )(code & 0xff)] == 1) return 1;
+
+ if ((code & 0xff00) != 0) {
+ if (EncLen_EUCKR[(int )(code >> 8) & 0xff] == 2)
+ return 2;
+ }
+ else {
+ if (EncLen_EUCKR[(int )(code & 0xff)] == 1)
+ return 1;
+ }
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
diff --git a/src/euc_tw.c b/src/euc_tw.c
index 8e72b97..99dc5ec 100644
--- a/src/euc_tw.c
+++ b/src/euc_tw.c
@@ -2,7 +2,7 @@
euc_tw.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,15 +57,22 @@ euctw_mbc_enc_len(const UChar* p)
static int
euctw_code_to_mbclen(OnigCodePoint code)
{
- if ((code & 0xff000000) != 0) return 4;
- else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
- else if ((code & 0xff00) != 0) return 2;
+ if ((code & 0xff000000) != 0) {
+ if (EncLen_EUCTW[(int )(code >> 24) & 0xff] == 4)
+ return 4;
+ }
+ else if ((code & 0xff0000) != 0)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff00) != 0) {
+ if (EncLen_EUCTW[(int )(code >> 8) & 0xff] == 2)
+ return 2;
+ }
else {
if (EncLen_EUCTW[(int )(code & 0xff)] == 1)
return 1;
-
- return ONIGERR_INVALID_CODE_POINT_VALUE;
}
+
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static int
diff --git a/src/gb18030.c b/src/gb18030.c
index 1385a7f..7409d3e 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -89,15 +89,25 @@ gb18030_mbc_enc_len(const UChar* p)
static int
gb18030_code_to_mbclen(OnigCodePoint code)
{
- if ((code & 0xff000000) != 0) return 4;
- else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
- else if ((code & 0xff00) != 0) return 2;
+ if ((code & 0xff000000) != 0) {
+ if (GB18030_MAP[(int )(code >> 24) & 0xff] == CM)
+ if (GB18030_MAP[(int )(code >> 16) & 0xff] == C4)
+ return 4;
+ }
+ else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff00) != 0) {
+ if (GB18030_MAP[(int )(code >> 8) & 0xff] == CM) {
+ char c = GB18030_MAP[(int )code & 0xff];
+ if (c == CM || c == C2)
+ return 2;
+ }
+ }
else {
- if (GB18030_MAP[(int )(code & 0xff)] == CM)
- return ONIGERR_INVALID_CODE_POINT_VALUE;
-
- return 1;
+ if (GB18030_MAP[(int )(code & 0xff)] != CM)
+ return 1;
}
+
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static int
diff --git a/src/make_property.sh b/src/make_property.sh
index e5f1244..1c5e0f5 100755
--- a/src/make_property.sh
+++ b/src/make_property.sh
@@ -1,7 +1,7 @@
#!/bin/sh
GPERF=gperf
-SED=gsed
+SED=sed
TMP1=gperf1.tmp
TMP2=gperf2.tmp
diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh
index 5129376..ff7dc62 100755
--- a/src/make_unicode_property.sh
+++ b/src/make_unicode_property.sh
@@ -1,7 +1,7 @@
#!/bin/sh
GPERF=gperf
-SED=gsed
+SED=sed
NAME=unicode_property_data
TMP1=gperf1.tmp
diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py
index 285c462..d1b3377 100755
--- a/src/make_unicode_property_data.py
+++ b/src/make_unicode_property_data.py
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# make_unicode_property_data.py
-# Copyright (c) 2016-2019 K.Kosako
+# Copyright (c) 2016-2020 K.Kosako
import sys
import re
@@ -405,7 +405,7 @@ def set_max_prop_name(name):
def entry_prop_name(name, index):
set_max_prop_name(name)
if OUTPUT_LIST_MODE and index >= len(POSIX_LIST):
- print >> UPF, "%3d: %s" % (index, name)
+ print >> UPF, "%s" % (name)
def entry_and_print_prop_and_index(name, index):
entry_prop_name(name, index)
diff --git a/src/onigposix.h b/src/onigposix.h
index 37e09ea..3514f80 100644
--- a/src/onigposix.h
+++ b/src/onigposix.h
@@ -74,19 +74,19 @@ extern "C" {
#define REG_POSIX_ENCODING_UTF16_LE 5
-typedef int regoff_t;
+typedef int onig_posix_regoff_t;
typedef struct {
- regoff_t rm_so;
- regoff_t rm_eo;
-} regmatch_t;
+ onig_posix_regoff_t rm_so;
+ onig_posix_regoff_t rm_eo;
+} onig_posix_regmatch_t;
/* POSIX regex_t */
typedef struct {
void* onig; /* Oniguruma regex_t* */
size_t re_nsub;
int comp_options;
-} regex_t;
+} onig_posix_regex_t;
#ifndef P_
@@ -160,16 +160,31 @@ ONIG_EXTERN int onig_end P_((void));
#endif /* ONIGURUMA_H */
-ONIG_EXTERN int regcomp P_((regex_t* reg, const char* pat, int options));
-ONIG_EXTERN int regexec P_((regex_t* reg, const char* str, size_t nmatch, regmatch_t* matches, int options));
-ONIG_EXTERN void regfree P_((regex_t* reg));
-ONIG_EXTERN size_t regerror P_((int code, const regex_t* reg, char* buf, size_t size));
+ONIG_EXTERN int onig_posix_regcomp P_((onig_posix_regex_t* reg, const char* pat, int options));
+ONIG_EXTERN int onig_posix_regexec P_((onig_posix_regex_t* reg, const char* str, size_t nmatch, onig_posix_regmatch_t* matches, int options));
+ONIG_EXTERN void onig_posix_regfree P_((onig_posix_regex_t* reg));
+ONIG_EXTERN size_t onig_posix_regerror P_((int code, const onig_posix_regex_t* reg, char* buf, size_t size));
/* extended API */
-ONIG_EXTERN void reg_set_encoding P_((int enc));
-ONIG_EXTERN int reg_name_to_group_numbers P_((regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
-ONIG_EXTERN int reg_foreach_name P_((regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*), void* arg));
-ONIG_EXTERN int reg_number_of_names P_((regex_t* reg));
+ONIG_EXTERN void onig_posix_reg_set_encoding P_((int enc));
+ONIG_EXTERN int onig_posix_reg_name_to_group_numbers P_((onig_posix_regex_t* reg, const unsigned char* name, const unsigned char* name_end, int** nums));
+ONIG_EXTERN int onig_posix_reg_foreach_name P_((onig_posix_regex_t* reg, int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*), void* arg));
+ONIG_EXTERN int onig_posix_reg_number_of_names P_((onig_posix_regex_t* reg));
+
+
+/* aliases */
+#define regex_t onig_posix_regex_t
+#define regmatch_t onig_posix_regmatch_t
+#define regoff_t onig_posix_regoff_t
+
+#define regcomp onig_posix_regcomp
+#define regexec onig_posix_regexec
+#define regfree onig_posix_regfree
+#define regerror onig_posix_regerror
+#define reg_set_encoding onig_posix_reg_set_encoding
+#define reg_name_to_group_numbers onig_posix_reg_name_to_group_numbers
+#define reg_foreach_name onig_posix_reg_foreach_name
+#define reg_number_of_names onig_posix_reg_number_of_names
#ifdef __cplusplus
}
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 15f6ef0..d983fc9 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -36,9 +36,9 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 5
+#define ONIGURUMA_VERSION_TEENY 6
-#define ONIGURUMA_VERSION_INT 60905
+#define ONIGURUMA_VERSION_INT 60906
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -395,8 +395,12 @@ typedef unsigned int OnigOptionType;
#define ONIG_OPTION_POSIX_IS_ASCII (ONIG_OPTION_SPACE_IS_ASCII << 1)
#define ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER (ONIG_OPTION_POSIX_IS_ASCII << 1)
#define ONIG_OPTION_TEXT_SEGMENT_WORD (ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER << 1)
+/* options (search time) */
+#define ONIG_OPTION_NOT_BEGIN_STRING (ONIG_OPTION_TEXT_SEGMENT_WORD << 1)
+#define ONIG_OPTION_NOT_END_STRING (ONIG_OPTION_NOT_BEGIN_STRING << 1)
+#define ONIG_OPTION_NOT_BEGIN_POSITION (ONIG_OPTION_NOT_END_STRING << 1)
-#define ONIG_OPTION_MAXBIT ONIG_OPTION_TEXT_SEGMENT_WORD /* limit */
+#define ONIG_OPTION_MAXBIT ONIG_OPTION_NOT_BEGIN_POSITION
#define ONIG_OPTION_ON(options,regopt) ((options) |= (regopt))
#define ONIG_OPTION_OFF(options,regopt) ((options) &= ~(regopt))
@@ -561,6 +565,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16
#define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER -17
#define ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER -18
+#define ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER -19
#define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
#define ONIGERR_FAIL_TO_INITIALIZE -23
@@ -919,6 +924,10 @@ int onig_set_capture_num_limit P_((int num));
ONIG_EXTERN
int onig_set_parse_depth_limit P_((unsigned int depth));
ONIG_EXTERN
+unsigned long onig_get_subexp_call_limit_in_search P_((void));
+ONIG_EXTERN
+int onig_set_subexp_call_limit_in_search P_((unsigned long n));
+ONIG_EXTERN
int onig_get_subexp_call_max_nest_level P_((void));
ONIG_EXTERN
int onig_set_subexp_call_max_nest_level P_((int level));
diff --git a/src/regcomp.c b/src/regcomp.c
index 4d5b78f..dd2b328 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -133,6 +133,7 @@ ops_init(regex_t* reg, int init_alloc_size)
size = sizeof(Operation) * init_alloc_size;
p = (Operation* )xrealloc(reg->ops, size);
CHECK_NULL_RETURN_MEMERR(p);
+ reg->ops = p;
#ifdef USE_DIRECT_THREADED_CODE
{
enum OpCode* cp;
@@ -144,13 +145,12 @@ ops_init(regex_t* reg, int init_alloc_size)
#endif
}
else {
- p = (Operation* )0;
+ reg->ops = (Operation* )0;
#ifdef USE_DIRECT_THREADED_CODE
reg->ocs = (enum OpCode* )0;
#endif
}
- reg->ops = p;
reg->ops_curr = 0; /* !!! not yet done ops_new() */
reg->ops_alloc = init_alloc_size;
reg->ops_used = 0;
@@ -176,6 +176,7 @@ ops_expand(regex_t* reg, int n)
size = sizeof(Operation) * n;
p = (Operation* )xrealloc(reg->ops, size);
CHECK_NULL_RETURN_MEMERR(p);
+ reg->ops = p;
#ifdef USE_DIRECT_THREADED_CODE
size = sizeof(enum OpCode) * n;
@@ -184,7 +185,6 @@ ops_expand(regex_t* reg, int n)
reg->ocs = cp;
#endif
- reg->ops = p;
reg->ops_alloc = n;
if (reg->ops_used == 0)
reg->ops_curr = 0;
@@ -265,10 +265,12 @@ ops_free(regex_t* reg)
case OP_BACKREF1: case OP_BACKREF2: case OP_BACKREF_N: case OP_BACKREF_N_IC:
break;
case OP_BACKREF_MULTI: case OP_BACKREF_MULTI_IC:
+ case OP_BACKREF_CHECK:
+#ifdef USE_BACKREF_WITH_LEVEL
case OP_BACKREF_WITH_LEVEL:
case OP_BACKREF_WITH_LEVEL_IC:
- case OP_BACKREF_CHECK:
case OP_BACKREF_CHECK_WITH_LEVEL:
+#endif
if (op->backref_general.num != 1)
xfree(op->backref_general.ns);
break;
@@ -631,7 +633,7 @@ mmcl_add(MinMaxCharLen* to, MinMaxCharLen* add)
to->min = distance_add(to->min, add->min);
to->max = distance_add(to->max, add->max);
- to->min_is_sure = add->min_is_sure != 0 && to->min_is_sure != 0;
+ to->min_is_sure = add->min_is_sure != FALSE && to->min_is_sure != FALSE;
}
static void
@@ -656,8 +658,11 @@ static void
mmcl_alt_merge(MinMaxCharLen* to, MinMaxCharLen* alt)
{
if (to->min > alt->min) {
- to->min = alt->min;
- if (alt->min_is_sure != 0)
+ to->min = alt->min;
+ to->min_is_sure = alt->min_is_sure;
+ }
+ else if (to->min == alt->min) {
+ if (alt->min_is_sure != FALSE)
to->min_is_sure = TRUE;
}
@@ -840,7 +845,7 @@ node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env,
en->min_char_len = ci->min;
en->max_char_len = ci->max;
NODE_STATUS_ADD(node, FIXED_CLEN);
- if (ci->min_is_sure != 0)
+ if (ci->min_is_sure != FALSE)
NODE_STATUS_ADD(node, FIXED_CLEN_MIN_SURE);
}
}
@@ -882,15 +887,15 @@ node_char_len1(Node* node, regex_t* reg, MinMaxCharLen* ci, ScanEnv* env,
}
break;
- case NODE_ANCHOR:
+ case NODE_GIMMICK:
mmcl_set(ci, 0);
- /* can't optimize look-behind if anchor exists. */
- ci->min_is_sure = FALSE;
break;
- case NODE_GIMMICK:
+ case NODE_ANCHOR:
zero:
mmcl_set(ci, 0);
+ /* can't optimize look-behind if anchor exists. */
+ ci->min_is_sure = FALSE;
break;
case NODE_BACKREF:
@@ -1082,6 +1087,9 @@ compile_call(CallNode* node, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
COP(reg)->call.addr = 0; /* dummy addr. */
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ COP(reg)->call.called_mem = node->called_gnum;
+#endif
offset = COP_CURR_OFFSET_BYTES(reg, call.addr);
r = unset_addr_list_add(env->unset_addr_list, offset, NODE_CALL_BODY(node));
@@ -1822,7 +1830,6 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
COP(reg)->memory_end.num = node->m.regnum;
if (NODE_IS_CALLED(node)) {
- if (r != 0) return r;
r = add_op(reg, OP_RETURN);
}
#else
@@ -2764,7 +2771,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
static int
make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)
{
- int r = 0;
+ int r;
Node* node = *plink;
switch (NODE_TYPE(node)) {
@@ -2772,17 +2779,17 @@ make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)
case NODE_ALT:
do {
r = make_named_capture_number_map(&(NODE_CAR(node)), map, counter);
- } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ if (r < 0) return r;
break;
case NODE_QUANT:
{
Node** ptarget = &(NODE_BODY(node));
- Node* old = *ptarget;
r = make_named_capture_number_map(ptarget, map, counter);
- if (r != 0) return r;
- if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
- r = onig_reduce_nested_quantifier(node);
+ if (r < 0) return r;
+ if (r == 1 && NODE_TYPE(*ptarget) == NODE_QUANT) {
+ return onig_reduce_nested_quantifier(node);
}
}
break;
@@ -2796,41 +2803,48 @@ make_named_capture_number_map(Node** plink, GroupNumMap* map, int* counter)
map[en->m.regnum].new_val = *counter;
en->m.regnum = *counter;
r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter);
+ if (r < 0) return r;
}
else {
*plink = NODE_BODY(node);
NODE_BODY(node) = NULL_NODE;
onig_node_free(node);
r = make_named_capture_number_map(plink, map, counter);
+ if (r < 0) return r;
+ return 1;
}
}
else if (en->type == BAG_IF_ELSE) {
r = make_named_capture_number_map(&(NODE_BAG_BODY(en)), map, counter);
- if (r != 0) return r;
+ if (r < 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
r = make_named_capture_number_map(&(en->te.Then), map, counter);
- if (r != 0) return r;
+ if (r < 0) return r;
}
if (IS_NOT_NULL(en->te.Else)) {
r = make_named_capture_number_map(&(en->te.Else), map, counter);
- if (r != 0) return r;
+ if (r < 0) return r;
}
}
- else
+ else {
r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter);
+ if (r < 0) return r;
+ }
}
break;
case NODE_ANCHOR:
- if (IS_NOT_NULL(NODE_BODY(node)))
+ if (IS_NOT_NULL(NODE_BODY(node))) {
r = make_named_capture_number_map(&(NODE_BODY(node)), map, counter);
+ if (r < 0) return r;
+ }
break;
default:
break;
}
- return r;
+ return 0;
}
static int
@@ -2982,7 +2996,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
}
counter = 0;
r = make_named_capture_number_map(root, map, &counter);
- if (r != 0) return r;
+ if (r < 0) return r;
r = renumber_backref_traverse(*root, map);
if (r != 0) return r;
@@ -3546,7 +3560,9 @@ check_node_in_look_behind(Node* node, int not, int* used)
if (r != 0) break;
if (en->type == BAG_MEMORY) {
- if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node)) *used = TRUE;
+ if (NODE_IS_BACKREF(node) || NODE_IS_CALLED(node)
+ || NODE_IS_REFERENCED(node))
+ *used = TRUE;
}
else if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
@@ -3978,6 +3994,7 @@ set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env)
{
BagNode* en = BAG_(node);
+ r = 0;
if (en->type == BAG_MEMORY) {
if (NODE_IS_BACKREF(node)) {
if (IS_NOT_NULL(empty))
@@ -4484,7 +4501,7 @@ remove_from_list(Node* prev, Node* a)
}
static int
-reduce_string_list(Node* node)
+reduce_string_list(Node* node, OnigEncoding enc)
{
int r = 0;
@@ -4515,43 +4532,70 @@ reduce_string_list(Node* node)
}
}
else {
- prev = NULL_NODE;
+ if (IS_NOT_NULL(prev)) {
+#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
+ StrNode* sn = STR_(prev);
+ if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+#endif
+ prev = NULL_NODE;
+ }
+ r = reduce_string_list(curr, enc);
+ if (r != 0) return r;
prev_node = node;
}
node = next_node;
} while (r == 0 && IS_NOT_NULL(node));
+
+#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
+ if (IS_NOT_NULL(prev)) {
+ StrNode* sn = STR_(prev);
+ if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ }
+#endif
}
break;
case NODE_ALT:
do {
- r = reduce_string_list(NODE_CAR(node));
+ r = reduce_string_list(NODE_CAR(node), enc);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
+#ifdef USE_CHECK_VALIDITY_OF_STRING_IN_TREE
+ case NODE_STRING:
+ {
+ StrNode* sn = STR_(node);
+ if (! ONIGENC_IS_VALID_MBC_STRING(enc, sn->s, sn->end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ }
+ break;
+#endif
+
case NODE_ANCHOR:
if (IS_NULL(NODE_BODY(node)))
break;
/* fall */
case NODE_QUANT:
- r = reduce_string_list(NODE_BODY(node));
+ r = reduce_string_list(NODE_BODY(node), enc);
break;
case NODE_BAG:
{
BagNode* en = BAG_(node);
- r = reduce_string_list(NODE_BODY(node));
+ r = reduce_string_list(NODE_BODY(node), enc);
if (r != 0) return r;
if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
- r = reduce_string_list(en->te.Then);
+ r = reduce_string_list(en->te.Then, enc);
if (r != 0) return r;
}
if (IS_NOT_NULL(en->te.Else)) {
- r = reduce_string_list(en->te.Else);
+ r = reduce_string_list(en->te.Else, enc);
if (r != 0) return r;
}
}
@@ -4723,7 +4767,7 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env)
return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
}
- if (ci.min == 0 && ci.min_is_sure != 0 && used == FALSE) {
+ if (ci.min == 0 && ci.min_is_sure != FALSE && used == FALSE) {
if (an->type == ANCR_LOOK_BEHIND_NOT)
r = onig_node_reset_fail(node);
else
@@ -4779,18 +4823,23 @@ tune_look_behind(Node* node, regex_t* reg, int state, ScanEnv* env)
static int
tune_next(Node* node, Node* next_node, regex_t* reg)
{
+ int called;
NodeType type;
+ called = FALSE;
+
retry:
type = NODE_TYPE(node);
if (type == NODE_QUANT) {
QuantNode* qn = QUANT_(node);
if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
#ifdef USE_QUANT_PEEK_NEXT
- Node* n = get_tree_head_literal(next_node, 1, reg);
- /* '\0': for UTF-16BE etc... */
- if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
- qn->next_head_exact = n;
+ if (called == FALSE) {
+ Node* n = get_tree_head_literal(next_node, 1, reg);
+ /* '\0': for UTF-16BE etc... */
+ if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') {
+ qn->next_head_exact = n;
+ }
}
#endif
/* automatic posseivation a*b ==> (?>a*)b */
@@ -4815,6 +4864,8 @@ tune_next(Node* node, Node* next_node, regex_t* reg)
else if (type == NODE_BAG) {
BagNode* en = BAG_(node);
if (en->type == BAG_MEMORY) {
+ if (NODE_IS_CALLED(node))
+ called = TRUE;
node = NODE_BODY(node);
goto retry;
}
@@ -4999,17 +5050,18 @@ unravel_cf_look_behind_add(Node** rlist, Node** rsn,
{
int r, i, found;
- found = 0;
+ found = FALSE;
for (i = 0; i < n; i++) {
OnigCaseFoldCodeItem* item = items + i;
if (item->byte_len == one_len) {
if (item->code_len == 1) {
- found = 1;
+ found = TRUE;
+ break;
}
}
}
- if (found == 0) {
+ if (found == FALSE) {
r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */);
}
else {
@@ -5073,6 +5125,7 @@ unravel_case_fold_string(Node* node, regex_t* reg, int state)
one_len = (OnigLen )enclen(enc, p);
if (n == 0) {
q = p + one_len;
+ if (q > end) q = end;
r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */);
if (r != 0) goto err;
}
@@ -5221,12 +5274,12 @@ quantifiers_memory_node_info(Node* node)
__inline
#endif
static int
-tune_call_node_call(CallNode* cn, ScanEnv* env, int state)
+check_call_reference(CallNode* cn, ScanEnv* env, int state)
{
MemEnv* mem_env = SCANENV_MEMENV(env);
if (cn->by_number != 0) {
- int gnum = cn->group_num;
+ int gnum = cn->called_gnum;
if (env->num_named > 0 &&
IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
@@ -5241,12 +5294,14 @@ tune_call_node_call(CallNode* cn, ScanEnv* env, int state)
}
set_call_attr:
- NODE_CALL_BODY(cn) = mem_env[cn->group_num].mem_node;
+ NODE_CALL_BODY(cn) = mem_env[cn->called_gnum].mem_node;
if (IS_NULL(NODE_CALL_BODY(cn))) {
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
cn->name, cn->name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
+
+ NODE_STATUS_ADD(NODE_CALL_BODY(cn), REFERENCED);
}
else {
int *refs;
@@ -5263,7 +5318,7 @@ tune_call_node_call(CallNode* cn, ScanEnv* env, int state)
return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
}
else {
- cn->group_num = refs[0];
+ cn->called_gnum = refs[0];
goto set_call_attr;
}
}
@@ -5396,7 +5451,7 @@ tune_call(Node* node, ScanEnv* env, int state)
CALL_(node)->entry_count--;
}
- r = tune_call_node_call(CALL_(node), env, state);
+ r = check_call_reference(CALL_(node), env, state);
break;
default:
@@ -6187,8 +6242,10 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
r = 1; /* 1:full */
break;
}
- for (j = 0; j < len && p < end; j++)
+ for (j = 0; j < len && p < end; j++) {
+ /* coverity[overrun-local] */
to->s[i++] = *p++;
+ }
}
to->len = i;
@@ -6210,8 +6267,10 @@ concat_opt_exact_str(OptStr* to, UChar* s, UChar* end, OnigEncoding enc)
for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
len = enclen(enc, p);
if (i + len > OPT_EXACT_MAXLEN) break;
- for (j = 0; j < len && p < end; j++)
+ for (j = 0; j < len && p < end; j++) {
+ /* coverity[overrun-local] */
to->s[i++] = *p++;
+ }
}
to->len = i;
@@ -7229,19 +7288,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
else
reg->ops_used = 0;
- reg->string_pool = 0;
- reg->string_pool_end = 0;
- reg->num_mem = 0;
- reg->num_repeat = 0;
- reg->num_empty_check = 0;
- reg->repeat_range_alloc = 0;
- reg->repeat_range = (RepeatRange* )NULL;
- reg->empty_status_mem = 0;
-
r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
- r = reduce_string_list(root);
+ r = reduce_string_list(root, reg->enc);
if (r != 0) goto err;
/* mixed use named group and no-named group */
@@ -7653,6 +7703,134 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
return onig_is_code_in_cc_len(len, code, cc);
}
+typedef struct {
+ int prec_read;
+ int look_behind;
+ int backref_with_level;
+ int call;
+} SlowElementCount;
+
+static int
+node_detect_can_be_slow(Node* node, SlowElementCount* ct)
+{
+ int r;
+
+ r = 0;
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ r = node_detect_can_be_slow(NODE_CAR(node), ct);
+ if (r != 0) return r;
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_QUANT:
+ r = node_detect_can_be_slow(NODE_BODY(node), ct);
+ break;
+
+ case NODE_ANCHOR:
+ switch (ANCHOR_(node)->type) {
+ case ANCR_PREC_READ:
+ case ANCR_PREC_READ_NOT:
+ ct->prec_read++;
+ break;
+ case ANCR_LOOK_BEHIND:
+ case ANCR_LOOK_BEHIND_NOT:
+ ct->look_behind++;
+ break;
+ default:
+ break;
+ }
+
+ if (ANCHOR_HAS_BODY(ANCHOR_(node)))
+ r = node_detect_can_be_slow(NODE_BODY(node), ct);
+ break;
+
+ case NODE_BAG:
+ {
+ BagNode* en = BAG_(node);
+
+ r = node_detect_can_be_slow(NODE_BODY(node), ct);
+ if (r != 0) return r;
+
+ if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = node_detect_can_be_slow(en->te.Then, ct);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = node_detect_can_be_slow(en->te.Else, ct);
+ if (r != 0) return r;
+ }
+ }
+ }
+ break;
+
+#ifdef USE_BACKREF_WITH_LEVEL
+ case NODE_BACKREF:
+ if (NODE_IS_NEST_LEVEL(node))
+ ct->backref_with_level++;
+ break;
+#endif
+
+#ifdef USE_CALL
+ case NODE_CALL:
+ ct->call++;
+ break;
+#endif
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+extern int
+onig_detect_can_be_slow_pattern(const UChar* pattern,
+ const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
+ OnigSyntaxType* syntax)
+{
+ int r;
+ regex_t* reg;
+ Node* root;
+ ScanEnv scan_env;
+ SlowElementCount count;
+
+ reg = (regex_t* )xmalloc(sizeof(regex_t));
+ if (IS_NULL(reg)) return ONIGERR_MEMORY;
+
+ r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
+ if (r != 0) {
+ xfree(reg);
+ return r;
+ }
+
+ root = 0;
+ r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
+ if (r == 0) {
+ count.prec_read = 0;
+ count.look_behind = 0;
+ count.backref_with_level = 0;
+ count.call = 0;
+
+ r = node_detect_can_be_slow(root, &count);
+ if (r == 0) {
+ int n = count.prec_read + count.look_behind
+ + count.backref_with_level + count.call;
+ r = n;
+ }
+ }
+
+ if (IS_NOT_NULL(scan_env.mem_env_dynamic))
+ xfree(scan_env.mem_env_dynamic);
+
+ onig_node_free(root);
+ onig_free(reg);
+ return r;
+}
+
#ifdef ONIG_DEBUG_PARSE
@@ -7734,14 +7912,18 @@ print_indent_tree(FILE* f, Node* node, int indent)
break;
case NODE_CCLASS:
+#define CCLASS_MBUF_MAX_OUTPUT_NUM 10
+
fprintf(f, "<cclass:%p>", node);
if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f);
if (CCLASS_(node)->mbuf) {
BBuf* bbuf = CCLASS_(node)->mbuf;
- for (i = 0; i < bbuf->used; i++) {
+ fprintf(f, " mbuf(%u) ", bbuf->used);
+ for (i = 0; i < bbuf->used && i < CCLASS_MBUF_MAX_OUTPUT_NUM; i++) {
if (i > 0) fprintf(f, ",");
fprintf(f, "%0x", bbuf->p[i]);
}
+ if (i < bbuf->used) fprintf(f, "...");
}
break;
@@ -7822,6 +8004,11 @@ print_indent_tree(FILE* f, Node* node, int indent)
if (i > 0) fputs(", ", f);
fprintf(f, "%d", p[i]);
}
+#ifdef USE_BACKREF_WITH_LEVEL
+ if (NODE_IS_NEST_LEVEL(node)) {
+ fprintf(f, ", level: %d", br->nest_level);
+ }
+#endif
}
break;
@@ -7830,6 +8017,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
{
CallNode* cn = CALL_(node);
fprintf(f, "<call:%p>", node);
+ fprintf(f, " num: %d, name", cn->called_gnum);
p_string(f, cn->name_end - cn->name, cn->name);
}
break;
@@ -7881,6 +8069,8 @@ print_indent_tree(FILE* f, Node* node, int indent)
fprintf(f, "memory:%d", BAG_(node)->m.regnum);
if (NODE_IS_CALLED(node))
fprintf(f, ", called");
+ else if (NODE_IS_REFERENCED(node))
+ fprintf(f, ", referenced");
if (NODE_IS_FIXED_ADDR(node))
fprintf(f, ", fixed-addr");
break;
diff --git a/src/regenc.c b/src/regenc.c
index dbfbc89..27e4549 100644
--- a/src/regenc.c
+++ b/src/regenc.c
@@ -263,12 +263,12 @@ onigenc_strlen_null(OnigEncoding enc, const UChar* s)
extern int
onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
{
- UChar* start = (UChar* )s;
- UChar* p = (UChar* )s;
+ const UChar* start = s;
+ const UChar* p = s;
while (1) {
if (*p == '\0') {
- UChar* q;
+ const UChar* q;
int len = ONIGENC_MBC_MINLEN(enc);
if (len == 1) return (int )(p - start);
diff --git a/src/regerror.c b/src/regerror.c
index 58bc7fd..dc1c8b6 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -56,6 +56,8 @@ onig_error_code_to_format(int code)
p = "retry-limit-in-match over"; break;
case ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER:
p = "retry-limit-in-search over"; break;
+ case ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER:
+ p = "subexp-call-limit-in-search over"; break;
case ONIGERR_TYPE_BUG:
p = "undefined type (bug)"; break;
case ONIGERR_PARSER_BUG:
diff --git a/src/regexec.c b/src/regexec.c
index 1b6895d..bb6b474 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -46,15 +46,15 @@
#define CHECK_INTERRUPT_IN_MATCH
-#define STACK_MEM_START(reg, i) \
- (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \
- STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i])))
+#define STACK_MEM_START(reg, idx) \
+ (MEM_STATUS_AT((reg)->push_mem_start, (idx)) != 0 ? \
+ STACK_AT(mem_start_stk[idx].i)->u.mem.pstr : mem_start_stk[idx].s)
-#define STACK_MEM_END(reg, i) \
- (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \
- STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i])))
+#define STACK_MEM_END(reg, idx) \
+ (MEM_STATUS_AT((reg)->push_mem_end, (idx)) != 0 ? \
+ STACK_AT(mem_end_stk[idx].i)->u.mem.pstr : mem_end_stk[idx].s)
-static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev);
+static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high);
static int
search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
@@ -170,6 +170,9 @@ typedef struct {
int best_len; /* for ONIG_OPTION_FIND_LONGEST */
UChar* best_s;
#endif
+#ifdef USE_CALL
+ unsigned long subexp_call_in_search_counter;
+#endif
} MatchArg;
@@ -1057,8 +1060,6 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/** stack **/
-#define INVALID_STACK_INDEX -1
-
#define STK_ALT_FLAG 0x0001
/* stack type */
@@ -1099,7 +1100,15 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_MASK_TO_VOID_TARGET 0x100e
#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
-typedef intptr_t StackIndex;
+typedef ptrdiff_t StackIndex;
+
+#define INVALID_STACK_INDEX ((StackIndex )-1)
+
+typedef union {
+ StackIndex i;
+ UChar* s;
+} StkPtrType;
+
typedef struct _StackType {
unsigned int type;
@@ -1108,7 +1117,6 @@ typedef struct _StackType {
struct {
Operation* pcode; /* byte code position */
UChar* pstr; /* string position */
- UChar* pstr_prev; /* previous char position of pstr */
} state;
struct {
int count;
@@ -1119,8 +1127,8 @@ typedef struct _StackType {
struct {
UChar *pstr; /* start/end position */
/* Following information is set, if this stack type is MEM-START */
- StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */
- StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
+ StkPtrType prev_start; /* prev. info (for backtrack "(...)*" ) */
+ StkPtrType prev_end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
UChar *pstr; /* start position */
@@ -1166,8 +1174,8 @@ struct OnigCalloutArgsStruct {
MatchArg* msa;
StackType* stk_base;
StackType* stk;
- StackIndex* mem_start_stk;
- StackIndex* mem_end_stk;
+ StkPtrType* mem_start_stk;
+ StkPtrType* mem_end_stk;
};
#endif
@@ -1178,7 +1186,7 @@ struct OnigCalloutArgsStruct {
#define UPDATE_FOR_STACK_REALLOC do{\
repeat_stk = (StackIndex* )alloc_base;\
empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
- mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
+ mem_start_stk = (StkPtrType* )(empty_check_stk + reg->num_empty_check);\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
@@ -1194,7 +1202,7 @@ struct OnigCalloutArgsStruct {
#define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
#define UPDATE_FOR_STACK_REALLOC do{\
- mem_start_stk = (StackIndex* )alloc_base;\
+ mem_start_stk = (StkPtrType* )alloc_base;\
mem_end_stk = mem_start_stk + num_mem + 1;\
} while(0)
@@ -1218,8 +1226,12 @@ struct OnigCalloutArgsStruct {
#endif
#if defined(USE_CALL)
+#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv) \
+ (msa).subexp_call_in_search_counter = 0;
+
#define POP_CALL else if (stk->type == STK_RETURN) {subexp_call_nest_counter++;} else if (stk->type == STK_CALL_FRAME) {subexp_call_nest_counter--;}
#else
+#define SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)
#define POP_CALL
#endif
@@ -1231,6 +1243,7 @@ struct OnigCalloutArgsStruct {
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
+ SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
@@ -1243,6 +1256,7 @@ struct OnigCalloutArgsStruct {
(msa).start = (arg_start);\
(msa).match_stack_limit = (mpv)->match_stack_limit;\
RETRY_IN_MATCH_ARG_INIT(msa,mpv)\
+ SUBEXP_CALL_IN_MATCH_ARG_INIT(msa,mpv)\
(msa).mp = mpv;\
(msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
@@ -1258,27 +1272,27 @@ struct OnigCalloutArgsStruct {
is_alloca = 0;\
alloc_base = msa->stack_p;\
stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
+ + (sizeof(StkPtrType) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + msa->stack_n;\
}\
else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\
is_alloca = 0;\
- alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\
+ alloc_base = (char* )xmalloc(sizeof(StkPtrType) * msa->ptr_num\
+ sizeof(StackType) * (stack_num));\
CHECK_NULL_RETURN_MEMERR(alloc_base);\
stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
+ + (sizeof(StkPtrType) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
else {\
is_alloca = 1;\
- alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\
+ alloc_base = (char* )xalloca(sizeof(StkPtrType) * msa->ptr_num\
+ sizeof(StackType) * (stack_num));\
CHECK_NULL_RETURN_MEMERR(alloc_base);\
stk_base = (StackType* )(alloc_base\
- + (sizeof(StackIndex) * msa->ptr_num));\
+ + (sizeof(StkPtrType) * msa->ptr_num));\
stk = stk_base;\
stk_end = stk_base + (stack_num);\
}\
@@ -1288,7 +1302,7 @@ struct OnigCalloutArgsStruct {
#define STACK_SAVE(msa,is_alloca,alloc_base) do{\
(msa)->stack_n = (int )(stk_end - stk_base);\
if ((is_alloca) != 0) {\
- size_t size = sizeof(StackIndex) * (msa)->ptr_num\
+ size_t size = sizeof(StkPtrType) * (msa)->ptr_num\
+ sizeof(StackType) * (msa)->stack_n;\
(msa)->stack_p = xmalloc(size);\
CHECK_NULL_RETURN_MEMERR((msa)->stack_p);\
@@ -1373,6 +1387,24 @@ onig_set_retry_limit_in_search(unsigned long n)
#endif
}
+#ifdef USE_CALL
+static unsigned long SubexpCallLimitInSearch = DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH;
+
+extern unsigned long
+onig_get_subexp_call_limit_in_search(void)
+{
+ return SubexpCallLimitInSearch;
+}
+
+extern int
+onig_set_subexp_call_limit_in_search(unsigned long n)
+{
+ SubexpCallLimitInSearch = n;
+ return 0;
+}
+
+#endif
+
#ifdef USE_CALLOUT
static OnigCalloutFunc DefaultProgressCallout;
static OnigCalloutFunc DefaultRetractionCallout;
@@ -1637,9 +1669,9 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk = *arg_stk;
n = (unsigned int )(stk_end - stk_base);
- size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
+ size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
n *= 2;
- new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n;
+ new_size = sizeof(StkPtrType) * msa->ptr_num + sizeof(StackType) * n;
if (*is_alloca != 0) {
new_alloc_base = (char* )xmalloc(new_size);
if (IS_NULL(new_alloc_base)) {
@@ -1669,7 +1701,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
used = (int )(stk - stk_base);
*arg_alloc_base = alloc_base;
*arg_stk_base = (StackType* )(alloc_base
- + (sizeof(StackIndex) * msa->ptr_num));
+ + (sizeof(StkPtrType) * msa->ptr_num));
*arg_stk = *arg_stk_base + used;
*arg_stk_end = *arg_stk_base + n;
return 0;
@@ -1694,22 +1726,20 @@ stack_double(int* is_alloca, char** arg_alloc_base,
#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
-#define STACK_PUSH(stack_type,pat,s,sprev) do {\
+#define STACK_PUSH(stack_type,pat,s) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
STACK_INC;\
} while(0)
-#define STACK_PUSH_WITH_ZID(stack_type,pat,s,sprev,id) do {\
+#define STACK_PUSH_WITH_ZID(stack_type,pat,s,id) do {\
STACK_ENSURE(1);\
stk->type = (stack_type);\
stk->zid = (int )(id);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = (s);\
- stk->u.state.pstr_prev = (sprev);\
STACK_INC;\
} while(0)
@@ -1724,7 +1754,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->type = (stack_type);\
stk->u.state.pcode = (pat);\
stk->u.state.pstr = s;\
- stk->u.state.pstr_prev = sprev;\
STACK_INC;\
} while (0)
#else
@@ -1735,10 +1764,9 @@ stack_double(int* is_alloca, char** arg_alloc_base,
} while (0)
#endif
-#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
-#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
-#define STACK_PUSH_ALT_WITH_ZID(pat,s,sprev,id) \
- STACK_PUSH_WITH_ZID(STK_ALT,pat,s,sprev,id)
+#define STACK_PUSH_ALT(pat,s) STACK_PUSH(STK_ALT,pat,s)
+#define STACK_PUSH_SUPER_ALT(pat,s) STACK_PUSH(STK_SUPER_ALT,pat,s)
+#define STACK_PUSH_ALT_WITH_ZID(pat,s,id) STACK_PUSH_WITH_ZID(STK_ALT,pat,s,id)
#if 0
#define STACK_PUSH_REPEAT(sid, pat) do {\
@@ -1767,8 +1795,8 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->u.mem.pstr = (s);\
stk->u.mem.prev_start = mem_start_stk[mnum];\
stk->u.mem.prev_end = mem_end_stk[mnum];\
- mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
- mem_end_stk[mnum] = INVALID_STACK_INDEX;\
+ mem_start_stk[mnum].i = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum].i = INVALID_STACK_INDEX;\
STACK_INC;\
} while(0)
@@ -1779,7 +1807,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->u.mem.pstr = (s);\
stk->u.mem.prev_start = mem_start_stk[mnum];\
stk->u.mem.prev_end = mem_end_stk[mnum];\
- mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum].i = GET_STACK_INDEX(stk);\
STACK_INC;\
} while(0)
@@ -1861,12 +1889,11 @@ stack_double(int* is_alloca, char** arg_alloc_base,
STACK_INC;\
} while(0)
-#define STACK_PUSH_MARK_WITH_POS(sid, s, sprev) do {\
+#define STACK_PUSH_MARK_WITH_POS(sid, s) do {\
STACK_ENSURE(1);\
stk->type = STK_MARK;\
stk->zid = (sid);\
stk->u.val.v = (UChar* )(s);\
- stk->u.val.v2 = (sprev);\
STACK_INC;\
} while(0)
@@ -1885,7 +1912,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
stk->zid = (sid);\
stk->u.val.type = (stype);\
stk->u.val.v = (UChar* )(sval);\
- stk->u.val.v2 = sprev;\
STACK_INC;\
} while(0)
@@ -1932,7 +1958,6 @@ stack_double(int* is_alloca, char** arg_alloc_base,
&& k->zid == (sid)) {\
if (level == 0) {\
(sval) = k->u.val.v;\
- sprev = k->u.val.v2;\
break;\
}\
}\
@@ -2135,14 +2160,14 @@ stack_double(int* is_alloca, char** arg_alloc_base,
} while(0)
#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
- if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
+ if (k->u.mem.prev_end.i == INVALID_STACK_INDEX) {\
(addr) = 0;\
}\
else {\
if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
- (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
+ (addr) = STACK_AT(k->u.mem.prev_end.i)->u.mem.pstr;\
else\
- (addr) = (UChar* )k->u.mem.prev_end;\
+ (addr) = k->u.mem.prev_end.s;\
}\
} while (0)
@@ -2163,7 +2188,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
if (endp == 0) {\
(isnull) = 0; break;\
}\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
+ else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) {\
(isnull) = 0; break;\
}\
else if (endp != s) {\
@@ -2199,7 +2224,7 @@ stack_double(int* is_alloca, char** arg_alloc_base,
if (endp == 0) {\
(isnull) = 0; break;\
}\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
+ else if (STACK_AT(k->u.mem.prev_start.i)->u.mem.pstr != endp) { \
(isnull) = 0; break;\
}\
else if (endp != s) {\
@@ -2362,6 +2387,10 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
p1++;
p2++;
}
+ if (s2 >= end2) {
+ if (s1 < end1) return 0;
+ else break;
+ }
}
*ps2 = s2;
@@ -2390,7 +2419,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
#define ON_STR_END(s) ((s) == end)
#define DATA_ENSURE_CHECK1 (s < right_range)
#define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
-#define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
+#define DATA_ENSURE(n) if (right_range - s < (n)) goto fail
#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range
@@ -2632,9 +2661,9 @@ typedef struct {
#define BYTECODE_INTERPRETER_START GOTO_OP;
#define BYTECODE_INTERPRETER_END
-#define CASE_OP(x) L_##x: SOP_IN(OP_##x); sbegin = s; MATCH_DEBUG_OUT(0)
+#define CASE_OP(x) L_##x: SOP_IN(OP_##x); MATCH_DEBUG_OUT(0)
#define DEFAULT_OP /* L_DEFAULT: */
-#define NEXT_OP sprev = sbegin; JUMP_OP
+#define NEXT_OP JUMP_OP
#define JUMP_OP GOTO_OP
#ifdef USE_DIRECT_THREADED_CODE
#define GOTO_OP goto *(p->opaddr)
@@ -2648,9 +2677,8 @@ typedef struct {
#define BYTECODE_INTERPRETER_START \
while (1) {\
MATCH_DEBUG_OUT(0)\
- sbegin = s;\
switch (p->opcode) {
-#define BYTECODE_INTERPRETER_END } sprev = sbegin; }
+#define BYTECODE_INTERPRETER_END } }
#define CASE_OP(x) case OP_##x: SOP_IN(OP_##x);
#define DEFAULT_OP default:
#define NEXT_OP break
@@ -2718,12 +2746,22 @@ typedef struct {
best_len = err_code; goto match_at_end;\
} while(0)
+#define MATCH_COUNTER_OUT(title) do {\
+ int i;\
+ fprintf(DBGFP, "%s (%ld): retry limit: %8lu, subexp_call: %8lu\n", (title), (sstart - str), retry_in_match_counter, msa->subexp_call_in_search_counter); \
+ fprintf(DBGFP, " ");\
+ for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {\
+ fprintf(DBGFP, " %6lu", subexp_call_counters[i]);\
+ }\
+ fprintf(DBGFP, "\n");\
+ fflush(DBGFP);\
+} while (0)
+
/* match data(str - end) from position (sstart). */
-/* if sstart == str then set sprev to NULL. */
static int
match_at(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* in_right_range, const UChar* sstart, UChar* sprev,
+ const UChar* in_right_range, const UChar* sstart,
MatchArg* msa)
{
@@ -2782,10 +2820,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_BACKREF_N_IC,
&&L_BACKREF_MULTI,
&&L_BACKREF_MULTI_IC,
+#ifdef USE_BACKREF_WITH_LEVEL
&&L_BACKREF_WITH_LEVEL,
&&L_BACKREF_WITH_LEVEL_IC,
+#endif
&&L_BACKREF_CHECK,
+#ifdef USE_BACKREF_WITH_LEVEL
&&L_BACKREF_CHECK_WITH_LEVEL,
+#endif
&&L_MEM_START,
&&L_MEM_START_PUSH,
&&L_MEM_END_PUSH,
@@ -2838,13 +2880,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
LengthType tlen, tlen2;
MemNumType mem;
RelAddrType addr;
- UChar *s, *ps, *sbegin;
+ UChar *s, *ps;
UChar *right_range;
int is_alloca;
char *alloc_base;
StackType *stk_base, *stk, *stk_end;
StackType *stkp; /* used as any purpose. */
- StackIndex *mem_start_stk, *mem_end_stk;
+ StkPtrType *mem_start_stk, *mem_end_stk;
UChar* keep;
#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
@@ -2858,6 +2900,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#ifdef USE_CALLOUT
int of;
#endif
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+#define MAX_SUBEXP_CALL_COUNTERS 9
+ unsigned long subexp_call_counters[MAX_SUBEXP_CALL_COUNTERS];
+#endif
Operation* p = reg->ops;
OnigOptionType option = reg->options;
@@ -2872,6 +2918,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
static unsigned int counter = 1;
#endif
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ for (i = 0; i < MAX_SUBEXP_CALL_COUNTERS; i++) {
+ subexp_call_counters[i] = 0;
+ }
+#endif
+
#ifdef USE_DIRECT_THREADED_CODE
if (IS_NULL(msa)) {
for (i = 0; i < reg->ops_used; i++) {
@@ -2903,12 +2955,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_INIT(INIT_MATCH_STACK_SIZE);
UPDATE_FOR_STACK_REALLOC;
for (i = 1; i <= num_mem; i++) {
- mem_start_stk[i] = mem_end_stk[i] = INVALID_STACK_INDEX;
+ mem_start_stk[i].i = mem_end_stk[i].i = INVALID_STACK_INDEX;
}
#ifdef ONIG_DEBUG_MATCH
- fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p, sprev: %p\n",
- str, end, sstart, sprev);
+ fprintf(DBGFP, "match_at: str: %p, end: %p, start: %p\n", str, end, sstart);
fprintf(DBGFP, "size: %d, start offset: %d\n",
(int )(end - str), (int )(sstart - str));
#endif
@@ -2932,24 +2983,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (n > msa->best_len) {
msa->best_len = n;
msa->best_s = (UChar* )sstart;
+ goto set_region;
}
else
goto end_best_len;
}
#endif
best_len = n;
+
+ set_region:
region = msa->region;
if (region) {
if (keep > s) keep = s;
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
if (OPTON_POSIX_REGION(msa->options)) {
posix_regmatch_t* rmt = (posix_regmatch_t* )region;
rmt[0].rm_so = (regoff_t )(keep - str);
rmt[0].rm_eo = (regoff_t )(s - str);
for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str);
}
@@ -2959,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
else {
-#endif /* USE_POSIX_API_REGION_OPTION */
+#endif /* USE_POSIX_API */
region->beg[0] = (int )(keep - str);
region->end[0] = (int )(s - str);
for (i = 1; i <= num_mem; i++) {
- if (mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (mem_end_stk[i].i != INVALID_STACK_INDEX) {
region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
region->end[i] = (int )(STACK_MEM_END(reg, i) - str);
}
@@ -2996,7 +3050,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (r < 0) MATCH_AT_ERROR_RETURN(r);
}
#endif /* USE_CAPTURE_HISTORY */
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
} /* else OPTON_POSIX_REGION() */
#endif
} /* if (region) */
@@ -3012,8 +3066,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
best_len = ONIG_MISMATCH;
goto fail; /* for retry */
}
- if (OPTON_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
- goto fail; /* for retry */
+ if (OPTON_FIND_LONGEST(option)) {
+ if (s >= in_right_range && msa->best_s == sstart)
+ best_len = msa->best_len;
+ else
+ goto fail; /* for retry */
}
}
@@ -3034,7 +3091,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3047,7 +3103,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3062,7 +3117,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3079,7 +3133,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
- sprev = s;
s++;
INC_OP;
JUMP_OUT;
@@ -3091,7 +3144,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
while (tlen-- > 0) {
if (*ps++ != *s++) goto fail;
}
- sprev = s - 1;
INC_OP;
JUMP_OUT;
@@ -3112,7 +3164,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ps++; s++;
if (*ps != *s) goto fail;
ps++; s++;
- sprev = s;
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
@@ -3131,7 +3182,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ps++; s++;
if (*ps != *s) goto fail;
ps++; s++;
- sprev = s;
if (*ps != *s) goto fail;
ps++; s++;
if (*ps != *s) goto fail;
@@ -3149,7 +3199,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
- sprev = s - 2;
INC_OP;
JUMP_OUT;
@@ -3165,7 +3214,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
- sprev = s - 3;
INC_OP;
JUMP_OUT;
@@ -3179,7 +3227,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (*ps != *s) goto fail;
ps++; s++;
}
- sprev = s - tlen;
INC_OP;
JUMP_OUT;
@@ -3295,11 +3342,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(ANYCHAR_STAR)
INC_OP;
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
s += n;
}
JUMP_OUT;
@@ -3307,15 +3353,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(ANYCHAR_ML_STAR)
INC_OP;
while (DATA_ENSURE_CHECK1) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
n = enclen(encode, s);
if (n > 1) {
DATA_ENSURE(n);
- sprev = s;
s += n;
}
else {
- sprev = s;
s++;
}
}
@@ -3329,12 +3373,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
while (DATA_ENSURE_CHECK1) {
if (c == *s) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
}
n = enclen(encode, s);
DATA_ENSURE(n);
if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail;
- sprev = s;
s += n;
}
}
@@ -3348,16 +3391,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
while (DATA_ENSURE_CHECK1) {
if (c == *s) {
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
}
n = enclen(encode, s);
if (n > 1) {
DATA_ENSURE(n);
- sprev = s;
s += n;
}
else {
- sprev = s;
s++;
}
}
@@ -3410,14 +3451,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
goto fail;
}
- else if (ON_STR_END(s)) {
- if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
else {
- if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
- == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (ON_STR_END(s)) {
+ if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
+ else {
+ if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+ == IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
}
}
INC_OP;
@@ -3432,14 +3476,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
goto fail;
}
- else if (ON_STR_END(s)) {
- if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
- }
else {
- if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
- != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
- goto fail;
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (ON_STR_END(s)) {
+ if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
+ else {
+ if (IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)
+ != IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode))
+ goto fail;
+ }
}
}
INC_OP;
@@ -3452,7 +3499,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mode = p->word_boundary.mode;
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ UChar* sprev;
+ if (ON_STR_BEGIN(s)) {
+ INC_OP;
+ JUMP_OUT;
+ }
+ sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
INC_OP;
JUMP_OUT;
}
@@ -3465,10 +3518,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
ModeType mode;
mode = p->word_boundary.mode;
- if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
- if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
- INC_OP;
- JUMP_OUT;
+ if (! ON_STR_BEGIN(s)) {
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
+ if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
+ INC_OP;
+ JUMP_OUT;
+ }
}
}
}
@@ -3478,6 +3534,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(TEXT_SEGMENT_BOUNDARY)
{
int is_break;
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
switch (p->text_segment_boundary.type) {
case EXTENDED_GRAPHEME_CLUSTER_BOUNDARY:
@@ -3507,12 +3564,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BEGIN_BUF)
if (! ON_STR_BEGIN(s)) goto fail;
+ if (OPTON_NOTBOL(msa->options)) goto fail;
+ if (OPTON_NOT_BEGIN_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
CASE_OP(END_BUF)
if (! ON_STR_END(s)) goto fail;
+ if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
@@ -3523,15 +3584,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
}
- else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) {
- INC_OP;
- JUMP_OUT;
+ else if (! ON_STR_END(s)) {
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
+ if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
+ INC_OP;
+ JUMP_OUT;
+ }
}
goto fail;
CASE_OP(END_LINE)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (OPTON_NOTEOL(msa->options)) goto fail;
@@ -3556,9 +3621,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(SEMI_END_BUF)
if (ON_STR_END(s)) {
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
+ UChar* sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) {
#endif
if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
@@ -3567,6 +3634,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) &&
ON_STR_END(s + enclen(encode, s))) {
+ if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
}
@@ -3575,6 +3644,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar* ss = s + enclen(encode, s);
ss += enclen(encode, ss);
if (ON_STR_END(ss)) {
+ if (OPTON_NOTEOL(msa->options)) goto fail;
+ if (OPTON_NOT_END_STRING(msa->options)) goto fail;
INC_OP;
JUMP_OUT;
}
@@ -3586,6 +3657,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
switch (p->check_position.type) {
case CHECK_POSITION_SEARCH_START:
if (s != msa->start) goto fail;
+ if (OPTON_NOT_BEGIN_POSITION(msa->options)) goto fail;
break;
case CHECK_POSITION_CURRENT_RIGHT_RANGE:
if (s != right_range) goto fail;
@@ -3604,7 +3676,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(MEM_START)
mem = p->memory_start.num;
- mem_start_stk[mem] = (StackIndex )((void* )s);
+ mem_start_stk[mem].s = s;
INC_OP;
JUMP_OUT;
@@ -3616,7 +3688,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(MEM_END)
mem = p->memory_end.num;
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem].s = s;
INC_OP;
JUMP_OUT;
@@ -3629,20 +3701,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
si = GET_STACK_INDEX(stkp);
STACK_PUSH_MEM_END(mem, s);
- mem_start_stk[mem] = si;
+ mem_start_stk[mem].i = si;
INC_OP;
JUMP_OUT;
}
CASE_OP(MEM_END_REC)
mem = p->memory_end.num;
- mem_end_stk[mem] = (StackIndex )((void* )s);
+ mem_end_stk[mem].s = s;
STACK_GET_MEM_START(mem, stkp);
if (MEM_STATUS_AT(reg->push_mem_start, mem))
- mem_start_stk[mem] = GET_STACK_INDEX(stkp);
+ mem_start_stk[mem].i = GET_STACK_INDEX(stkp);
else
- mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
+ mem_start_stk[mem].s = stkp->u.mem.pstr;
STACK_PUSH_MEM_END_MARK(mem);
INC_OP;
@@ -3661,21 +3733,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->backref_n.n1;
backref:
{
- int len;
UChar *pstart, *pend;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
STRING_CMP(s, pstart, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
}
INC_OP;
@@ -3684,21 +3752,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_N_IC)
mem = p->backref_n.n1;
{
- int len;
UChar *pstart, *pend;
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) goto fail;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) goto fail;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
}
INC_OP;
@@ -3706,28 +3770,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_MULTI)
{
- int len, is_fail;
+ int is_fail;
UChar *pstart, *pend, *swork;
tlen = p->backref_general.num;
for (i = 0; i < tlen; i++) {
mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
swork = s;
STRING_CMP_VALUE(swork, pstart, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
break; /* success */
}
@@ -3738,28 +3799,25 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(BACKREF_MULTI_IC)
{
- int len, is_fail;
+ int is_fail;
UChar *pstart, *pend, *swork;
tlen = p->backref_general.num;
for (i = 0; i < tlen; i++) {
mem = tlen == 1 ? p->backref_general.n1 : p->backref_general.ns[i];
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
pstart = STACK_MEM_START(reg, mem);
pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
if (n != 0) {
DATA_ENSURE(n);
- sprev = s;
swork = s;
STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
if (is_fail) continue;
s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
}
break; /* success */
}
@@ -3774,10 +3832,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto backref_with_level;
CASE_OP(BACKREF_WITH_LEVEL)
{
- int len;
int level;
MemNumType* mems;
- UChar* ssave;
n = 0;
backref_with_level:
@@ -3785,17 +3841,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen = p->backref_general.num;
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
- ssave = s;
- if (backref_match_at_nested_level(reg, stk, stk_base, n,
- case_fold_flag, level, (int )tlen, mems, &s, end)) {
- if (ssave != s) {
- sprev = ssave;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
- }
- }
- else
+ if (! backref_match_at_nested_level(reg, stk, stk_base, n,
+ case_fold_flag, level, (int )tlen, mems, &s, end)) {
goto fail;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3810,8 +3859,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
for (i = 0; i < tlen; i++) {
mem = mems[i];
- if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
- if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
+ if (mem_end_stk[mem].i == INVALID_STACK_INDEX) continue;
+ if (mem_start_stk[mem].i == INVALID_STACK_INDEX) continue;
break; /* success */
}
if (i == tlen) goto fail;
@@ -3928,13 +3977,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(PUSH)
addr = p->push.addr;
- STACK_PUSH_ALT(p + addr, s, sprev);
+ STACK_PUSH_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
CASE_OP(PUSH_SUPER)
addr = p->push.addr;
- STACK_PUSH_SUPER_ALT(p + addr, s, sprev);
+ STACK_PUSH_SUPER_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
@@ -3956,7 +4005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_or_jump_exact1.addr;
c = p->push_or_jump_exact1.c;
if (DATA_ENSURE_CHECK1 && c == *s) {
- STACK_PUSH_ALT(p + addr, s, sprev);
+ STACK_PUSH_ALT(p + addr, s);
INC_OP;
JUMP_OUT;
}
@@ -3972,9 +4021,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_if_peek_next.addr;
c = p->push_if_peek_next.c;
if (DATA_ENSURE_CHECK1 && c == *s) {
- STACK_PUSH_ALT(p + addr, s, sprev);
- INC_OP;
- JUMP_OUT;
+ STACK_PUSH_ALT(p + addr, s);
}
}
INC_OP;
@@ -3986,7 +4033,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + addr, s, sprev);
+ STACK_PUSH_ALT(p + addr, s);
}
INC_OP;
JUMP_OUT;
@@ -3997,7 +4044,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
- STACK_PUSH_ALT(p + 1, s, sprev);
+ STACK_PUSH_ALT(p + 1, s);
p += addr;
}
else
@@ -4014,7 +4061,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else if (n >= reg->repeat_range[mem].lower) {
INC_OP;
- STACK_PUSH_ALT(p, s, sprev);
+ STACK_PUSH_ALT(p, s);
p = reg->repeat_range[mem].u.pcode;
}
else {
@@ -4033,7 +4080,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
else {
if (n >= reg->repeat_range[mem].lower) {
- STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
+ STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s);
INC_OP;
}
else {
@@ -4047,6 +4094,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (subexp_call_nest_counter == SubexpCallMaxNestLevel)
goto fail;
subexp_call_nest_counter++;
+
+ if (SubexpCallLimitInSearch != 0) {
+ msa->subexp_call_in_search_counter++;
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ if (p->call.called_mem < MAX_SUBEXP_CALL_COUNTERS)
+ subexp_call_counters[p->call.called_mem]++;
+ if (msa->subexp_call_in_search_counter % 1000 == 0)
+ MATCH_COUNTER_OUT("CALL");
+#endif
+ if (msa->subexp_call_in_search_counter >
+ SubexpCallLimitInSearch) {
+ MATCH_AT_ERROR_RETURN(ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER);
+ }
+ }
+
addr = p->call.addr;
INC_OP; STACK_PUSH_CALL_FRAME(p);
p = reg->ops + addr;
@@ -4070,7 +4132,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
for (tlen = p->move.n; tlen > 0; tlen--) {
len = enclen(encode, s);
- sprev = s;
s += len;
if (s > end) goto fail;
if (s == end) {
@@ -4079,7 +4140,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
}
}
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
INC_OP;
JUMP_OUT;
@@ -4088,10 +4148,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (tlen != 0) {
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen);
if (IS_NULL(s)) goto fail;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
}
if (p->step_back_start.remaining != 0) {
- STACK_PUSH_ALT_WITH_ZID(p + 1, s, sprev, p->step_back_start.remaining);
+ STACK_PUSH_ALT_WITH_ZID(p + 1, s, p->step_back_start.remaining);
p += p->step_back_start.addr;
}
else
@@ -4103,9 +4162,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (tlen != INFINITE_LEN) tlen--;
s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, 1);
if (IS_NULL(s)) goto fail;
- sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s);
if (tlen != 0) {
- STACK_PUSH_ALT_WITH_ZID(p, s, sprev, (int )tlen);
+ STACK_PUSH_ALT_WITH_ZID(p, s, (int )tlen);
}
INC_OP;
JUMP_OUT;
@@ -4114,8 +4172,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->cut_to_mark.id; /* mem: mark id */
STACK_TO_VOID_TO_MARK(stkp, mem);
if (p->cut_to_mark.restore_pos != 0) {
- s = stkp->u.val.v;
- sprev = stkp->u.val.v2;
+ s = stkp->u.val.v;
}
INC_OP;
JUMP_OUT;
@@ -4123,7 +4180,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(MARK)
mem = p->mark.id; /* mem: mark id */
if (p->mark.save_pos != 0)
- STACK_PUSH_MARK_WITH_POS(mem, s, sprev);
+ STACK_PUSH_MARK_WITH_POS(mem, s);
else
STACK_PUSH_MARK(mem);
@@ -4275,9 +4332,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
fail:
#endif
STACK_POP;
- p = stk->u.state.pcode;
- s = stk->u.state.pstr;
- sprev = stk->u.state.pstr_prev;
+ p = stk->u.state.pcode;
+ s = stk->u.state.pstr;
CHECK_RETRY_LIMIT_IN_MATCH;
JUMP_OUT;
@@ -4290,6 +4346,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (msa->retry_limit_in_search != 0) {
msa->retry_limit_in_search_counter += retry_in_match_counter;
}
+
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ MATCH_COUNTER_OUT("END");
+#endif
+
STACK_SAVE(msa, is_alloca, alloc_base);
return best_len;
}
@@ -4324,12 +4385,11 @@ typedef struct {
int state; /* value of enum SearchRangeStatus */
UChar* low;
UChar* high;
- UChar* low_prev;
UChar* sch_range;
} SearchRange;
#define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
+ r = match_at(reg, str, end, (upper_range), s, msas + i); \
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
@@ -4345,8 +4405,8 @@ regset_search_body_position_lead(OnigRegSet* set,
OnigOptionType option, MatchArg* msas, int* rmatch_pos)
{
int r, n, i;
- UChar *s, *prev;
- UChar *low, *high, *low_prev;
+ UChar *s;
+ UChar *low, *high;
UChar* sch_range;
regex_t* reg;
OnigEncoding enc;
@@ -4354,12 +4414,7 @@ regset_search_body_position_lead(OnigRegSet* set,
n = set->n;
enc = set->enc;
-
s = (UChar* )start;
- if (s > str)
- prev = onigenc_get_prev_char_head(enc, str, s);
- else
- prev = (UChar* )NULL;
sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
CHECK_NULL_RETURN_MEMERR(sr);
@@ -4375,18 +4430,16 @@ regset_search_body_position_lead(OnigRegSet* set,
else
sch_range = (UChar* )end;
- if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) {
+ if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
sr[i].state = SRS_LOW_HIGH;
sr[i].low = low;
sr[i].high = high;
- sr[i].low_prev = low_prev;
sr[i].sch_range = sch_range;
}
}
else {
sch_range = (UChar* )end;
- if (forward_search(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) {
+ if (forward_search(reg, str, end, s, sch_range, &low, &high)) {
goto total_active;
}
}
@@ -4396,7 +4449,6 @@ regset_search_body_position_lead(OnigRegSet* set,
sr[i].state = SRS_ALL_RANGE;
sr[i].low = s;
sr[i].high = (UChar* )range;
- sr[i].low_prev = prev;
}
}
@@ -4412,10 +4464,9 @@ regset_search_body_position_lead(OnigRegSet* set,
if (s < sr[i].low) continue;
if (s >= sr[i].high) {
if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
- &low, &high, &low_prev) != 0) {
+ &low, &high) != 0) {
sr[i].low = low;
sr[i].high = high;
- sr[i].low_prev = low_prev;
if (s < low) continue;
}
else {
@@ -4436,16 +4487,13 @@ regset_search_body_position_lead(OnigRegSet* set,
for (i = 0; i < n; i++) {
if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
low = sr[i].low;
- low_prev = sr[i].low_prev;
}
}
if (low == range) break;
s = low;
- prev = low_prev;
}
else {
- prev = s;
s += enclen(enc, s);
}
} while (1);
@@ -4459,10 +4507,9 @@ regset_search_body_position_lead(OnigRegSet* set,
if (s < sr[i].low) continue;
if (s >= sr[i].high) {
if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
- &low, &high, &low_prev) != 0) {
+ &low, &high) != 0) {
sr[i].low = low;
sr[i].high = high;
- /* sr[i].low_prev = low_prev; */
if (s < low) continue;
}
else {
@@ -4483,7 +4530,6 @@ regset_search_body_position_lead(OnigRegSet* set,
if (set->anychar_inf != 0)
prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
- prev = s;
s += enclen(enc, s);
} while (1);
}
@@ -4552,7 +4598,7 @@ onig_regset_search_with_param(OnigRegSet* set,
{
int r;
int i;
- UChar *s, *prev;
+ UChar *s;
regex_t* reg;
OnigEncoding enc;
OnigRegion* region;
@@ -4654,7 +4700,6 @@ onig_regset_search_with_param(OnigRegSet* set,
else if (str == end) { /* empty string */
start = end = str;
s = (UChar* )start;
- prev = (UChar* )NULL;
msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
CHECK_NULL_RETURN_MEMERR(msas);
@@ -4669,7 +4714,7 @@ onig_regset_search_with_param(OnigRegSet* set,
/* Can't use REGSET_MATCH_AND_RETURN_CHECK()
because r must be set regex index (i)
*/
- r = match_at(reg, str, end, end, s, prev, msas + i);
+ r = match_at(reg, str, end, end, s, msas + i);
if (r != ONIG_MISMATCH) {
if (r >= 0) {
r = i;
@@ -4814,7 +4859,7 @@ slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
else
s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s);
- while (s >= text) {
+ while (PTR_GE(s, text)) {
if (*s == *target) {
p = s + 1;
t = target + 1;
@@ -4855,7 +4900,7 @@ sunday_quick_search_step_forward(regex_t* reg,
tail = target_end - 1;
tlen1 = (int )(tail - target);
end = text_range;
- if (end + tlen1 > text_end)
+ if (tlen1 > text_end - end)
end = text_end - tlen1;
map_offset = reg->map_offset;
@@ -4893,15 +4938,38 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
const UChar *s, *t, *p, *end;
const UChar *tail;
int map_offset;
-
- end = text_range + (target_end - target);
- if (end > text_end)
- end = text_end;
+ ptrdiff_t target_len;
map_offset = reg->map_offset;
tail = target_end - 1;
- s = text + (tail - target);
+ target_len = target_end - target;
+ if (target_len > text_end - text_range) {
+ end = text_end;
+ if (target_len > text_end - text)
+ return (UChar* )NULL;
+ }
+ else {
+ end = text_range + target_len;
+ }
+
+ s = text + target_len - 1;
+
+#ifdef USE_STRICT_POINTER_ADDRESS
+ if (s < end) {
+ while (TRUE) {
+ p = s;
+ t = tail;
+ while (*p == *t) {
+ if (t == target) return (UChar* )p;
+ p--; t--;
+ }
+ if (text_end - s <= map_offset) break;
+ if (reg->map[*(s + map_offset)] >= end - s) break;
+ s += reg->map[*(s + map_offset)];
+ }
+ }
+#else
while (s < end) {
p = s;
t = tail;
@@ -4909,9 +4977,10 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
if (t == target) return (UChar* )p;
p--; t--;
}
- if (s + map_offset >= text_end) break;
+ if (text_end - s <= map_offset) break;
s += reg->map[*(s + map_offset)];
}
+#endif
return (UChar* )NULL;
}
@@ -4937,7 +5006,7 @@ map_search_backward(OnigEncoding enc, UChar map[],
{
const UChar *s = text_start;
- while (s >= text) {
+ while (PTR_GE(s, text)) {
if (map[*s]) return (UChar* )s;
s = onigenc_get_prev_char_head(enc, adjust_text, s);
@@ -4963,13 +5032,16 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
OnigMatchParam* mp)
{
int r;
- UChar *prev;
MatchArg msa;
+#ifndef USE_POSIX_API
+ if (OPTON_POSIX_REGION(option)) return ONIGERR_INVALID_ARGUMENT;
+#endif
+
ADJUST_MATCH_PARAM(reg, mp);
MATCH_ARG_INIT(msa, reg, option, region, at, mp);
if (region
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
&& !OPTON_POSIX_REGION(option)
#endif
) {
@@ -4986,8 +5058,14 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
}
- prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at);
- r = match_at(reg, str, end, end, at, prev, &msa);
+ r = match_at(reg, str, end, end, at, &msa);
+#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+ if (OPTON_FIND_LONGEST(option) && r == ONIG_MISMATCH) {
+ if (msa.best_len >= 0) {
+ r = msa.best_len;
+ }
+ }
+#endif
}
end:
@@ -4997,7 +5075,7 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
static int
forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
- UChar* range, UChar** low, UChar** high, UChar** low_prev)
+ UChar* range, UChar** low, UChar** high)
{
UChar *p, *pprev = (UChar* )NULL;
@@ -5081,33 +5159,18 @@ forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
}
if (reg->dist_max == 0) {
- *low = p;
- if (low_prev) {
- if (*low > start)
- *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);
- else
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
- }
+ *low = p;
*high = p;
}
else {
if (reg->dist_max != INFINITE_LEN) {
if (p - str < reg->dist_max) {
*low = (UChar* )str;
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
}
else {
*low = p - reg->dist_max;
if (*low > start) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,
- *low, (const UChar** )low_prev);
- }
- else {
- if (low_prev)
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), *low);
+ *low = onigenc_get_right_adjust_char_head(reg->enc, start, *low);
}
}
}
@@ -5263,7 +5326,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
OnigOptionType option, OnigMatchParam* mp)
{
int r;
- UChar *s, *prev;
+ UChar *s;
MatchArg msa;
const UChar *orig_start = start;
@@ -5275,8 +5338,15 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
ADJUST_MATCH_PARAM(reg, mp);
+#ifndef USE_POSIX_API
+ if (OPTON_POSIX_REGION(option)) {
+ r = ONIGERR_INVALID_ARGUMENT;
+ goto finish_no_msa;
+ }
+#endif
+
if (region
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
&& ! OPTON_POSIX_REGION(option)
#endif
) {
@@ -5294,27 +5364,14 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
}
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
- if (r != ONIG_MISMATCH) {\
- if (r >= 0) {\
- if (! OPTON_FIND_LONGEST(reg->options)) {\
- goto match;\
- }\
- }\
- else goto finish; /* error */ \
- }
-#else
-#define MATCH_AND_RETURN_CHECK(upper_range) \
- r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
+ r = match_at(reg, str, end, (upper_range), s, &msa);\
if (r != ONIG_MISMATCH) {\
if (r >= 0) {\
goto match;\
}\
else goto finish; /* error */ \
}
-#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
/* anchor optimize: resume search range */
@@ -5422,7 +5479,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (reg->threshold_len == 0) {
start = end = str = address_for_empty_string;
s = (UChar* )start;
- prev = (UChar* )NULL;
MATCH_ARG_INIT(msa, reg, option, region, start, mp);
MATCH_AND_RETURN_CHECK(end);
@@ -5440,13 +5496,8 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
s = (UChar* )start;
if (range > start) { /* forward search */
- if (s > str)
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
- else
- prev = (UChar* )NULL;
-
if (reg->optimize != OPTIMIZE_NONE) {
- UChar *sch_range, *low, *high, *low_prev;
+ UChar *sch_range, *low, *high;
if (reg->dist_max != 0) {
if (reg->dist_max == INFINITE_LEN)
@@ -5467,27 +5518,27 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (reg->dist_max != INFINITE_LEN) {
do {
- if (! forward_search(reg, str, end, s, sch_range, &low, &high,
- &low_prev)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high))
+ goto mismatch;
if (s < low) {
s = low;
- prev = low_prev;
}
while (s <= high) {
MATCH_AND_RETURN_CHECK(data_range);
- prev = s;
s += enclen(reg->enc, s);
}
} while (s < range);
goto mismatch;
}
else { /* check only. */
- if (! forward_search(reg, str, end, s, sch_range, &low, &high,
- (UChar** )NULL)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high))
+ goto mismatch;
if ((reg->anchor & ANCR_ANYCHAR_INF) != 0 &&
(reg->anchor & (ANCR_LOOK_BEHIND | ANCR_PREC_READ_NOT)) == 0) {
do {
+ UChar* prev;
+
MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
@@ -5504,7 +5555,6 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
do {
MATCH_AND_RETURN_CHECK(data_range);
- prev = s;
s += enclen(reg->enc, s);
} while (s < range);
@@ -5549,12 +5599,11 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
if (s > high)
s = high;
- while (s >= low) {
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
+ while (PTR_GE(s, low)) {
MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
+ s = onigenc_get_prev_char_head(reg->enc, str, s);
}
- } while (s >= range);
+ } while (PTR_GE(s, range));
goto mismatch;
}
else { /* check only. */
@@ -5566,10 +5615,9 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
}
do {
- prev = onigenc_get_prev_char_head(reg->enc, str, s);
MATCH_AND_RETURN_CHECK(orig_start);
- s = prev;
- } while (s >= range);
+ s = onigenc_get_prev_char_head(reg->enc, str, s);
+ } while (PTR_GE(s, range));
}
mismatch:
@@ -5589,7 +5637,7 @@ search_in_range(regex_t* reg, const UChar* str, const UChar* end,
/* If result is mismatch and no FIND_NOT_EMPTY option,
then the region is not set in match_at(). */
if (OPTON_FIND_NOT_EMPTY(reg->options) && region
-#ifdef USE_POSIX_API_REGION_OPTION
+#ifdef USE_POSIX_API
&& !OPTON_POSIX_REGION(option)
#endif
) {
@@ -5952,7 +6000,7 @@ extern int
onig_init_for_match_at(regex_t* reg)
{
return match_at(reg, (const UChar* )NULL, (const UChar* )NULL,
- (const UChar* )NULL, (const UChar* )NULL, (UChar* )NULL,
+ (const UChar* )NULL, (const UChar* )NULL,
(MatchArg* )NULL);
}
#endif
@@ -6139,8 +6187,8 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i
const UChar* str;
StackType* stk_base;
int i;
- StackIndex* mem_start_stk;
- StackIndex* mem_end_stk;
+ StkPtrType* mem_start_stk;
+ StkPtrType* mem_end_stk;
i = mem_num;
reg = a->regex;
@@ -6150,7 +6198,7 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i
mem_end_stk = a->mem_end_stk;
if (i > 0) {
- if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
+ if (a->mem_end_stk[i].i != INVALID_STACK_INDEX) {
*begin = (int )(STACK_MEM_START(reg, i) - str);
*end = (int )(STACK_MEM_END(reg, i) - str);
}
diff --git a/src/regint.h b/src/regint.h
index 04ebe0a..74a5c61 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -34,6 +34,7 @@
/* #define ONIG_DEBUG_COMPILE */
/* #define ONIG_DEBUG_SEARCH */
/* #define ONIG_DEBUG_MATCH */
+/* #define ONIG_DEBUG_MATCH_COUNTER */
/* #define ONIG_DONT_OPTIMIZE */
/* for byte-code statistical data. */
@@ -41,7 +42,7 @@
#if defined(ONIG_DEBUG_PARSE) || defined(ONIG_DEBUG_MATCH) || \
defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \
- defined(ONIG_DEBUG_STATISTICS)
+ defined(ONIG_DEBUG_MATCH_COUNTER) || defined(ONIG_DEBUG_STATISTICS)
#ifndef ONIG_DEBUG
#define ONIG_DEBUG
#define DBGFP stderr
@@ -70,23 +71,29 @@
#endif
/* internal config */
+#define USE_CHECK_VALIDITY_OF_STRING_IN_TREE
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUANT_PEEK_NEXT
#define USE_ST_LIBRARY
#define USE_TIMEOFDAY
+#define USE_STRICT_POINTER_ADDRESS
+#define USE_STRICT_POINTER_COMPARISON
#define USE_WORD_BEGIN_END /* "\<", "\>" */
#define USE_CAPTURE_HISTORY
#define USE_VARIABLE_META_CHARS
-#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
/* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
+/* enabled by configure --enable-posix-api=yes */
+/* #define USE_POSIX_API */
+
+#define DEFAULT_PARSE_DEPTH_LIMIT 4096
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000
#define DEFAULT_RETRY_LIMIT_IN_SEARCH 0 /* unlimited */
-#define DEFAULT_PARSE_DEPTH_LIMIT 4096
+#define DEFAULT_SUBEXP_CALL_LIMIT_IN_SEARCH 0 /* unlimited */
#define DEFAULT_SUBEXP_CALL_MAX_NEST_LEVEL 20
@@ -181,6 +188,12 @@
#define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY
#define NULL_UCHARP ((UChar* )0)
+#ifdef USE_STRICT_POINTER_COMPARISON
+#define PTR_GE(p,q) ((p) != NULL && (p) >= (q))
+#else
+#define PTR_GE(p,q) (p) >= (q)
+#endif
+
#ifndef ONIG_INT_MAX
#define ONIG_INT_MAX INT_MAX
#endif
@@ -255,11 +268,22 @@
#ifdef _WIN32
-#if defined(_MSC_VER) && (_MSC_VER < 1300)
+#ifdef _MSC_VER
+
+#if _MSC_VER < 1300
typedef int intptr_t;
typedef unsigned int uintptr_t;
#endif
+
+#if _MSC_VER < 1600
+typedef __int32 int32_t;
+typedef unsigned __int32 uint32_t;
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+#endif
+
#endif
+#endif /* _WIN32 */
#if SIZEOF_VOIDP == SIZEOF_LONG
typedef unsigned long hash_data_type;
@@ -378,6 +402,9 @@ typedef unsigned int MemStatusType;
#define OPTON_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION)
#define OPTON_CHECK_VALIDITY_OF_STRING(option) ((option) & \
ONIG_OPTION_CHECK_VALIDITY_OF_STRING)
+#define OPTON_NOT_BEGIN_STRING(option) ((option) & ONIG_OPTION_NOT_BEGIN_STRING)
+#define OPTON_NOT_END_STRING(option) ((option) & ONIG_OPTION_NOT_END_STRING)
+#define OPTON_NOT_BEGIN_POSITION(option) ((option) & ONIG_OPTION_NOT_BEGIN_POSITION)
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
@@ -562,10 +589,14 @@ enum OpCode {
OP_BACKREF_N_IC,
OP_BACKREF_MULTI,
OP_BACKREF_MULTI_IC,
+#ifdef USE_BACKREF_WITH_LEVEL
OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
OP_BACKREF_WITH_LEVEL_IC, /* \k<xxx+n>, \k<xxx-n> */
+#endif
OP_BACKREF_CHECK, /* (?(n)), (?('name')) */
+#ifdef USE_BACKREF_WITH_LEVEL
OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */
+#endif
OP_MEM_START,
OP_MEM_START_PUSH, /* push back-tracker to stack */
OP_MEM_END_PUSH, /* push back-tracker to stack */
@@ -891,6 +922,9 @@ typedef struct {
} update_var;
struct {
AbsAddrType addr;
+#ifdef ONIG_DEBUG_MATCH_COUNTER
+ MemNumType called_mem;
+#endif
} call;
#ifdef USE_CALLOUT
struct {
diff --git a/src/regparse.c b/src/regparse.c
index cc015a7..dd2824b 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -290,7 +290,7 @@ bbuf_clone(BBuf** rto, BBuf* from)
CHECK_NULL_RETURN_MEMERR(to);
r = BB_INIT(to, from->alloc);
if (r != 0) {
- xfree(to->p);
+ bbuf_free(to);
*rto = 0;
return r;
}
@@ -303,6 +303,8 @@ static int
backref_rel_to_abs(int rel_no, ScanEnv* env)
{
if (rel_no > 0) {
+ if (rel_no > ONIG_INT_MAX - env->num_mem)
+ return ONIGERR_INVALID_BACKREF;
return env->num_mem + rel_no;
}
else {
@@ -437,6 +439,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end
int capa)
{
UChar* r;
+ ptrdiff_t dest_delta = dest_end - dest;
if (dest)
r = (UChar* )xrealloc(dest, capa + 1);
@@ -444,7 +447,7 @@ strcat_capa(UChar* dest, UChar* dest_end, const UChar* src, const UChar* src_end
r = (UChar* )xmalloc(capa + 1);
CHECK_NULL_RETURN(r);
- onig_strcpy(r + (dest_end - dest), src, src_end);
+ onig_strcpy(r + dest_delta, src, src_end);
return r;
}
@@ -1294,7 +1297,9 @@ static int
i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e,
void* arg ARG_UNUSED)
{
- xfree(e->name);
+ if (IS_NOT_NULL(e)) {
+ xfree(e->name);
+ }
/*xfree(key->s); */ /* is same as e->name */
xfree(key);
xfree(e);
@@ -2502,7 +2507,7 @@ node_new_call(UChar* name, UChar* name_end, int gnum, int by_number)
CALL_(node)->by_number = by_number;
CALL_(node)->name = name;
CALL_(node)->name_end = name_end;
- CALL_(node)->group_num = gnum;
+ CALL_(node)->called_gnum = gnum;
CALL_(node)->entry_count = 1;
return node;
}
@@ -3135,7 +3140,6 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua
lower = QUANT_(quant)->lower;
upper = QUANT_(quant)->upper;
- onig_node_free(quant);
r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env);
if (r != 0) goto err;
@@ -3202,9 +3206,9 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
simple:
r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant,
body, possessive, env);
+ onig_node_free(quant);
if (r != 0) {
ns[4] = NULL_NODE;
- onig_node_free(quant);
onig_node_free(body);
goto err;
}
@@ -3708,21 +3712,24 @@ get_next_code_point(UChar** src, UChar* end, int base, OnigEncoding enc, int in_
while (! PEND) {
PFETCH(c);
- if (! IS_CODE_POINT_DIVIDE(c)) break;
- }
- if (IS_CODE_POINT_DIVIDE(c))
- return ONIGERR_INVALID_CODE_POINT_VALUE;
-
- if (c == '}') {
- *src = p;
- return 1; /* end of sequence */
- }
- else if (c == '-' && in_cc == TRUE) {
- *src = p;
- return 2; /* range */
+ if (! IS_CODE_POINT_DIVIDE(c)) {
+ if (c == '}') {
+ *src = p;
+ return 1; /* end of sequence */
+ }
+ else if (c == '-' && in_cc == TRUE) {
+ *src = p;
+ return 2; /* range */
+ }
+ PUNFETCH;
+ break;
+ }
+ else {
+ if (PEND)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ }
}
- PUNFETCH;
r = scan_number_of_base(&p, end, 1, enc, rcode, base);
if (r != 0) return r;
@@ -3873,13 +3880,17 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf)
to = data[i*2+1];
if (pre <= from - 1) {
r = add_code_range_to_buf(pbuf, pre, from - 1);
- if (r != 0) return r;
+ if (r != 0) {
+ bbuf_free(*pbuf);
+ return r;
+ }
}
if (to == ~((OnigCodePoint )0)) break;
pre = to + 1;
}
if (to < ~((OnigCodePoint )0)) {
r = add_code_range_to_buf(pbuf, to + 1, ~((OnigCodePoint )0));
+ if (r != 0) bbuf_free(*pbuf);
}
return r;
}
@@ -4564,7 +4575,7 @@ fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
/* \M-, \C-, \c, or \... */
static int
-fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
+fetch_escaped_value_raw(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
{
int v;
OnigCodePoint c;
@@ -4583,7 +4594,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
if (PEND) return ONIGERR_END_PATTERN_AT_META;
PFETCH_S(c);
if (c == MC_ESC(env->syntax)) {
- v = fetch_escaped_value(&p, end, env, &c);
+ v = fetch_escaped_value_raw(&p, end, env, &c);
if (v < 0) return v;
}
c = ((c & 0xff) | 0x80);
@@ -4612,7 +4623,7 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
}
else {
if (c == MC_ESC(env->syntax)) {
- v = fetch_escaped_value(&p, end, env, &c);
+ v = fetch_escaped_value_raw(&p, end, env, &c);
if (v < 0) return v;
}
c &= 0x9f;
@@ -4634,6 +4645,21 @@ fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
return 0;
}
+static int
+fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val)
+{
+ int r;
+ int len;
+
+ r = fetch_escaped_value_raw(src, end, env, val);
+ if (r != 0) return r;
+
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, *val);
+ if (len < 0) return len;
+
+ return 0;
+}
+
static int fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env);
static OnigCodePoint
@@ -5192,7 +5218,7 @@ fetch_token_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env, int state)
else {
int curr_state;
- curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START;
+ curr_state = (state == CS_RANGE) ? CPS_EMPTY : CPS_START;
r = check_code_point_sequence_cc(p, end, tok->base_num, enc,
curr_state);
if (r < 0) return r;
@@ -6372,7 +6398,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
static int
-parse_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
+prs_posix_bracket(CClassNode* cc, UChar** src, UChar* end, ScanEnv* env)
{
#define POSIX_BRACKET_CHECK_LIMIT_LENGTH 20
#define POSIX_BRACKET_NAME_MIN_LEN 4
@@ -6481,7 +6507,7 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
}
static int
-parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
+prs_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, ctype;
CClassNode* cc;
@@ -6617,7 +6643,7 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
}
static int
-parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
+prs_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, neg, len, fetched, and_start;
OnigCodePoint in_code, curr_code;
@@ -6715,6 +6741,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
p = psave;
for (i = 1; i < len; i++) {
r = fetch_token_cc(tok, &p, end, env, CS_COMPLETE);
+ if (r < 0) goto err;
}
fetched = 0;
}
@@ -6759,7 +6786,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
break;
case TK_CC_POSIX_BRACKET_OPEN:
- r = parse_posix_bracket(cc, &p, end, env);
+ r = prs_posix_bracket(cc, &p, end, env);
if (r < 0) goto err;
if (r == 1) { /* is not POSIX bracket */
CC_ESC_WARN(env, (UChar* )"[");
@@ -6869,7 +6896,7 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
state = CS_COMPLETE;
- r = parse_cc(&anode, tok, &p, end, env);
+ r = prs_cc(&anode, tok, &p, end, env);
if (r != 0) {
onig_node_free(anode);
goto cc_open_err;
@@ -6967,14 +6994,14 @@ parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
return r;
}
-static int parse_alts(Node** top, PToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env, int group_head);
+static int prs_alts(Node** top, PToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env, int group_head);
#ifdef USE_CALLOUT
/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */
static int
-parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
+prs_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
{
int r;
int i;
@@ -7078,18 +7105,18 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
contents = onigenc_strdup(enc, code_start, code_end);
CHECK_NULL_RETURN_MEMERR(contents);
- r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
- if (r != 0) {
- xfree(contents);
- return r;
- }
-
e = onig_reg_callout_list_at(env->reg, num);
if (IS_NULL(e)) {
xfree(contents);
return ONIGERR_MEMORY;
}
+ r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env);
+ if (r != 0) {
+ xfree(contents);
+ return r;
+ }
+
e->of = ONIG_CALLOUT_OF_CONTENTS;
e->in = in;
e->name_id = ONIG_NON_NAME_ID;
@@ -7101,7 +7128,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
}
static long
-parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
+prs_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl)
{
long v;
long d;
@@ -7137,10 +7164,27 @@ parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long*
return ONIG_NORMAL;
}
+static void
+clear_callout_args(int n, unsigned int types[], OnigValue vals[])
+{
+ int i;
+
+ for (i = 0; i < n; i++) {
+ switch (types[i]) {
+ case ONIG_TYPE_STRING:
+ if (IS_NOT_NULL(vals[i].s.start))
+ xfree(vals[i].s.start);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
static int
-parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
- int max_arg_num, unsigned int types[], OnigValue vals[],
- ScanEnv* env)
+prs_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
+ int max_arg_num, unsigned int types[], OnigValue vals[],
+ ScanEnv* env)
{
#define MAX_CALLOUT_ARG_BYTE_LENGTH 128
@@ -7168,7 +7212,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
bufend = buf;
s = e = p;
while (1) {
- if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+ if (PEND) {
+ r = ONIGERR_INVALID_CALLOUT_PATTERN;
+ goto err_clear;
+ }
e = p;
PFETCH_S(c);
@@ -7196,8 +7243,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
add_char:
if (skip_mode == FALSE) {
clen = p - e;
- if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
- return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
+ if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) {
+ r = ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
+ goto err_clear;
+ }
xmemcpy(bufend, e, clen);
bufend += clen;
@@ -7208,15 +7257,17 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
}
if (cn != 0) {
- if (max_arg_num >= 0 && n >= max_arg_num)
- return ONIGERR_INVALID_CALLOUT_ARG;
+ if (max_arg_num >= 0 && n >= max_arg_num) {
+ r = ONIGERR_INVALID_CALLOUT_ARG;
+ goto err_clear;
+ }
if (skip_mode == FALSE) {
if ((types[n] & ONIG_TYPE_LONG) != 0) {
int fixed = 0;
if (cn > 0) {
long rl;
- r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl);
+ r = prs_long(enc, buf, bufend, 1, LONG_MAX, &rl);
if (r == ONIG_NORMAL) {
vals[n].l = rl;
fixed = 1;
@@ -7226,8 +7277,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
if (fixed == 0) {
types[n] = (types[n] & ~ONIG_TYPE_LONG);
- if (types[n] == ONIG_TYPE_VOID)
- return ONIGERR_INVALID_CALLOUT_ARG;
+ if (types[n] == ONIG_TYPE_VOID) {
+ r = ONIGERR_INVALID_CALLOUT_ARG;
+ goto err_clear;
+ }
}
}
@@ -7236,22 +7289,29 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
break;
case ONIG_TYPE_CHAR:
- if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG;
+ if (cn != 1) {
+ r = ONIGERR_INVALID_CALLOUT_ARG;
+ goto err_clear;
+ }
vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend);
break;
case ONIG_TYPE_STRING:
{
UChar* rs = onigenc_strdup(enc, buf, bufend);
- CHECK_NULL_RETURN_MEMERR(rs);
+ if (IS_NULL(rs)) {
+ r = ONIGERR_MEMORY; goto err_clear;
+ }
vals[n].s.start = rs;
vals[n].s.end = rs + (e - s);
}
break;
case ONIG_TYPE_TAG:
- if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e))
- return ONIGERR_INVALID_CALLOUT_TAG_NAME;
+ if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) {
+ r = ONIGERR_INVALID_CALLOUT_TAG_NAME;
+ goto err_clear;
+ }
vals[n].s.start = s;
vals[n].s.end = e;
@@ -7259,7 +7319,8 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
case ONIG_TYPE_VOID:
case ONIG_TYPE_POINTER:
- return ONIGERR_PARSER_BUG;
+ r = ONIGERR_PARSER_BUG;
+ goto err_clear;
break;
}
}
@@ -7270,15 +7331,23 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
if (c == cterm) break;
}
- if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN;
+ if (c != cterm) {
+ r = ONIGERR_INVALID_CALLOUT_PATTERN;
+ goto err_clear;
+ }
*src = p;
return n;
+
+ err_clear:
+ if (skip_mode == FALSE)
+ clear_callout_args(n, types, vals);
+ return r;
}
/* (*name[TAG]) (*name[TAG]{a,b,..}) */
static int
-parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
+prs_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env)
{
int r;
int i;
@@ -7343,7 +7412,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
/* read for single check only */
save = p;
- arg_num = parse_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
+ arg_num = prs_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
if (arg_num < 0) return arg_num;
is_not_single = PPEEK_IS(cterm) ? 0 : 1;
@@ -7357,10 +7426,13 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
types[i] = get_callout_arg_type_by_name_id(name_id, i);
}
- arg_num = parse_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
+ arg_num = prs_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
if (arg_num < 0) return arg_num;
- if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
+ if (PEND) {
+ r = ONIGERR_END_PATTERN_IN_GROUP;
+ goto err_clear;
+ }
PFETCH_S(c);
}
else {
@@ -7379,32 +7451,40 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
in = onig_get_callout_in_by_name_id(name_id);
opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id);
- if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num))
- return ONIGERR_INVALID_CALLOUT_ARG;
+ if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) {
+ r = ONIGERR_INVALID_CALLOUT_ARG;
+ goto err_clear;
+ }
- if (c != cterm)
- return ONIGERR_INVALID_CALLOUT_PATTERN;
+ if (c != cterm) {
+ r = ONIGERR_INVALID_CALLOUT_PATTERN;
+ goto err_clear;
+ }
r = reg_callout_list_entry(env, &num);
- if (r != 0) return r;
+ if (r != 0) goto err_clear;
ext = onig_get_regex_ext(env->reg);
- CHECK_NULL_RETURN_MEMERR(ext);
+ if (IS_NULL(ext)) {
+ r = ONIGERR_MEMORY; goto err_clear;
+ }
if (IS_NULL(ext->pattern)) {
r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end);
- if (r != ONIG_NORMAL) return r;
+ if (r != ONIG_NORMAL) goto err_clear;
}
if (tag_start != tag_end) {
r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
- if (r != ONIG_NORMAL) return r;
+ if (r != ONIG_NORMAL) goto err_clear;
}
- r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
- if (r != ONIG_NORMAL) return r;
-
e = onig_reg_callout_list_at(env->reg, num);
- CHECK_NULL_RETURN_MEMERR(e);
+ if (IS_NULL(e)) {
+ r = ONIGERR_MEMORY; goto err_clear;
+ }
+
+ r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env);
+ if (r != ONIG_NORMAL) goto err_clear;
e->of = ONIG_CALLOUT_OF_NAME;
e->in = in;
@@ -7425,12 +7505,16 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
*np = node;
*src = p;
return 0;
+
+ err_clear:
+ clear_callout_args(arg_num, types, vals);
+ return r;
}
#endif
static int
-parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env)
+prs_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env)
{
int r, num;
Node *target;
@@ -7457,7 +7541,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
group:
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_alts(np, tok, term, &p, end, env, FALSE);
+ r = prs_alts(np, tok, term, &p, end, env, FALSE);
if (r < 0) return r;
*src = p;
return 1; /* group */
@@ -7554,7 +7638,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_alts(&absent, tok, term, &p, end, env, TRUE);
+ r = prs_alts(&absent, tok, term, &p, end, env, TRUE);
if (r < 0) {
onig_node_free(absent);
return r;
@@ -7600,7 +7684,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS))
return ONIGERR_UNDEFINED_GROUP_OPTION;
- r = parse_callout_of_contents(np, ')', &p, end, env);
+ r = prs_callout_of_contents(np, ')', &p, end, env);
if (r != 0) return r;
goto end;
@@ -7620,10 +7704,12 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (IS_CODE_DIGIT_ASCII(enc, c)
|| c == '-' || c == '+' || c == '<' || c == '\'') {
- UChar* name_end;
- int back_num;
+#ifdef USE_BACKREF_WITH_LEVEL
int exist_level;
int level;
+#endif
+ UChar* name_end;
+ int back_num;
enum REF_NUM num_type;
int is_enclosed;
@@ -7631,8 +7717,8 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (! is_enclosed)
PUNFETCH;
prev = p;
- exist_level = 0;
#ifdef USE_BACKREF_WITH_LEVEL
+ exist_level = 0;
name_end = NULL_UCHARP; /* no need. escape gcc warning. */
r = fetch_name_with_level(
(OnigCodePoint )(is_enclosed != 0 ? c : '('),
@@ -7709,7 +7795,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
/* condition part is callouts of contents: (?(?{...})THEN|ELSE) */
condition_is_checker = 0;
PFETCH(c);
- r = parse_callout_of_contents(&condition, ')', &p, end, env);
+ r = prs_callout_of_contents(&condition, ')', &p, end, env);
if (r != 0) return r;
goto end_condition;
}
@@ -7719,7 +7805,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
else if (c == '*' &&
IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
condition_is_checker = 0;
- r = parse_callout_of_name(&condition, ')', &p, end, env);
+ r = prs_callout_of_name(&condition, ')', &p, end, env);
if (r != 0) return r;
goto end_condition;
}
@@ -7730,7 +7816,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
condition_is_checker = 0;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_alts(&condition, tok, term, &p, end, env, FALSE);
+ r = prs_alts(&condition, tok, term, &p, end, env, FALSE);
if (r < 0) {
onig_node_free(condition);
return r;
@@ -7773,7 +7859,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
onig_node_free(condition);
return r;
}
- r = parse_alts(&target, tok, term, &p, end, env, TRUE);
+ r = prs_alts(&target, tok, term, &p, end, env, TRUE);
if (r < 0) {
onig_node_free(condition);
onig_node_free(target);
@@ -7949,7 +8035,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
env->options = option;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_alts(&target, tok, term, &p, end, env, FALSE);
+ r = prs_alts(&target, tok, term, &p, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@@ -7976,7 +8062,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
else if (c == '*' &&
IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) {
PINC;
- r = parse_callout_of_name(np, ')', &p, end, env);
+ r = prs_callout_of_name(np, ')', &p, end, env);
if (r != 0) return r;
goto end;
@@ -7996,7 +8082,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(*np);
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_alts(&target, tok, term, &p, end, env, FALSE);
+ r = prs_alts(&target, tok, term, &p, end, env, FALSE);
if (r < 0) {
onig_node_free(target);
return r;
@@ -8006,7 +8092,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (NODE_TYPE(*np) == NODE_BAG) {
if (BAG_(*np)->type == BAG_MEMORY) {
- /* Don't move this to previous of parse_alts() */
+ /* Don't move this to previous of prs_alts() */
r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
if (r != 0) return r;
}
@@ -8285,8 +8371,8 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
}
static int
-parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env, int group_head)
+prs_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env, int group_head)
{
int r, len, group;
Node* qn;
@@ -8311,7 +8397,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
break;
case TK_SUBEXP_OPEN:
- r = parse_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
+ r = prs_bag(np, tok, TK_SUBEXP_CLOSE, src, end, env);
if (r < 0) return r;
if (r == 1) { /* group */
if (group_head == 0)
@@ -8341,7 +8427,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
env->options = BAG_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- r = parse_alts(&target, tok, term, src, end, env, FALSE);
+ r = prs_alts(&target, tok, term, src, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@@ -8419,8 +8505,9 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case TK_CODE_POINT:
{
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
if (len < 0) return len;
+ len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
*np = node_new_str_crude(buf, buf + len, env->options);
#else
@@ -8465,7 +8552,12 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = CCLASS_(*np);
- add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
+ r = add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
+ if (r != 0) {
+ onig_node_free(*np);
+ *np = NULL_NODE;
+ return r;
+ }
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
}
break;
@@ -8478,7 +8570,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
break;
case TK_CHAR_PROPERTY:
- r = parse_char_property(np, tok, src, end, env);
+ r = prs_char_property(np, tok, src, end, env);
if (r != 0) return r;
break;
@@ -8486,7 +8578,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
{
CClassNode* cc;
- r = parse_cc(np, tok, src, end, env);
+ r = prs_cc(np, tok, src, end, env);
if (r != 0) return r;
cc = CCLASS_(*np);
@@ -8685,8 +8777,8 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
static int
-parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env, int group_head)
+prs_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env, int group_head)
{
int r;
Node *node, **headp;
@@ -8694,7 +8786,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
*top = NULL;
INC_PARSE_DEPTH(env->parse_depth);
- r = parse_exp(&node, tok, term, src, end, env, group_head);
+ r = prs_exp(&node, tok, term, src, end, env, group_head);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8712,7 +8804,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
headp = &(NODE_CDR(*top));
while (r != TK_EOT && r != term && r != TK_ALT) {
- r = parse_exp(&node, tok, term, src, end, env, FALSE);
+ r = prs_exp(&node, tok, term, src, end, env, FALSE);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8736,8 +8828,8 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
static int
-parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env, int group_head)
+prs_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env, int group_head)
{
int r;
Node *node, **headp;
@@ -8747,7 +8839,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
INC_PARSE_DEPTH(env->parse_depth);
save_options = env->options;
- r = parse_branch(&node, tok, term, src, end, env, group_head);
+ r = prs_branch(&node, tok, term, src, end, env, group_head);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8767,7 +8859,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
while (r == TK_ALT) {
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- r = parse_branch(&node, tok, term, src, end, env, FALSE);
+ r = prs_branch(&node, tok, term, src, end, env, FALSE);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8800,7 +8892,7 @@ parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
}
static int
-parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
+prs_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
{
int r;
PToken tok;
@@ -8808,7 +8900,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
ptoken_init(&tok);
r = fetch_token(&tok, src, end, env);
if (r < 0) return r;
- r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE);
+ r = prs_alts(top, &tok, TK_EOT, src, end, env, FALSE);
if (r < 0) return r;
return 0;
@@ -8846,6 +8938,15 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
RegexExt* ext;
#endif
+ reg->string_pool = 0;
+ reg->string_pool_end = 0;
+ reg->num_mem = 0;
+ reg->num_repeat = 0;
+ reg->num_empty_check = 0;
+ reg->repeat_range_alloc = 0;
+ reg->repeat_range = (RepeatRange* )NULL;
+ reg->empty_status_mem = 0;
+
names_clear(reg);
scan_env_clear(env);
@@ -8863,7 +8964,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end,
return ONIGERR_INVALID_WIDE_CHAR_VALUE;
p = (UChar* )pattern;
- r = parse_regexp(root, &p, (UChar* )end, env);
+ r = prs_regexp(root, &p, (UChar* )end, env);
if (r != 0) return r;
#ifdef USE_CALL
diff --git a/src/regparse.h b/src/regparse.h
index 979e982..c60a42d 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -163,7 +163,7 @@ typedef struct {
struct _Node* body; /* to BagNode : BAG_MEMORY */
int by_number;
- int group_num;
+ int called_gnum;
UChar* name;
UChar* name_end;
int entry_count;
@@ -339,6 +339,7 @@ typedef struct {
#define NODE_ST_TEXT_SEGMENT_WORD (1<<23)
#define NODE_ST_ABSENT_WITH_SIDE_EFFECTS (1<<24) /* stopper or clear */
#define NODE_ST_FIXED_CLEN_MIN_SURE (1<<25)
+#define NODE_ST_REFERENCED (1<<26)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
@@ -374,6 +375,7 @@ typedef struct {
#define NODE_IS_TEXT_SEGMENT_WORD(node) ((NODE_STATUS(node) & NODE_ST_TEXT_SEGMENT_WORD) != 0)
#define NODE_IS_ABSENT_WITH_SIDE_EFFECTS(node) ((NODE_STATUS(node) & NODE_ST_ABSENT_WITH_SIDE_EFFECTS) != 0)
#define NODE_IS_FIXED_CLEN_MIN_SURE(node) ((NODE_STATUS(node) & NODE_ST_FIXED_CLEN_MIN_SURE) != 0)
+#define NODE_IS_REFERENCED(node) ((NODE_STATUS(node) & NODE_ST_REFERENCED) != 0)
#define NODE_PARENT(node) ((node)->u.base.parent)
#define NODE_BODY(node) ((node)->u.base.body)
diff --git a/src/regposerr.c b/src/regposerr.c
index 12d95a9..e5b7899 100644
--- a/src/regposerr.c
+++ b/src/regposerr.c
@@ -37,6 +37,18 @@
#include "config.h"
#include "onigposix.h"
+#undef regex_t
+#undef regmatch_t
+#undef regoff_t
+#undef regcomp
+#undef regexec
+#undef regfree
+#undef regerror
+#undef reg_set_encoding
+#undef reg_name_to_group_numbers
+#undef reg_foreach_name
+#undef reg_number_of_names
+
#ifndef ONIG_NO_STANDARD_C_HEADERS
#include <string.h>
#include <stdio.h>
@@ -92,10 +104,9 @@ static char* ESTRING[] = {
};
-
extern size_t
-regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
- size_t size)
+onig_posix_regerror(int posix_ecode, const onig_posix_regex_t* reg ARG_UNUSED,
+ char* buf, size_t size)
{
char* s;
char tbuf[35];
@@ -121,3 +132,14 @@ regerror(int posix_ecode, const regex_t* reg ARG_UNUSED, char* buf,
}
return len;
}
+
+#ifdef USE_BINARY_COMPATIBLE_POSIX_API
+
+extern size_t
+regerror(int posix_ecode, const onig_posix_regex_t* reg ARG_UNUSED,
+ char* buf, size_t size)
+{
+ return onig_posix_regerror(posix_ecode, reg, buf, size);
+}
+
+#endif
diff --git a/src/regposix.c b/src/regposix.c
index 4e523a4..497ba02 100644
--- a/src/regposix.c
+++ b/src/regposix.c
@@ -33,6 +33,18 @@
#include "onigposix.h"
+#undef regex_t
+#undef regmatch_t
+#undef regoff_t
+#undef regcomp
+#undef regexec
+#undef regfree
+#undef regerror
+#undef reg_set_encoding
+#undef reg_name_to_group_numbers
+#undef reg_foreach_name
+#undef reg_number_of_names
+
#define ONIG_C(reg) ((onig_regex_t* )((reg)->onig))
#define PONIG_C(reg) ((onig_regex_t** )(&(reg)->onig))
@@ -64,6 +76,7 @@ onig2posix_error_code(int code)
{ ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL },
{ ONIGERR_RETRY_LIMIT_IN_MATCH_OVER, REG_EONIG_INTERNAL },
{ ONIGERR_RETRY_LIMIT_IN_SEARCH_OVER, REG_EONIG_INTERNAL },
+ { ONIGERR_SUBEXP_CALL_LIMIT_IN_SEARCH_OVER, REG_EONIG_INTERNAL },
{ ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL },
{ ONIGERR_STACK_BUG, REG_EONIG_INTERNAL },
@@ -144,7 +157,7 @@ onig2posix_error_code(int code)
}
extern int
-regcomp(regex_t* reg, const char* pattern, int posix_options)
+onig_posix_regcomp(onig_posix_regex_t* reg, const char* pattern, int posix_options)
{
int r, len;
OnigSyntaxType* syntax = OnigDefaultSyntax;
@@ -178,12 +191,12 @@ regcomp(regex_t* reg, const char* pattern, int posix_options)
}
extern int
-regexec(regex_t* reg, const char* str, size_t nmatch,
- regmatch_t pmatch[], int posix_options)
+onig_posix_regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch,
+ onig_posix_regmatch_t pmatch[], int posix_options)
{
int r, i, len;
UChar* end;
- regmatch_t* pm;
+ onig_posix_regmatch_t* pm;
OnigOptionType options;
options = ONIG_OPTION_POSIX_REGION;
@@ -191,11 +204,11 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
if ((posix_options & REG_NOTEOL) != 0) options |= ONIG_OPTION_NOTEOL;
if (nmatch == 0 || (reg->comp_options & REG_NOSUB) != 0) {
- pm = (regmatch_t* )NULL;
+ pm = (onig_posix_regmatch_t* )NULL;
nmatch = 0;
}
else if ((int )nmatch < ONIG_C(reg)->num_mem + 1) {
- pm = (regmatch_t* )xmalloc(sizeof(regmatch_t)
+ pm = (onig_posix_regmatch_t* )xmalloc(sizeof(onig_posix_regmatch_t)
* (ONIG_C(reg)->num_mem + 1));
if (pm == NULL)
return REG_ESPACE;
@@ -212,7 +225,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
if (r >= 0) {
r = 0; /* Match */
if (pm != pmatch && pm != NULL) {
- xmemcpy(pmatch, pm, sizeof(regmatch_t) * nmatch);
+ xmemcpy(pmatch, pm, sizeof(onig_posix_regmatch_t) * nmatch);
}
}
else if (r == ONIG_MISMATCH) {
@@ -236,7 +249,7 @@ regexec(regex_t* reg, const char* str, size_t nmatch,
}
extern void
-regfree(regex_t* reg)
+onig_posix_regfree(onig_posix_regex_t* reg)
{
onig_free(ONIG_C(reg));
reg->onig = (void* )0;
@@ -244,7 +257,7 @@ regfree(regex_t* reg)
extern void
-reg_set_encoding(int mb_code)
+onig_posix_reg_set_encoding(int mb_code)
{
OnigEncoding enc;
@@ -279,15 +292,15 @@ reg_set_encoding(int mb_code)
}
extern int
-reg_name_to_group_numbers(regex_t* reg,
+onig_posix_reg_name_to_group_numbers(onig_posix_regex_t* reg,
const unsigned char* name, const unsigned char* name_end, int** nums)
{
return onig_name_to_group_numbers(ONIG_C(reg), name, name_end, nums);
}
typedef struct {
- int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*);
- regex_t* reg;
+ int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*);
+ onig_posix_regex_t* reg;
void* arg;
} i_wrap;
@@ -301,8 +314,8 @@ i_wrapper(const UChar* name, const UChar* name_end, int ng, int* gs,
}
extern int
-reg_foreach_name(regex_t* reg,
- int (*func)(const unsigned char*, const unsigned char*,int,int*,regex_t*,void*),
+onig_posix_reg_foreach_name(onig_posix_regex_t* reg,
+ int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*),
void* arg)
{
i_wrap warg;
@@ -315,7 +328,58 @@ reg_foreach_name(regex_t* reg,
}
extern int
-reg_number_of_names(regex_t* reg)
+onig_posix_reg_number_of_names(onig_posix_regex_t* reg)
{
return onig_number_of_names(ONIG_C(reg));
}
+
+
+#ifdef USE_BINARY_COMPATIBLE_POSIX_API
+
+extern int
+regcomp(onig_posix_regex_t* reg, const char* pattern, int posix_options)
+{
+ return onig_posix_regcomp(reg, pattern, posix_options);
+}
+
+extern int
+regexec(onig_posix_regex_t* reg, const char* str, size_t nmatch,
+ onig_posix_regmatch_t pmatch[], int posix_options)
+{
+ return onig_posix_regexec(reg, str, nmatch, pmatch, posix_options);
+}
+
+extern void
+regfree(onig_posix_regex_t* reg)
+{
+ onig_posix_regfree(reg);
+}
+
+extern void
+reg_set_encoding(int mb_code)
+{
+ onig_posix_reg_set_encoding(mb_code);
+}
+
+extern int
+reg_name_to_group_numbers(onig_posix_regex_t* reg,
+ const unsigned char* name, const unsigned char* name_end, int** nums)
+{
+ return onig_posix_reg_name_to_group_numbers(reg, name, name_end, nums);
+}
+
+extern int
+reg_foreach_name(onig_posix_regex_t* reg,
+ int (*func)(const unsigned char*, const unsigned char*,int,int*,onig_posix_regex_t*,void*),
+ void* arg)
+{
+ return onig_posix_reg_foreach_name(reg, func, arg);
+}
+
+extern int
+reg_number_of_names(onig_posix_regex_t* reg)
+{
+ return onig_posix_reg_number_of_names(reg);
+}
+
+#endif /* USE_BINARY_COMPATIBLE_POSIX_API */
diff --git a/src/sjis.c b/src/sjis.c
index 1fd92d9..10afd9d 100644
--- a/src/sjis.c
+++ b/src/sjis.c
@@ -2,7 +2,7 @@
sjis.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -113,13 +113,15 @@ static int
code_to_mbclen(OnigCodePoint code)
{
if (code < 256) {
- return EncLen_SJIS[(int )code] == 1;
+ if (EncLen_SJIS[(int )code] == 1)
+ return 1;
}
- else if (code <= 0xffff) {
- return 2;
+ else if (code < 0x10000) {
+ if (EncLen_SJIS[(int )(code >> 8) & 0xff] == 2)
+ return 2;
}
- else
- return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
}
static OnigCodePoint
diff --git a/src/st.c b/src/st.c
index 522f205..8ee610b 100644
--- a/src/st.c
+++ b/src/st.c
@@ -151,6 +151,7 @@ st_init_table_with_size(type, size)
#endif
size = new_size(size); /* round up to prime number */
+ if (size <= 0) return 0;
tbl = alloc(st_table);
if (tbl == 0) return 0;
@@ -318,10 +319,13 @@ rehash(table)
register st_table *table;
{
register st_table_entry *ptr, *next, **new_bins;
- int i, old_num_bins = table->num_bins, new_num_bins;
+ int i, new_num_bins, old_num_bins;
unsigned int hash_val;
- new_num_bins = new_size(old_num_bins+1);
+ old_num_bins = table->num_bins;
+ new_num_bins = new_size(old_num_bins + 1);
+ if (new_num_bins <= 0) return ;
+
new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*));
if (new_bins == 0) {
return ;
diff --git a/src/unicode.c b/src/unicode.c
index 080da74..6703d4b 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -387,15 +387,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
for (k = 0; k < ncs[2]; k++) {
+ if (cs[0][i] == orig_codes[0] && cs[1][j] == orig_codes[1] &&
+ cs[2][k] == orig_codes[2])
+ continue;
+
items[n].byte_len = lens[2];
items[n].code_len = 3;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
items[n].code[2] = cs[2][k];
- if (items[n].code[0] == orig_codes[0] &&
- items[n].code[1] == orig_codes[1] &&
- items[n].code[2] == orig_codes[2])
- continue;
n++;
}
}
@@ -431,13 +431,12 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (i = 0; i < ncs[0]; i++) {
for (j = 0; j < ncs[1]; j++) {
+ if (cs[0][i] == orig_codes[0] && cs[1][j] == orig_codes[1])
+ continue;
items[n].byte_len = lens[1];
items[n].code_len = 2;
items[n].code[0] = cs[0][i];
items[n].code[1] = cs[1][j];
- if (items[n].code[0] == orig_codes[0] &&
- items[n].code[1] == orig_codes[1])
- continue;
n++;
}
}
diff --git a/src/utf16_be.c b/src/utf16_be.c
index d237b93..5014e18 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/utf16_le.c b/src/utf16_le.c
index f14d263..35ceb3c 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/utf32_be.c b/src/utf32_be.c
index bdd3db7..31bd98b 100644
--- a/src/utf32_be.c
+++ b/src/utf32_be.c
@@ -2,7 +2,7 @@
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -67,7 +67,10 @@ utf32be_is_mbc_newline(const UChar* p, const UChar* end)
static OnigCodePoint
utf32be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
- return (OnigCodePoint )(((p[0] * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
+ OnigCodePoint code;
+
+ code = (OnigCodePoint )((((p[0] & 0x7f) * 256 + p[1]) * 256 + p[2]) * 256 + p[3]);
+ return code;
}
static int
diff --git a/src/utf32_le.c b/src/utf32_le.c
index 473ab74..f50cab7 100644
--- a/src/utf32_le.c
+++ b/src/utf32_le.c
@@ -2,7 +2,7 @@
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako
+ * Copyright (c) 2002-2020 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -67,7 +67,10 @@ utf32le_is_mbc_newline(const UChar* p, const UChar* end)
static OnigCodePoint
utf32le_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
{
- return (OnigCodePoint )(((p[3] * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
+ OnigCodePoint code;
+
+ code = (OnigCodePoint )((((p[3] & 0x7f) * 256 + p[2]) * 256 + p[1]) * 256 + p[0]);
+ return code;
}
static int