summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2019-12-23 07:44:50 +0100
committerJörg Frings-Fürst <debian@jff.email>2019-12-23 07:44:50 +0100
commit9e629c8f43b43617fa5b7d3654f7d81e81b8a427 (patch)
tree581dcb2708a7eac0bcc7bbfa6478cfa50dfcf5a8 /src
parent7bbf4ae1401bc6e40f71a32d3f97952796d85690 (diff)
parent091456e1a135d4674701a264495bd34918779391 (diff)
Merge branch 'release/debian/6.9.4-1'debian/6.9.4-1
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.windows28
-rw-r--r--src/ascii.c2
-rw-r--r--src/big5.c23
-rw-r--r--src/config.h.win326
-rw-r--r--src/config.h.win646
-rw-r--r--src/config.h.windows.in7
-rw-r--r--src/cp1251.c4
-rw-r--r--src/euc_jp.c21
-rw-r--r--src/euc_jp_prop.c2
-rw-r--r--src/euc_kr.c25
-rw-r--r--src/euc_tw.c18
-rw-r--r--src/gb18030.c34
-rwxr-xr-xsrc/gperf_fold_key_conv.py4
-rwxr-xr-xsrc/gperf_unfold_key_conv.py4
-rw-r--r--src/iso8859_1.c28
-rw-r--r--src/iso8859_10.c24
-rw-r--r--src/iso8859_11.c2
-rw-r--r--src/iso8859_13.c28
-rw-r--r--src/iso8859_14.c25
-rw-r--r--src/iso8859_15.c28
-rw-r--r--src/iso8859_16.c24
-rw-r--r--src/iso8859_2.c24
-rw-r--r--src/iso8859_3.c28
-rw-r--r--src/iso8859_4.c27
-rw-r--r--src/iso8859_5.c15
-rw-r--r--src/iso8859_6.c2
-rw-r--r--src/iso8859_7.c22
-rw-r--r--src/iso8859_8.c2
-rw-r--r--src/iso8859_9.c28
-rw-r--r--src/koi8.c21
-rw-r--r--src/koi8_r.c15
-rwxr-xr-xsrc/make_property.sh3
-rwxr-xr-xsrc/make_unicode_egcb_data.py23
-rwxr-xr-xsrc/make_unicode_fold.sh2
-rwxr-xr-xsrc/make_unicode_fold_data.py31
-rwxr-xr-xsrc/make_unicode_property.sh3
-rwxr-xr-xsrc/make_unicode_property_data.py77
-rwxr-xr-xsrc/make_unicode_wb_data.py21
-rw-r--r--src/mktable.c2
-rw-r--r--src/onig_init.c2
-rw-r--r--src/oniggnu.h2
-rw-r--r--src/onigposix.h6
-rw-r--r--src/oniguruma.h41
-rw-r--r--src/regcomp.c1955
-rw-r--r--src/regenc.c64
-rw-r--r--src/regenc.h4
-rw-r--r--src/regerror.c19
-rw-r--r--src/regexec.c1911
-rw-r--r--src/regext.c8
-rw-r--r--src/reggnu.c2
-rw-r--r--src/regint.h374
-rw-r--r--src/regparse.c1116
-rw-r--r--src/regparse.h102
-rw-r--r--src/regposerr.c2
-rw-r--r--src/regposix.c2
-rw-r--r--src/regsyntax.c2
-rw-r--r--src/regtrav.c2
-rw-r--r--src/regversion.c2
-rw-r--r--src/sjis.c31
-rw-r--r--src/sjis_prop.c2
-rw-r--r--src/unicode.c12
-rw-r--r--src/unicode_egcb_data.c4
-rw-r--r--src/unicode_fold1_key.c6
-rw-r--r--src/unicode_fold2_key.c6
-rw-r--r--src/unicode_fold3_key.c6
-rw-r--r--src/unicode_fold_data.c2
-rw-r--r--src/unicode_property_data.c5
-rw-r--r--src/unicode_property_data_posix.c2
-rw-r--r--src/unicode_unfold_key.c6
-rw-r--r--src/unicode_wb_data.c4
-rw-r--r--src/utf16_be.c68
-rw-r--r--src/utf16_le.c59
-rw-r--r--src/utf32_be.c35
-rw-r--r--src/utf32_le.c34
-rw-r--r--src/utf8.c29
75 files changed, 3685 insertions, 2931 deletions
diff --git a/src/Makefile.windows b/src/Makefile.windows
index 762cf07..1e87504 100644
--- a/src/Makefile.windows
+++ b/src/Makefile.windows
@@ -2,6 +2,9 @@
product_name = oniguruma
+TEST_DIR = $(ONIG_DIR)/../test
+WIN_DIR = $(ONIG_DIR)/../windows
+
CPPFLAGS =
CFLAGS = -O2 -nologo /W3
LDFLAGS =
@@ -152,25 +155,24 @@ $(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/
$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h
-# C library test
-ctest: $(testc)
- .\$(testc)
-# POSIX C library test
-ptest: $(testp)
- .\$(testp)
+test_regset: $(TEST_DIR)/test_regset.c $(libname)
+ $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /utf-8 $(TEST_DIR)/test_regset.c $(libname)
+
+test_utf8: $(TEST_DIR)/test_utf8.c $(libname)
+ $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /utf-8 $(TEST_DIR)/test_utf8.c $(libname)
-$(testc): $(testc).c $(libname)
- $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname)
+testc: $(WIN_DIR)/testc.c $(libname)
+ $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern $(WIN_DIR)/testc.c $(libname)
-$(testp): $(testc).c $(dlllib)
- $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib)
+testp: $(WIN_DIR)/testc.c $(libname)
+ $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern /DPOSIX_TEST $(WIN_DIR)/testc.c $(libname)
-$(testc)u: $(testc)u.c $(libname)
- $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname)
+testu: $(TEST_DIR)/testu.c $(libname)
+ $(CC) -nologo /Fe:$@ /I. /I$(ONIG_DIR) /DONIG_EXTERN=extern $(TEST_DIR)/testu.c $(libname)
clean:
- del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj
+ del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\test_regset.exe $(BUILD_DIR)\test_utf8.exe $(BUILD_DIR)\testp.exe $(BUILD_DIR)\testc.exe $(BUILD_DIR)\testu.exe
samples: all
diff --git a/src/ascii.c b/src/ascii.c
index e83e4d6..f2dc0d3 100644
--- a/src/ascii.c
+++ b/src/ascii.c
@@ -2,7 +2,7 @@
ascii.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/big5.c b/src/big5.c
index ff8bd3b..79ae1e3 100644
--- a/src/big5.c
+++ b/src/big5.c
@@ -2,7 +2,7 @@
big5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,16 @@ big5_mbc_enc_len(const UChar* p)
}
static int
+big5_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ if ((code & 0xff00) != 0) return 2;
+ if (EncLen_BIG5[(int )(code & 0xff)] == 1) return 1;
+
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+}
+
+static int
is_valid_mbc_string(const UChar* p, const UChar* end)
{
while (p < end) {
@@ -99,15 +109,6 @@ big5_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
-#if 0
-static int
-big5_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_BIG5, flag, pp, end);
-}
-#endif
-
static int
big5_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
@@ -174,7 +175,7 @@ OnigEncodingType OnigEncodingBIG5 = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
big5_mbc_to_code,
- onigenc_mb2_code_to_mbclen,
+ big5_code_to_mbclen,
big5_code_to_mbc,
big5_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
diff --git a/src/config.h.win32 b/src/config.h.win32
index 1f848e2..82a35b9 100644
--- a/src/config.h.win32
+++ b/src/config.h.win32
@@ -1,3 +1,9 @@
+#if defined(__MINGW32__) || _MSC_VER >= 1600
+#define HAVE_STDINT_H 1
+#endif
+#if defined(__MINGW32__) || _MSC_VER >= 1800
+#define HAVE_INTTYPES_H 1
+#endif
#define HAVE_SYS_TYPES_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_MEMORY_H 1
diff --git a/src/config.h.win64 b/src/config.h.win64
index f72671b..7f19699 100644
--- a/src/config.h.win64
+++ b/src/config.h.win64
@@ -1,3 +1,9 @@
+#if defined(__MINGW32__) || _MSC_VER >= 1600
+#define HAVE_STDINT_H 1
+#endif
+#if defined(__MINGW32__) || _MSC_VER >= 1800
+#define HAVE_INTTYPES_H 1
+#endif
#define HAVE_SYS_TYPES_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_MEMORY_H 1
diff --git a/src/config.h.windows.in b/src/config.h.windows.in
index d8de1dd..d4f73d7 100644
--- a/src/config.h.windows.in
+++ b/src/config.h.windows.in
@@ -1,7 +1,14 @@
+#if defined(__MINGW32__) || _MSC_VER >= 1600
+#define HAVE_STDINT_H 1
+#endif
+#if defined(__MINGW32__) || _MSC_VER >= 1800
+#define HAVE_INTTYPES_H 1
+#endif
#define HAVE_SYS_TYPES_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_MEMORY_H 1
#define HAVE_OFF_T 1
+
#define SIZEOF_INT 4
#define SIZEOF_LONG 4
#define SIZEOF_LONG_LONG 8
diff --git a/src/cp1251.c b/src/cp1251.c
index b4ce4d8..fa20780 100644
--- a/src/cp1251.c
+++ b/src/cp1251.c
@@ -2,8 +2,8 @@
cp1251.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2006-2018 Byte <byte AT mail DOT kna DOT ru>
- * K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2006-2019 Byte <byte AT mail DOT kna DOT ru>
+ * K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/euc_jp.c b/src/euc_jp.c
index d17386d..640b3e3 100644
--- a/src/euc_jp.c
+++ b/src/euc_jp.c
@@ -2,7 +2,7 @@
euc_jp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -120,25 +120,6 @@ code_to_mbclen(OnigCodePoint code)
return ONIGERR_INVALID_CODE_POINT_VALUE;
}
-#if 0
-static int
-code_to_mbc_first(OnigCodePoint code)
-{
- int first;
-
- if ((code & 0xff0000) != 0) {
- first = (code >> 16) & 0xff;
- }
- else if ((code & 0xff00) != 0) {
- first = (code >> 8) & 0xff;
- }
- else {
- return (int )code;
- }
- return first;
-}
-#endif
-
static int
code_to_mbc(OnigCodePoint code, UChar *buf)
{
diff --git a/src/euc_jp_prop.c b/src/euc_jp_prop.c
index be719cf..a816f48 100644
--- a/src/euc_jp_prop.c
+++ b/src/euc_jp_prop.c
@@ -1,5 +1,5 @@
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -pt -T -L ANSI-C -N onigenc_euc_jp_lookup_property_name --output-file gperf1.tmp euc_jp_prop.gperf */
+/* Command-line: gperf -pt -T -L ANSI-C -N onigenc_euc_jp_lookup_property_name --output-file gperf1.tmp euc_jp_prop.gperf */
/* Computed positions: -k'1,3' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
diff --git a/src/euc_kr.c b/src/euc_kr.c
index bb968b0..7fa50af 100644
--- a/src/euc_kr.c
+++ b/src/euc_kr.c
@@ -2,7 +2,7 @@
euc_kr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,16 @@ euckr_mbc_enc_len(const UChar* p)
}
static int
+euckr_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ if ((code & 0xff00) != 0) return 2;
+ if (EncLen_EUCKR[(int )(code & 0xff)] == 1) return 1;
+
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+}
+
+static int
is_valid_mbc_string(const UChar* p, const UChar* end)
{
while (p < end) {
@@ -98,15 +108,6 @@ euckr_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
-#if 0
-static int
-euckr_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_EUC_KR, flag, pp, end);
-}
-#endif
-
static int
euckr_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
@@ -149,7 +150,7 @@ OnigEncodingType OnigEncodingEUC_KR = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
- onigenc_mb2_code_to_mbclen,
+ euckr_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
@@ -174,7 +175,7 @@ OnigEncodingType OnigEncodingEUC_CN = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euckr_mbc_to_code,
- onigenc_mb2_code_to_mbclen,
+ euckr_code_to_mbclen,
euckr_code_to_mbc,
euckr_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
diff --git a/src/euc_tw.c b/src/euc_tw.c
index c9acaf1..8e72b97 100644
--- a/src/euc_tw.c
+++ b/src/euc_tw.c
@@ -2,7 +2,7 @@
euc_tw.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -55,6 +55,20 @@ euctw_mbc_enc_len(const UChar* p)
}
static int
+euctw_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff00) != 0) return 2;
+ else {
+ if (EncLen_EUCTW[(int )(code & 0xff)] == 1)
+ return 1;
+
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ }
+}
+
+static int
is_valid_mbc_string(const UChar* p, const UChar* end)
{
while (p < end) {
@@ -155,7 +169,7 @@ OnigEncodingType OnigEncodingEUC_TW = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
euctw_mbc_to_code,
- onigenc_mb4_code_to_mbclen,
+ euctw_code_to_mbclen,
euctw_code_to_mbc,
euctw_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
diff --git a/src/gb18030.c b/src/gb18030.c
index 7654432..50898eb 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -2,8 +2,8 @@
gb18030.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org>
- * K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2005-2019 KUBO Takehiro <kubo AT jiubao DOT org>
+ * K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,6 +33,7 @@
#if 1
#define DEBUG_GB18030(arg)
#else
+#include <stdio.h>
#define DEBUG_GB18030(arg) printf arg
#endif
@@ -67,15 +68,29 @@ gb18030_mbc_enc_len(const UChar* p)
{
if (GB18030_MAP[*p] != CM)
return 1;
+
p++;
if (GB18030_MAP[*p] == C4)
return 4;
- if (GB18030_MAP[*p] == C1)
- return 1; /* illegal sequence */
+
return 2;
}
static int
+gb18030_code_to_mbclen(OnigCodePoint code)
+{
+ if ((code & 0xff000000) != 0) return 4;
+ else if ((code & 0xff0000) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else if ((code & 0xff00) != 0) return 2;
+ else {
+ if (GB18030_MAP[(int )(code & 0xff)] == CM)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ return 1;
+ }
+}
+
+static int
is_valid_mbc_string(const UChar* p, const UChar* end)
{
while (p < end) {
@@ -135,15 +150,6 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
pp, end, lower);
}
-#if 0
-static int
-gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
-}
-#endif
-
static int
gb18030_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
@@ -522,7 +528,7 @@ OnigEncodingType OnigEncodingGB18030 = {
1, /* min enc length */
onigenc_is_mbc_newline_0x0a,
gb18030_mbc_to_code,
- onigenc_mb4_code_to_mbclen,
+ gb18030_code_to_mbclen,
gb18030_code_to_mbc,
gb18030_mbc_case_fold,
onigenc_ascii_apply_all_case_fold,
diff --git a/src/gperf_fold_key_conv.py b/src/gperf_fold_key_conv.py
index f453186..c633100 100755
--- a/src/gperf_fold_key_conv.py
+++ b/src/gperf_fold_key_conv.py
@@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')
REG_RETURN_TYPE = re.compile('^const\s+short\s+int\s*\*')
REG_FOLD_KEY = re.compile('unicode_fold(\d)_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')
REG_ENTRY = re.compile('\{".*?",\s*(-?\d+)\s*\}')
-REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
+REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')
REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')
REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)')
@@ -34,7 +34,7 @@ def parse_line(s, key_len):
if r != s: return r
r = re.sub(REG_ENTRY, '\\1', s)
if r != s: return r
- r = re.sub(REG_IF_LEN, 'if (0 == 0)', s)
+ r = re.sub(REG_IF_LEN, '', s)
if r != s: return r
r = re.sub(REG_GET_HASH, 'int key = hash(codes);', s)
if r != s: return r
diff --git a/src/gperf_unfold_key_conv.py b/src/gperf_unfold_key_conv.py
index 3cf4836..d999d4e 100755
--- a/src/gperf_unfold_key_conv.py
+++ b/src/gperf_unfold_key_conv.py
@@ -12,7 +12,7 @@ REG_STR_AT = re.compile('str\[(\d+)\]')
REG_UNFOLD_KEY = re.compile('onigenc_unicode_unfold_key\s*\(register\s+const\s+char\s*\*\s*str,\s*register\s+size_t\s+len\)')
REG_ENTRY = re.compile('\{".+?",\s*/\*(.+?)\*/\s*(-?\d+),\s*(\d)\}')
REG_EMPTY_ENTRY = re.compile('\{"",\s*(-?\d+),\s*(\d)\}')
-REG_IF_LEN = re.compile('if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
+REG_IF_LEN = re.compile('\s*if\s*\(\s*len\s*<=\s*MAX_WORD_LENGTH.+')
REG_GET_HASH = re.compile('(?:register\s+)?(?:unsigned\s+)?int\s+key\s*=\s*hash\s*\(str,\s*len\);')
REG_GET_CODE = re.compile('(?:register\s+)?const\s+char\s*\*\s*s\s*=\s*wordlist\[key\]\.name;')
REG_CODE_CHECK = re.compile('if\s*\(\*str\s*==\s*\*s\s*&&\s*!strncmp.+\)')
@@ -32,7 +32,7 @@ def parse_line(s):
if r != s: return r
r = re.sub(REG_EMPTY_ENTRY, '{0xffffffff, \\1, \\2}', s)
if r != s: return r
- r = re.sub(REG_IF_LEN, 'if (0 == 0)', s)
+ r = re.sub(REG_IF_LEN, '', s)
if r != s: return r
r = re.sub(REG_GET_HASH, 'int key = hash(&code);', s)
if r != s: return r
diff --git a/src/iso8859_1.c b/src/iso8859_1.c
index 3b64942..e681c2a 100644
--- a/src/iso8859_1.c
+++ b/src/iso8859_1.c
@@ -2,7 +2,7 @@
iso8859_1.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -216,32 +216,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
return 1;
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_1_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xdf, 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (*p >= 0xaa && *p <= 0xba)
- return FALSE;
- else
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_10.c b/src/iso8859_10.c
index f5882bc..e98cffb 100644
--- a/src/iso8859_10.c
+++ b/src/iso8859_10.c
@@ -2,7 +2,7 @@
iso8859_10.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,28 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1;
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_10_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_11.c b/src/iso8859_11.c
index da8fda0..8639ce2 100644
--- a/src/iso8859_11.c
+++ b/src/iso8859_11.c
@@ -2,7 +2,7 @@
iso8859_11.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/iso8859_13.c b/src/iso8859_13.c
index 0cf251c..2bd460f 100644
--- a/src/iso8859_13.c
+++ b/src/iso8859_13.c
@@ -2,7 +2,7 @@
iso8859_13.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1;
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_13_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xdf, 0xb5 are lower case letter, but can't convert. */
- if (*p == 0xb5)
- return FALSE;
- else
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_14.c b/src/iso8859_14.c
index 030e9f5..5030b55 100644
--- a/src/iso8859_14.c
+++ b/src/iso8859_14.c
@@ -2,7 +2,7 @@
iso8859_14.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,29 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_14_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_15.c b/src/iso8859_15.c
index 859d727..f32c3de 100644
--- a/src/iso8859_15.c
+++ b/src/iso8859_15.c
@@ -2,7 +2,7 @@
iso8859_15.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_15_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xdf etc.. are lower case letter, but can't convert. */
- if (*p == 0xaa || *p == 0xb5 || *p == 0xba)
- return FALSE;
- else
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_16.c b/src/iso8859_16.c
index 2614e56..22a653a 100644
--- a/src/iso8859_16.c
+++ b/src/iso8859_16.c
@@ -2,7 +2,7 @@
iso8859_16.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,28 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_16_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_2.c b/src/iso8859_2.c
index ba030d5..dc3d0a1 100644
--- a/src/iso8859_2.c
+++ b/src/iso8859_2.c
@@ -2,7 +2,7 @@
iso8859_2.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,28 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_2_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static const OnigPairCaseFoldCodes CaseFoldMap[] = {
{ 0xa1, 0xb1 },
{ 0xa3, 0xb3 },
diff --git a/src/iso8859_3.c b/src/iso8859_3.c
index f090d0b..49dc6b2 100644
--- a/src/iso8859_3.c
+++ b/src/iso8859_3.c
@@ -2,7 +2,7 @@
iso8859_3.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp,
return 1;
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_3_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (*p == 0xb5)
- return FALSE;
- else
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_4.c b/src/iso8859_4.c
index 57dc9fe..f3f6ba9 100644
--- a/src/iso8859_4.c
+++ b/src/iso8859_4.c
@@ -2,7 +2,7 @@
iso8859_4.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,31 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1; /* return byte length of converted char to lower */
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_4_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- if (*p == 0xa2)
- return FALSE;
- else
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_5.c b/src/iso8859_5.c
index a090d25..a5f587c 100644
--- a/src/iso8859_5.c
+++ b/src/iso8859_5.c
@@ -2,7 +2,7 @@
iso8859_5.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -114,19 +114,6 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- (*pp)++;
- v = (EncISO_8859_5_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_6.c b/src/iso8859_6.c
index 1c16c79..fb72442 100644
--- a/src/iso8859_6.c
+++ b/src/iso8859_6.c
@@ -2,7 +2,7 @@
iso8859_6.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/iso8859_7.c b/src/iso8859_7.c
index 8c88351..018efac 100644
--- a/src/iso8859_7.c
+++ b/src/iso8859_7.c
@@ -2,7 +2,7 @@
iso8859_7.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -114,26 +114,6 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- (*pp)++;
- v = (EncISO_8859_7_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- if (*p == 0xc0 || *p == 0xe0)
- return FALSE;
- else
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/iso8859_8.c b/src/iso8859_8.c
index bd3e94d..92a5eb1 100644
--- a/src/iso8859_8.c
+++ b/src/iso8859_8.c
@@ -2,7 +2,7 @@
iso8859_8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/iso8859_9.c b/src/iso8859_9.c
index 1d291d5..1f9bdea 100644
--- a/src/iso8859_9.c
+++ b/src/iso8859_9.c
@@ -2,7 +2,7 @@
iso8859_9.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,32 +121,6 @@ mbc_case_fold(OnigCaseFoldType flag,
return 1;
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- (*pp)++;
- return TRUE;
- }
-
- (*pp)++;
- v = (EncISO_8859_9_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xdf etc.. are lower case letter, but can't convert. */
- if (*p >= 0xaa && *p <= 0xba)
- return FALSE;
- else
- return TRUE;
- }
-
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/koi8.c b/src/koi8.c
index 94c95a0..37023c6 100644
--- a/src/koi8.c
+++ b/src/koi8.c
@@ -2,7 +2,7 @@
koi8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -115,25 +115,6 @@ koi8_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
-#if 0
-static int
-koi8_is_mbc_ambiguous(OnigAmbigType flag, const OnigUChar** pp, const OnigUChar* end)
-{
- const OnigUChar* p = *pp;
-
- (*pp)++;
- if (((flag & ONIGENC_CASE_FOLD_ASCII_CASE) != 0 &&
- ONIGENC_IS_MBC_ASCII(p)) ||
- ((flag & ONIGENC_CASE_FOLD_NONASCII_CASE) != 0 &&
- !ONIGENC_IS_MBC_ASCII(p))) {
- int v = (EncKOI8_CtypeTable[*p] &
- (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- return (v != 0 ? TRUE : FALSE);
- }
- return FALSE;
-}
-#endif
-
static int
koi8_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/koi8_r.c b/src/koi8_r.c
index 1284f7f..c77302f 100644
--- a/src/koi8_r.c
+++ b/src/koi8_r.c
@@ -2,7 +2,7 @@
koi8_r.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -114,19 +114,6 @@ koi8_r_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
return 1;
}
-#if 0
-static int
-koi8_r_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- int v;
- const UChar* p = *pp;
-
- (*pp)++;
- v = (EncKOI8_R_CtypeTable[*p] & (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- return (v != 0 ? TRUE : FALSE);
-}
-#endif
-
static int
koi8_r_is_code_ctype(OnigCodePoint code, unsigned int ctype)
{
diff --git a/src/make_property.sh b/src/make_property.sh
index bc5cf98..cef0a96 100755
--- a/src/make_property.sh
+++ b/src/make_property.sh
@@ -1,8 +1,9 @@
#!/bin/sh
+GPERF=gperf
+
TMP1=gperf1.tmp
TMP2=gperf2.tmp
-GPERF=/usr/local/bin/gperf
GPERF_OPT='-pt -T -L ANSI-C'
diff --git a/src/make_unicode_egcb_data.py b/src/make_unicode_egcb_data.py
index 0f63f97..9c71796 100755
--- a/src/make_unicode_egcb_data.py
+++ b/src/make_unicode_egcb_data.py
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# make_unicode_egcb_data.py
-# Copyright (c) 2017-2018 K.Kosako
+# Copyright (c) 2017-2019 K.Kosako
import sys
import re
@@ -13,18 +13,19 @@ PR_LINE_REG = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")
PA_LINE_REG = re.compile("(\w+)\s*;\s*(\w+)")
PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
BL_LINE_REG = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)")
-VERSION_REG = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt")
+VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt")
-VERSION_INFO = None
+VERSION_INFO = [-1, -1, -1]
DIC = { }
PROPS = []
PropIndex = { }
def check_version_info(s):
- global VERSION_INFO
m = VERSION_REG.match(s)
if m is not None:
- VERSION_INFO = m.group(1)
+ VERSION_INFO[0] = int(m.group(1))
+ VERSION_INFO[1] = int(m.group(2))
+ VERSION_INFO[2] = int(m.group(3))
def print_ranges(ranges):
for (start, end) in ranges:
@@ -160,7 +161,7 @@ def parse_properties(path):
continue
if s[0] == '#':
- if VERSION_INFO is None:
+ if VERSION_INFO[0] < 0:
check_version_info(s)
m = PR_LINE_REG.match(s)
@@ -194,7 +195,7 @@ PROPS = sorted(PROPS)
print '/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */'
COPYRIGHT = '''
/*-
- * Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2017-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -222,9 +223,11 @@ COPYRIGHT = '''
print COPYRIGHT
print ''
-if VERSION_INFO is not None:
- print "#define GRAPHEME_BREAK_PROPERTY_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO)
- print ''
+if VERSION_INFO[0] < 0:
+ raise RuntimeError("Version is not found")
+
+print "#define GRAPHEME_BREAK_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
+print ''
ranges = []
for prop in PROPS:
diff --git a/src/make_unicode_fold.sh b/src/make_unicode_fold.sh
index 35ce974..1d5cc1e 100755
--- a/src/make_unicode_fold.sh
+++ b/src/make_unicode_fold.sh
@@ -1,6 +1,6 @@
#!/bin/sh
-GPERF=/usr/local/bin/gperf
+GPERF=gperf
TMP0=gperf0.tmp
TMP1=gperf1.tmp
diff --git a/src/make_unicode_fold_data.py b/src/make_unicode_fold_data.py
index 783988c..55d5b88 100755
--- a/src/make_unicode_fold_data.py
+++ b/src/make_unicode_fold_data.py
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# make_unicode_fold_data.py
-# Copyright (c) 2016-2018 K.Kosako
+# Copyright (c) 2016-2019 K.Kosako
import sys
import re
@@ -16,9 +16,9 @@ DataName = 'OnigUnicodeFolds'
ENCODING = 'utf-8'
LINE_REG = re.compile("([0-9A-F]{1,6}); (.); ([0-9A-F]{1,6})(?: ([0-9A-F]{1,6}))?(?: ([0-9A-F]{1,6}))?;(?:\s*#\s*)(.*)")
-VERSION_REG = re.compile("#.*-(\d+\.\d+\.\d+)\.txt")
+VERSION_REG = re.compile("#.*-(\d+)\.(\d+)\.(\d+)\.txt")
-VERSION_INFO = None
+VERSION_INFO = [-1, -1, -1]
FOLDS = {}
TURKISH_FOLDS = {}
@@ -56,18 +56,19 @@ def form3bytes(x):
return "\\x%02x\\x%02x\\x%02x" % (x2, x1, x0)
def check_version_info(s):
- global VERSION_INFO
- if VERSION_INFO is None:
- m = VERSION_REG.match(s)
- if m is not None:
- VERSION_INFO = m.group(1)
+ m = VERSION_REG.match(s)
+ if m is not None:
+ VERSION_INFO[0] = int(m.group(1))
+ VERSION_INFO[1] = int(m.group(2))
+ VERSION_INFO[2] = int(m.group(3))
def parse_line(s):
if len(s) == 0:
- return False
+ return False
if s[0] == '#':
+ if VERSION_INFO[0] < 0:
check_version_info(s)
- return False
+ return False
m = LINE_REG.match(s)
if m is None:
@@ -232,9 +233,11 @@ def output_fold_source(f, out_comment):
print >> f, "/* This file was generated by make_unicode_fold_data.py. */"
print >> f, '#include "regenc.h"'
print >> f, ''
- if VERSION_INFO is not None:
- print "#define UNICODE_CASEFOLD_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO)
- print ''
+ if VERSION_INFO[0] < 0:
+ raise RuntimeError("Version is not found")
+
+ print "#define UNICODE_CASEFOLD_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
+ print ''
#output_macros(f, DataName)
print >> f, ''
#output_typedef(f)
@@ -246,7 +249,7 @@ HEAD = '''
/* This gperf source file was generated by make_unicode_fold_data.py */
/*-
- * Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2017-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/make_unicode_property.sh b/src/make_unicode_property.sh
index 124d76a..51c8951 100755
--- a/src/make_unicode_property.sh
+++ b/src/make_unicode_property.sh
@@ -1,10 +1,11 @@
#!/bin/sh
+GPERF=gperf
+
NAME=unicode_property_data
TMP1=gperf1.tmp
TMP2=gperf2.tmp
TMP=
-GPERF=/usr/local/bin/gperf
GPERF_OPT='-T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool'
POOL_CAST='s/\(int *\)\(size_t *\)&\(\(struct +unicode_prop_name_pool_t *\* *\) *0\)->unicode_prop_name_pool_str([^,]+)/pool_offset(\1)/g'
diff --git a/src/make_unicode_property_data.py b/src/make_unicode_property_data.py
index dc3071a..9776628 100755
--- a/src/make_unicode_property_data.py
+++ b/src/make_unicode_property_data.py
@@ -1,7 +1,7 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
# make_unicode_property_data.py
-# Copyright (c) 2016-2018 K.Kosako
+# Copyright (c) 2016-2019 K.Kosako
import sys
import re
@@ -22,9 +22,12 @@ PR_LINE_REG = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")
PA_LINE_REG = re.compile("(\w+)\s*;\s*(\w+)")
PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
BL_LINE_REG = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)")
-VERSION_REG = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt")
+UNICODE_VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt")
+EMOJI_VERSION_REG = re.compile("(?i)#\s*Version:\s*(\d+)\.(\d+)")
+
+VERSION_INFO = [-1, -1, -1]
+EMOJI_VERSION_INFO = [-1, -1]
-VERSION_INFO = None
DIC = { }
KDIC = { }
PropIndex = { }
@@ -40,13 +43,6 @@ def fix_block_name(name):
s = re.sub(r'[- ]+', '_', name)
return 'In_' + s
-def check_version_info(s):
- global VERSION_INFO
- m = VERSION_REG.match(s)
- if m is not None:
- VERSION_INFO = m.group(1)
-
-
def print_ranges(ranges):
for (start, end) in ranges:
print "0x%06x, 0x%06x" % (start, end)
@@ -233,7 +229,8 @@ def parse_unicode_data_file(f):
normalize_ranges_in_dic(dic)
return dic, assigned
-def parse_properties(path, klass, prop_prefix = None):
+def parse_properties(path, klass, prop_prefix = None, version_reg = None):
+ version_match = None
with open(path, 'r') as f:
dic = { }
prop = None
@@ -243,9 +240,10 @@ def parse_properties(path, klass, prop_prefix = None):
if len(s) == 0:
continue
- if s[0] == '#':
- if VERSION_INFO is None:
- check_version_info(s)
+ if s[0] == '#' and version_reg is not None and version_match is None:
+ version_match = version_reg.match(s)
+ if version_match is not None:
+ continue
m = PR_LINE_REG.match(s)
if m:
@@ -266,7 +264,7 @@ def parse_properties(path, klass, prop_prefix = None):
props.append(prop)
normalize_ranges_in_dic(dic)
- return (dic, props)
+ return (dic, props, version_match)
def parse_property_aliases(path):
a = { }
@@ -414,11 +412,11 @@ def entry_and_print_prop_and_index(name, index):
nname = normalize_prop_name(name)
print_prop_and_index(nname, index)
-def parse_and_merge_properties(path, klass):
- dic, props = parse_properties(path, klass)
+def parse_and_merge_properties(path, klass, prop_prefix = None, version_reg = None):
+ dic, props, ver_m = parse_properties(path, klass, prop_prefix, version_reg)
merge_dic(DIC, dic)
merge_props(PROPS, props)
- return dic, props
+ return dic, props, ver_m
### main ###
argv = sys.argv
@@ -447,11 +445,21 @@ with open('UnicodeData.txt', 'r') as f:
PROPS = DIC.keys()
PROPS = list_sub(PROPS, POSIX_LIST)
-parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property')
-dic, props = parse_and_merge_properties('Scripts.txt', 'Script')
+_, _, ver_m = parse_and_merge_properties('DerivedCoreProperties.txt', 'Derived Property', None, UNICODE_VERSION_REG)
+if ver_m is not None:
+ VERSION_INFO[0] = int(ver_m.group(1))
+ VERSION_INFO[1] = int(ver_m.group(2))
+ VERSION_INFO[2] = int(ver_m.group(3))
+
+dic, props, _ = parse_and_merge_properties('Scripts.txt', 'Script')
DIC['Unknown'] = inverse_ranges(add_ranges_in_dic(dic))
+
parse_and_merge_properties('PropList.txt', 'Binary Property')
-parse_and_merge_properties('emoji-data.txt', 'Emoji Property')
+
+_, _, ver_m = parse_and_merge_properties('emoji-data.txt', 'Emoji Property', None, EMOJI_VERSION_REG)
+if ver_m is not None:
+ EMOJI_VERSION_INFO[0] = int(ver_m.group(1))
+ EMOJI_VERSION_INFO[1] = int(ver_m.group(2))
PROPS.append('Unknown')
KDIC['Unknown'] = 'Script'
@@ -464,9 +472,9 @@ dic, BLOCKS = parse_blocks('Blocks.txt')
merge_dic(DIC, dic)
if INCLUDE_GRAPHEME_CLUSTER_DATA:
- dic, props = parse_properties('GraphemeBreakProperty.txt',
- 'GraphemeBreak Property',
- GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
+ dic, props, _ = parse_properties('GraphemeBreakProperty.txt',
+ 'GraphemeBreak Property',
+ GRAPHEME_CLUSTER_BREAK_NAME_PREFIX)
merge_dic(DIC, dic)
merge_props(PROPS, props)
#prop = GRAPHEME_CLUSTER_BREAK_NAME_PREFIX + 'Other'
@@ -533,9 +541,13 @@ sys.stdout.write(s)
if OUTPUT_LIST_MODE:
UPF = open("UNICODE_PROPERTIES", "w")
- if VERSION_INFO is not None:
- print >> UPF, "Unicode Properties (from Unicode Version: %s)" % VERSION_INFO
- print >> UPF, ''
+ if VERSION_INFO[0] < 0:
+ raise RuntimeError("Unicode Version is not found")
+ if EMOJI_VERSION_INFO[0] < 0:
+ raise RuntimeError("Emoji Version is not found")
+
+ print >> UPF, "Unicode Properties (Unicode Version: %d.%d.%d, Emoji: %d.%d)" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2], EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
+ print >> UPF, ''
index = -1
for prop in POSIX_LIST:
@@ -569,9 +581,14 @@ if not(POSIX_ONLY):
print '%%'
print ''
if not(POSIX_ONLY):
- if VERSION_INFO is not None:
- print "#define UNICODE_PROPERTY_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO)
- print ''
+ if VERSION_INFO[0] < 0:
+ raise RuntimeError("Unicode Version is not found")
+ if EMOJI_VERSION_INFO[0] < 0:
+ raise RuntimeError("Emoji Version is not found")
+
+ print "#define UNICODE_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
+ print "#define UNICODE_EMOJI_VERSION %02d%02d" % (EMOJI_VERSION_INFO[0], EMOJI_VERSION_INFO[1])
+ print ''
print "#define PROPERTY_NAME_MAX_SIZE %d" % (PROPERTY_NAME_MAX_LEN + 10)
print "#define CODE_RANGES_NUM %d" % (index + 1)
diff --git a/src/make_unicode_wb_data.py b/src/make_unicode_wb_data.py
index 624fa7e..ddedd5d 100755
--- a/src/make_unicode_wb_data.py
+++ b/src/make_unicode_wb_data.py
@@ -13,18 +13,19 @@ PR_LINE_REG = re.compile("([0-9A-Fa-f]+)(?:..([0-9A-Fa-f]+))?\s*;\s*(\w+)")
PA_LINE_REG = re.compile("(\w+)\s*;\s*(\w+)")
PVA_LINE_REG = re.compile("(sc|gc)\s*;\s*(\w+)\s*;\s*(\w+)(?:\s*;\s*(\w+))?")
BL_LINE_REG = re.compile("([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+)\s*;\s*(.*)")
-VERSION_REG = re.compile("#\s*.*-(\d+\.\d+\.\d+)\.txt")
+VERSION_REG = re.compile("#\s*.*-(\d+)\.(\d+)\.(\d+)\.txt")
-VERSION_INFO = None
+VERSION_INFO = [-1, -1, -1]
DIC = { }
PROPS = []
PropIndex = { }
def check_version_info(s):
- global VERSION_INFO
m = VERSION_REG.match(s)
if m is not None:
- VERSION_INFO = m.group(1)
+ VERSION_INFO[0] = int(m.group(1))
+ VERSION_INFO[1] = int(m.group(2))
+ VERSION_INFO[2] = int(m.group(3))
def print_ranges(ranges):
for (start, end) in ranges:
@@ -160,7 +161,7 @@ def parse_properties(path):
continue
if s[0] == '#':
- if VERSION_INFO is None:
+ if VERSION_INFO[0] < 0:
check_version_info(s)
m = PR_LINE_REG.match(s)
@@ -194,7 +195,7 @@ PROPS = sorted(PROPS)
print '/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */'
COPYRIGHT = '''
/*-
- * Copyright (c) 2019 K.Kosako <kkosako0 AT gmail DOT com>
+ * Copyright (c) 2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -222,9 +223,11 @@ COPYRIGHT = '''
print COPYRIGHT
print ''
-if VERSION_INFO is not None:
- print "#define WORD_BREAK_PROPERTY_VERSION %s" % re.sub(r'[\.-]', '_', VERSION_INFO)
- print ''
+if VERSION_INFO[0] < 0:
+ raise RuntimeError("Version is not found.")
+
+print "#define WORD_BREAK_PROPERTY_VERSION %02d%02d%02d" % (VERSION_INFO[0], VERSION_INFO[1], VERSION_INFO[2])
+print ''
ranges = []
for prop in PROPS:
diff --git a/src/mktable.c b/src/mktable.c
index 80ac08a..318bac0 100644
--- a/src/mktable.c
+++ b/src/mktable.c
@@ -2,7 +2,7 @@
mktable.c
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/onig_init.c b/src/onig_init.c
index 7ad98b7..c660e7d 100644
--- a/src/onig_init.c
+++ b/src/onig_init.c
@@ -2,7 +2,7 @@
onig_init.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2016-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2016-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/oniggnu.h b/src/oniggnu.h
index d688883..96d9085 100644
--- a/src/oniggnu.h
+++ b/src/oniggnu.h
@@ -4,7 +4,7 @@
oniggnu.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2005 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/onigposix.h b/src/onigposix.h
index da0f919..5ff779f 100644
--- a/src/onigposix.h
+++ b/src/onigposix.h
@@ -4,7 +4,7 @@
onigposix.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,6 +95,7 @@ typedef struct {
#endif
#endif
+#ifndef ONIG_STATIC
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(ONIGURUMA_EXPORT)
@@ -108,6 +109,9 @@ typedef struct {
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
+#else
+#define ONIG_EXTERN extern
+#endif
#ifndef ONIGURUMA_H
typedef unsigned int OnigOptionType;
diff --git a/src/oniguruma.h b/src/oniguruma.h
index f6aa5ba..08ac6f7 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -4,7 +4,7 @@
oniguruma.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -36,9 +36,9 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 2
+#define ONIGURUMA_VERSION_TEENY 4
-#define ONIGURUMA_VERSION_INT 60902
+#define ONIGURUMA_VERSION_INT 60904
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -52,6 +52,7 @@ extern "C" {
# define PV_(args) args
#endif
+#ifndef ONIG_STATIC
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(ONIGURUMA_EXPORT)
@@ -65,6 +66,9 @@ extern "C" {
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
+#else
+#define ONIG_EXTERN extern
+#endif
/* PART: character encoding */
@@ -517,6 +521,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
+#define ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (1U<<26)
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
@@ -682,6 +687,14 @@ typedef OnigRegexType* OnigRegex;
typedef OnigRegexType regex_t;
#endif
+struct OnigRegSetStruct;
+typedef struct OnigRegSetStruct OnigRegSet;
+
+typedef enum {
+ ONIG_REGSET_POSITION_LEAD = 0,
+ ONIG_REGSET_REGEX_LEAD = 1,
+ ONIG_REGSET_PRIORITY_TO_REGEX_ORDER = 2
+} OnigRegSetLead;
typedef struct {
int num_of_elements;
@@ -766,6 +779,8 @@ int onig_init P_((void));
ONIG_EXTERN
int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
ONIG_EXTERN
+int onig_is_error_code_needs_param PV_((int code));
+ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
@@ -790,6 +805,26 @@ ONIG_EXTERN
int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option));
ONIG_EXTERN
int onig_match_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp));
+
+ONIG_EXTERN
+int onig_regset_new P_((OnigRegSet** rset, int n, regex_t* regs[]));
+ONIG_EXTERN
+int onig_regset_add P_((OnigRegSet* set, regex_t* reg));
+ONIG_EXTERN
+int onig_regset_replace P_((OnigRegSet* set, int at, regex_t* reg));
+ONIG_EXTERN
+void onig_regset_free P_((OnigRegSet* set));
+ONIG_EXTERN
+int onig_regset_number_of_regex P_((OnigRegSet* set));
+ONIG_EXTERN
+regex_t* onig_regset_get_regex P_((OnigRegSet* set, int at));
+ONIG_EXTERN
+OnigRegion* onig_regset_get_region P_((OnigRegSet* set, int at));
+ONIG_EXTERN
+int onig_regset_search P_((OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos));
+ONIG_EXTERN
+int onig_regset_search_with_param P_((OnigRegSet* set, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos));
+
ONIG_EXTERN
OnigRegion* onig_region_new P_((void));
ONIG_EXTERN
diff --git a/src/regcomp.c b/src/regcomp.c
index c2c04a4..69d4b95 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -2,7 +2,7 @@
regcomp.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -224,17 +224,17 @@ ops_free(regex_t* reg)
#endif
switch (opcode) {
- case OP_EXACTMBN:
+ case OP_STR_MBN:
if (! is_in_string_pool(reg, op->exact_len_n.s))
xfree(op->exact_len_n.s);
break;
- case OP_EXACTN: case OP_EXACTMB2N: case OP_EXACTMB3N: case OP_EXACTN_IC:
+ case OP_STR_N: case OP_STR_MB2N: case OP_STR_MB3N: case OP_STR_N_IC:
if (! is_in_string_pool(reg, op->exact_n.s))
xfree(op->exact_n.s);
break;
- case OP_EXACT1: case OP_EXACT2: case OP_EXACT3: case OP_EXACT4:
- case OP_EXACT5: case OP_EXACTMB2N1: case OP_EXACTMB2N2:
- case OP_EXACTMB2N3: case OP_EXACT1_IC:
+ case OP_STR_1: case OP_STR_2: case OP_STR_3: case OP_STR_4:
+ case OP_STR_5: case OP_STR_MB2N1: case OP_STR_MB2N2:
+ case OP_STR_MB2N3: case OP_STR_1_IC:
break;
case OP_CCLASS_NOT: case OP_CCLASS:
@@ -298,17 +298,17 @@ ops_calc_size_of_string_pool(regex_t* reg)
#endif
switch (opcode) {
- case OP_EXACTMBN:
+ case OP_STR_MBN:
total += op->exact_len_n.len * op->exact_len_n.n;
break;
- case OP_EXACTN:
- case OP_EXACTN_IC:
+ case OP_STR_N:
+ case OP_STR_N_IC:
total += op->exact_n.n;
break;
- case OP_EXACTMB2N:
+ case OP_STR_MB2N:
total += op->exact_n.n * 2;
break;
- case OP_EXACTMB3N:
+ case OP_STR_MB3N:
total += op->exact_n.n * 3;
break;
@@ -349,15 +349,15 @@ ops_make_string_pool(regex_t* reg)
#endif
switch (opcode) {
- case OP_EXACTMBN:
+ case OP_STR_MBN:
len = op->exact_len_n.len * op->exact_len_n.n;
xmemcpy(curr, op->exact_len_n.s, len);
xfree(op->exact_len_n.s);
op->exact_len_n.s = curr;
curr += len;
break;
- case OP_EXACTN:
- case OP_EXACTN_IC:
+ case OP_STR_N:
+ case OP_STR_N_IC:
len = op->exact_n.n;
copy:
xmemcpy(curr, op->exact_n.s, len);
@@ -365,11 +365,11 @@ ops_make_string_pool(regex_t* reg)
op->exact_n.s = curr;
curr += len;
break;
- case OP_EXACTMB2N:
+ case OP_STR_MB2N:
len = op->exact_n.n * 2;
goto copy;
break;
- case OP_EXACTMB3N:
+ case OP_STR_MB3N:
len = op->exact_n.n * 3;
goto copy;
break;
@@ -427,7 +427,7 @@ onig_positive_int_multiply(int x, int y)
static void
-swap_node(Node* a, Node* b)
+node_swap(Node* a, Node* b)
{
Node c;
@@ -452,6 +452,81 @@ swap_node(Node* a, Node* b)
}
}
+static int
+node_list_len(Node* list)
+{
+ int len;
+
+ len = 1;
+ while (IS_NOT_NULL(NODE_CDR(list))) {
+ list = NODE_CDR(list);
+ len++;
+ }
+
+ return len;
+}
+
+static Node*
+node_list_add(Node* list, Node* x)
+{
+ Node *n;
+
+ n = onig_node_new_list(x, NULL);
+ if (IS_NULL(n)) return NULL_NODE;
+
+ if (IS_NOT_NULL(list)) {
+ while (IS_NOT_NULL(NODE_CDR(list)))
+ list = NODE_CDR(list);
+
+ NODE_CDR(list) = n;
+ }
+
+ return n;
+}
+
+static int
+node_str_node_cat(Node* node, Node* add)
+{
+ int r;
+
+ if (STR_(node)->flag != STR_(add)->flag)
+ return ONIGERR_TYPE_BUG;
+
+ r = onig_node_str_cat(node, STR_(add)->s, STR_(add)->end);
+ if (r != 0) return r;
+
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(node))
+ STR_(node)->case_min_len += STR_(add)->case_min_len;
+
+ return 0;
+}
+
+static int
+node_str_cat_case_fold(Node* node, const UChar* s, const UChar* end, int case_min_len)
+{
+ int r;
+
+ if (! NODE_STRING_IS_CASE_FOLD_MATCH(node))
+ return ONIGERR_TYPE_BUG;
+
+ r = onig_node_str_cat(node, s, end);
+ if (r != 0) return r;
+
+ STR_(node)->case_min_len += case_min_len;
+ return 0;
+}
+
+static void
+node_conv_to_str_node(Node* node, int flag)
+{
+ NODE_SET_TYPE(node, NODE_STRING);
+ STR_(node)->flag = flag;
+ STR_(node)->s = STR_(node)->buf;
+ STR_(node)->end = STR_(node)->buf;
+ STR_(node)->capacity = 0;
+ STR_(node)->case_min_len = 0;
+}
+
static OnigLen
distance_add(OnigLen d1, OnigLen d2)
{
@@ -549,81 +624,108 @@ static int compile_length_tree(Node* node, regex_t* reg);
static int compile_tree(Node* node, regex_t* reg, ScanEnv* env);
-#define IS_NEED_STR_LEN_OP_EXACT(op) \
- ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
- (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
+#define IS_NEED_STR_LEN_OP(op) \
+ ((op) == OP_STR_N || (op) == OP_STR_MB2N ||\
+ (op) == OP_STR_MB3N || (op) == OP_STR_MBN || (op) == OP_STR_N_IC)
static int
-select_str_opcode(int mb_len, int str_len, int ignore_case)
+select_str_opcode(int mb_len, int str_len)
{
int op;
- if (ignore_case) {
+ switch (mb_len) {
+ case 1:
switch (str_len) {
- case 1: op = OP_EXACT1_IC; break;
- default: op = OP_EXACTN_IC; break;
+ case 1: op = OP_STR_1; break;
+ case 2: op = OP_STR_2; break;
+ case 3: op = OP_STR_3; break;
+ case 4: op = OP_STR_4; break;
+ case 5: op = OP_STR_5; break;
+ default: op = OP_STR_N; break;
}
- }
- else {
- switch (mb_len) {
- case 1:
- switch (str_len) {
- case 1: op = OP_EXACT1; break;
- case 2: op = OP_EXACT2; break;
- case 3: op = OP_EXACT3; break;
- case 4: op = OP_EXACT4; break;
- case 5: op = OP_EXACT5; break;
- default: op = OP_EXACTN; break;
- }
- break;
+ break;
- case 2:
- switch (str_len) {
- case 1: op = OP_EXACTMB2N1; break;
- case 2: op = OP_EXACTMB2N2; break;
- case 3: op = OP_EXACTMB2N3; break;
- default: op = OP_EXACTMB2N; break;
- }
- break;
+ case 2:
+ switch (str_len) {
+ case 1: op = OP_STR_MB2N1; break;
+ case 2: op = OP_STR_MB2N2; break;
+ case 3: op = OP_STR_MB2N3; break;
+ default: op = OP_STR_MB2N; break;
+ }
+ break;
- case 3:
- op = OP_EXACTMB3N;
- break;
+ case 3:
+ op = OP_STR_MB3N;
+ break;
- default:
- op = OP_EXACTMBN;
- break;
- }
+ default:
+ op = OP_STR_MBN;
+ break;
}
+
return op;
}
static int
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
+is_strict_real_node(Node* node)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_STRING:
+ {
+ StrNode* sn = STR_(node);
+ return (sn->end != sn->s);
+ }
+ break;
+
+ case NODE_CCLASS:
+ case NODE_CTYPE:
+ return 1;
+ break;
+
+ default:
+ return 0;
+ break;
+ }
+}
+
+static int
+compile_quant_body_with_empty_check(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int r;
- int saved_num_null_check = reg->num_null_check;
+ int saved_num_empty_check;
+ int emptiness;
+ Node* body;
- if (empty_info != BODY_IS_NOT_EMPTY) {
+ body = NODE_BODY((Node* )qn);
+ emptiness = qn->emptiness;
+ saved_num_empty_check = reg->num_empty_check;
+
+ if (emptiness != BODY_IS_NOT_EMPTY) {
r = add_op(reg, OP_EMPTY_CHECK_START);
if (r != 0) return r;
- COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */
- reg->num_null_check++;
+ COP(reg)->empty_check_start.mem = reg->num_empty_check; /* NULL CHECK ID */
+ reg->num_empty_check++;
}
- r = compile_tree(node, reg, env);
+ r = compile_tree(body, reg, env);
if (r != 0) return r;
- if (empty_info != BODY_IS_NOT_EMPTY) {
- if (empty_info == BODY_IS_EMPTY)
+ if (emptiness != BODY_IS_NOT_EMPTY) {
+ if (emptiness == BODY_IS_EMPTY_POSSIBILITY)
r = add_op(reg, OP_EMPTY_CHECK_END);
- else if (empty_info == BODY_IS_EMPTY_MEM)
- r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
- else if (empty_info == BODY_IS_EMPTY_REC)
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM) {
+ if (NODE_IS_EMPTY_STATUS_CHECK(qn) != 0)
+ r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
+ else
+ r = add_op(reg, OP_EMPTY_CHECK_END);
+ }
+#ifdef USE_CALL
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)
r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
+#endif
if (r != 0) return r;
- COP(reg)->empty_check_end.mem = saved_num_null_check; /* NULL CHECK ID */
+ COP(reg)->empty_check_end.mem = saved_num_empty_check; /* NULL CHECK ID */
}
return r;
}
@@ -660,14 +762,13 @@ compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env)
static int
add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, int str_len,
- regex_t* reg ARG_UNUSED, int ignore_case)
+ regex_t* reg ARG_UNUSED)
{
return 1;
}
static int
-add_compile_string(UChar* s, int mb_len, int str_len,
- regex_t* reg, int ignore_case)
+add_compile_string(UChar* s, int mb_len, int str_len, regex_t* reg)
{
int op;
int r;
@@ -675,14 +776,14 @@ add_compile_string(UChar* s, int mb_len, int str_len,
UChar* p;
UChar* end;
- op = select_str_opcode(mb_len, str_len, ignore_case);
+ op = select_str_opcode(mb_len, str_len);
r = add_op(reg, op);
if (r != 0) return r;
byte_len = mb_len * str_len;
end = s + byte_len;
- if (op == OP_EXACTMBN) {
+ if (op == OP_STR_MBN) {
p = onigenc_strdup(reg->enc, s, end);
CHECK_NULL_RETURN_MEMERR(p);
@@ -690,11 +791,11 @@ add_compile_string(UChar* s, int mb_len, int str_len,
COP(reg)->exact_len_n.n = str_len;
COP(reg)->exact_len_n.s = p;
}
- else if (IS_NEED_STR_LEN_OP_EXACT(op)) {
+ else if (IS_NEED_STR_LEN_OP(op)) {
p = onigenc_strdup(reg->enc, s, end);
CHECK_NULL_RETURN_MEMERR(p);
- if (op == OP_EXACTN_IC)
+ if (op == OP_STR_N_IC)
COP(reg)->exact_n.n = byte_len;
else
COP(reg)->exact_n.n = str_len;
@@ -702,8 +803,8 @@ add_compile_string(UChar* s, int mb_len, int str_len,
COP(reg)->exact_n.s = p;
}
else {
+ xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
xmemcpy(COP(reg)->exact.s, s, (size_t )byte_len);
- COP(reg)->exact.s[byte_len] = '\0';
}
return 0;
@@ -712,7 +813,7 @@ add_compile_string(UChar* s, int mb_len, int str_len,
static int
compile_length_string_node(Node* node, regex_t* reg)
{
- int rlen, r, len, prev_len, slen, ambig;
+ int rlen, r, len, prev_len, slen;
UChar *p, *prev;
StrNode* sn;
OnigEncoding enc = reg->enc;
@@ -721,7 +822,7 @@ compile_length_string_node(Node* node, regex_t* reg)
if (sn->end <= sn->s)
return 0;
- ambig = NODE_STRING_IS_AMBIG(node);
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) return 1;
p = prev = sn->s;
prev_len = enclen(enc, p);
@@ -735,7 +836,7 @@ compile_length_string_node(Node* node, regex_t* reg)
slen++;
}
else {
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, slen, reg);
rlen += r;
prev = p;
slen = 1;
@@ -744,25 +845,59 @@ compile_length_string_node(Node* node, regex_t* reg)
p += len;
}
- r = add_compile_string_length(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string_length(prev, prev_len, slen, reg);
rlen += r;
return rlen;
}
static int
-compile_length_string_raw_node(StrNode* sn, regex_t* reg)
+compile_length_string_crude_node(StrNode* sn, regex_t* reg)
{
if (sn->end <= sn->s)
return 0;
return add_compile_string_length(sn->s, 1 /* sb */, (int )(sn->end - sn->s),
- reg, 0);
+ reg);
+}
+
+static int
+compile_ambig_string_node(Node* node, regex_t* reg)
+{
+ int r;
+ int len;
+ int byte_len;
+ UChar* p;
+ StrNode* sn;
+ OnigEncoding enc = reg->enc;
+
+ sn = STR_(node);
+ len = enclen(enc, sn->s);
+ byte_len = (int )(sn->end - sn->s);
+ if (len == byte_len) {
+ r = add_op(reg, OP_STR_1_IC);
+ if (r != 0) return r;
+
+ xmemset(COP(reg)->exact.s, 0, sizeof(COP(reg)->exact.s));
+ xmemcpy(COP(reg)->exact.s, sn->s, (size_t )byte_len);
+ }
+ else {
+ r = add_op(reg, OP_STR_N_IC);
+ if (r != 0) return r;
+
+ p = onigenc_strdup(enc, sn->s, sn->end);
+ CHECK_NULL_RETURN_MEMERR(p);
+
+ COP(reg)->exact_n.s = p;
+ COP(reg)->exact_n.n = byte_len;
+ }
+
+ return 0;
}
static int
compile_string_node(Node* node, regex_t* reg)
{
- int r, len, prev_len, slen, ambig;
+ int r, len, prev_len, slen;
UChar *p, *prev, *end;
StrNode* sn;
OnigEncoding enc = reg->enc;
@@ -772,7 +907,9 @@ compile_string_node(Node* node, regex_t* reg)
return 0;
end = sn->end;
- ambig = NODE_STRING_IS_AMBIG(node);
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(node) != 0) {
+ return compile_ambig_string_node(node, reg);
+ }
p = prev = sn->s;
prev_len = enclen(enc, p);
@@ -785,7 +922,7 @@ compile_string_node(Node* node, regex_t* reg)
slen++;
}
else {
- r = add_compile_string(prev, prev_len, slen, reg, ambig);
+ r = add_compile_string(prev, prev_len, slen, reg);
if (r != 0) return r;
prev = p;
@@ -796,16 +933,16 @@ compile_string_node(Node* node, regex_t* reg)
p += len;
}
- return add_compile_string(prev, prev_len, slen, reg, ambig);
+ return add_compile_string(prev, prev_len, slen, reg);
}
static int
-compile_string_raw_node(StrNode* sn, regex_t* reg)
+compile_string_crude_node(StrNode* sn, regex_t* reg)
{
if (sn->end <= sn->s)
return 0;
- return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg, 0);
+ return add_compile_string(sn->s, 1 /* sb */, (int )(sn->end - sn->s), reg);
}
static void*
@@ -869,15 +1006,27 @@ compile_cclass_node(CClassNode* cc, regex_t* reg)
return 0;
}
+static void
+set_addr_in_repeat_range(regex_t* reg)
+{
+ int i;
+
+ for (i = 0; i < reg->num_repeat; i++) {
+ RepeatRange* p = reg->repeat_range + i;
+ int offset = p->u.offset;
+ p->u.pcode = reg->ops + offset;
+ }
+}
+
static int
-entry_repeat_range(regex_t* reg, int id, int lower, int upper)
+entry_repeat_range(regex_t* reg, int id, int lower, int upper, int ops_index)
{
#define REPEAT_RANGE_ALLOC 4
- OnigRepeatRange* p;
+ RepeatRange* p;
if (reg->repeat_range_alloc == 0) {
- p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
+ p = (RepeatRange* )xmalloc(sizeof(RepeatRange) * REPEAT_RANGE_ALLOC);
CHECK_NULL_RETURN_MEMERR(p);
reg->repeat_range = p;
reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
@@ -885,7 +1034,7 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
else if (reg->repeat_range_alloc <= id) {
int n;
n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
- p = (OnigRepeatRange* )xrealloc(reg->repeat_range, sizeof(OnigRepeatRange) * n);
+ p = (RepeatRange* )xrealloc(reg->repeat_range, sizeof(RepeatRange) * n);
CHECK_NULL_RETURN_MEMERR(p);
reg->repeat_range = p;
reg->repeat_range_alloc = n;
@@ -894,13 +1043,14 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
p = reg->repeat_range;
}
- p[id].lower = lower;
- p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
+ p[id].lower = lower;
+ p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
+ p[id].u.offset = ops_index;
return 0;
}
static int
-compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
+compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
regex_t* reg, ScanEnv* env)
{
int r;
@@ -910,24 +1060,16 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
if (r != 0) return r;
COP(reg)->repeat.id = num_repeat;
- COP(reg)->repeat.addr = SIZE_INC_OP + target_len + SIZE_OP_REPEAT_INC;
+ COP(reg)->repeat.addr = SIZE_INC + target_len + OPSIZE_REPEAT_INC;
- r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
+ r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper,
+ COP_CURR_OFFSET(reg) + OPSIZE_REPEAT);
if (r != 0) return r;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- if (
-#ifdef USE_CALL
- NODE_IS_IN_MULTI_ENTRY(qn) ||
-#endif
- NODE_IS_IN_REAL_REPEAT(qn)) {
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
- }
- else {
- r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
- }
+ r = add_op(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
if (r != 0) return r;
COP(reg)->repeat_inc.id = num_repeat;
@@ -937,7 +1079,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
static int
is_anychar_infinite_greedy(QuantNode* qn)
{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
return 1;
else
@@ -951,8 +1093,8 @@ static int
compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -963,22 +1105,21 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
if (qn->lower <= 1 ||
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) {
if (IS_NOT_NULL(qn->next_head_exact))
- return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
+ return OPSIZE_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
else
- return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
+ return OPSIZE_ANYCHAR_STAR + tlen * qn->lower;
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
- len = SIZE_OP_JUMP;
+ len = OPSIZE_JUMP;
}
else {
len = tlen * qn->lower;
@@ -987,36 +1128,36 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
if (qn->greedy) {
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
- len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH_OR_JUMP_EXACT1 + mod_tlen + OPSIZE_JUMP;
else
#endif
if (IS_NOT_NULL(qn->next_head_exact))
- len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH_IF_PEEK_NEXT + mod_tlen + OPSIZE_JUMP;
else
- len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH + mod_tlen + OPSIZE_JUMP;
}
else
- len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
+ len += OPSIZE_JUMP + mod_tlen + OPSIZE_PUSH;
}
else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
- len = SIZE_OP_JUMP + tlen;
+ if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
+ len = OPSIZE_JUMP + tlen;
}
else
len = 0;
}
else if (!infinite && qn->greedy &&
(qn->upper == 1 ||
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
+ int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper,
QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
len = tlen * qn->lower;
- len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
+ len += (OPSIZE_PUSH + tlen) * (qn->upper - qn->lower);
}
else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
- len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
+ len = OPSIZE_PUSH + OPSIZE_JUMP + tlen;
}
else {
- len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OP_REPEAT;
+ len = OPSIZE_REPEAT_INC + mod_tlen + OPSIZE_REPEAT;
}
return len;
@@ -1026,8 +1167,8 @@ static int
compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int i, r, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -1055,10 +1196,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += OPSIZE_EMPTY_CHECK_START + OPSIZE_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
@@ -1071,16 +1211,16 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (qn->greedy) {
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
if (IS_NOT_NULL(qn->head_exact))
- COP(reg)->jump.addr = SIZE_OP_PUSH_OR_JUMP_EXACT1 + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_PUSH_OR_JUMP_EXACT1 + SIZE_INC;
else
#endif
if (IS_NOT_NULL(qn->next_head_exact))
- COP(reg)->jump.addr = SIZE_OP_PUSH_IF_PEEK_NEXT + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_PUSH_IF_PEEK_NEXT + SIZE_INC;
else
- COP(reg)->jump.addr = SIZE_OP_PUSH + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_PUSH + SIZE_INC;
}
else {
- COP(reg)->jump.addr = SIZE_OP_JUMP + SIZE_INC_OP;
+ COP(reg)->jump.addr = OPSIZE_JUMP + SIZE_INC;
}
}
else {
@@ -1093,36 +1233,36 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (IS_NOT_NULL(qn->head_exact)) {
r = add_op(reg, OP_PUSH_OR_JUMP_EXACT1);
if (r != 0) return r;
- COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push_or_jump_exact1.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);
+ addr = -(mod_tlen + (int )OPSIZE_PUSH_OR_JUMP_EXACT1);
}
else
#endif
if (IS_NOT_NULL(qn->next_head_exact)) {
r = add_op(reg, OP_PUSH_IF_PEEK_NEXT);
if (r != 0) return r;
- COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push_if_peek_next.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);
+ addr = -(mod_tlen + (int )OPSIZE_PUSH_IF_PEEK_NEXT);
}
else {
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + mod_tlen + OPSIZE_JUMP;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
- addr = -(mod_tlen + (int )SIZE_OP_PUSH);
+ addr = -(mod_tlen + (int )OPSIZE_PUSH);
}
r = add_op(reg, OP_JUMP);
@@ -1132,9 +1272,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
else {
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;
+ COP(reg)->jump.addr = mod_tlen + SIZE_INC;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_quant_body_with_empty_check(qn, reg, env);
if (r != 0) return r;
r = add_op(reg, OP_PUSH);
@@ -1143,10 +1283,10 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
}
else if (qn->upper == 0) {
- if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
+ if (qn->include_referred != 0) { /* /(?<n>..){0}/ */
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;
+ COP(reg)->jump.addr = tlen + SIZE_INC;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
@@ -1157,7 +1297,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
else if (! infinite && qn->greedy &&
(qn->upper == 1 ||
- int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper,
+ int_multiply_cmp(tlen + OPSIZE_PUSH, qn->upper,
QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) {
int n = qn->upper - qn->lower;
@@ -1165,7 +1305,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
for (i = 0; i < n; i++) {
- int v = onig_positive_int_multiply(n - i, tlen + SIZE_OP_PUSH);
+ int v = onig_positive_int_multiply(n - i, tlen + OPSIZE_PUSH);
if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
r = add_op(reg, OP_PUSH);
@@ -1179,16 +1319,16 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
else if (! qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + OPSIZE_JUMP;
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = tlen + SIZE_INC_OP;
+ COP(reg)->jump.addr = tlen + SIZE_INC;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
+ r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
}
return r;
}
@@ -1240,40 +1380,40 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
#ifdef USE_CALL
if (node->m.regnum == 0 && NODE_IS_CALLED(node)) {
- len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
+ len = tlen + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
return len;
}
if (NODE_IS_CALLED(node)) {
- len = SIZE_OP_MEMORY_START_PUSH + tlen
- + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ len = OPSIZE_MEM_START_PUSH + tlen
+ + OPSIZE_CALL + OPSIZE_JUMP + OPSIZE_RETURN;
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
else
len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
}
else if (NODE_IS_RECURSION(node)) {
- len = SIZE_OP_MEMORY_START_PUSH;
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
+ len = OPSIZE_MEM_START_PUSH;
+ len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
+ ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_REC);
}
else
#endif
{
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
- len = SIZE_OP_MEMORY_START_PUSH;
+ if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
+ len = OPSIZE_MEM_START_PUSH;
else
- len = SIZE_OP_MEMORY_START;
+ len = OPSIZE_MEM_START;
- len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)
- ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
+ len += tlen + (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum)
+ ? OPSIZE_MEM_END_PUSH : OPSIZE_MEM_END);
}
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
int v;
QuantNode* qn;
@@ -1283,10 +1423,10 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
v = onig_positive_int_multiply(qn->lower, tlen);
if (v < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
- len = v + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
+ len = v + OPSIZE_PUSH + tlen + OPSIZE_POP_OUT + OPSIZE_JUMP;
}
else {
- len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END;
+ len = OPSIZE_ATOMIC_START + tlen + OPSIZE_ATOMIC_END;
}
break;
@@ -1298,8 +1438,8 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
len = compile_length_tree(cond, reg);
if (len < 0) return len;
- len += SIZE_OP_PUSH;
- len += SIZE_OP_ATOMIC_START + SIZE_OP_ATOMIC_END;
+ len += OPSIZE_PUSH;
+ len += OPSIZE_ATOMIC_START + OPSIZE_ATOMIC_END;
if (IS_NOT_NULL(Then)) {
tlen = compile_length_tree(Then, reg);
@@ -1307,8 +1447,9 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
len += tlen;
}
+ len += OPSIZE_JUMP + OPSIZE_ATOMIC_END;
+
if (IS_NOT_NULL(Else)) {
- len += SIZE_OP_JUMP;
tlen = compile_length_tree(Else, reg);
if (tlen < 0) return tlen;
len += tlen;
@@ -1331,24 +1472,25 @@ static int
compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
{
int r;
- int len;
#ifdef USE_CALL
if (NODE_IS_CALLED(node)) {
+ int len;
+
r = add_op(reg, OP_CALL);
if (r != 0) return r;
- node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + SIZE_OP_JUMP;
+ node->m.called_addr = COP_CURR_OFFSET(reg) + 1 + OPSIZE_JUMP;
NODE_STATUS_ADD(node, ADDR_FIXED);
COP(reg)->call.addr = (int )node->m.called_addr;
if (node->m.regnum == 0) {
len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += SIZE_OP_RETURN;
+ len += OPSIZE_RETURN;
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = len + SIZE_INC_OP;
+ COP(reg)->jump.addr = len + SIZE_INC;
r = compile_tree(NODE_BAG_BODY(node), reg, env);
if (r != 0) return r;
@@ -1358,25 +1500,24 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
}
else {
len = compile_length_tree(NODE_BAG_BODY(node), reg);
- len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ len += (OPSIZE_MEM_START_PUSH + OPSIZE_RETURN);
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
+ ? OPSIZE_MEM_END_PUSH_REC : OPSIZE_MEM_END_PUSH);
else
- len += (NODE_IS_RECURSION(node)
- ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
+ len += (NODE_IS_RECURSION(node) ? OPSIZE_MEM_END_REC : OPSIZE_MEM_END);
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = len + SIZE_INC_OP;
+ COP(reg)->jump.addr = len + SIZE_INC;
}
}
#endif
- if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum))
- r = add_op(reg, OP_MEMORY_START_PUSH);
+ if (MEM_STATUS_AT0(reg->push_mem_start, node->m.regnum))
+ r = add_op(reg, OP_MEM_START_PUSH);
else
- r = add_op(reg, OP_MEMORY_START);
+ r = add_op(reg, OP_MEM_START);
if (r != 0) return r;
COP(reg)->memory_start.num = node->m.regnum;
@@ -1384,11 +1525,11 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
#ifdef USE_CALL
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
r = add_op(reg, (NODE_IS_RECURSION(node)
- ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
+ ? OP_MEM_END_PUSH_REC : OP_MEM_END_PUSH));
else
- r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_REC : OP_MEMORY_END));
+ r = add_op(reg, (NODE_IS_RECURSION(node) ? OP_MEM_END_REC : OP_MEM_END));
if (r != 0) return r;
COP(reg)->memory_end.num = node->m.regnum;
@@ -1397,10 +1538,10 @@ compile_bag_memory_node(BagNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_RETURN);
}
#else
- if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum))
- r = add_op(reg, OP_MEMORY_END_PUSH);
+ if (MEM_STATUS_AT0(reg->push_mem_end, node->m.regnum))
+ r = add_op(reg, OP_MEM_END_PUSH);
else
- r = add_op(reg, OP_MEMORY_END);
+ r = add_op(reg, OP_MEM_END);
if (r != 0) return r;
COP(reg)->memory_end.num = node->m.regnum;
#endif
@@ -1423,7 +1564,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
QuantNode* qn = QUANT_(NODE_BAG_BODY(node));
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
@@ -1433,7 +1574,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_POP_OUT + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + len + OPSIZE_POP_OUT + OPSIZE_JUMP;
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
if (r != 0) return r;
@@ -1442,7 +1583,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_JUMP);
if (r != 0) return r;
- COP(reg)->jump.addr = -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT);
+ COP(reg)->jump.addr = -((int )OPSIZE_PUSH + len + (int )OPSIZE_POP_OUT);
}
else {
r = add_op(reg, OP_ATOMIC_START);
@@ -1455,7 +1596,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
case BAG_IF_ELSE:
{
- int cond_len, then_len, jump_len;
+ int cond_len, then_len, else_len, jump_len;
Node* cond = NODE_BAG_BODY(node);
Node* Then = node->te.Then;
Node* Else = node->te.Else;
@@ -1472,12 +1613,11 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
else
then_len = 0;
- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
- if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
+ jump_len = cond_len + then_len + OPSIZE_ATOMIC_END + OPSIZE_JUMP;
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
- COP(reg)->push.addr = SIZE_INC_OP + jump_len;
+ COP(reg)->push.addr = SIZE_INC + jump_len;
r = compile_tree(cond, reg, env);
if (r != 0) return r;
@@ -1490,11 +1630,20 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
}
if (IS_NOT_NULL(Else)) {
- int else_len = compile_length_tree(Else, reg);
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = else_len + SIZE_INC_OP;
+ else_len = compile_length_tree(Else, reg);
+ if (else_len < 0) return else_len;
+ }
+ else
+ else_len = 0;
+
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = OPSIZE_ATOMIC_END + else_len + SIZE_INC;
+ r = add_op(reg, OP_ATOMIC_END);
+ if (r != 0) return r;
+
+ if (IS_NOT_NULL(Else)) {
r = compile_tree(Else, reg, env);
}
}
@@ -1517,16 +1666,16 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
switch (node->type) {
case ANCR_PREC_READ:
- len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END;
+ len = OPSIZE_PREC_READ_START + tlen + OPSIZE_PREC_READ_END;
break;
case ANCR_PREC_READ_NOT:
- len = SIZE_OP_PREC_READ_NOT_START + tlen + SIZE_OP_PREC_READ_NOT_END;
+ len = OPSIZE_PREC_READ_NOT_START + tlen + OPSIZE_PREC_READ_NOT_END;
break;
case ANCR_LOOK_BEHIND:
- len = SIZE_OP_LOOK_BEHIND + tlen;
+ len = OPSIZE_LOOK_BEHIND + tlen;
break;
case ANCR_LOOK_BEHIND_NOT:
- len = SIZE_OP_LOOK_BEHIND_NOT_START + tlen + SIZE_OP_LOOK_BEHIND_NOT_END;
+ len = OPSIZE_LOOK_BEHIND_NOT_START + tlen + OPSIZE_LOOK_BEHIND_NOT_END;
break;
case ANCR_WORD_BOUNDARY:
@@ -1535,7 +1684,7 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg)
case ANCR_WORD_BEGIN:
case ANCR_WORD_END:
#endif
- len = SIZE_OP_WORD_BOUNDARY;
+ len = OPSIZE_WORD_BOUNDARY;
break;
case ANCR_TEXT_SEGMENT_BOUNDARY:
@@ -1619,7 +1768,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
r = add_op(reg, OP_PREC_READ_NOT_START);
if (r != 0) return r;
- COP(reg)->prec_read_not_start.addr = SIZE_INC_OP + len + SIZE_OP_PREC_READ_NOT_END;
+ COP(reg)->prec_read_not_start.addr = SIZE_INC + len + OPSIZE_PREC_READ_NOT_END;
r = compile_tree(NODE_ANCHOR_BODY(node), reg, env);
if (r != 0) return r;
r = add_op(reg, OP_PREC_READ_NOT_END);
@@ -1649,7 +1798,7 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env)
len = compile_length_tree(NODE_ANCHOR_BODY(node), reg);
r = add_op(reg, OP_LOOK_BEHIND_NOT_START);
if (r != 0) return r;
- COP(reg)->look_behind_not_start.addr = SIZE_INC_OP + len + SIZE_OP_LOOK_BEHIND_NOT_END;
+ COP(reg)->look_behind_not_start.addr = SIZE_INC + len + OPSIZE_LOOK_BEHIND_NOT_END;
if (node->char_len < 0) {
r = get_char_len_node(NODE_ANCHOR_BODY(node), reg, &n);
@@ -1735,25 +1884,25 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg)
switch (node->type) {
case GIMMICK_FAIL:
- len = SIZE_OP_FAIL;
+ len = OPSIZE_FAIL;
break;
case GIMMICK_SAVE:
- len = SIZE_OP_PUSH_SAVE_VAL;
+ len = OPSIZE_PUSH_SAVE_VAL;
break;
case GIMMICK_UPDATE_VAR:
- len = SIZE_OP_UPDATE_VAR;
+ len = OPSIZE_UPDATE_VAR;
break;
#ifdef USE_CALLOUT
case GIMMICK_CALLOUT:
switch (node->detail_type) {
case ONIG_CALLOUT_OF_CONTENTS:
- len = SIZE_OP_CALLOUT_CONTENTS;
+ len = OPSIZE_CALLOUT_CONTENTS;
break;
case ONIG_CALLOUT_OF_NAME:
- len = SIZE_OP_CALLOUT_NAME;
+ len = OPSIZE_CALLOUT_NAME;
break;
default:
@@ -1792,13 +1941,13 @@ compile_length_tree(Node* node, regex_t* reg)
r += compile_length_tree(NODE_CAR(node), reg);
n++;
} while (IS_NOT_NULL(node = NODE_CDR(node)));
- r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
+ r += (OPSIZE_PUSH + OPSIZE_JUMP) * (n - 1);
}
break;
case NODE_STRING:
- if (NODE_STRING_IS_RAW(node))
- r = compile_length_string_raw_node(STR_(node), reg);
+ if (NODE_STRING_IS_CRUDE(node))
+ r = compile_length_string_crude_node(STR_(node), reg);
else
r = compile_length_string_node(node, reg);
break;
@@ -1812,12 +1961,12 @@ compile_length_tree(Node* node, regex_t* reg)
break;
case NODE_BACKREF:
- r = SIZE_OP_BACKREF;
+ r = OPSIZE_BACKREF;
break;
#ifdef USE_CALL
case NODE_CALL:
- r = SIZE_OP_CALL;
+ r = OPSIZE_CALL;
break;
#endif
@@ -1864,7 +2013,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
do {
len += compile_length_tree(NODE_CAR(x), reg);
if (IS_NOT_NULL(NODE_CDR(x))) {
- len += SIZE_OP_PUSH + SIZE_OP_JUMP;
+ len += OPSIZE_PUSH + OPSIZE_JUMP;
}
} while (IS_NOT_NULL(x = NODE_CDR(x)));
pos = COP_CURR_OFFSET(reg) + 1 + len; /* goal position */
@@ -1875,7 +2024,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH;
r = add_op(reg, push);
if (r != 0) break;
- COP(reg)->push.addr = SIZE_INC_OP + len + SIZE_OP_JUMP;
+ COP(reg)->push.addr = SIZE_INC + len + OPSIZE_JUMP;
}
r = compile_tree(NODE_CAR(node), reg, env);
if (r != 0) break;
@@ -1890,8 +2039,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env)
break;
case NODE_STRING:
- if (NODE_STRING_IS_RAW(node))
- r = compile_string_raw_node(STR_(node), reg);
+ if (NODE_STRING_IS_CRUDE(node))
+ r = compile_string_crude_node(STR_(node), reg);
else
r = compile_string_node(node, reg);
break;
@@ -2061,8 +2210,9 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
Node** ptarget = &(NODE_BODY(node));
Node* old = *ptarget;
r = noname_disable_map(ptarget, map, counter);
+ if (r != 0) return r;
if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) {
- onig_reduce_nested_quantifier(node, *ptarget);
+ r = onig_reduce_nested_quantifier(node);
}
}
break;
@@ -2274,11 +2424,11 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
}
}
- loc = env->capture_history;
- MEM_STATUS_CLEAR(env->capture_history);
+ loc = env->cap_history;
+ MEM_STATUS_CLEAR(env->cap_history);
for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
if (MEM_STATUS_AT(loc, i)) {
- MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val);
+ MEM_STATUS_ON_SIMPLE(env->cap_history, map[i].new_val);
}
}
@@ -2654,7 +2804,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
len = NODE_STRING_LEN(x);
if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y);
- if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) {
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(x) || NODE_STRING_IS_CASE_FOLD_MATCH(y)) {
/* tiny version */
return 0;
}
@@ -2714,7 +2864,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg)
break;
if (exact == 0 ||
- ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_RAW(node)) {
+ ! IS_IGNORECASE(reg->options) || NODE_STRING_IS_CRUDE(node)) {
n = node;
}
}
@@ -2842,9 +2992,9 @@ tree_min_len(Node* node, ScanEnv* env)
if (NODE_IS_RECURSION(node)) break;
backs = BACKREFS_P(br);
- len = tree_min_len(mem_env[backs[0]].node, env);
+ len = tree_min_len(mem_env[backs[0]].mem_node, env);
for (i = 1; i < br->back_num; i++) {
- tmin = tree_min_len(mem_env[backs[i]].node, env);
+ tmin = tree_min_len(mem_env[backs[i]].mem_node, env);
if (len > tmin) len = tmin;
}
}
@@ -3013,7 +3163,7 @@ tree_max_len(Node* node, ScanEnv* env)
}
backs = BACKREFS_P(br);
for (i = 0; i < br->back_num; i++) {
- tmax = tree_max_len(mem_env[backs[i]].node, env);
+ tmax = tree_max_len(mem_env[backs[i]].mem_node, env);
if (len < tmax) len = tmax;
}
}
@@ -3035,7 +3185,7 @@ tree_max_len(Node* node, ScanEnv* env)
if (qn->upper != 0) {
len = tree_max_len(NODE_BODY(node), env);
if (len != 0) {
- if (! IS_REPEAT_INFINITE(qn->upper))
+ if (! IS_INFINITE_REPEAT(qn->upper))
len = distance_multiply(len, qn->upper);
else
len = INFINITE_LEN;
@@ -3150,7 +3300,7 @@ check_backrefs(Node* node, ScanEnv* env)
if (backs[i] > env->num_mem)
return ONIGERR_INVALID_BACKREF;
- NODE_STATUS_ADD(mem_env[backs[i]].node, BACKREF);
+ NODE_STATUS_ADD(mem_env[backs[i]].mem_node, BACKREF);
}
r = 0;
}
@@ -3164,6 +3314,204 @@ check_backrefs(Node* node, ScanEnv* env)
return r;
}
+static int
+set_empty_repeat_node_trav(Node* node, Node* empty, ScanEnv* env)
+{
+ int r;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ r = set_empty_repeat_node_trav(NODE_CAR(node), empty, env);
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ {
+ AnchorNode* an = ANCHOR_(node);
+
+ if (! ANCHOR_HAS_BODY(an)) {
+ r = 0;
+ break;
+ }
+
+ switch (an->type) {
+ case ANCR_PREC_READ:
+ case ANCR_LOOK_BEHIND:
+ empty = NULL_NODE;
+ break;
+ default:
+ break;
+ }
+ r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env);
+ }
+ break;
+
+ case NODE_QUANT:
+ {
+ QuantNode* qn = QUANT_(node);
+
+ if (qn->emptiness != BODY_IS_NOT_EMPTY) empty = node;
+ r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env);
+ }
+ break;
+
+ case NODE_BAG:
+ if (IS_NOT_NULL(NODE_BODY(node))) {
+ r = set_empty_repeat_node_trav(NODE_BODY(node), empty, env);
+ if (r != 0) return r;
+ }
+ {
+ BagNode* en = BAG_(node);
+
+ if (en->type == BAG_MEMORY) {
+ if (NODE_IS_BACKREF(node)) {
+ if (IS_NOT_NULL(empty))
+ SCANENV_MEMENV(env)[en->m.regnum].empty_repeat_node = empty;
+ }
+ }
+ else if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = set_empty_repeat_node_trav(en->te.Then, empty, env);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = set_empty_repeat_node_trav(en->te.Else, empty, env);
+ }
+ }
+ }
+ break;
+
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+static int
+is_ancestor_node(Node* node, Node* me)
+{
+ Node* parent;
+
+ while ((parent = NODE_PARENT(me)) != NULL_NODE) {
+ if (parent == node) return 1;
+ me = parent;
+ }
+ return 0;
+}
+
+static void
+set_empty_status_check_trav(Node* node, ScanEnv* env)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ set_empty_status_check_trav(NODE_CAR(node), env);
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ {
+ AnchorNode* an = ANCHOR_(node);
+
+ if (! ANCHOR_HAS_BODY(an)) break;
+ set_empty_status_check_trav(NODE_BODY(node), env);
+ }
+ break;
+
+ case NODE_QUANT:
+ set_empty_status_check_trav(NODE_BODY(node), env);
+ break;
+
+ case NODE_BAG:
+ if (IS_NOT_NULL(NODE_BODY(node)))
+ set_empty_status_check_trav(NODE_BODY(node), env);
+ {
+ BagNode* en = BAG_(node);
+
+ if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ set_empty_status_check_trav(en->te.Then, env);
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ set_empty_status_check_trav(en->te.Else, env);
+ }
+ }
+ }
+ break;
+
+ case NODE_BACKREF:
+ {
+ int i;
+ int* backs;
+ MemEnv* mem_env = SCANENV_MEMENV(env);
+ BackRefNode* br = BACKREF_(node);
+ backs = BACKREFS_P(br);
+ for (i = 0; i < br->back_num; i++) {
+ Node* ernode = mem_env[backs[i]].empty_repeat_node;
+ if (IS_NOT_NULL(ernode)) {
+ if (! is_ancestor_node(ernode, node)) {
+ MEM_STATUS_LIMIT_ON(env->reg->empty_status_mem, backs[i]);
+ NODE_STATUS_ADD(ernode, EMPTY_STATUS_CHECK);
+ NODE_STATUS_ADD(mem_env[backs[i]].mem_node, EMPTY_STATUS_CHECK);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+set_parent_node_trav(Node* node, Node* parent)
+{
+ NODE_PARENT(node) = parent;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ case NODE_ALT:
+ do {
+ set_parent_node_trav(NODE_CAR(node), node);
+ } while (IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ if (! ANCHOR_HAS_BODY(ANCHOR_(node))) break;
+ set_parent_node_trav(NODE_BODY(node), node);
+ break;
+
+ case NODE_QUANT:
+ set_parent_node_trav(NODE_BODY(node), node);
+ break;
+
+ case NODE_BAG:
+ if (IS_NOT_NULL(NODE_BODY(node)))
+ set_parent_node_trav(NODE_BODY(node), node);
+ {
+ BagNode* en = BAG_(node);
+
+ if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then))
+ set_parent_node_trav(en->te.Then, node);
+ if (IS_NOT_NULL(en->te.Else)) {
+ set_parent_node_trav(en->te.Else, node);
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+}
+
#ifdef USE_CALL
@@ -3269,6 +3617,9 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head)
if ((eret & RECURSION_MUST) == 0)
r &= ~RECURSION_MUST;
}
+ else {
+ r &= ~RECURSION_MUST;
+ }
}
else {
r = infinite_recursive_call_check(NODE_BODY(node), env, head);
@@ -3443,7 +3794,7 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
r = recursive_call_check_trav(NODE_BODY(node), env, state);
if (QUANT_(node)->upper == 0) {
if (r == FOUND_CALLED_NODE)
- QUANT_(node)->is_refered = 1;
+ QUANT_(node)->include_referred = 1;
}
break;
@@ -3466,8 +3817,10 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
if (! NODE_IS_RECURSION(node)) {
NODE_STATUS_ADD(node, MARK1);
r = recursive_call_check(NODE_BODY(node));
- if (r != 0)
+ if (r != 0) {
NODE_STATUS_ADD(node, RECURSION);
+ MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
+ }
NODE_STATUS_REMOVE(node, MARK1);
}
@@ -3508,6 +3861,96 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state)
#endif
+static void
+remove_from_list(Node* prev, Node* a)
+{
+ if (NODE_CDR(prev) != a) return ;
+
+ NODE_CDR(prev) = NODE_CDR(a);
+ NODE_CDR(a) = NULL_NODE;
+}
+
+static int
+reduce_string_list(Node* node)
+{
+ int r = 0;
+
+ switch (NODE_TYPE(node)) {
+ case NODE_LIST:
+ {
+ Node* prev;
+ Node* curr;
+ Node* prev_node;
+ Node* next_node;
+
+ prev = NULL_NODE;
+ do {
+ next_node = NODE_CDR(node);
+ curr = NODE_CAR(node);
+ if (NODE_TYPE(curr) == NODE_STRING) {
+ if (IS_NULL(prev) || STR_(curr)->flag != STR_(prev)->flag) {
+ prev = curr;
+ prev_node = node;
+ }
+ else {
+ r = node_str_node_cat(prev, curr);
+ if (r != 0) return r;
+ remove_from_list(prev_node, node);
+ onig_node_free(node);
+ }
+ }
+ else {
+ prev = NULL_NODE;
+ prev_node = node;
+ }
+
+ node = next_node;
+ } while (r == 0 && IS_NOT_NULL(node));
+ }
+ break;
+
+ case NODE_ALT:
+ do {
+ r = reduce_string_list(NODE_CAR(node));
+ } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
+ break;
+
+ case NODE_ANCHOR:
+ if (IS_NULL(NODE_BODY(node)))
+ break;
+ /* fall */
+ case NODE_QUANT:
+ r = reduce_string_list(NODE_BODY(node));
+ break;
+
+ case NODE_BAG:
+ {
+ BagNode* en = BAG_(node);
+
+ r = reduce_string_list(NODE_BODY(node));
+ if (r != 0) return r;
+
+ if (en->type == BAG_IF_ELSE) {
+ if (IS_NOT_NULL(en->te.Then)) {
+ r = reduce_string_list(en->te.Then);
+ if (r != 0) return r;
+ }
+ if (IS_NOT_NULL(en->te.Else)) {
+ r = reduce_string_list(en->te.Else);
+ if (r != 0) return r;
+ }
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ return r;
+}
+
+
#define IN_ALT (1<<0)
#define IN_NOT (1<<1)
#define IN_REAL_REPEAT (1<<2)
@@ -3530,7 +3973,7 @@ divide_look_behind_alternatives(Node* node)
head = NODE_ANCHOR_BODY(an);
np = NODE_CAR(head);
- swap_node(node, head);
+ node_swap(node, head);
NODE_CAR(node) = head;
NODE_BODY(head) = np;
@@ -3552,7 +3995,7 @@ divide_look_behind_alternatives(Node* node)
}
static int
-setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
+tune_look_behind(Node* node, regex_t* reg, ScanEnv* env)
{
int r, len;
AnchorNode* an = ANCHOR_(node);
@@ -3573,7 +4016,7 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
}
static int
-next_setup(Node* node, Node* next_node, regex_t* reg)
+tune_next(Node* node, Node* next_node, regex_t* reg)
{
NodeType type;
@@ -3581,7 +4024,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
type = NODE_TYPE(node);
if (type == NODE_QUANT) {
QuantNode* qn = QUANT_(node);
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
#ifdef USE_QUANT_PEEK_NEXT
Node* n = get_head_value_node(next_node, 1, reg);
/* '\0': for UTF-16BE etc... */
@@ -3591,7 +4034,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
#endif
/* automatic posseivation a*b ==> (?>a*)b */
if (qn->lower <= 1) {
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
+ if (is_strict_real_node(NODE_BODY(node))) {
Node *x, *y;
x = get_head_value_node(NODE_BODY(node), 0, reg);
if (IS_NOT_NULL(x)) {
@@ -3599,8 +4042,8 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
CHECK_NULL_RETURN_MEMERR(en);
- NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
- swap_node(node, en);
+ NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);
+ node_swap(node, en);
NODE_BODY(node) = en;
}
}
@@ -3620,23 +4063,57 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
static int
-update_string_node_case_fold(regex_t* reg, Node *node)
+is_all_code_len_1_items(int n, OnigCaseFoldCodeItem items[])
{
- UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ int i;
+
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ if (item->code_len != 1) return 0;
+ }
+
+ return 1;
+}
+
+static int
+get_min_max_byte_len_case_fold_items(int n, OnigCaseFoldCodeItem items[], int* rmin, int* rmax)
+{
+ int i, len, minlen, maxlen;
+
+ minlen = INT_MAX;
+ maxlen = 0;
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+
+ len = item->byte_len;
+ if (len < minlen) minlen = len;
+ if (len > maxlen) maxlen = len;
+ }
+
+ *rmin = minlen;
+ *rmax = maxlen;
+ return 0;
+}
+
+static int
+conv_string_case_fold(OnigEncoding enc, OnigCaseFoldType case_fold_flag,
+ UChar* s, UChar* end, UChar** rs, UChar** rend, int* rcase_min_len)
+{
+ UChar *p, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
UChar *sbuf, *ebuf, *sp;
- int r, i, len, sbuf_size;
- StrNode* sn = STR_(node);
+ int i, n, len, sbuf_size;
- end = sn->end;
- sbuf_size = (int )(end - sn->s) * 2;
+ *rs = NULL;
+ sbuf_size = (int )(end - s) * 2;
sbuf = (UChar* )xmalloc(sbuf_size);
CHECK_NULL_RETURN_MEMERR(sbuf);
ebuf = sbuf + sbuf_size;
+ n = 0;
sp = sbuf;
- p = sn->s;
+ p = s;
while (p < end) {
- len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
+ len = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, buf);
for (i = 0; i < len; i++) {
if (sp >= ebuf) {
sbuf = (UChar* )xrealloc(sbuf, sbuf_size * 2);
@@ -3648,364 +4125,310 @@ update_string_node_case_fold(regex_t* reg, Node *node)
*sp++ = buf[i];
}
+ n++;
}
- r = onig_node_str_set(node, sbuf, sp);
- if (r != 0) {
- xfree(sbuf);
- return r;
- }
-
- xfree(sbuf);
+ *rs = sbuf;
+ *rend = sp;
+ *rcase_min_len = n;
return 0;
}
static int
-expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* reg)
+make_code_list_to_string(Node** rnode, OnigEncoding enc,
+ int n, OnigCodePoint codes[])
{
- int r;
- Node *node;
+ int r, i, len;
+ Node* node;
+ UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- node = onig_node_new_str(s, end);
- if (IS_NULL(node)) return ONIGERR_MEMORY;
+ *rnode = NULL_NODE;
+ node = onig_node_new_str(NULL, NULL);
+ CHECK_NULL_RETURN_MEMERR(node);
- r = update_string_node_case_fold(reg, node);
- if (r != 0) {
- onig_node_free(node);
- return r;
+ for (i = 0; i < n; i++) {
+ len = ONIGENC_CODE_TO_MBC(enc, codes[i], buf);
+ if (len < 0) {
+ r = len;
+ goto err;
+ }
+
+ r = onig_node_str_cat(node, buf, buf + len);
+ if (r != 0) goto err;
}
- NODE_STRING_SET_AMBIG(node);
- NODE_STRING_SET_DONT_GET_OPT_INFO(node);
*rnode = node;
return 0;
+
+ err:
+ onig_node_free(node);
+ return r;
}
static int
-expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p,
- int slen, UChar *end, regex_t* reg, Node **rnode)
+unravel_cf_node_add(Node** rlist, Node* add)
{
- int r, i, j;
- int len;
- int varlen;
- Node *anode, *var_anode, *snode, *xnode, *an;
- UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
+ Node *list;
- *rnode = var_anode = NULL_NODE;
-
- varlen = 0;
- for (i = 0; i < item_num; i++) {
- if (items[i].byte_len != slen) {
- varlen = 1;
- break;
- }
+ list = *rlist;
+ if (IS_NULL(list)) {
+ list = onig_node_new_list(add, NULL);
+ CHECK_NULL_RETURN_MEMERR(list);
+ *rlist = list;
+ }
+ else {
+ Node* r = node_list_add(list, add);
+ CHECK_NULL_RETURN_MEMERR(r);
}
- if (varlen != 0) {
- *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
+ return 0;
+}
+
+static int
+unravel_cf_string_add(Node** rlist, Node** rsn, UChar* s, UChar* end,
+ unsigned int flag, int case_min_len)
+{
+ int r;
+ Node *sn, *list;
- xnode = onig_node_new_list(NULL, NULL);
- if (IS_NULL(xnode)) goto mem_err;
- NODE_CAR(var_anode) = xnode;
+ list = *rlist;
+ sn = *rsn;
- anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) goto mem_err;
- NODE_CAR(xnode) = anode;
+ if (IS_NOT_NULL(sn) && STR_(sn)->flag == flag) {
+ if (NODE_STRING_IS_CASE_FOLD_MATCH(sn))
+ r = node_str_cat_case_fold(sn, s, end, case_min_len);
+ else
+ r = onig_node_str_cat(sn, s, end);
}
else {
- *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(anode)) return ONIGERR_MEMORY;
+ sn = onig_node_new_str(s, end);
+ CHECK_NULL_RETURN_MEMERR(sn);
+
+ STR_(sn)->flag = flag;
+ STR_(sn)->case_min_len = case_min_len;
+ r = unravel_cf_node_add(&list, sn);
}
- snode = onig_node_new_str(p, p + slen);
- if (IS_NULL(snode)) goto mem_err;
+ if (r == 0) {
+ *rlist = list;
+ *rsn = sn;
+ }
+ return r;
+}
- NODE_CAR(anode) = snode;
+static int
+unravel_cf_string_fold_add(Node** rlist, Node** rsn, OnigEncoding enc,
+ OnigCaseFoldType case_fold_flag, UChar* s, UChar* end)
+{
+ int r;
+ int case_min_len;
+ UChar *rs, *rend;
- for (i = 0; i < item_num; i++) {
- snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
+ r = conv_string_case_fold(enc, case_fold_flag, s, end,
+ &rs, &rend, &case_min_len);
+ if (r != 0) return r;
- for (j = 0; j < items[i].code_len; j++) {
- len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
- if (len < 0) {
- r = len;
- goto mem_err2;
- }
+ r = unravel_cf_string_add(rlist, rsn, rs, rend,
+ NODE_STRING_CASE_FOLD_MATCH, case_min_len);
+ xfree(rs);
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r != 0) goto mem_err2;
- }
+ return r;
+}
- an = onig_node_new_alt(NULL_NODE, NULL_NODE);
- if (IS_NULL(an)) {
- goto mem_err2;
- }
+static int
+unravel_cf_string_alt_or_cc_add(Node** rlist, int n,
+ OnigCaseFoldCodeItem items[], int byte_len, OnigEncoding enc,
+ OnigCaseFoldType case_fold_flag, UChar* s, UChar* end)
+{
+ int r, i;
+ Node* node;
- if (items[i].byte_len != slen && IS_NOT_NULL(var_anode)) {
- Node *rem;
- UChar *q = p + items[i].byte_len;
+ if (is_all_code_len_1_items(n, items)) {
+ OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
- if (q < end) {
- r = expand_case_fold_make_rem_string(&rem, q, end, reg);
- if (r != 0) {
- onig_node_free(an);
- goto mem_err2;
- }
+ codes[0] = ONIGENC_MBC_TO_CODE(enc, s, end);
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ codes[i+1] = item->code[0];
+ }
+ r = onig_new_cclass_with_code_list(&node, enc, n + 1, codes);
+ if (r != 0) return r;
+ }
+ else {
+ Node *snode, *alt, *curr;
- xnode = onig_node_list_add(NULL_NODE, snode);
- if (IS_NULL(xnode)) {
- onig_node_free(an);
- onig_node_free(rem);
- goto mem_err2;
- }
- if (IS_NULL(onig_node_list_add(xnode, rem))) {
- onig_node_free(an);
- onig_node_free(xnode);
- onig_node_free(rem);
- goto mem_err;
- }
+ snode = onig_node_new_str(s, end);
+ CHECK_NULL_RETURN_MEMERR(snode);
+ node = curr = onig_node_new_alt(snode, NULL_NODE);
+ if (IS_NULL(curr)) {
+ onig_node_free(snode);
+ return ONIGERR_MEMORY;
+ }
- NODE_CAR(an) = xnode;
+ r = 0;
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ r = make_code_list_to_string(&snode, enc, item->code_len, item->code);
+ if (r != 0) {
+ onig_node_free(node);
+ return r;
}
- else {
- NODE_CAR(an) = snode;
+
+ alt = onig_node_new_alt(snode, NULL_NODE);
+ if (IS_NULL(alt)) {
+ onig_node_free(snode);
+ onig_node_free(node);
+ return ONIGERR_MEMORY;
}
- NODE_CDR(var_anode) = an;
- var_anode = an;
- }
- else {
- NODE_CAR(an) = snode;
- NODE_CDR(anode) = an;
- anode = an;
+ NODE_CDR(curr) = alt;
+ curr = alt;
}
}
- return varlen;
-
- mem_err2:
- onig_node_free(snode);
-
- mem_err:
- onig_node_free(*rnode);
-
- return ONIGERR_MEMORY;
+ r = unravel_cf_node_add(rlist, node);
+ if (r != 0) onig_node_free(node);
+ return r;
}
static int
-is_good_case_fold_items_for_search(OnigEncoding enc, int slen,
- int n, OnigCaseFoldCodeItem items[])
+unravel_cf_look_behind_add(Node** rlist, Node** rsn,
+ int n, OnigCaseFoldCodeItem items[], OnigEncoding enc,
+ UChar* s, int one_len)
{
- int i, len;
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
+ int r, i, found;
+ found = 0;
for (i = 0; i < n; i++) {
OnigCaseFoldCodeItem* item = items + i;
+ if (item->byte_len == one_len) {
+ if (item->code_len == 1) {
+ found = 1;
+ }
+ }
+ }
- if (item->code_len != 1) return 0;
- if (item->byte_len != slen) return 0;
- len = ONIGENC_CODE_TO_MBC(enc, item->code[0], buf);
- if (len != slen) return 0;
+ if (found == 0) {
+ r = unravel_cf_string_add(rlist, rsn, s, s + one_len, 0 /* flag */, 0);
}
+ else {
+ Node* node;
+ OnigCodePoint codes[14];/* least ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM + 1 */
- return 1;
-}
+ found = 0;
+ codes[found++] = ONIGENC_MBC_TO_CODE(enc, s, s + one_len);
+ for (i = 0; i < n; i++) {
+ OnigCaseFoldCodeItem* item = items + i;
+ if (item->byte_len == one_len) {
+ if (item->code_len == 1) {
+ codes[found++] = item->code[0];
+ }
+ }
+ }
+ r = onig_new_cclass_with_code_list(&node, enc, found, codes);
+ if (r != 0) return r;
+
+ r = unravel_cf_node_add(rlist, node);
+ if (r != 0) onig_node_free(node);
+
+ *rsn = NULL_NODE;
+ }
-#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
+ return r;
+}
static int
-expand_case_fold_string(Node* node, regex_t* reg, int state)
-{
- int r, n, len, alt_num;
- int fold_len;
- int prev_is_ambig, prev_is_good, is_good, is_in_look_behind;
- UChar *start, *end, *p;
- UChar* foldp;
- Node *top_root, *root, *snode, *prev_node;
+unravel_case_fold_string(Node* node, regex_t* reg, int state)
+{
+ int r, n, one_len, min_len, max_len, in_look_behind;
+ UChar *start, *end, *p, *q;
+ StrNode* snode;
+ Node *sn, *list;
+ OnigEncoding enc;
OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
- UChar buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
- StrNode* sn;
- if (NODE_STRING_IS_AMBIG(node)) return 0;
+ if (NODE_STRING_IS_CASE_EXPANDED(node)) return 0;
- sn = STR_(node);
+ snode = STR_(node);
- start = sn->s;
- end = sn->end;
+ start = snode->s;
+ end = snode->end;
if (start >= end) return 0;
- is_in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+ in_look_behind = (state & IN_LOOK_BEHIND) != 0;
+ enc = reg->enc;
- r = 0;
- top_root = root = prev_node = snode = NULL_NODE;
- alt_num = 1;
+ list = sn = NULL_NODE;
p = start;
while (p < end) {
- n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
- p, end, items);
+ n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag, p, end,
+ items);
if (n < 0) {
r = n;
goto err;
}
- len = enclen(reg->enc, p);
- is_good = is_good_case_fold_items_for_search(reg->enc, len, n, items);
-
- if (is_in_look_behind ||
- (IS_NOT_NULL(snode) ||
- (is_good
- /* expand single char case: ex. /(?i:a)/ */
- && !(p == start && p + len >= end)))) {
- if (IS_NULL(snode)) {
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- prev_node = snode = onig_node_new_str(NULL, NULL);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
-
- prev_is_ambig = -1; /* -1: new */
- prev_is_good = 0; /* escape compiler warning */
- }
- else {
- prev_is_ambig = NODE_STRING_IS_AMBIG(snode);
- prev_is_good = NODE_STRING_IS_GOOD_AMBIG(snode);
- }
-
- if (n != 0) {
- foldp = p;
- fold_len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag,
- &foldp, end, buf);
- foldp = buf;
- }
- else {
- foldp = p; fold_len = len;
- }
-
- if ((prev_is_ambig == 0 && n != 0) ||
- (prev_is_ambig > 0 && (n == 0 || prev_is_good != is_good))) {
- if (IS_NULL(root) /* && IS_NOT_NULL(prev_node) */) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- prev_node = snode = onig_node_new_str(foldp, foldp + fold_len);
- if (IS_NULL(snode)) goto mem_err;
- if (IS_NULL(onig_node_list_add(root, snode))) {
- onig_node_free(snode);
- goto mem_err;
- }
- }
- else {
- r = onig_node_str_cat(snode, foldp, foldp + fold_len);
- if (r != 0) goto err;
- }
-
- if (n != 0) NODE_STRING_SET_AMBIG(snode);
- if (is_good != 0) NODE_STRING_SET_GOOD_AMBIG(snode);
+ one_len = enclen(enc, p);
+ if (n == 0) {
+ q = p + one_len;
+ r = unravel_cf_string_add(&list, &sn, p, q, 0 /* flag */, 0);
+ if (r != 0) goto err;
}
else {
- alt_num *= (n + 1);
- if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
-
- if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(prev_node);
- goto mem_err;
- }
+ if (in_look_behind != 0) {
+ q = p + one_len;
+ r = unravel_cf_look_behind_add(&list, &sn, n, items, enc, p, one_len);
+ if (r != 0) goto err;
}
-
- r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
- if (r < 0) goto mem_err;
- if (r == 1) {
- if (IS_NULL(root)) {
- top_root = prev_node;
+ else {
+ get_min_max_byte_len_case_fold_items(n, items, &min_len, &max_len);
+ q = p + max_len;
+ if (one_len == max_len && min_len == max_len) {
+ r = unravel_cf_string_alt_or_cc_add(&list, n, items, max_len, enc,
+ reg->case_fold_flag, p, q);
+ if (r != 0) goto err;
+ sn = NULL_NODE;
}
else {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- root = NODE_CAR(prev_node);
- }
- else { /* r == 0 */
- if (IS_NOT_NULL(root)) {
- if (IS_NULL(onig_node_list_add(root, prev_node))) {
- onig_node_free(prev_node);
- goto mem_err;
- }
+ r = unravel_cf_string_fold_add(&list, &sn, enc, reg->case_fold_flag,
+ p, q);
+ if (r != 0) goto err;
}
}
-
- snode = NULL_NODE;
}
- p += len;
+ p = q;
}
- if (p < end) {
- Node *srem;
-
- r = expand_case_fold_make_rem_string(&srem, p, end, reg);
- if (r != 0) goto mem_err;
-
- if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
- top_root = root = onig_node_list_add(NULL_NODE, prev_node);
- if (IS_NULL(root)) {
- onig_node_free(srem);
- onig_node_free(prev_node);
- goto mem_err;
- }
- }
-
- if (IS_NULL(root)) {
- prev_node = srem;
+ if (IS_NOT_NULL(list)) {
+ if (node_list_len(list) == 1) {
+ node_swap(node, NODE_CAR(list));
}
else {
- if (IS_NULL(onig_node_list_add(root, srem))) {
- onig_node_free(srem);
- goto mem_err;
- }
+ node_swap(node, list);
}
+ onig_node_free(list);
+ }
+ else {
+ node_swap(node, sn);
+ onig_node_free(sn);
}
-
- /* ending */
- top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
- swap_node(node, top_root);
- onig_node_free(top_root);
return 0;
- mem_err:
- r = ONIGERR_MEMORY;
-
err:
- onig_node_free(top_root);
+ if (IS_NOT_NULL(list))
+ onig_node_free(list);
+ else if (IS_NOT_NULL(sn))
+ onig_node_free(sn);
+
return r;
}
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
-static enum BodyEmpty
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+static enum BodyEmptyType
quantifiers_memory_node_info(Node* node)
{
- int r = BODY_IS_EMPTY;
+ int r = BODY_IS_EMPTY_POSSIBILITY;
switch (NODE_TYPE(node)) {
case NODE_LIST:
@@ -4022,7 +4445,7 @@ quantifiers_memory_node_info(Node* node)
#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC; /* tiny version */
+ return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */
}
else
r = quantifiers_memory_node_info(NODE_BODY(node));
@@ -4044,9 +4467,9 @@ quantifiers_memory_node_info(Node* node)
switch (en->type) {
case BAG_MEMORY:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC;
+ return BODY_IS_EMPTY_POSSIBILITY_REC;
}
- return BODY_IS_EMPTY_MEM;
+ return BODY_IS_EMPTY_POSSIBILITY_MEM;
break;
case BAG_OPTION:
@@ -4083,7 +4506,7 @@ quantifiers_memory_node_info(Node* node)
return r;
}
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#ifdef USE_CALL
@@ -4092,7 +4515,7 @@ quantifiers_memory_node_info(Node* node)
__inline
#endif
static int
-setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
+tune_call_node_call(CallNode* cn, ScanEnv* env, int state)
{
MemEnv* mem_env = SCANENV_MEMENV(env);
@@ -4112,7 +4535,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
}
set_call_attr:
- NODE_CALL_BODY(cn) = mem_env[cn->group_num].node;
+ NODE_CALL_BODY(cn) = mem_env[cn->group_num].mem_node;
if (IS_NULL(NODE_CALL_BODY(cn))) {
onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
cn->name, cn->name_end);
@@ -4143,23 +4566,23 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state)
}
static void
-setup_call2_call(Node* node)
+tune_call2_call(Node* node)
{
switch (NODE_TYPE(node)) {
case NODE_LIST:
case NODE_ALT:
do {
- setup_call2_call(NODE_CAR(node));
+ tune_call2_call(NODE_CAR(node));
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_QUANT:
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
break;
case NODE_ANCHOR:
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
break;
case NODE_BAG:
@@ -4169,19 +4592,19 @@ setup_call2_call(Node* node)
if (en->type == BAG_MEMORY) {
if (! NODE_IS_MARK1(node)) {
NODE_STATUS_ADD(node, MARK1);
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
NODE_STATUS_REMOVE(node, MARK1);
}
}
else if (en->type == BAG_IF_ELSE) {
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
if (IS_NOT_NULL(en->te.Then))
- setup_call2_call(en->te.Then);
+ tune_call2_call(en->te.Then);
if (IS_NOT_NULL(en->te.Else))
- setup_call2_call(en->te.Else);
+ tune_call2_call(en->te.Else);
}
else {
- setup_call2_call(NODE_BODY(node));
+ tune_call2_call(NODE_BODY(node));
}
}
break;
@@ -4197,7 +4620,7 @@ setup_call2_call(Node* node)
NODE_STATUS_ADD(called, CALLED);
BAG_(called)->m.entry_count++;
- setup_call2_call(called);
+ tune_call2_call(called);
}
NODE_STATUS_REMOVE(node, MARK1);
}
@@ -4209,7 +4632,7 @@ setup_call2_call(Node* node)
}
static int
-setup_call(Node* node, ScanEnv* env, int state)
+tune_call(Node* node, ScanEnv* env, int state)
{
int r;
@@ -4217,7 +4640,7 @@ setup_call(Node* node, ScanEnv* env, int state)
case NODE_LIST:
case NODE_ALT:
do {
- r = setup_call(NODE_CAR(node), env, state);
+ r = tune_call(NODE_CAR(node), env, state);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
@@ -4225,12 +4648,12 @@ setup_call(Node* node, ScanEnv* env, int state)
if (QUANT_(node)->upper == 0)
state |= IN_ZERO_REPEAT;
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
break;
case NODE_ANCHOR:
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
else
r = 0;
break;
@@ -4244,20 +4667,20 @@ setup_call(Node* node, ScanEnv* env, int state)
NODE_STATUS_ADD(node, IN_ZERO_REPEAT);
BAG_(node)->m.entry_count--;
}
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
}
else if (en->type == BAG_IF_ELSE) {
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
- r = setup_call(en->te.Then, env, state);
+ r = tune_call(en->te.Then, env, state);
if (r != 0) return r;
}
if (IS_NOT_NULL(en->te.Else))
- r = setup_call(en->te.Else, env, state);
+ r = tune_call(en->te.Else, env, state);
}
else
- r = setup_call(NODE_BODY(node), env, state);
+ r = tune_call(NODE_BODY(node), env, state);
}
break;
@@ -4267,7 +4690,7 @@ setup_call(Node* node, ScanEnv* env, int state)
CALL_(node)->entry_count--;
}
- r = setup_call_node_call(CALL_(node), env, state);
+ r = tune_call_node_call(CALL_(node), env, state);
break;
default:
@@ -4279,7 +4702,7 @@ setup_call(Node* node, ScanEnv* env, int state)
}
static int
-setup_call2(Node* node)
+tune_call2(Node* node)
{
int r = 0;
@@ -4287,23 +4710,23 @@ setup_call2(Node* node)
case NODE_LIST:
case NODE_ALT:
do {
- r = setup_call2(NODE_CAR(node));
+ r = tune_call2(NODE_CAR(node));
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_QUANT:
if (QUANT_(node)->upper != 0)
- r = setup_call2(NODE_BODY(node));
+ r = tune_call2(NODE_BODY(node));
break;
case NODE_ANCHOR:
if (ANCHOR_HAS_BODY(ANCHOR_(node)))
- r = setup_call2(NODE_BODY(node));
+ r = tune_call2(NODE_BODY(node));
break;
case NODE_BAG:
if (! NODE_IS_IN_ZERO_REPEAT(node))
- r = setup_call2(NODE_BODY(node));
+ r = tune_call2(NODE_BODY(node));
{
BagNode* en = BAG_(node);
@@ -4311,18 +4734,18 @@ setup_call2(Node* node)
if (r != 0) return r;
if (en->type == BAG_IF_ELSE) {
if (IS_NOT_NULL(en->te.Then)) {
- r = setup_call2(en->te.Then);
+ r = tune_call2(en->te.Then);
if (r != 0) return r;
}
if (IS_NOT_NULL(en->te.Else))
- r = setup_call2(en->te.Else);
+ r = tune_call2(en->te.Else);
}
}
break;
case NODE_CALL:
if (! NODE_IS_IN_ZERO_REPEAT(node)) {
- setup_call2_call(node);
+ tune_call2_call(node);
}
break;
@@ -4335,7 +4758,7 @@ setup_call2(Node* node)
static void
-setup_called_state_call(Node* node, int state)
+tune_called_state_call(Node* node, int state)
{
switch (NODE_TYPE(node)) {
case NODE_ALT:
@@ -4343,7 +4766,7 @@ setup_called_state_call(Node* node, int state)
/* fall */
case NODE_LIST:
do {
- setup_called_state_call(NODE_CAR(node), state);
+ tune_called_state_call(NODE_CAR(node), state);
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
@@ -4351,12 +4774,12 @@ setup_called_state_call(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
- setup_called_state_call(NODE_QUANT_BODY(qn), state);
+ tune_called_state_call(NODE_QUANT_BODY(qn), state);
}
break;
@@ -4371,7 +4794,7 @@ setup_called_state_call(Node* node, int state)
/* fall */
case ANCR_PREC_READ:
case ANCR_LOOK_BEHIND:
- setup_called_state_call(NODE_ANCHOR_BODY(an), state);
+ tune_called_state_call(NODE_ANCHOR_BODY(an), state);
break;
default:
break;
@@ -4387,31 +4810,33 @@ setup_called_state_call(Node* node, int state)
if (NODE_IS_MARK1(node)) {
if ((~en->m.called_state & state) != 0) {
en->m.called_state |= state;
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
}
}
else {
NODE_STATUS_ADD(node, MARK1);
en->m.called_state |= state;
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
NODE_STATUS_REMOVE(node, MARK1);
}
}
else if (en->type == BAG_IF_ELSE) {
+ state |= IN_ALT;
+ tune_called_state_call(NODE_BODY(node), state);
if (IS_NOT_NULL(en->te.Then)) {
- setup_called_state_call(en->te.Then, state);
+ tune_called_state_call(en->te.Then, state);
}
if (IS_NOT_NULL(en->te.Else))
- setup_called_state_call(en->te.Else, state);
+ tune_called_state_call(en->te.Else, state);
}
else {
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
}
}
break;
case NODE_CALL:
- setup_called_state_call(NODE_BODY(node), state);
+ tune_called_state_call(NODE_BODY(node), state);
break;
default:
@@ -4420,7 +4845,7 @@ setup_called_state_call(Node* node, int state)
}
static void
-setup_called_state(Node* node, int state)
+tune_called_state(Node* node, int state)
{
switch (NODE_TYPE(node)) {
case NODE_ALT:
@@ -4428,13 +4853,13 @@ setup_called_state(Node* node, int state)
/* fall */
case NODE_LIST:
do {
- setup_called_state(NODE_CAR(node), state);
+ tune_called_state(NODE_CAR(node), state);
} while (IS_NOT_NULL(node = NODE_CDR(node)));
break;
#ifdef USE_CALL
case NODE_CALL:
- setup_called_state_call(node, state);
+ tune_called_state_call(node, state);
break;
#endif
@@ -4451,14 +4876,15 @@ setup_called_state(Node* node, int state)
/* fall */
case BAG_OPTION:
case BAG_STOP_BACKTRACK:
- setup_called_state(NODE_BODY(node), state);
+ tune_called_state(NODE_BODY(node), state);
break;
case BAG_IF_ELSE:
- setup_called_state(NODE_BODY(node), state);
+ state |= IN_ALT;
+ tune_called_state(NODE_BODY(node), state);
if (IS_NOT_NULL(en->te.Then))
- setup_called_state(en->te.Then, state);
+ tune_called_state(en->te.Then, state);
if (IS_NOT_NULL(en->te.Else))
- setup_called_state(en->te.Else, state);
+ tune_called_state(en->te.Else, state);
break;
}
}
@@ -4468,12 +4894,12 @@ setup_called_state(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
- setup_called_state(NODE_QUANT_BODY(qn), state);
+ tune_called_state(NODE_QUANT_BODY(qn), state);
}
break;
@@ -4488,7 +4914,7 @@ setup_called_state(Node* node, int state)
/* fall */
case ANCR_PREC_READ:
case ANCR_LOOK_BEHIND:
- setup_called_state(NODE_ANCHOR_BODY(an), state);
+ tune_called_state(NODE_ANCHOR_BODY(an), state);
break;
default:
break;
@@ -4509,13 +4935,13 @@ setup_called_state(Node* node, int state)
#endif /* USE_CALL */
-static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
+static int tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env);
#ifdef __GNUC__
__inline
#endif
static int
-setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
+tune_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
{
/* allowed node types in look-behind */
#define ALLOWED_TYPE_IN_LB \
@@ -4543,10 +4969,10 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
switch (an->type) {
case ANCR_PREC_READ:
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, state, env);
break;
case ANCR_PREC_READ_NOT:
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env);
break;
case ANCR_LOOK_BEHIND:
@@ -4555,9 +4981,9 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
ALLOWED_BAG_IN_LB, ALLOWED_ANCHOR_IN_LB);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_LOOK_BEHIND), env);
if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
+ r = tune_look_behind(node, reg, env);
}
break;
@@ -4567,10 +4993,10 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
ALLOWED_BAG_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
if (r < 0) return r;
if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
- r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),
- env);
+ r = tune_tree(NODE_ANCHOR_BODY(an), reg, (state|IN_NOT|IN_LOOK_BEHIND),
+ env);
if (r != 0) return r;
- r = setup_look_behind(node, reg, env);
+ r = tune_look_behind(node, reg, env);
}
break;
@@ -4586,7 +5012,7 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env)
__inline
#endif
static int
-setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
+tune_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
{
int r;
OnigLen d;
@@ -4600,44 +5026,37 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
d = tree_min_len(body, env);
if (d == 0) {
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
- qn->empty_info = quantifiers_memory_node_info(body);
- if (qn->empty_info == BODY_IS_EMPTY_REC) {
- if (NODE_TYPE(body) == NODE_BAG &&
- BAG_(body)->type == BAG_MEMORY) {
- MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
- }
- }
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+ qn->emptiness = quantifiers_memory_node_info(body);
#else
- qn->empty_info = BODY_IS_EMPTY;
+ qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;
#endif
}
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
- r = setup_tree(body, reg, state, env);
+ r = tune_tree(body, reg, state, env);
if (r != 0) return r;
/* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
if (NODE_TYPE(body) == NODE_STRING) {
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
int len = NODE_STRING_LEN(body);
- StrNode* sn = STR_(body);
if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) {
int i, n = qn->lower;
- onig_node_conv_to_str_node(node, STR_(body)->flag);
+ node_conv_to_str_node(node, STR_(body)->flag);
for (i = 0; i < n; i++) {
- r = onig_node_str_cat(node, sn->s, sn->end);
+ r = node_str_node_cat(node, body);
if (r != 0) return r;
}
onig_node_free(body);
@@ -4646,7 +5065,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
}
- if (qn->greedy && (qn->empty_info == BODY_IS_NOT_EMPTY)) {
+ if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
if (NODE_TYPE(body) == NODE_QUANT) {
QuantNode* tqn = QUANT_(body);
if (IS_NOT_NULL(tqn->head_exact)) {
@@ -4662,8 +5081,8 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
return r;
}
-/* setup_tree does the following work.
- 1. check empty loop. (set qn->empty_info)
+/* tune_tree does the following work.
+ 1. check empty loop. (set qn->emptiness)
2. expand ignore-case in char class.
3. set memory status bit flags. (reg->mem_stats)
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
@@ -4671,7 +5090,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
6. expand repeated string.
*/
static int
-setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
+tune_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
int r = 0;
@@ -4680,9 +5099,9 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
Node* prev = NULL_NODE;
do {
- r = setup_tree(NODE_CAR(node), reg, state, env);
+ r = tune_tree(NODE_CAR(node), reg, state, env);
if (IS_NOT_NULL(prev) && r == 0) {
- r = next_setup(prev, NODE_CAR(node), reg);
+ r = tune_next(prev, NODE_CAR(node), reg);
}
prev = NODE_CAR(node);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
@@ -4691,13 +5110,13 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
case NODE_ALT:
do {
- r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
+ r = tune_tree(NODE_CAR(node), reg, (state | IN_ALT), env);
} while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node)));
break;
case NODE_STRING:
- if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) {
- r = expand_case_fold_string(node, reg, state);
+ if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_CRUDE(node)) {
+ r = unravel_case_fold_string(node, reg, state);
}
break;
@@ -4710,12 +5129,18 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
for (i = 0; i < br->back_num; i++) {
if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
MEM_STATUS_ON(env->backrefed_mem, p[i]);
- MEM_STATUS_ON(env->bt_mem_start, p[i]);
+#if 0
#ifdef USE_BACKREF_WITH_LEVEL
if (NODE_IS_NEST_LEVEL(node)) {
- MEM_STATUS_ON(env->bt_mem_end, p[i]);
+ MEM_STATUS_ON(env->backtrack_mem, p[i]);
}
#endif
+#else
+ /* More precisely, it should be checked whether alt/repeat exists before
+ the subject capture node, and then this backreference position
+ exists before (or in) the capture node. */
+ MEM_STATUS_ON(env->backtrack_mem, p[i]);
+#endif
}
}
break;
@@ -4729,7 +5154,7 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
{
OnigOptionType options = reg->options;
reg->options = BAG_(node)->o.options;
- r = setup_tree(NODE_BODY(node), reg, state, env);
+ r = tune_tree(NODE_BODY(node), reg, state, env);
reg->options = options;
}
break;
@@ -4741,46 +5166,46 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0
|| NODE_IS_RECURSION(node)) {
- MEM_STATUS_ON(env->bt_mem_start, en->m.regnum);
+ MEM_STATUS_ON(env->backtrack_mem, en->m.regnum);
}
- r = setup_tree(NODE_BODY(node), reg, state, env);
+ r = tune_tree(NODE_BODY(node), reg, state, env);
break;
case BAG_STOP_BACKTRACK:
{
Node* target = NODE_BODY(node);
- r = setup_tree(target, reg, state, env);
+ r = tune_tree(target, reg, state, env);
if (NODE_TYPE(target) == NODE_QUANT) {
QuantNode* tqn = QUANT_(target);
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
- NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
+ if (is_strict_real_node(NODE_BODY(target)))
+ NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);
}
}
}
break;
case BAG_IF_ELSE:
- r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
+ r = tune_tree(NODE_BODY(node), reg, (state | IN_ALT), env);
if (r != 0) return r;
if (IS_NOT_NULL(en->te.Then)) {
- r = setup_tree(en->te.Then, reg, (state | IN_ALT), env);
+ r = tune_tree(en->te.Then, reg, (state | IN_ALT), env);
if (r != 0) return r;
}
if (IS_NOT_NULL(en->te.Else))
- r = setup_tree(en->te.Else, reg, (state | IN_ALT), env);
+ r = tune_tree(en->te.Else, reg, (state | IN_ALT), env);
break;
}
}
break;
case NODE_QUANT:
- r = setup_quant(node, reg, state, env);
+ r = tune_quant(node, reg, state, env);
break;
case NODE_ANCHOR:
- r = setup_anchor(node, reg, state, env);
+ r = tune_anchor(node, reg, state, env);
break;
#ifdef USE_CALL
@@ -4879,7 +5304,7 @@ typedef struct {
} MinMax;
typedef struct {
- MinMax mmd;
+ MinMax mm;
OnigEncoding enc;
OnigOptionType options;
OnigCaseFoldType case_fold_flag;
@@ -4892,17 +5317,16 @@ typedef struct {
} OptAnc;
typedef struct {
- MinMax mmd; /* position */
+ MinMax mm; /* position */
OptAnc anc;
int reach_end;
int case_fold;
- int good_case_fold;
int len;
UChar s[OPT_EXACT_MAXLEN];
} OptStr;
typedef struct {
- MinMax mmd; /* position */
+ MinMax mm; /* position */
OptAnc anc;
int value; /* weighted value */
UChar map[CHAR_MAP_SIZE];
@@ -5119,11 +5543,10 @@ is_full_opt_exact(OptStr* e)
static void
clear_opt_exact(OptStr* e)
{
- clear_mml(&e->mmd);
+ clear_mml(&e->mm);
clear_opt_anc_info(&e->anc);
e->reach_end = 0;
e->case_fold = 0;
- e->good_case_fold = 0;
e->len = 0;
e->s[0] = '\0';
}
@@ -5147,11 +5570,6 @@ concat_opt_exact(OptStr* to, OptStr* add, OnigEncoding enc)
to->case_fold = 1;
}
- else {
- if (to->good_case_fold != 0) {
- if (add->good_case_fold == 0) return 0;
- }
- }
}
r = 0;
@@ -5206,7 +5624,7 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
return ;
}
- if (! is_equal_mml(&to->mmd, &add->mmd)) {
+ if (! is_equal_mml(&to->mm, &add->mm)) {
clear_opt_exact(to);
return ;
}
@@ -5228,8 +5646,6 @@ alt_merge_opt_exact(OptStr* to, OptStr* add, OptEnv* env)
to->len = i;
if (add->case_fold != 0)
to->case_fold = 1;
- if (add->good_case_fold == 0)
- to->good_case_fold = 0;
alt_merge_opt_anc_info(&to->anc, &add->anc);
if (! to->reach_end) to->anc.right = 0;
@@ -5262,10 +5678,7 @@ select_opt_exact(OnigEncoding enc, OptStr* now, OptStr* alt)
if (now->case_fold == 0) vn *= 2;
if (alt->case_fold == 0) va *= 2;
- if (now->good_case_fold != 0) vn *= 4;
- if (alt->good_case_fold != 0) va *= 4;
-
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
+ if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
copy_opt_exact(now, alt);
}
@@ -5349,7 +5762,7 @@ select_opt_map(OptMap* now, OptMap* alt)
vn = z / now->value;
va = z / alt->value;
- if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0)
+ if (comp_distance_value(&now->mm, &alt->mm, vn, va) > 0)
copy_opt_map(now, alt);
}
@@ -5363,17 +5776,14 @@ comp_opt_exact_or_map(OptStr* e, OptMap* m)
if (m->value <= 0) return -1;
if (e->case_fold != 0) {
- if (e->good_case_fold != 0)
- case_value = 2;
- else
- case_value = 1;
+ case_value = 1;
}
else
case_value = 3;
ae = COMP_EM_BASE * e->len * case_value;
am = COMP_EM_BASE * 5 * 2 / m->value;
- return comp_distance_value(&e->mmd, &m->mmd, ae, am);
+ return comp_distance_value(&e->mm, &m->mm, ae, am);
}
static void
@@ -5381,14 +5791,14 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
{
int i, val;
- /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
+ /* if (! is_equal_mml(&to->mm, &add->mm)) return ; */
if (to->value == 0) return ;
- if (add->value == 0 || to->mmd.max < add->mmd.min) {
+ if (add->value == 0 || to->mm.max < add->mm.min) {
clear_opt_map(to);
return ;
}
- alt_merge_mml(&to->mmd, &add->mmd);
+ alt_merge_mml(&to->mm, &add->mm);
val = 0;
for (i = 0; i < CHAR_MAP_SIZE; i++) {
@@ -5406,9 +5816,9 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add)
static void
set_bound_node_opt_info(OptNode* opt, MinMax* plen)
{
- copy_mml(&(opt->sb.mmd), plen);
- copy_mml(&(opt->spr.mmd), plen);
- copy_mml(&(opt->map.mmd), plen);
+ copy_mml(&(opt->sb.mm), plen);
+ copy_mml(&(opt->spr.mm), plen);
+ copy_mml(&(opt->map.mm), plen);
}
static void
@@ -5443,7 +5853,7 @@ concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
}
if (add->map.value > 0 && to->len.max == 0) {
- if (add->map.mmd.max == 0)
+ if (add->map.mm.max == 0)
add->map.anc.left |= to->anc.left;
}
@@ -5468,10 +5878,7 @@ concat_left_node_opt_info(OnigEncoding enc, OptNode* to, OptNode* add)
if (to->spr.len > 0) {
if (add->len.max > 0) {
- if (to->spr.len > (int )add->len.max)
- to->spr.len = add->len.max;
-
- if (to->spr.mmd.max == 0)
+ if (to->spr.mm.max == 0)
select_opt_exact(enc, &to->sb, &to->spr);
else
select_opt_exact(enc, &to->sm, &to->spr);
@@ -5511,7 +5918,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
r = 0;
enc = env->enc;
clear_node_opt_info(opt);
- set_bound_node_opt_info(opt, &env->mmd);
+ set_bound_node_opt_info(opt, &env->mm);
switch (NODE_TYPE(node)) {
case NODE_LIST:
@@ -5523,7 +5930,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
do {
r = optimize_nodes(NODE_CAR(nd), &xo, &nenv);
if (r == 0) {
- add_mml(&nenv.mmd, &xo.len);
+ add_mml(&nenv.mm, &xo.len);
concat_left_node_opt_info(enc, opt, &xo);
}
} while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd)));
@@ -5548,9 +5955,8 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
{
StrNode* sn = STR_(node);
int slen = (int )(sn->end - sn->s);
- /* int is_raw = NODE_STRING_IS_RAW(node); */
- if (! NODE_STRING_IS_AMBIG(node)) {
+ if (! NODE_STRING_IS_CASE_FOLD_MATCH(node)) {
concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
if (slen > 0) {
add_char_opt_map(&opt->map, *(sn->s), enc);
@@ -5558,28 +5964,20 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
set_mml(&opt->len, slen, slen);
}
else {
- int max;
+ int max, min;
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) {
- int n = onigenc_strlen(enc, sn->s, sn->end);
- max = ONIGENC_MBC_MAXLEN_DIST(enc) * n;
- }
- else {
- concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
- opt->sb.case_fold = 1;
- if (NODE_STRING_IS_GOOD_AMBIG(node))
- opt->sb.good_case_fold = 1;
-
- if (slen > 0) {
- r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
- enc, env->case_fold_flag);
- if (r != 0) break;
- }
+ concat_opt_exact_str(&opt->sb, sn->s, sn->end, enc);
+ opt->sb.case_fold = 1;
- max = slen;
+ if (slen > 0) {
+ r = add_char_amb_opt_map(&opt->map, sn->s, sn->end,
+ enc, env->case_fold_flag);
+ if (r != 0) break;
}
- set_mml(&opt->len, slen, max);
+ max = slen;
+ min = sn->case_min_len * ONIGENC_MBC_MINLEN(enc);
+ set_mml(&opt->len, min, max);
}
}
break;
@@ -5589,7 +5987,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
int z;
CClassNode* cc = CCLASS_(node);
- /* no need to check ignore case. (set in setup_tree()) */
+ /* no need to check ignore case. (set in tune_tree()) */
if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
OnigLen min = ONIGENC_MBC_MINLEN(enc);
@@ -5699,11 +6097,11 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
break;
}
backs = BACKREFS_P(br);
- min = tree_min_len(mem_env[backs[0]].node, env->scan_env);
- max = tree_max_len(mem_env[backs[0]].node, env->scan_env);
+ min = tree_min_len(mem_env[backs[0]].mem_node, env->scan_env);
+ max = tree_max_len(mem_env[backs[0]].mem_node, env->scan_env);
for (i = 1; i < br->back_num; i++) {
- tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env);
- tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env);
+ tmin = tree_min_len(mem_env[backs[i]].mem_node, env->scan_env);
+ tmax = tree_max_len(mem_env[backs[i]].mem_node, env->scan_env);
if (min > tmin) min = tmin;
if (max < tmax) max = tmax;
}
@@ -5752,8 +6150,8 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
opt->sm.reach_end = 0;
}
- if (IS_REPEAT_INFINITE(qn->upper)) {
- if (env->mmd.max == 0 &&
+ if (IS_INFINITE_REPEAT(qn->upper)) {
+ if (env->mm.max == 0 &&
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
add_opt_anc_info(&opt->anc, ANCR_ANYCHAR_INF_ML);
@@ -5821,7 +6219,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
copy_opt_env(&nenv, env);
r = optimize_nodes(NODE_BAG_BODY(en), &xo, &nenv);
if (r == 0) {
- add_mml(&nenv.mmd, &xo.len);
+ add_mml(&nenv.mm, &xo.len);
concat_left_node_opt_info(enc, opt, &xo);
if (IS_NOT_NULL(en->te.Then)) {
r = optimize_nodes(en->te.Then, &xo, &nenv);
@@ -5870,15 +6268,6 @@ set_optimize_exact(regex_t* reg, OptStr* e)
if (e->case_fold) {
reg->optimize = OPTIMIZE_STR_CASE_FOLD;
- if (e->good_case_fold != 0) {
- if (e->len >= 2) {
- r = set_sunday_quick_search_or_bmh_skip_table(reg, 1,
- reg->exact, reg->exact_end,
- reg->map, &(reg->map_offset));
- if (r != 0) return r;
- reg->optimize = OPTIMIZE_STR_CASE_FOLD_FAST;
- }
- }
}
else {
int allow_reverse;
@@ -5901,11 +6290,17 @@ set_optimize_exact(regex_t* reg, OptStr* e)
}
}
- reg->dmin = e->mmd.min;
- reg->dmax = e->mmd.max;
+ reg->dist_min = e->mm.min;
+ reg->dist_max = e->mm.max;
+
+ if (reg->dist_min != INFINITE_LEN) {
+ int n;
+ if (e->case_fold != 0)
+ n = 1;
+ else
+ n = (int )(reg->exact_end - reg->exact);
- if (reg->dmin != INFINITE_LEN) {
- reg->threshold_len = reg->dmin + (int )(reg->exact_end - reg->exact);
+ reg->threshold_len = reg->dist_min + n;
}
return 0;
@@ -5920,11 +6315,11 @@ set_optimize_map(regex_t* reg, OptMap* m)
reg->map[i] = m->map[i];
reg->optimize = OPTIMIZE_MAP;
- reg->dmin = m->mmd.min;
- reg->dmax = m->mmd.max;
+ reg->dist_min = m->mm.min;
+ reg->dist_max = m->mm.max;
- if (reg->dmin != INFINITE_LEN) {
- reg->threshold_len = reg->dmin + 1;
+ if (reg->dist_min != INFINITE_LEN) {
+ reg->threshold_len = reg->dist_min + 1;
}
}
@@ -5950,7 +6345,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
env.options = reg->options;
env.case_fold_flag = reg->case_fold_flag;
env.scan_env = scan_env;
- clear_mml(&env.mmd);
+ clear_mml(&env.mm);
r = optimize_nodes(node, &opt, &env);
if (r != 0) return r;
@@ -5966,8 +6361,8 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
ANCR_PREC_READ_NOT);
if (reg->anchor & (ANCR_END_BUF | ANCR_SEMI_END_BUF)) {
- reg->anchor_dmin = opt.len.min;
- reg->anchor_dmax = opt.len.max;
+ reg->anc_dist_min = opt.len.min;
+ reg->anc_dist_max = opt.len.max;
}
if (opt.sb.len > 0 || opt.sm.len > 0) {
@@ -6002,8 +6397,8 @@ clear_optimize_info(regex_t* reg)
{
reg->optimize = OPTIMIZE_NONE;
reg->anchor = 0;
- reg->anchor_dmin = 0;
- reg->anchor_dmax = 0;
+ reg->anc_dist_min = 0;
+ reg->anc_dist_max = 0;
reg->sub_anchor = 0;
reg->exact_end = (UChar* )NULL;
reg->map_offset = 0;
@@ -6122,12 +6517,12 @@ print_optimize_info(FILE* f, regex_t* reg)
{
static const char* on[] = { "NONE", "STR",
"STR_FAST", "STR_FAST_STEP_FORWARD",
- "STR_CASE_FOLD_FAST", "STR_CASE_FOLD", "MAP" };
+ "STR_CASE_FOLD", "MAP" };
fprintf(f, "optimize: %s\n", on[reg->optimize]);
fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
if ((reg->anchor & ANCR_END_BUF_MASK) != 0)
- print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
+ print_distance_range(f, reg->anc_dist_min, reg->anc_dist_max);
fprintf(f, "\n");
if (reg->optimize) {
@@ -6275,7 +6670,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
Node* root;
ScanEnv scan_env;
#ifdef USE_CALL
- UnsetAddrList uslist;
+ UnsetAddrList uslist = {0};
#endif
root = 0;
@@ -6299,13 +6694,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
reg->string_pool_end = 0;
reg->num_mem = 0;
reg->num_repeat = 0;
- reg->num_null_check = 0;
+ reg->num_empty_check = 0;
reg->repeat_range_alloc = 0;
- reg->repeat_range = (OnigRepeatRange* )NULL;
+ reg->repeat_range = (RepeatRange* )NULL;
+ reg->empty_status_mem = 0;
r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env);
if (r != 0) goto err;
+ r = reduce_string_list(root);
+ if (r != 0) goto err;
+
/* mixed use named group and no-named group */
if (scan_env.num_named > 0 &&
IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
@@ -6326,38 +6725,65 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
r = unset_addr_list_init(&uslist, scan_env.num_call);
if (r != 0) goto err;
scan_env.unset_addr_list = &uslist;
- r = setup_call(root, &scan_env, 0);
+ r = tune_call(root, &scan_env, 0);
if (r != 0) goto err_unset;
- r = setup_call2(root);
+ r = tune_call2(root);
if (r != 0) goto err_unset;
r = recursive_call_check_trav(root, &scan_env, 0);
if (r < 0) goto err_unset;
r = infinite_recursive_call_check_trav(root, &scan_env);
if (r != 0) goto err_unset;
- setup_called_state(root, 0);
+ tune_called_state(root, 0);
}
reg->num_call = scan_env.num_call;
#endif
- r = setup_tree(root, reg, 0, &scan_env);
+#ifdef ONIG_DEBUG_PARSE
+ fprintf(stderr, "MAX PARSE DEPTH: %d\n", scan_env.max_parse_depth);
+ fprintf(stderr, "TREE (parsed)\n");
+ print_tree(stderr, root);
+ fprintf(stderr, "\n");
+#endif
+
+ r = tune_tree(root, reg, 0, &scan_env);
if (r != 0) goto err_unset;
+ if (scan_env.backref_num != 0) {
+ set_parent_node_trav(root, NULL_NODE);
+ r = set_empty_repeat_node_trav(root, NULL_NODE, &scan_env);
+ if (r != 0) goto err_unset;
+ set_empty_status_check_trav(root, &scan_env);
+ }
+
#ifdef ONIG_DEBUG_PARSE
+ fprintf(stderr, "TREE (after tune)\n");
print_tree(stderr, root);
+ fprintf(stderr, "\n");
#endif
- reg->capture_history = scan_env.capture_history;
- reg->bt_mem_start = scan_env.bt_mem_start;
- reg->bt_mem_start |= reg->capture_history;
- if (IS_FIND_CONDITION(reg->options))
- MEM_STATUS_ON_ALL(reg->bt_mem_end);
+ reg->capture_history = scan_env.cap_history;
+ reg->push_mem_start = scan_env.backtrack_mem | scan_env.cap_history;
+
+#ifdef USE_CALLOUT
+ if (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0) {
+ reg->push_mem_end = reg->push_mem_start;
+ }
else {
- reg->bt_mem_end = scan_env.bt_mem_end;
- reg->bt_mem_end |= reg->capture_history;
+ if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
+ reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
+ else
+ reg->push_mem_end = reg->push_mem_start &
+ (scan_env.backrefed_mem | scan_env.cap_history);
}
- reg->bt_mem_start |= reg->bt_mem_end;
+#else
+ if (MEM_STATUS_IS_ALL_ON(reg->push_mem_start))
+ reg->push_mem_end = scan_env.backrefed_mem | scan_env.cap_history;
+ else
+ reg->push_mem_end = reg->push_mem_start &
+ (scan_env.backrefed_mem | scan_env.cap_history);
+#endif
clear_optimize_info(reg);
#ifndef ONIG_DONT_OPTIMIZE
@@ -6391,14 +6817,20 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
}
#endif
- if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)
+ set_addr_in_repeat_range(reg);
+
+ if ((reg->push_mem_end != 0)
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ || (reg->num_repeat != 0)
+ || (reg->num_empty_check != 0)
+#endif
#ifdef USE_CALLOUT
|| (IS_NOT_NULL(reg->extp) && reg->extp->callout_num != 0)
#endif
)
reg->stack_pop_level = STACK_POP_LEVEL_ALL;
else {
- if (reg->bt_mem_start != 0)
+ if (reg->push_mem_start != 0)
reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
else
reg->stack_pop_level = STACK_POP_LEVEL_FREE;
@@ -6531,11 +6963,14 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
- if (r != 0) goto err;
+ if (r != 0) {
+ xfree(*reg);
+ *reg = NULL;
+ return r;
+ }
r = onig_compile(*reg, pattern, pattern_end, einfo);
if (r != 0) {
- err:
onig_free(*reg);
*reg = NULL;
}
@@ -6672,6 +7107,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
}
else {
len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+ if (len < 0) return 0;
}
return onig_is_code_in_cc_len(len, code, cc);
}
@@ -6679,12 +7115,14 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
#ifdef ONIG_DEBUG_PARSE
+#ifdef USE_CALL
static void
p_string(FILE* f, int len, UChar* s)
{
fputs(":", f);
while (len-- > 0) { fputc(*s++, f); }
}
+#endif
static void
Indent(FILE* f, int indent)
@@ -6704,7 +7142,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
Indent(f, indent);
if (IS_NULL(node)) {
fprintf(f, "ERROR: null node!!!\n");
- exit (0);
+ exit(0);
}
type = NODE_TYPE(node);
@@ -6728,28 +7166,22 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NODE_STRING:
{
+ char* str;
char* mode;
- char* dont;
- char* good;
- if (NODE_STRING_IS_RAW(node))
- mode = "-raw";
- else if (NODE_STRING_IS_AMBIG(node))
- mode = "-ambig";
+ if (NODE_STRING_IS_CRUDE(node))
+ mode = "-crude";
+ else if (NODE_STRING_IS_CASE_FOLD_MATCH(node))
+ mode = "-case_fold_match";
else
mode = "";
- if (NODE_STRING_IS_GOOD_AMBIG(node))
- good = "-good";
- else
- good = "";
-
- if (NODE_STRING_IS_DONT_GET_OPT_INFO(node))
- dont = " (dont-opt)";
+ if (STR_(node)->s == STR_(node)->end)
+ str = "empty-string";
else
- dont = "";
+ str = "string";
- fprintf(f, "<string%s%s%s:%p>", mode, good, dont, node);
+ fprintf(f, "<%s%s:%p>", str, mode, node);
for (p = STR_(node)->s; p < STR_(node)->end; p++) {
if (*p >= 0x20 && *p < 0x7f)
fputc(*p, f);
@@ -6871,6 +7303,34 @@ print_indent_tree(FILE* f, Node* node, int indent)
case NODE_BAG:
fprintf(f, "<bag:%p> ", node);
+ if (BAG_(node)->type == BAG_IF_ELSE) {
+ Node* Then;
+ Node* Else;
+ BagNode* bn;
+
+ bn = BAG_(node);
+ fprintf(f, "if-else\n");
+ print_indent_tree(f, NODE_BODY(node), indent + add);
+
+ Then = bn->te.Then;
+ Else = bn->te.Else;
+ if (IS_NULL(Then)) {
+ Indent(f, indent + add);
+ fprintf(f, "THEN empty\n");
+ }
+ else
+ print_indent_tree(f, Then, indent + add);
+
+ if (IS_NULL(Else)) {
+ Indent(f, indent + add);
+ fprintf(f, "ELSE empty\n");
+ }
+ else
+ print_indent_tree(f, Else, indent + add);
+
+ break;
+ }
+
switch (BAG_(node)->type) {
case BAG_OPTION:
fprintf(f, "option:%d", BAG_(node)->o.options);
@@ -6881,8 +7341,7 @@ print_indent_tree(FILE* f, Node* node, int indent)
case BAG_STOP_BACKTRACK:
fprintf(f, "stop-bt");
break;
- case BAG_IF_ELSE:
- fprintf(f, "if-else");
+ default:
break;
}
fprintf(f, "\n");
diff --git a/src/regenc.c b/src/regenc.c
index 6376565..16ac313 100644
--- a/src/regenc.c
+++ b/src/regenc.c
@@ -2,7 +2,7 @@
regenc.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -182,7 +182,8 @@ onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc,
p += enclen(enc, p);
}
else {
- if (prev) *prev = (const UChar* )NULL; /* Sorry */
+ if (prev)
+ *prev = onigenc_get_prev_char_head(enc, start, p);
}
return p;
}
@@ -208,20 +209,6 @@ onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)
return (UChar* )s;
}
-#if 0
-extern int
-onigenc_mbc_enc_len_end(OnigEncoding enc, const UChar* p, const UChar* end)
-{
- int len;
- int n;
-
- len = ONIGENC_MBC_ENC_LEN(enc, p);
- n = (int )(end - p);
-
- return (n < len ? n : len);
-}
-#endif
-
extern UChar*
onigenc_step(OnigEncoding enc, const UChar* p, const UChar* end, int n)
{
@@ -705,18 +692,6 @@ onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p,
return 1; /* return byte length of converted char to lower */
}
-#if 0
-extern int
-onigenc_ascii_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- (*pp)++;
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
-}
-#endif
-
extern int
onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED)
{
@@ -833,39 +808,6 @@ onigenc_mbn_mbc_case_fold(OnigEncoding enc, OnigCaseFoldType flag ARG_UNUSED,
}
}
-#if 0
-extern int
-onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- if (ONIGENC_IS_MBC_ASCII(p)) {
- (*pp)++;
- return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p);
- }
-
- (*pp) += enclen(enc, p);
- return FALSE;
-}
-#endif
-
-extern int
-onigenc_mb2_code_to_mbclen(OnigCodePoint code)
-{
- if ((code & 0xff00) != 0) return 2;
- else return 1;
-}
-
-extern int
-onigenc_mb4_code_to_mbclen(OnigCodePoint code)
-{
- if ((code & 0xff000000) != 0) return 4;
- else if ((code & 0xff0000) != 0) return 3;
- else if ((code & 0xff00) != 0) return 2;
- else return 1;
-}
-
extern int
onigenc_mb2_code_to_mbc(OnigEncoding enc, OnigCodePoint code, UChar *buf)
{
diff --git a/src/regenc.h b/src/regenc.h
index bd2819e..db35841 100644
--- a/src/regenc.h
+++ b/src/regenc.h
@@ -4,7 +4,7 @@
regenc.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -163,13 +163,11 @@ extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const
/* methods for multi byte encoding */
extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end));
extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower));
-extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code));
extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end));
extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end));
extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
-extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code));
extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf));
extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
extern struct PropertyNameCtype* onigenc_euc_jp_lookup_property_name P_((register const char *str, register size_t len));
diff --git a/src/regerror.c b/src/regerror.c
index 7564827..b57a276 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -2,7 +2,7 @@
regerror.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -257,6 +257,23 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
}
+extern int
+onig_is_error_code_needs_param(int code)
+{
+ switch (code) {
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_INVALID_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
diff --git a/src/regexec.c b/src/regexec.c
index 6618996..ce498c6 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -2,7 +2,7 @@
regexec.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,6 +39,20 @@
#define CHECK_INTERRUPT_IN_MATCH
+#define STACK_MEM_START(reg, i) \
+ (MEM_STATUS_AT((reg)->push_mem_start, (i)) != 0 ? \
+ STACK_AT(mem_start_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_start_stk[i])))
+
+#define STACK_MEM_END(reg, i) \
+ (MEM_STATUS_AT((reg)->push_mem_end, (i)) != 0 ? \
+ STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )(mem_end_stk[i])))
+
+static int forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start, UChar* range, UChar** low, UChar** high, UChar** low_prev);
+
+static int
+search_in_range(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, /* match range */ const UChar* data_range, /* subject string range */ OnigRegion* region, OnigOptionType option, OnigMatchParam* mp);
+
+
#ifdef USE_CALLOUT
typedef struct {
int last_match_at_call_counter;
@@ -129,7 +143,7 @@ typedef struct {
} MatchArg;
-#ifdef ONIG_DEBUG
+#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
/* arguments type */
typedef enum {
@@ -149,102 +163,108 @@ typedef struct {
} OpInfoType;
static OpInfoType OpInfo[] = {
- { OP_FINISH, "finish" },
- { OP_END, "end" },
- { OP_EXACT1, "exact1" },
- { OP_EXACT2, "exact2" },
- { OP_EXACT3, "exact3" },
- { OP_EXACT4, "exact4" },
- { OP_EXACT5, "exact5" },
- { OP_EXACTN, "exactn" },
- { OP_EXACTMB2N1, "exactmb2-n1" },
- { OP_EXACTMB2N2, "exactmb2-n2" },
- { OP_EXACTMB2N3, "exactmb2-n3" },
- { OP_EXACTMB2N, "exactmb2-n" },
- { OP_EXACTMB3N, "exactmb3n" },
- { OP_EXACTMBN, "exactmbn" },
- { OP_EXACT1_IC, "exact1-ic" },
- { OP_EXACTN_IC, "exactn-ic" },
- { OP_CCLASS, "cclass" },
- { OP_CCLASS_MB, "cclass-mb" },
- { OP_CCLASS_MIX, "cclass-mix" },
- { OP_CCLASS_NOT, "cclass-not" },
- { OP_CCLASS_MB_NOT, "cclass-mb-not" },
- { OP_CCLASS_MIX_NOT, "cclass-mix-not" },
- { OP_ANYCHAR, "anychar" },
- { OP_ANYCHAR_ML, "anychar-ml" },
- { OP_ANYCHAR_STAR, "anychar*" },
- { OP_ANYCHAR_ML_STAR, "anychar-ml*" },
- { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next" },
- { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next" },
- { OP_WORD, "word" },
- { OP_WORD_ASCII, "word-ascii" },
- { OP_NO_WORD, "not-word" },
- { OP_NO_WORD_ASCII, "not-word-ascii" },
- { OP_WORD_BOUNDARY, "word-boundary" },
- { OP_NO_WORD_BOUNDARY, "not-word-boundary" },
- { OP_WORD_BEGIN, "word-begin" },
- { OP_WORD_END, "word-end" },
- { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary" },
- { OP_BEGIN_BUF, "begin-buf" },
- { OP_END_BUF, "end-buf" },
- { OP_BEGIN_LINE, "begin-line" },
- { OP_END_LINE, "end-line" },
- { OP_SEMI_END_BUF, "semi-end-buf" },
- { OP_BEGIN_POSITION, "begin-position" },
- { OP_BACKREF1, "backref1" },
- { OP_BACKREF2, "backref2" },
- { OP_BACKREF_N, "backref-n" },
- { OP_BACKREF_N_IC, "backref-n-ic" },
- { OP_BACKREF_MULTI, "backref_multi" },
- { OP_BACKREF_MULTI_IC, "backref_multi-ic" },
- { OP_BACKREF_WITH_LEVEL, "backref_with_level" },
- { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c" },
- { OP_BACKREF_CHECK, "backref_check" },
- { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level" },
- { OP_MEMORY_START_PUSH, "mem-start-push" },
- { OP_MEMORY_START, "mem-start" },
- { OP_MEMORY_END_PUSH, "mem-end-push" },
- { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec" },
- { OP_MEMORY_END, "mem-end" },
- { OP_MEMORY_END_REC, "mem-end-rec" },
- { OP_FAIL, "fail" },
- { OP_JUMP, "jump" },
- { OP_PUSH, "push" },
- { OP_PUSH_SUPER, "push-super" },
- { OP_POP_OUT, "pop-out" },
+ { OP_FINISH, "finish"},
+ { OP_END, "end"},
+ { OP_STR_1, "str_1"},
+ { OP_STR_2, "str_2"},
+ { OP_STR_3, "str_3"},
+ { OP_STR_4, "str_4"},
+ { OP_STR_5, "str_5"},
+ { OP_STR_N, "str_n"},
+ { OP_STR_MB2N1, "str_mb2-n1"},
+ { OP_STR_MB2N2, "str_mb2-n2"},
+ { OP_STR_MB2N3, "str_mb2-n3"},
+ { OP_STR_MB2N, "str_mb2-n"},
+ { OP_STR_MB3N, "str_mb3n"},
+ { OP_STR_MBN, "str_mbn"},
+ { OP_STR_1_IC, "str_1-ic"},
+ { OP_STR_N_IC, "str_n-ic"},
+ { OP_CCLASS, "cclass"},
+ { OP_CCLASS_MB, "cclass-mb"},
+ { OP_CCLASS_MIX, "cclass-mix"},
+ { OP_CCLASS_NOT, "cclass-not"},
+ { OP_CCLASS_MB_NOT, "cclass-mb-not"},
+ { OP_CCLASS_MIX_NOT, "cclass-mix-not"},
+ { OP_ANYCHAR, "anychar"},
+ { OP_ANYCHAR_ML, "anychar-ml"},
+ { OP_ANYCHAR_STAR, "anychar*"},
+ { OP_ANYCHAR_ML_STAR, "anychar-ml*"},
+ { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next"},
+ { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next"},
+ { OP_WORD, "word"},
+ { OP_WORD_ASCII, "word-ascii"},
+ { OP_NO_WORD, "not-word"},
+ { OP_NO_WORD_ASCII, "not-word-ascii"},
+ { OP_WORD_BOUNDARY, "word-boundary"},
+ { OP_NO_WORD_BOUNDARY, "not-word-boundary"},
+ { OP_WORD_BEGIN, "word-begin"},
+ { OP_WORD_END, "word-end"},
+ { OP_TEXT_SEGMENT_BOUNDARY, "text-segment-boundary"},
+ { OP_BEGIN_BUF, "begin-buf"},
+ { OP_END_BUF, "end-buf"},
+ { OP_BEGIN_LINE, "begin-line"},
+ { OP_END_LINE, "end-line"},
+ { OP_SEMI_END_BUF, "semi-end-buf"},
+ { OP_BEGIN_POSITION, "begin-position"},
+ { OP_BACKREF1, "backref1"},
+ { OP_BACKREF2, "backref2"},
+ { OP_BACKREF_N, "backref-n"},
+ { OP_BACKREF_N_IC, "backref-n-ic"},
+ { OP_BACKREF_MULTI, "backref_multi"},
+ { OP_BACKREF_MULTI_IC, "backref_multi-ic"},
+ { OP_BACKREF_WITH_LEVEL, "backref_with_level"},
+ { OP_BACKREF_WITH_LEVEL_IC, "backref_with_level-c"},
+ { OP_BACKREF_CHECK, "backref_check"},
+ { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level"},
+ { OP_MEM_START_PUSH, "mem-start-push"},
+ { OP_MEM_START, "mem-start"},
+ { OP_MEM_END_PUSH, "mem-end-push"},
+#ifdef USE_CALL
+ { OP_MEM_END_PUSH_REC, "mem-end-push-rec"},
+#endif
+ { OP_MEM_END, "mem-end"},
+#ifdef USE_CALL
+ { OP_MEM_END_REC, "mem-end-rec"},
+#endif
+ { OP_FAIL, "fail"},
+ { OP_JUMP, "jump"},
+ { OP_PUSH, "push"},
+ { OP_PUSH_SUPER, "push-super"},
+ { OP_POP_OUT, "pop-out"},
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1" },
+ { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1"},
+#endif
+ { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next"},
+ { OP_REPEAT, "repeat"},
+ { OP_REPEAT_NG, "repeat-ng"},
+ { OP_REPEAT_INC, "repeat-inc"},
+ { OP_REPEAT_INC_NG, "repeat-inc-ng"},
+ { OP_EMPTY_CHECK_START, "empty-check-start"},
+ { OP_EMPTY_CHECK_END, "empty-check-end"},
+ { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst"},
+#ifdef USE_CALL
+ { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push"},
+#endif
+ { OP_PREC_READ_START, "push-pos"},
+ { OP_PREC_READ_END, "pop-pos"},
+ { OP_PREC_READ_NOT_START, "prec-read-not-start"},
+ { OP_PREC_READ_NOT_END, "prec-read-not-end"},
+ { OP_ATOMIC_START, "atomic-start"},
+ { OP_ATOMIC_END, "atomic-end"},
+ { OP_LOOK_BEHIND, "look-behind"},
+ { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start"},
+ { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end"},
+ { OP_PUSH_SAVE_VAL, "push-save-val"},
+ { OP_UPDATE_VAR, "update-var"},
+#ifdef USE_CALL
+ { OP_CALL, "call"},
+ { OP_RETURN, "return"},
#endif
- { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next" },
- { OP_REPEAT, "repeat" },
- { OP_REPEAT_NG, "repeat-ng" },
- { OP_REPEAT_INC, "repeat-inc" },
- { OP_REPEAT_INC_NG, "repeat-inc-ng" },
- { OP_REPEAT_INC_SG, "repeat-inc-sg" },
- { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg" },
- { OP_EMPTY_CHECK_START, "empty-check-start" },
- { OP_EMPTY_CHECK_END, "empty-check-end" },
- { OP_EMPTY_CHECK_END_MEMST, "empty-check-end-memst" },
- { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push" },
- { OP_PREC_READ_START, "push-pos" },
- { OP_PREC_READ_END, "pop-pos" },
- { OP_PREC_READ_NOT_START, "prec-read-not-start" },
- { OP_PREC_READ_NOT_END, "prec-read-not-end" },
- { OP_ATOMIC_START, "atomic-start" },
- { OP_ATOMIC_END, "atomic-end" },
- { OP_LOOK_BEHIND, "look-behind" },
- { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start" },
- { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end" },
- { OP_CALL, "call" },
- { OP_RETURN, "return" },
- { OP_PUSH_SAVE_VAL, "push-save-val" },
- { OP_UPDATE_VAR, "update-var" },
#ifdef USE_CALLOUT
- { OP_CALLOUT_CONTENTS, "callout-contents" },
- { OP_CALLOUT_NAME, "callout-name" },
+ { OP_CALLOUT_CONTENTS, "callout-contents"},
+ { OP_CALLOUT_NAME, "callout-name"},
#endif
- { -1, "" }
+ { -1, ""}
};
static char*
@@ -320,32 +340,32 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
fprintf(f, "%s", op2name(opcode));
switch (opcode) {
- case OP_EXACT1:
+ case OP_STR_1:
p_string(f, 1, p->exact.s); break;
- case OP_EXACT2:
+ case OP_STR_2:
p_string(f, 2, p->exact.s); break;
- case OP_EXACT3:
+ case OP_STR_3:
p_string(f, 3, p->exact.s); break;
- case OP_EXACT4:
+ case OP_STR_4:
p_string(f, 4, p->exact.s); break;
- case OP_EXACT5:
+ case OP_STR_5:
p_string(f, 5, p->exact.s); break;
- case OP_EXACTN:
+ case OP_STR_N:
len = p->exact_n.n;
p_string(f, len, p->exact_n.s); break;
- case OP_EXACTMB2N1:
+ case OP_STR_MB2N1:
p_string(f, 2, p->exact.s); break;
- case OP_EXACTMB2N2:
+ case OP_STR_MB2N2:
p_string(f, 4, p->exact.s); break;
- case OP_EXACTMB2N3:
+ case OP_STR_MB2N3:
p_string(f, 3, p->exact.s); break;
- case OP_EXACTMB2N:
+ case OP_STR_MB2N:
len = p->exact_n.n;
p_len_string(f, len, 2, p->exact_n.s); break;
- case OP_EXACTMB3N:
+ case OP_STR_MB3N:
len = p->exact_n.n;
p_len_string(f, len, 3, p->exact_n.s); break;
- case OP_EXACTMBN:
+ case OP_STR_MBN:
{
int mb_len;
@@ -357,11 +377,11 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
while (n-- > 0) { fputc(*q++, f); }
}
break;
- case OP_EXACT1_IC:
+ case OP_STR_1_IC:
len = enclen(enc, p->exact.s);
p_string(f, len, p->exact.s);
break;
- case OP_EXACTN_IC:
+ case OP_STR_N_IC:
len = p->exact_n.n;
p_len_string(f, len, 1, p->exact_n.s);
break;
@@ -375,13 +395,13 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_CCLASS_MB_NOT:
{
OnigCodePoint ncode;
- OnigCodePoint* codes;
+ OnigCodePoint* codes;
codes = (OnigCodePoint* )p->cclass_mb.mb;
GET_CODE_POINT(ncode, codes);
codes++;
GET_CODE_POINT(code, codes);
- fprintf(f, ":%u:%u", code, ncode);
+ fprintf(f, ":%d:0x%x", ncode, code);
}
break;
case OP_CCLASS_MIX:
@@ -447,15 +467,18 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
}
break;
- case OP_MEMORY_START:
- case OP_MEMORY_START_PUSH:
+ case OP_MEM_START:
+ case OP_MEM_START_PUSH:
mem = p->memory_start.num;
fprintf(f, ":%d", mem);
break;
- case OP_MEMORY_END_PUSH:
- case OP_MEMORY_END_PUSH_REC:
- case OP_MEMORY_END:
- case OP_MEMORY_END_REC:
+
+ case OP_MEM_END:
+ case OP_MEM_END_PUSH:
+#ifdef USE_CALL
+ case OP_MEM_END_REC:
+ case OP_MEM_END_PUSH_REC:
+#endif
mem = p->memory_end.num;
fprintf(f, ":%d", mem);
break;
@@ -499,8 +522,6 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
mem = p->repeat.id;
fprintf(f, ":%d", mem);
break;
@@ -511,7 +532,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
break;
case OP_EMPTY_CHECK_END:
case OP_EMPTY_CHECK_END_MEMST:
+#ifdef USE_CALL
case OP_EMPTY_CHECK_END_MEMST_PUSH:
+#endif
mem = p->empty_check_end.mem;
fprintf(f, ":%d", mem);
break;
@@ -534,10 +557,12 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
p_rel_addr(f, addr, p, start);
break;
+#ifdef USE_CALL
case OP_CALL:
addr = p->call.addr;
fprintf(f, ":{/%d}", addr);
break;
+#endif
case OP_PUSH_SAVE_VAL:
{
@@ -607,7 +632,9 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
case OP_ATOMIC_START:
case OP_ATOMIC_END:
case OP_LOOK_BEHIND_NOT_END:
+#ifdef USE_CALL
case OP_RETURN:
+#endif
break;
default:
@@ -615,7 +642,7 @@ print_compiled_byte_code(FILE* f, regex_t* reg, int index,
break;
}
}
-#endif /* ONIG_DEBUG */
+#endif /* defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) */
#ifdef ONIG_DEBUG_COMPILE
extern void
@@ -625,8 +652,8 @@ onig_print_compiled_byte_code_list(FILE* f, regex_t* reg)
Operation* start = reg->ops;
Operation* end = reg->ops + reg->ops_used;
- fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n",
- reg->bt_mem_start, reg->bt_mem_end);
+ fprintf(f, "push_mem_start: 0x%x, push_mem_end: 0x%x\n",
+ reg->push_mem_start, reg->push_mem_end);
fprintf(f, "code-length: %d\n", reg->ops_used);
bp = start;
@@ -943,7 +970,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
result = ONIGERR_INVALID_ARGUMENT;\
}\
best_len = result;\
- goto finish;\
+ goto match_at_end;\
break;\
}\
} while(0)
@@ -965,21 +992,31 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
/* handled by normal-POP */
#define STK_MEM_START 0x0010
#define STK_MEM_END 0x8030
-#define STK_REPEAT_INC 0x0050
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+#define STK_REPEAT_INC (0x0040 | STK_MASK_POP_HANDLED)
+#else
+#define STK_REPEAT_INC 0x0040
+#endif
#ifdef USE_CALLOUT
#define STK_CALLOUT 0x0070
#endif
/* avoided by normal-POP */
#define STK_VOID 0x0000 /* for fill a blank */
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+#define STK_EMPTY_CHECK_START (0x3000 | STK_MASK_POP_HANDLED)
+#else
#define STK_EMPTY_CHECK_START 0x3000
+#endif
#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */
#define STK_MEM_END_MARK 0x8100
#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */
-#define STK_REPEAT 0x0300
+/* #define STK_REPEAT 0x0300 */
#define STK_CALL_FRAME 0x0400
#define STK_RETURN 0x0500
#define STK_SAVE_VAL 0x0600
+#define STK_PREC_READ_START 0x0700
+#define STK_PREC_READ_END 0x0800
/* stack type check mask */
#define STK_MASK_POP_USED STK_ALT_FLAG
@@ -1000,11 +1037,10 @@ typedef struct _StackType {
UChar* pstr_prev; /* previous char position of pstr */
} state;
struct {
- int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */
- Operation* pcode; /* byte code position (head of repeated target) */
- } repeat;
- struct {
- StackIndex si; /* index of stack */
+ int count;
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ StackIndex prev_index; /* index of stack */
+#endif
} repeat_inc;
struct {
UChar *pstr; /* start/end position */
@@ -1013,7 +1049,10 @@ typedef struct _StackType {
StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
- UChar *pstr; /* start position */
+ UChar *pstr; /* start position */
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ StackIndex prev_index; /* index of stack */
+#endif
} empty_check;
#ifdef USE_CALL
struct {
@@ -1059,29 +1098,64 @@ struct OnigCalloutArgsStruct {
#endif
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+
+#define PTR_NUM_SIZE(reg) ((reg)->num_repeat + (reg)->num_empty_check + ((reg)->num_mem + 1) * 2)
+#define UPDATE_FOR_STACK_REALLOC do{\
+ repeat_stk = (StackIndex* )alloc_base;\
+ empty_check_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
+ mem_start_stk = (StackIndex* )(empty_check_stk + reg->num_empty_check);\
+ mem_end_stk = mem_start_stk + num_mem + 1;\
+} while(0)
+
+#define SAVE_REPEAT_STK_VAR(sid) stk->u.repeat_inc.prev_index = repeat_stk[sid]
+#define LOAD_TO_REPEAT_STK_VAR(sid) repeat_stk[sid] = GET_STACK_INDEX(stk)
+#define POP_REPEAT_INC else if (stk->type == STK_REPEAT_INC) {repeat_stk[stk->zid] = stk->u.repeat_inc.prev_index;}
+
+#define SAVE_EMPTY_CHECK_STK_VAR(sid) stk->u.empty_check.prev_index = empty_check_stk[sid]
+#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid) empty_check_stk[sid] = GET_STACK_INDEX(stk)
+#define POP_EMPTY_CHECK_START else if (stk->type == STK_EMPTY_CHECK_START) {empty_check_stk[stk->zid] = stk->u.empty_check.prev_index;}
+
+#else
+
+#define PTR_NUM_SIZE(reg) (((reg)->num_mem + 1) * 2)
+#define UPDATE_FOR_STACK_REALLOC do{\
+ mem_start_stk = (StackIndex* )alloc_base;\
+ mem_end_stk = mem_start_stk + num_mem + 1;\
+} while(0)
+
+#define SAVE_REPEAT_STK_VAR(sid)
+#define LOAD_TO_REPEAT_STK_VAR(sid)
+#define POP_REPEAT_INC
+
+#define SAVE_EMPTY_CHECK_STK_VAR(sid)
+#define LOAD_TO_EMPTY_CHECK_STK_VAR(sid)
+#define POP_EMPTY_CHECK_START
+
+#endif /* USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
- (msa).match_stack_limit = (mp)->match_stack_limit;\
- (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
- (msa).mp = mp;\
+ (msa).match_stack_limit = (mpv)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
+ (msa).mp = mpv;\
(msa).best_len = ONIG_MISMATCH;\
- (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
+ (msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#else
-#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \
+#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mpv) do { \
(msa).stack_p = (void* )0;\
(msa).options = (arg_option);\
(msa).region = (arg_region);\
(msa).start = (arg_start);\
- (msa).match_stack_limit = (mp)->match_stack_limit;\
- (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\
- (msa).mp = mp;\
- (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \
+ (msa).match_stack_limit = (mpv)->match_stack_limit;\
+ (msa).retry_limit_in_match = (mpv)->retry_limit_in_match;\
+ (msa).mp = mpv;\
+ (msa).ptr_num = PTR_NUM_SIZE(reg);\
} while(0)
#endif
@@ -1136,12 +1210,6 @@ struct OnigCalloutArgsStruct {
};\
} while(0)
-#define UPDATE_FOR_STACK_REALLOC do{\
- repeat_stk = (StackIndex* )alloc_base;\
- mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\
- mem_end_stk = mem_start_stk + num_mem + 1;\
-} while(0)
-
static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE;
extern unsigned int
@@ -1162,7 +1230,9 @@ onig_set_match_stack_limit_size(unsigned int size)
static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH;
#define CHECK_RETRY_LIMIT_IN_MATCH do {\
- if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\
+ if (retry_in_match_counter++ > retry_limit_in_match) {\
+ MATCH_AT_ERROR_RETURN(ONIGERR_RETRY_LIMIT_IN_MATCH_OVER);\
+ }\
} while (0)
#else
@@ -1544,27 +1614,31 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
-#define STACK_PUSH_POS(s,sprev) \
- STACK_PUSH(STK_TO_VOID_START,(Operation* )0,s,sprev)
+#define STACK_PUSH_PREC_READ_START(s,sprev) \
+ STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \
STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev)
+#if 0
#define STACK_PUSH_REPEAT(sid, pat) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT;\
stk->zid = (sid);\
- stk->u.repeat.pcode = (pat);\
- stk->u.repeat.count = 0;\
+ stk->u.repeat.pcode = (pat);\
STACK_INC;\
} while(0)
+#endif
-#define STACK_PUSH_REPEAT_INC(sindex) do {\
+#define STACK_PUSH_REPEAT_INC(sid, ct) do {\
STACK_ENSURE(1);\
stk->type = STK_REPEAT_INC;\
- stk->u.repeat_inc.si = (sindex);\
+ stk->zid = (sid);\
+ stk->u.repeat_inc.count = (ct);\
+ SAVE_REPEAT_STK_VAR(sid);\
+ LOAD_TO_REPEAT_STK_VAR(sid);\
STACK_INC;\
} while(0)
@@ -1637,6 +1711,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
stk->type = STK_EMPTY_CHECK_START;\
stk->zid = (cnum);\
stk->u.empty_check.pstr = (s);\
+ SAVE_EMPTY_CHECK_STK_VAR(cnum);\
+ LOAD_TO_EMPTY_CHECK_STK_VAR(cnum);\
STACK_INC;\
} while(0)
@@ -1774,7 +1850,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_BASE_CHECK(p, at) \
if ((p) < stk_base) {\
fprintf(stderr, "at %s\n", at);\
- goto stack_error;\
+ MATCH_AT_ERROR_RETURN(ONIGERR_STACK_BUG);\
}
#else
#define STACK_BASE_CHECK(p, at)
@@ -1825,13 +1901,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
else if (stk->type == STK_MEM_END) {\
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
+ POP_REPEAT_INC \
+ POP_EMPTY_CHECK_START \
POP_CALLOUT_CASE\
}\
}\
@@ -1850,13 +1925,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
- else if (stk->type == STK_REPEAT_INC) {\
- STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
- }\
else if (stk->type == STK_MEM_END) {\
mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
+ POP_REPEAT_INC \
+ POP_EMPTY_CHECK_START \
/* Don't call callout here because negation of total success by (?!..) (?<!..) */\
}\
}\
@@ -1887,65 +1961,99 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_EMPTY_CHECK(isnull,sid,s) do {\
- StackType* k = stk;\
+#define STACK_GET_PREC_READ_START(k) do {\
+ int level = 0;\
+ k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) {\
- (isnull) = (k->u.empty_check.pstr == (s));\
+ STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_PREC_READ_START) {\
+ if (level == 0) {\
break;\
}\
+ level--;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ level++;\
}\
}\
} while(0)
+
+#define EMPTY_CHECK_START_SEARCH(sid, k) do {\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "EMPTY_CHECK_START_SEARCH"); \
+ if (k->type == STK_EMPTY_CHECK_START) {\
+ if (k->zid == (sid)) break;\
+ }\
+ }\
+} while(0)
+
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+
+#define GET_EMPTY_CHECK_START(sid, k) do {\
+ if (reg->num_call == 0) {\
+ k = STACK_AT(empty_check_stk[sid]);\
+ }\
+ else {\
+ EMPTY_CHECK_START_SEARCH(sid, k);\
+ }\
+} while(0)
+#else
+
+#define GET_EMPTY_CHECK_START(sid, k) EMPTY_CHECK_START_SEARCH(sid, k)
+
+#endif
+
+
+#define STACK_EMPTY_CHECK(isnull, sid, s) do {\
+ StackType* k;\
+ GET_EMPTY_CHECK_START(sid, k);\
+ (isnull) = (k->u.empty_check.pstr == (s));\
+} while(0)
+
#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
(addr) = 0;\
}\
else {\
- if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
+ if (MEM_STATUS_AT((reg)->push_mem_end, k->zid))\
(addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
else\
(addr) = (UChar* )k->u.mem.prev_end;\
}\
} while (0)
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
-#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
- StackType* k = stk;\
- while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
- if (k->type == STK_EMPTY_CHECK_START) {\
- if (k->zid == (sid)) {\
- if (k->u.empty_check.pstr != (s)) {\
- (isnull) = 0;\
- break;\
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#define STACK_EMPTY_CHECK_MEM(isnull, sid, s, reg) do {\
+ StackType* k;\
+ GET_EMPTY_CHECK_START(sid, k);\
+ if (k->u.empty_check.pstr != (s)) {\
+ (isnull) = 0;\
+ }\
+ else {\
+ UChar* endp;\
+ (isnull) = 1;\
+ while (k < stk) {\
+ if (k->type == STK_MEM_START &&\
+ MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid)) {\
+ STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
+ if (endp == 0) {\
+ (isnull) = 0; break;\
}\
- else {\
- UChar* endp;\
- (isnull) = 1;\
- while (k < stk) {\
- if (k->type == STK_MEM_START) {\
- STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
- if (endp == 0) {\
- (isnull) = 0; break;\
- }\
- else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
- (isnull) = 0; break;\
- }\
- else if (endp != s) {\
- (isnull) = -1; /* empty, but position changed */ \
- }\
- }\
- k++;\
- }\
- break;\
+ else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */ \
}\
}\
+ k++;\
}\
}\
} while(0)
@@ -1968,7 +2076,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START) {\
- if (level == 0) {\
+ if (level == 0 && \
+ MEM_STATUS_LIMIT_AT((reg)->empty_status_mem, k->zid) !=0) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
@@ -2023,26 +2132,47 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
} while(0)
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
-#define STACK_GET_REPEAT(sid, k) do {\
- int level = 0;\
- k = stk;\
+#define STACK_GET_REPEAT_COUNT_SEARCH(sid, c) do {\
+ StackType* k = stk;\
while (1) {\
- k--;\
- STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
- if (k->type == STK_REPEAT) {\
- if (level == 0) {\
- if (k->zid == (sid)) {\
- break;\
+ (k)--;\
+ STACK_BASE_CHECK(k, "STACK_GET_REPEAT_COUNT_SEARCH");\
+ if ((k)->type == STK_REPEAT_INC) {\
+ if ((k)->zid == (sid)) {\
+ (c) = (k)->u.repeat_inc.count;\
+ break;\
+ }\
+ }\
+ else if ((k)->type == STK_RETURN) {\
+ int level = -1;\
+ while (1) {\
+ (k)--;\
+ if ((k)->type == STK_CALL_FRAME) {\
+ level++;\
+ if (level == 0) break;\
}\
+ else if ((k)->type == STK_RETURN) level--;\
}\
}\
- else if (k->type == STK_CALL_FRAME) level--;\
- else if (k->type == STK_RETURN) level++;\
}\
} while(0)
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+
+#define STACK_GET_REPEAT_COUNT(sid, c) do {\
+ if (reg->num_call == 0) {\
+ (c) = (STACK_AT(repeat_stk[sid]))->u.repeat_inc.count;\
+ }\
+ else {\
+ STACK_GET_REPEAT_COUNT_SEARCH(sid, c);\
+ }\
+} while(0)
+#else
+#define STACK_GET_REPEAT_COUNT(sid, c) STACK_GET_REPEAT_COUNT_SEARCH(sid, c)
+#endif
+
#define STACK_RETURN(addr) do {\
int level = 0;\
StackType* k = stk;\
@@ -2444,6 +2574,8 @@ typedef struct {
#define MATCH_DEBUG_OUT(offset)
#endif
+#define MATCH_AT_ERROR_RETURN(err_code) best_len = err_code; goto match_at_end
+
/* match data(str - end) from position (sstart). */
/* if sstart == str then set sprev to NULL. */
@@ -2463,20 +2595,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
static const void *opcode_to_label[] = {
&&L_FINISH,
&&L_END,
- &&L_EXACT1,
- &&L_EXACT2,
- &&L_EXACT3,
- &&L_EXACT4,
- &&L_EXACT5,
- &&L_EXACTN,
- &&L_EXACTMB2N1,
- &&L_EXACTMB2N2,
- &&L_EXACTMB2N3,
- &&L_EXACTMB2N,
- &&L_EXACTMB3N,
- &&L_EXACTMBN,
- &&L_EXACT1_IC,
- &&L_EXACTN_IC,
+ &&L_STR_1,
+ &&L_STR_2,
+ &&L_STR_3,
+ &&L_STR_4,
+ &&L_STR_5,
+ &&L_STR_N,
+ &&L_STR_MB2N1,
+ &&L_STR_MB2N2,
+ &&L_STR_MB2N3,
+ &&L_STR_MB2N,
+ &&L_STR_MB3N,
+ &&L_STR_MBN,
+ &&L_STR_1_IC,
+ &&L_STR_N_IC,
&&L_CCLASS,
&&L_CCLASS_MB,
&&L_CCLASS_MIX,
@@ -2514,12 +2646,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_BACKREF_WITH_LEVEL_IC,
&&L_BACKREF_CHECK,
&&L_BACKREF_CHECK_WITH_LEVEL,
- &&L_MEMORY_START,
- &&L_MEMORY_START_PUSH,
- &&L_MEMORY_END_PUSH,
- &&L_MEMORY_END_PUSH_REC,
- &&L_MEMORY_END,
- &&L_MEMORY_END_REC,
+ &&L_MEM_START,
+ &&L_MEM_START_PUSH,
+ &&L_MEM_END_PUSH,
+#ifdef USE_CALL
+ &&L_MEM_END_PUSH_REC,
+#endif
+ &&L_MEM_END,
+#ifdef USE_CALL
+ &&L_MEM_END_REC,
+#endif
&&L_FAIL,
&&L_JUMP,
&&L_PUSH,
@@ -2533,12 +2669,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_REPEAT_NG,
&&L_REPEAT_INC,
&&L_REPEAT_INC_NG,
- &&L_REPEAT_INC_SG,
- &&L_REPEAT_INC_NG_SG,
&&L_EMPTY_CHECK_START,
&&L_EMPTY_CHECK_END,
&&L_EMPTY_CHECK_END_MEMST,
+#ifdef USE_CALL
&&L_EMPTY_CHECK_END_MEMST_PUSH,
+#endif
&&L_PREC_READ_START,
&&L_PREC_READ_END,
&&L_PREC_READ_NOT_START,
@@ -2548,10 +2684,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
&&L_LOOK_BEHIND,
&&L_LOOK_BEHIND_NOT_START,
&&L_LOOK_BEHIND_NOT_END,
- &&L_CALL,
- &&L_RETURN,
&&L_PUSH_SAVE_VAL,
&&L_UPDATE_VAR,
+#ifdef USE_CALL
+ &&L_CALL,
+ &&L_RETURN,
+#endif
#ifdef USE_CALLOUT
&&L_CALLOUT_CONTENTS,
&&L_CALLOUT_NAME,
@@ -2569,15 +2707,17 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
char *alloc_base;
StackType *stk_base, *stk, *stk_end;
StackType *stkp; /* used as any purpose. */
- StackIndex si;
- StackIndex *repeat_stk;
StackIndex *mem_start_stk, *mem_end_stk;
UChar* keep;
+
+#ifdef USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR
+ StackIndex *repeat_stk;
+ StackIndex *empty_check_stk;
+#endif
#ifdef USE_RETRY_LIMIT_IN_MATCH
unsigned long retry_limit_in_match;
unsigned long retry_in_match_counter;
#endif
-
#ifdef USE_CALLOUT
int of;
#endif
@@ -2663,15 +2803,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
rmt[0].rm_eo = (regoff_t )(s - str);
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- rmt[i].rm_so = (regoff_t )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
- else
- rmt[i].rm_so = (regoff_t )((UChar* )((void* )(mem_start_stk[i])) - str);
-
- rmt[i].rm_eo = (regoff_t )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i]))
- - str);
+ rmt[i].rm_so = (regoff_t )(STACK_MEM_START(reg, i) - str);
+ rmt[i].rm_eo = (regoff_t )(STACK_MEM_END(reg, i) - str);
}
else {
rmt[i].rm_so = rmt[i].rm_eo = ONIG_REGION_NOTPOS;
@@ -2684,14 +2817,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
region->end[0] = (int )(s - str);
for (i = 1; i <= num_mem; i++) {
if (mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- region->beg[i] = (int )(STACK_AT(mem_start_stk[i])->u.mem.pstr - str);
- else
- region->beg[i] = (int )((UChar* )((void* )mem_start_stk[i]) - str);
-
- region->end[i] = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[i])) - str);
+ region->beg[i] = (int )(STACK_MEM_START(reg, i) - str);
+ region->end[i] = (int )(STACK_MEM_END(reg, i) - str);
}
else {
region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
@@ -2719,10 +2846,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
stkp = stk_base;
r = make_capture_history_tree(region->history_root, &stkp,
stk, (UChar* )str, reg);
- if (r < 0) {
- best_len = r; /* error code */
- goto finish;
- }
+ if (r < 0) MATCH_AT_ERROR_RETURN(r);
}
#endif /* USE_CAPTURE_HISTORY */
#ifdef USE_POSIX_API_REGION_OPTION
@@ -2747,9 +2871,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
/* default behavior: return first-matching result. */
- goto finish;
+ goto match_at_end;
- CASE_OP(EXACT1)
+ CASE_OP(STR_1)
DATA_ENSURE(1);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2757,7 +2881,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
NEXT_OUT;
- CASE_OP(EXACT1_IC)
+ CASE_OP(STR_1_IC)
{
int len;
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -2778,7 +2902,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
NEXT_OUT;
- CASE_OP(EXACT2)
+ CASE_OP(STR_2)
DATA_ENSURE(2);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2789,7 +2913,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACT3)
+ CASE_OP(STR_3)
DATA_ENSURE(3);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2802,7 +2926,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACT4)
+ CASE_OP(STR_4)
DATA_ENSURE(4);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2817,7 +2941,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACT5)
+ CASE_OP(STR_5)
DATA_ENSURE(5);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2834,7 +2958,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTN)
+ CASE_OP(STR_N)
tlen = p->exact_n.n;
DATA_ENSURE(tlen);
ps = p->exact_n.s;
@@ -2845,7 +2969,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTN_IC)
+ CASE_OP(STR_N_IC)
{
int len;
UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
@@ -2863,6 +2987,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
DATA_ENSURE(0);
q = lowbuf;
while (len-- > 0) {
+ if (ps >= endp) goto fail;
if (*ps != *q) goto fail;
ps++; q++;
}
@@ -2872,7 +2997,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB2N1)
+ CASE_OP(STR_MB2N1)
DATA_ENSURE(2);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2882,7 +3007,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
NEXT_OUT;
- CASE_OP(EXACTMB2N2)
+ CASE_OP(STR_MB2N2)
DATA_ENSURE(4);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2897,7 +3022,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB2N3)
+ CASE_OP(STR_MB2N3)
DATA_ENSURE(6);
ps = p->exact.s;
if (*ps != *s) goto fail;
@@ -2916,7 +3041,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB2N)
+ CASE_OP(STR_MB2N)
tlen = p->exact_n.n;
DATA_ENSURE(tlen * 2);
ps = p->exact_n.s;
@@ -2930,7 +3055,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMB3N)
+ CASE_OP(STR_MB3N)
tlen = p->exact_n.n;
DATA_ENSURE(tlen * 3);
ps = p->exact_n.s;
@@ -2946,7 +3071,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(EXACTMBN)
+ CASE_OP(STR_MBN)
tlen = p->exact_len_n.len; /* mb byte len */
tlen2 = p->exact_len_n.n; /* number of chars */
tlen2 *= tlen;
@@ -2968,6 +3093,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
NEXT_OUT;
CASE_OP(CCLASS_MB)
+ DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
@@ -2976,7 +3102,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
UChar *ss;
int mb_len;
- DATA_ENSURE(1);
mb_len = enclen(encode, s);
DATA_ENSURE(mb_len);
ss = s;
@@ -3265,7 +3390,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
break;
#endif
default:
- goto bytecode_error;
+ MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
break;
}
@@ -3365,46 +3490,50 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_START_PUSH)
+ CASE_OP(MEM_START_PUSH)
mem = p->memory_start.num;
STACK_PUSH_MEM_START(mem, s);
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_START)
+ CASE_OP(MEM_START)
mem = p->memory_start.num;
mem_start_stk[mem] = (StackIndex )((void* )s);
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_END_PUSH)
+ CASE_OP(MEM_END_PUSH)
mem = p->memory_end.num;
STACK_PUSH_MEM_END(mem, s);
INC_OP;
JUMP_OUT;
- CASE_OP(MEMORY_END)
+ CASE_OP(MEM_END)
mem = p->memory_end.num;
mem_end_stk[mem] = (StackIndex )((void* )s);
INC_OP;
JUMP_OUT;
#ifdef USE_CALL
- CASE_OP(MEMORY_END_PUSH_REC)
- mem = p->memory_end.num;
- STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
- si = GET_STACK_INDEX(stkp);
- STACK_PUSH_MEM_END(mem, s);
- mem_start_stk[mem] = si;
- INC_OP;
- JUMP_OUT;
+ CASE_OP(MEM_END_PUSH_REC)
+ {
+ StackIndex si;
+
+ mem = p->memory_end.num;
+ STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
+ si = GET_STACK_INDEX(stkp);
+ STACK_PUSH_MEM_END(mem, s);
+ mem_start_stk[mem] = si;
+ INC_OP;
+ JUMP_OUT;
+ }
- CASE_OP(MEMORY_END_REC)
+ CASE_OP(MEM_END_REC)
mem = p->memory_end.num;
mem_end_stk[mem] = (StackIndex )((void* )s);
STACK_GET_MEM_START(mem, stkp);
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
+ if (MEM_STATUS_AT(reg->push_mem_start, mem))
mem_start_stk[mem] = GET_STACK_INDEX(stkp);
else
mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr);
@@ -3432,20 +3561,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(pstart, s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(s, pstart, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3459,20 +3584,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3489,24 +3610,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(pstart, swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(swork, pstart, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3526,24 +3642,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
- if (MEM_STATUS_AT(reg->bt_mem_start, mem))
- pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
- else
- pstart = (UChar* )((void* )mem_start_stk[mem]);
-
- pend = (MEM_STATUS_AT(reg->bt_mem_end, mem)
- ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
- : (UChar* )((void* )mem_end_stk[mem]));
+ pstart = STACK_MEM_START(reg, mem);
+ pend = STACK_MEM_END(reg, mem);
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3560,6 +3671,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int len;
int level;
MemNumType* mems;
+ UChar* ssave;
n = 0;
backref_with_level:
@@ -3567,10 +3679,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen = p->backref_general.num;
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
- sprev = s;
+ ssave = s;
if (backref_match_at_nested_level(reg, stk, stk_base, n,
case_fold_flag, level, (int )tlen, mems, &s, end)) {
- if (sprev < end) {
+ if (ssave != s) {
+ sprev = ssave;
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
@@ -3643,12 +3756,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_PUSH:
case OP_REPEAT_INC:
case OP_REPEAT_INC_NG:
- case OP_REPEAT_INC_SG:
- case OP_REPEAT_INC_NG_SG:
INC_OP;
break;
default:
- goto unexpected_bytecode_error;
+ MATCH_AT_ERROR_RETURN(ONIGERR_UNEXPECTED_BYTECODE);
break;
}
#else
@@ -3658,7 +3769,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
JUMP_OUT;
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
CASE_OP(EMPTY_CHECK_END_MEMST)
{
int is_empty;
@@ -3683,7 +3794,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int is_empty;
mem = p->empty_check_end.mem; /* mem: null check id */
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
#else
STACK_EMPTY_CHECK_REC(is_empty, mem, s);
@@ -3751,7 +3862,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
addr = p->push_if_peek_next.addr;
c = p->push_if_peek_next.c;
- if (c == *s) {
+ if (DATA_ENSURE_CHECK1 && c == *s) {
STACK_PUSH_ALT(p + addr, s, sprev);
INC_OP;
JUMP_OUT;
@@ -3764,10 +3875,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->repeat.id; /* mem: OP_REPEAT ID */
addr = p->repeat.addr;
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p + 1);
-
+ STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + addr, s, sprev);
}
@@ -3778,10 +3886,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
mem = p->repeat.id; /* mem: OP_REPEAT ID */
addr = p->repeat.addr;
- STACK_ENSURE(1);
- repeat_stk[mem] = GET_STACK_INDEX(stk);
- STACK_PUSH_REPEAT(mem, p + 1);
-
+ STACK_PUSH_REPEAT_INC(mem, 0);
if (reg->repeat_range[mem].lower == 0) {
STACK_PUSH_ALT(p + 1, s, sprev);
p += addr;
@@ -3792,73 +3897,52 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
CASE_OP(REPEAT_INC)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
+ STACK_GET_REPEAT_COUNT(mem, n);
+ n++;
+ if (n >= reg->repeat_range[mem].upper) {
/* end of repeat. Nothing to do. */
INC_OP;
}
- else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
+ else if (n >= reg->repeat_range[mem].lower) {
INC_OP;
STACK_PUSH_ALT(p, s, sprev);
- p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
+ p = reg->repeat_range[mem].u.pcode;
}
else {
- p = stkp->u.repeat.pcode;
+ p = reg->repeat_range[mem].u.pcode;
}
- STACK_PUSH_REPEAT_INC(si);
+ STACK_PUSH_REPEAT_INC(mem, n);
CHECK_INTERRUPT_JUMP_OUT;
- CASE_OP(REPEAT_INC_SG)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc;
-
CASE_OP(REPEAT_INC_NG)
mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- si = repeat_stk[mem];
- stkp = STACK_AT(si);
-
- repeat_inc_ng:
- stkp->u.repeat.count++;
- if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
- if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
- Operation* pcode = stkp->u.repeat.pcode;
-
- STACK_PUSH_REPEAT_INC(si);
- STACK_PUSH_ALT(pcode, s, sprev);
+ STACK_GET_REPEAT_COUNT(mem, n);
+ n++;
+ STACK_PUSH_REPEAT_INC(mem, n);
+ if (n == reg->repeat_range[mem].upper) {
+ INC_OP;
+ }
+ else {
+ if (n >= reg->repeat_range[mem].lower) {
+ STACK_PUSH_ALT(reg->repeat_range[mem].u.pcode, s, sprev);
INC_OP;
}
else {
- p = stkp->u.repeat.pcode;
- STACK_PUSH_REPEAT_INC(si);
+ p = reg->repeat_range[mem].u.pcode;
}
}
- else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
- STACK_PUSH_REPEAT_INC(si);
- INC_OP;
- }
CHECK_INTERRUPT_JUMP_OUT;
- CASE_OP(REPEAT_INC_NG_SG)
- mem = p->repeat_inc.id; /* mem: OP_REPEAT ID */
- STACK_GET_REPEAT(mem, stkp);
- si = GET_STACK_INDEX(stkp);
- goto repeat_inc_ng;
-
CASE_OP(PREC_READ_START)
- STACK_PUSH_POS(s, sprev);
+ STACK_PUSH_PREC_READ_START(s, sprev);
INC_OP;
JUMP_OUT;
CASE_OP(PREC_READ_END)
- STACK_EXEC_TO_VOID(stkp);
+ STACK_GET_PREC_READ_START(stkp);
s = stkp->u.state.pstr;
sprev = stkp->u.state.pstr_prev;
+ STACK_PUSH(STK_PREC_READ_END,0,0,0);
INC_OP;
JUMP_OUT;
@@ -3997,14 +4081,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
OnigCalloutFunc func;
OnigCalloutArgs args;
- of = ONIG_CALLOUT_OF_NAME;
- name_id = p->callout_name.id;
- mem = p->callout_name.num;
+ of = ONIG_CALLOUT_OF_NAME;
+ mem = p->callout_name.num;
callout_common_entry:
e = onig_reg_callout_list_at(reg, mem);
in = e->in;
if (of == ONIG_CALLOUT_OF_NAME) {
+ name_id = p->callout_name.id;
func = onig_get_callout_start_func(reg, mem);
}
else {
@@ -4027,7 +4111,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
call_result = ONIGERR_INVALID_ARGUMENT;
}
best_len = call_result;
- goto finish;
+ goto match_at_end;
break;
}
}
@@ -4053,7 +4137,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
#endif
CASE_OP(FINISH)
- goto finish;
+ goto match_at_end;
#ifdef ONIG_DEBUG_STATISTICS
fail:
@@ -4074,37 +4158,472 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
JUMP_OUT;
DEFAULT_OP
- goto bytecode_error;
+ MATCH_AT_ERROR_RETURN(ONIGERR_UNDEFINED_BYTECODE);
} BYTECODE_INTERPRETER_END;
- finish:
+ match_at_end:
STACK_SAVE;
return best_len;
+}
-#ifdef ONIG_DEBUG
- stack_error:
- STACK_SAVE;
- return ONIGERR_STACK_BUG;
-#endif
+typedef struct {
+ regex_t* reg;
+ OnigRegion* region;
+} RR;
+
+struct OnigRegSetStruct {
+ RR* rs;
+ int n;
+ int alloc;
+ OnigEncoding enc;
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigLen anc_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anc_dmax; /* (SEMI_)END_BUF anchor distance */
+ int all_low_high;
+ int anychar_inf;
+};
- bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNDEFINED_BYTECODE;
+enum SearchRangeStatus {
+ SRS_DEAD = 0,
+ SRS_LOW_HIGH = 1,
+ SRS_ALL_RANGE = 2
+};
-#if defined(ONIG_DEBUG) && !defined(USE_DIRECT_THREADED_CODE)
- unexpected_bytecode_error:
- STACK_SAVE;
- return ONIGERR_UNEXPECTED_BYTECODE;
-#endif
+typedef struct {
+ int state; /* value of enum SearchRangeStatus */
+ UChar* low;
+ UChar* high;
+ UChar* low_prev;
+ UChar* sch_range;
+} SearchRange;
+
+#define REGSET_MATCH_AND_RETURN_CHECK(upper_range) \
+ r = match_at(reg, str, end, (upper_range), s, prev, msas + i); \
+ if (r != ONIG_MISMATCH) {\
+ if (r >= 0) {\
+ goto match;\
+ }\
+ else goto finish; /* error */ \
+ }
-#ifdef USE_RETRY_LIMIT_IN_MATCH
- retry_limit_in_match_over:
- STACK_SAVE;
- return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER;
+static inline int
+regset_search_body_position_lead(OnigRegSet* set,
+ const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, /* match start range */
+ const UChar* orig_range, /* data range */
+ OnigOptionType option, MatchArg* msas, int* rmatch_pos)
+{
+ int r, n, i;
+ UChar *s, *prev;
+ UChar *low, *high, *low_prev;
+ UChar* sch_range;
+ regex_t* reg;
+ OnigEncoding enc;
+ SearchRange* sr;
+
+ n = set->n;
+ enc = set->enc;
+
+ s = (UChar* )start;
+ if (s > str)
+ prev = onigenc_get_prev_char_head(enc, str, s);
+ else
+ prev = (UChar* )NULL;
+
+ sr = (SearchRange* )xmalloc(sizeof(*sr) * n);
+ CHECK_NULL_RETURN_MEMERR(sr);
+
+ for (i = 0; i < n; i++) {
+ reg = set->rs[i].reg;
+
+ sr[i].state = SRS_DEAD;
+ if (reg->optimize != OPTIMIZE_NONE) {
+ if (reg->dist_max != INFINITE_LEN) {
+ if (end - range > reg->dist_max)
+ sch_range = (UChar* )range + reg->dist_max;
+ else
+ sch_range = (UChar* )end;
+
+ if (forward_search(reg, str, end, s, sch_range, &low, &high, &low_prev)) {
+ sr[i].state = SRS_LOW_HIGH;
+ sr[i].low = low;
+ sr[i].high = high;
+ sr[i].low_prev = low_prev;
+ sr[i].sch_range = sch_range;
+ }
+ }
+ else {
+ sch_range = (UChar* )end;
+ if (forward_search(reg, str, end, s, sch_range,
+ &low, &high, (UChar** )NULL)) {
+ goto total_active;
+ }
+ }
+ }
+ else {
+ total_active:
+ sr[i].state = SRS_ALL_RANGE;
+ sr[i].low = s;
+ sr[i].high = (UChar* )range;
+ sr[i].low_prev = prev;
+ }
+ }
+
+#define ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN 500
+
+ if (set->all_low_high != 0
+ && range - start > ACTIVATE_ALL_LOW_HIGH_SEARCH_THRESHOLD_LEN) {
+ do {
+ int try_count = 0;
+ for (i = 0; i < n; i++) {
+ if (sr[i].state == SRS_DEAD) continue;
+
+ if (s < sr[i].low) continue;
+ if (s >= sr[i].high) {
+ if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
+ &low, &high, &low_prev) != 0) {
+ sr[i].low = low;
+ sr[i].high = high;
+ sr[i].low_prev = low_prev;
+ if (s < low) continue;
+ }
+ else {
+ sr[i].state = SRS_DEAD;
+ continue;
+ }
+ }
+
+ reg = set->rs[i].reg;
+ REGSET_MATCH_AND_RETURN_CHECK(orig_range);
+ try_count++;
+ } /* for (i) */
+
+ if (s >= range) break;
+
+ if (try_count == 0) {
+ low = (UChar* )range;
+ for (i = 0; i < n; i++) {
+ if (sr[i].state == SRS_LOW_HIGH && low > sr[i].low) {
+ low = sr[i].low;
+ low_prev = sr[i].low_prev;
+ }
+ }
+ if (low == range) break;
+
+ s = low;
+ prev = low_prev;
+ }
+ else {
+ prev = s;
+ s += enclen(enc, s);
+ }
+ } while (1);
+ }
+ else {
+ int prev_is_newline = 1;
+ do {
+ for (i = 0; i < n; i++) {
+ if (sr[i].state == SRS_DEAD) continue;
+ if (sr[i].state == SRS_LOW_HIGH) {
+ if (s < sr[i].low) continue;
+ if (s >= sr[i].high) {
+ if (forward_search(set->rs[i].reg, str, end, s, sr[i].sch_range,
+ &low, &high, &low_prev) != 0) {
+ sr[i].low = low;
+ sr[i].high = high;
+ /* sr[i].low_prev = low_prev; */
+ if (s < low) continue;
+ }
+ else {
+ sr[i].state = SRS_DEAD;
+ continue;
+ }
+ }
+ }
+
+ reg = set->rs[i].reg;
+ if ((reg->anchor & ANCR_ANYCHAR_INF) == 0 || prev_is_newline != 0) {
+ REGSET_MATCH_AND_RETURN_CHECK(orig_range);
+ }
+ }
+
+ if (s >= range) break;
+
+ if (set->anychar_inf != 0)
+ prev_is_newline = ONIGENC_IS_MBC_NEWLINE(set->enc, s, end);
+
+ prev = s;
+ s += enclen(enc, s);
+ } while (1);
+ }
+
+ xfree(sr);
+ return ONIG_MISMATCH;
+
+ finish:
+ xfree(sr);
+ return r;
+
+ match:
+ xfree(sr);
+ *rmatch_pos = (int )(s - str);
+ return i;
+}
+
+static inline int
+regset_search_body_regex_lead(OnigRegSet* set,
+ const UChar* str, const UChar* end,
+ const UChar* start, const UChar* orig_range, OnigRegSetLead lead,
+ OnigOptionType option, OnigMatchParam* mps[], int* rmatch_pos)
+{
+ int r;
+ int i;
+ int n;
+ int match_index;
+ const UChar* ep;
+ regex_t* reg;
+ OnigRegion* region;
+
+ n = set->n;
+
+ match_index = ONIG_MISMATCH;
+ ep = orig_range;
+ for (i = 0; i < n; i++) {
+ reg = set->rs[i].reg;
+ region = set->rs[i].region;
+ r = search_in_range(reg, str, end, start, ep, orig_range, region, option, mps[i]);
+ if (r > 0) {
+ if (str + r < ep) {
+ match_index = i;
+ *rmatch_pos = r;
+ if (lead == ONIG_REGSET_PRIORITY_TO_REGEX_ORDER)
+ break;
+
+ ep = str + r;
+ }
+ }
+ else if (r == 0) {
+ match_index = i;
+ *rmatch_pos = r;
+ break;
+ }
+ }
+
+ return match_index;
+}
+
+extern int
+onig_regset_search_with_param(OnigRegSet* set,
+ const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range,
+ OnigRegSetLead lead, OnigOptionType option, OnigMatchParam* mps[],
+ int* rmatch_pos)
+{
+ int r;
+ int i;
+ UChar *s, *prev;
+ regex_t* reg;
+ OnigEncoding enc;
+ OnigRegion* region;
+ MatchArg* msas;
+ const UChar *orig_start = start;
+ const UChar *orig_range = range;
+
+ if (set->n == 0)
+ return ONIG_MISMATCH;
+
+ if (IS_POSIX_REGION(option))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ r = 0;
+ enc = set->enc;
+ msas = (MatchArg* )NULL;
+
+ for (i = 0; i < set->n; i++) {
+ reg = set->rs[i].reg;
+ region = set->rs[i].region;
+ ADJUST_MATCH_PARAM(reg, mps[i]);
+ if (IS_NOT_NULL(region)) {
+ r = onig_region_resize_clear(region, reg->num_mem + 1);
+ if (r != 0) goto finish_no_msa;
+ }
+ }
+
+ if (start > end || start < str) goto mismatch_no_msa;
+ if (str < end) {
+ /* forward search only */
+ if (range <= start)
+ return ONIGERR_INVALID_ARGUMENT;
+ }
+
+ if (ONIG_IS_OPTION_ON(option, ONIG_OPTION_CHECK_VALIDITY_OF_STRING)) {
+ if (! ONIGENC_IS_VALID_MBC_STRING(enc, str, end)) {
+ r = ONIGERR_INVALID_WIDE_CHAR_VALUE;
+ goto finish_no_msa;
+ }
+ }
+
+ if (set->anchor != OPTIMIZE_NONE && str < end) {
+ UChar *min_semi_end, *max_semi_end;
+
+ if ((set->anchor & ANCR_BEGIN_POSITION) != 0) {
+ /* search start-position only */
+ begin_position:
+ range = start + 1;
+ }
+ else if ((set->anchor & ANCR_BEGIN_BUF) != 0) {
+ /* search str-position only */
+ if (start != str) goto mismatch_no_msa;
+ range = str + 1;
+ }
+ else if ((set->anchor & ANCR_END_BUF) != 0) {
+ min_semi_end = max_semi_end = (UChar* )end;
+
+ end_buf:
+ if ((OnigLen )(max_semi_end - str) < set->anc_dmin)
+ goto mismatch_no_msa;
+
+ if ((OnigLen )(min_semi_end - start) > set->anc_dmax) {
+ start = min_semi_end - set->anc_dmax;
+ if (start < end)
+ start = onigenc_get_right_adjust_char_head(enc, str, start);
+ }
+ if ((OnigLen )(max_semi_end - (range - 1)) < set->anc_dmin) {
+ range = max_semi_end - set->anc_dmin + 1;
+ }
+ if (start > range) goto mismatch_no_msa;
+ }
+ else if ((set->anchor & ANCR_SEMI_END_BUF) != 0) {
+ UChar* pre_end = ONIGENC_STEP_BACK(enc, str, end, 1);
+
+ max_semi_end = (UChar* )end;
+ if (ONIGENC_IS_MBC_NEWLINE(enc, pre_end, end)) {
+ min_semi_end = pre_end;
+
+#ifdef USE_CRNL_AS_LINE_TERMINATOR
+ pre_end = ONIGENC_STEP_BACK(enc, str, pre_end, 1);
+ if (IS_NOT_NULL(pre_end) &&
+ ONIGENC_IS_MBC_CRNL(enc, pre_end, end)) {
+ min_semi_end = pre_end;
+ }
#endif
+ if (min_semi_end > str && start <= min_semi_end) {
+ goto end_buf;
+ }
+ }
+ else {
+ min_semi_end = (UChar* )end;
+ goto end_buf;
+ }
+ }
+ else if ((set->anchor & ANCR_ANYCHAR_INF_ML) != 0) {
+ goto begin_position;
+ }
+ }
+ else if (str == end) { /* empty string */
+ start = end = str;
+ s = (UChar* )start;
+ prev = (UChar* )NULL;
+
+ msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
+ CHECK_NULL_RETURN_MEMERR(msas);
+ for (i = 0; i < set->n; i++) {
+ reg = set->rs[i].reg;
+ MATCH_ARG_INIT(msas[i], reg, option, set->rs[i].region, start, mps[i]);
+ }
+ for (i = 0; i < set->n; i++) {
+ reg = set->rs[i].reg;
+ if (reg->threshold_len == 0) {
+ REGSET_MATCH_AND_RETURN_CHECK(end);
+ }
+ }
+
+ goto mismatch;
+ }
+
+ if (lead == ONIG_REGSET_POSITION_LEAD) {
+ msas = (MatchArg* )xmalloc(sizeof(*msas) * set->n);
+ CHECK_NULL_RETURN_MEMERR(msas);
+
+ for (i = 0; i < set->n; i++) {
+ MATCH_ARG_INIT(msas[i], set->rs[i].reg, option, set->rs[i].region,
+ orig_start, mps[i]);
+ }
+
+ r = regset_search_body_position_lead(set, str, end, start, range,
+ orig_range, option, msas, rmatch_pos);
+ }
+ else {
+ r = regset_search_body_regex_lead(set, str, end, start, orig_range,
+ lead, option, mps, rmatch_pos);
+ }
+ if (r < 0) goto finish;
+ else goto match2;
+
+ mismatch:
+ r = ONIG_MISMATCH;
+ finish:
+ for (i = 0; i < set->n; i++) {
+ if (IS_NOT_NULL(msas))
+ MATCH_ARG_FREE(msas[i]);
+ if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
+ IS_NOT_NULL(set->rs[i].region)) {
+ onig_region_clear(set->rs[i].region);
+ }
+ }
+ if (IS_NOT_NULL(msas)) xfree(msas);
+ return r;
+
+ mismatch_no_msa:
+ r = ONIG_MISMATCH;
+ finish_no_msa:
+ return r;
+
+ match:
+ *rmatch_pos = (int )(s - str);
+ match2:
+ for (i = 0; i < set->n; i++) {
+ if (IS_NOT_NULL(msas))
+ MATCH_ARG_FREE(msas[i]);
+ if (IS_FIND_NOT_EMPTY(set->rs[i].reg->options) &&
+ IS_NOT_NULL(set->rs[i].region)) {
+ onig_region_clear(set->rs[i].region);
+ }
+ }
+ if (IS_NOT_NULL(msas)) xfree(msas);
+ return r; /* regex index */
}
+extern int
+onig_regset_search(OnigRegSet* set, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range,
+ OnigRegSetLead lead, OnigOptionType option, int* rmatch_pos)
+{
+ int r;
+ int i;
+ OnigMatchParam* mp;
+ OnigMatchParam** mps;
+
+ mps = (OnigMatchParam** )xmalloc((sizeof(OnigMatchParam*) + sizeof(OnigMatchParam)) * set->n);
+ CHECK_NULL_RETURN_MEMERR(mps);
+
+ mp = (OnigMatchParam* )(mps + set->n);
+
+ for (i = 0; i < set->n; i++) {
+ onig_initialize_match_param(mp + i);
+ mps[i] = mp + i;
+ }
+
+ r = onig_regset_search_with_param(set, str, end, start, range, lead, option, mps,
+ rmatch_pos);
+ for (i = 0; i < set->n; i++)
+ onig_free_match_param_content(mp + i);
+
+ xfree(mps);
+
+ return r;
+}
static UChar*
slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
@@ -4146,9 +4665,11 @@ str_lower_case_match(OnigEncoding enc, int case_fold_flag,
UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
while (t < tend) {
+ if (p >= end) return 0;
lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
q = lowbuf;
while (lowlen > 0) {
+ if (t >= tend) return 0;
if (*t++ != *q++) return 0;
lowlen--;
}
@@ -4162,16 +4683,11 @@ slow_search_ic(OnigEncoding enc, int case_fold_flag,
UChar* target, UChar* target_end,
const UChar* text, const UChar* text_end, UChar* text_range)
{
- UChar *s, *end;
-
- end = (UChar* )text_end;
- end -= target_end - target - 1;
- if (end > text_range)
- end = text_range;
+ UChar *s;
s = (UChar* )text;
- while (s < end) {
+ while (s < text_range) {
if (str_lower_case_match(enc, case_fold_flag, target, target_end,
s, text_end))
return s;
@@ -4325,60 +4841,6 @@ sunday_quick_search(regex_t* reg, const UChar* target, const UChar* target_end,
}
static UChar*
-sunday_quick_search_case_fold(regex_t* reg,
- const UChar* target, const UChar* target_end,
- const UChar* text, const UChar* text_end,
- const UChar* text_range)
-{
- const UChar *s, *se, *end;
- const UChar *tail;
- int skip, tlen1;
- int map_offset;
- int case_fold_flag;
- OnigEncoding enc;
-
-#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr,
- "sunday_quick_search_case_fold: text: %p, text_end: %p, text_range: %p\n", text, text_end, text_range);
-#endif
-
- enc = reg->enc;
- case_fold_flag = reg->case_fold_flag;
-
- tail = target_end - 1;
- tlen1 = (int )(tail - target);
- end = text_range;
- if (end + tlen1 > text_end)
- end = text_end - tlen1;
-
- map_offset = reg->map_offset;
- s = text;
-
- while (s < end) {
- if (str_lower_case_match(enc, case_fold_flag, target, target_end,
- s, text_end))
- return (UChar* )s;
-
- se = s + tlen1;
- if (se + map_offset >= text_end) break;
- skip = reg->map[*(se + map_offset)];
-#if 0
- p = s;
- do {
- s += enclen(enc, s);
- } while ((s - p) < skip && s < end);
-#else
- /* This is faster than prev code for long text. ex: /(?i)Twain/ */
- s += skip;
- if (s < end)
- s = onigenc_get_right_adjust_char_head(enc, text, s);
-#endif
- }
-
- return (UChar* )NULL;
-}
-
-static UChar*
map_search(OnigEncoding enc, UChar map[],
const UChar* text, const UChar* text_range)
{
@@ -4458,25 +4920,26 @@ onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
static int
-forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
- UChar* range, UChar** low, UChar** high, UChar** low_prev)
+forward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* start,
+ UChar* range, UChar** low, UChar** high, UChar** low_prev)
{
UChar *p, *pprev = (UChar* )NULL;
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n",
- str, end, s, range);
+ fprintf(stderr, "forward_search: str: %p, end: %p, start: %p, range: %p\n",
+ str, end, start, range);
#endif
- p = s;
- if (reg->dmin > 0) {
+ p = start;
+ if (reg->dist_min != 0) {
+ if (end - p <= reg->dist_min)
+ return 0; /* fail */
+
if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
- p += reg->dmin;
+ p += reg->dist_min;
}
else {
- UChar *q = p + reg->dmin;
-
- if (q >= end) return 0; /* fail */
+ UChar *q = p + reg->dist_min;
while (p < q) p += enclen(reg->enc, p);
}
}
@@ -4491,11 +4954,6 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
reg->exact, reg->exact_end, p, end, range);
break;
- case OPTIMIZE_STR_CASE_FOLD_FAST:
- p = sunday_quick_search_case_fold(reg, reg->exact, reg->exact_end, p, end,
- range);
- break;
-
case OPTIMIZE_STR_FAST:
p = sunday_quick_search(reg, reg->exact, reg->exact_end, p, end, range);
break;
@@ -4511,7 +4969,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
if (p && p < range) {
- if (p - reg->dmin < s) {
+ if (p - start < reg->dist_min) {
retry_gate:
pprev = p;
p += enclen(reg->enc, p);
@@ -4524,8 +4982,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
switch (reg->sub_anchor) {
case ANCR_BEGIN_LINE:
if (!ON_STR_BEGIN(p)) {
- prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : str), p);
+ prev = onigenc_get_prev_char_head(reg->enc, (pprev ? pprev : str), p);
if (!ONIGENC_IS_MBC_NEWLINE(reg->enc, prev, end))
goto retry_gate;
}
@@ -4546,35 +5003,34 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
#endif
)
goto retry_gate;
+
break;
}
}
- if (reg->dmax == 0) {
+ if (reg->dist_max == 0) {
*low = p;
if (low_prev) {
- if (*low > s)
- *low_prev = onigenc_get_prev_char_head(reg->enc, s, p);
+ if (*low > start)
+ *low_prev = onigenc_get_prev_char_head(reg->enc, start, p);
else
*low_prev = onigenc_get_prev_char_head(reg->enc,
(pprev ? pprev : str), p);
}
+ *high = p;
}
else {
- if (reg->dmax != INFINITE_LEN) {
- if (p - str < reg->dmax) {
+ if (reg->dist_max != INFINITE_LEN) {
+ if (p - str < reg->dist_max) {
*low = (UChar* )str;
if (low_prev)
*low_prev = onigenc_get_prev_char_head(reg->enc, str, *low);
}
else {
- *low = p - reg->dmax;
- if (*low > s) {
- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
+ *low = p - reg->dist_max;
+ if (*low > start) {
+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, start,
*low, (const UChar** )low_prev);
- if (low_prev && IS_NULL(*low_prev))
- *low_prev = onigenc_get_prev_char_head(reg->enc,
- (pprev ? pprev : s), *low);
}
else {
if (low_prev)
@@ -4583,14 +5039,18 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
}
}
}
+ /* no needs to adjust *high, *high is used as range check only */
+ if (p - str < reg->dist_min)
+ *high = (UChar* )str;
+ else
+ *high = p - reg->dist_min;
}
- /* no needs to adjust *high, *high is used as range check only */
- *high = p - reg->dmin;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
- "forward_search_range success: low: %d, high: %d, dmin: %d, dmax: %d\n",
- (int )(*low - str), (int )(*high - str), reg->dmin, reg->dmax);
+ "forward_search success: low: %d, high: %d, dmin: %u, dmax: %u\n",
+ (int )(*low - str), (int )(*high - str),
+ reg->dist_min, reg->dist_max);
#endif
return 1; /* success */
}
@@ -4600,15 +5060,11 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
static int
-backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
- UChar* s, const UChar* range, UChar* adjrange,
- UChar** low, UChar** high)
+backward_search(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
+ const UChar* range, UChar* adjrange, UChar** low, UChar** high)
{
UChar *p;
- if (range == 0) goto fail;
-
- range += reg->dmin;
p = s;
retry:
@@ -4620,7 +5076,6 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
break;
case OPTIMIZE_STR_CASE_FOLD:
- case OPTIMIZE_STR_CASE_FOLD_FAST:
p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
reg->exact, reg->exact_end,
range, adjrange, end, p);
@@ -4675,15 +5130,27 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
}
}
- /* no needs to adjust *high, *high is used as range check only */
- if (reg->dmax != INFINITE_LEN) {
- *low = p - reg->dmax;
- *high = p - reg->dmin;
+ if (reg->dist_max != INFINITE_LEN) {
+ if (p - str < reg->dist_max)
+ *low = (UChar* )str;
+ else
+ *low = p - reg->dist_max;
+
+ if (reg->dist_min != 0) {
+ if (p - str < reg->dist_min)
+ *high = (UChar* )str;
+ else
+ *high = p - reg->dist_min;
+ }
+ else {
+ *high = p;
+ }
+
*high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high);
}
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
+ fprintf(stderr, "backward_search: low: %d, high: %d\n",
(int )(*low - str), (int )(*high - str));
#endif
return 1; /* success */
@@ -4691,7 +5158,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
fail:
#ifdef ONIG_DEBUG_SEARCH
- fprintf(stderr, "backward_search_range: fail.\n");
+ fprintf(stderr, "backward_search: fail.\n");
#endif
return 0; /* fail */
}
@@ -4704,24 +5171,35 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end,
{
int r;
OnigMatchParam mp;
+ const UChar* data_range;
onig_initialize_match_param(&mp);
- r = onig_search_with_param(reg, str, end, start, range, region, option, &mp);
+
+ /* The following is an expanded code of onig_search_with_param() */
+ if (range > start)
+ data_range = range;
+ else
+ data_range = end;
+
+ r = search_in_range(reg, str, end, start, range, data_range, region,
+ option, &mp);
+
onig_free_match_param_content(&mp);
return r;
}
-extern int
-onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
- const UChar* start, const UChar* range, OnigRegion* region,
- OnigOptionType option, OnigMatchParam* mp)
+static int
+search_in_range(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, /* match start range */
+ const UChar* data_range, /* subject string range */
+ OnigRegion* region,
+ OnigOptionType option, OnigMatchParam* mp)
{
int r;
UChar *s, *prev;
MatchArg msa;
const UChar *orig_start = start;
- const UChar *orig_range = range;
#ifdef ONIG_DEBUG_SEARCH
fprintf(stderr,
@@ -4804,17 +5282,21 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
min_semi_end = max_semi_end = (UChar* )end;
end_buf:
- if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin)
+ if ((OnigLen )(max_semi_end - str) < reg->anc_dist_min)
goto mismatch_no_msa;
if (range > start) {
- if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) {
- start = min_semi_end - reg->anchor_dmax;
+ if (reg->anc_dist_max != INFINITE_LEN &&
+ min_semi_end - start > reg->anc_dist_max) {
+ start = min_semi_end - reg->anc_dist_max;
if (start < end)
start = onigenc_get_right_adjust_char_head(reg->enc, str, start);
}
- if ((OnigLen )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
- range = max_semi_end - reg->anchor_dmin + 1;
+ if (max_semi_end - (range - 1) < reg->anc_dist_min) {
+ if (max_semi_end - str + 1 < reg->anc_dist_min)
+ goto mismatch_no_msa;
+ else
+ range = max_semi_end - reg->anc_dist_min + 1;
}
if (start > range) goto mismatch_no_msa;
@@ -4822,12 +5304,17 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
Backward search is used. */
}
else {
- if ((OnigLen )(min_semi_end - range) > reg->anchor_dmax) {
- range = min_semi_end - reg->anchor_dmax;
+ if (reg->anc_dist_max != INFINITE_LEN &&
+ min_semi_end - range > reg->anc_dist_max) {
+ range = min_semi_end - reg->anc_dist_max;
}
- if ((OnigLen )(max_semi_end - start) < reg->anchor_dmin) {
- start = max_semi_end - reg->anchor_dmin;
- start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
+ if (max_semi_end - start < reg->anc_dist_min) {
+ if (max_semi_end - str < reg->anc_dist_min)
+ goto mismatch_no_msa;
+ else {
+ start = max_semi_end - reg->anc_dist_min;
+ start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start);
+ }
}
if (range > start) goto mismatch_no_msa;
}
@@ -4895,29 +5382,33 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
if (reg->optimize != OPTIMIZE_NONE) {
UChar *sch_range, *low, *high, *low_prev;
- sch_range = (UChar* )range;
- if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_LEN)
+ if (reg->dist_max != 0) {
+ if (reg->dist_max == INFINITE_LEN)
sch_range = (UChar* )end;
else {
- sch_range += reg->dmax;
- if (sch_range > end) sch_range = (UChar* )end;
+ if ((end - range) < reg->dist_max)
+ sch_range = (UChar* )end;
+ else {
+ sch_range = (UChar* )range + reg->dist_max;
+ }
}
}
+ else
+ sch_range = (UChar* )range;
if ((end - start) < reg->threshold_len)
goto mismatch;
- if (reg->dmax != INFINITE_LEN) {
+ if (reg->dist_max != INFINITE_LEN) {
do {
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, &low_prev)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high,
+ &low_prev)) goto mismatch;
if (s < low) {
s = low;
prev = low_prev;
}
while (s <= high) {
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
}
@@ -4925,12 +5416,12 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
goto mismatch;
}
else { /* check only. */
- if (! forward_search_range(reg, str, end, s, sch_range,
- &low, &high, (UChar** )NULL)) goto mismatch;
+ if (! forward_search(reg, str, end, s, sch_range, &low, &high,
+ (UChar** )NULL)) goto mismatch;
if ((reg->anchor & ANCR_ANYCHAR_INF) != 0) {
do {
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
@@ -4947,13 +5438,13 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
do {
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
prev = s;
s += enclen(reg->enc, s);
} while (s < range);
if (s == range) { /* because empty match with /$/. */
- MATCH_AND_RETURN_CHECK(orig_range);
+ MATCH_AND_RETURN_CHECK(data_range);
}
}
else { /* backward search */
@@ -4964,19 +5455,30 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
if (reg->optimize != OPTIMIZE_NONE) {
UChar *low, *high, *adjrange, *sch_start;
+ const UChar *min_range;
+
+ if ((end - range) < reg->threshold_len) goto mismatch;
if (range < end)
adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range);
else
adjrange = (UChar* )end;
- if (reg->dmax != INFINITE_LEN &&
- (end - range) >= reg->threshold_len) {
+ if (end - range > reg->dist_min)
+ min_range = range + reg->dist_min;
+ else
+ min_range = end;
+
+ if (reg->dist_max != INFINITE_LEN) {
do {
- sch_start = s + reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0)
+ if (end - s > reg->dist_max)
+ sch_start = s + reg->dist_max;
+ else {
+ sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
+ }
+
+ if (backward_search(reg, str, end, sch_start, min_range, adjrange,
+ &low, &high) <= 0)
goto mismatch;
if (s > high)
@@ -4991,22 +5493,10 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
goto mismatch;
}
else { /* check only. */
- if ((end - range) < reg->threshold_len) goto mismatch;
+ sch_start = onigenc_get_prev_char_head(reg->enc, str, end);
- sch_start = s;
- if (reg->dmax != 0) {
- if (reg->dmax == INFINITE_LEN)
- sch_start = (UChar* )end;
- else {
- sch_start += reg->dmax;
- if (sch_start > end) sch_start = (UChar* )end;
- else
- sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
- start, sch_start);
- }
- }
- if (backward_search_range(reg, str, end, sch_start, range, adjrange,
- &low, &high) <= 0) goto mismatch;
+ if (backward_search(reg, str, end, sch_start, min_range, adjrange,
+ &low, &high) <= 0) goto mismatch;
}
}
@@ -5062,6 +5552,22 @@ onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
}
extern int
+onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end,
+ const UChar* start, const UChar* range, OnigRegion* region,
+ OnigOptionType option, OnigMatchParam* mp)
+{
+ const UChar* data_range;
+
+ if (range > start)
+ data_range = range;
+ else
+ data_range = end;
+
+ return search_in_range(reg, str, end, start, range, data_range, region,
+ option, mp);
+}
+
+extern int
onig_scan(regex_t* reg, const UChar* str, const UChar* end,
OnigRegion* region, OnigOptionType option,
int (*scan_callback)(int, int, OnigRegion*, void*),
@@ -5163,6 +5669,202 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from)
*to = *from;
}
+extern int
+onig_regset_new(OnigRegSet** rset, int n, regex_t* regs[])
+{
+#define REGSET_INITIAL_ALLOC_SIZE 10
+
+ int i;
+ int r;
+ int alloc;
+ OnigRegSet* set;
+ RR* rs;
+
+ *rset = 0;
+
+ set = (OnigRegSet* )xmalloc(sizeof(*set));
+ CHECK_NULL_RETURN_MEMERR(set);
+
+ alloc = n > REGSET_INITIAL_ALLOC_SIZE ? n : REGSET_INITIAL_ALLOC_SIZE;
+ rs = (RR* )xmalloc(sizeof(set->rs[0]) * alloc);
+ if (IS_NULL(rs)) {
+ xfree(set);
+ return ONIGERR_MEMORY;
+ }
+
+ set->rs = rs;
+ set->n = 0;
+ set->alloc = alloc;
+
+ for (i = 0; i < n; i++) {
+ regex_t* reg = regs[i];
+
+ r = onig_regset_add(set, reg);
+ if (r != 0) {
+ for (i = 0; i < set->n; i++) {
+ OnigRegion* region = set->rs[i].region;
+ if (IS_NOT_NULL(region))
+ onig_region_free(region, 1);
+ }
+ xfree(set->rs);
+ xfree(set);
+ return r;
+ }
+ }
+
+ *rset = set;
+ return 0;
+}
+
+static void
+update_regset_by_reg(OnigRegSet* set, regex_t* reg)
+{
+ if (set->n == 1) {
+ set->enc = reg->enc;
+ set->anchor = reg->anchor;
+ set->anc_dmin = reg->anc_dist_min;
+ set->anc_dmax = reg->anc_dist_max;
+ set->all_low_high =
+ (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN) ? 0 : 1;
+ set->anychar_inf = (reg->anchor & ANCR_ANYCHAR_INF) != 0 ? 1 : 0;
+ }
+ else {
+ int anchor;
+
+ anchor = set->anchor & reg->anchor;
+ if (anchor != 0) {
+ OnigLen anc_dmin;
+ OnigLen anc_dmax;
+
+ anc_dmin = set->anc_dmin;
+ anc_dmax = set->anc_dmax;
+ if (anc_dmin > reg->anc_dist_min) anc_dmin = reg->anc_dist_min;
+ if (anc_dmax < reg->anc_dist_max) anc_dmax = reg->anc_dist_max;
+ set->anc_dmin = anc_dmin;
+ set->anc_dmax = anc_dmax;
+ }
+
+ set->anchor = anchor;
+
+ if (reg->optimize == OPTIMIZE_NONE || reg->dist_max == INFINITE_LEN)
+ set->all_low_high = 0;
+
+ if ((reg->anchor & ANCR_ANYCHAR_INF) != 0)
+ set->anychar_inf = 1;
+ }
+}
+
+extern int
+onig_regset_add(OnigRegSet* set, regex_t* reg)
+{
+ OnigRegion* region;
+
+ if (IS_FIND_LONGEST(reg->options))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (set->n != 0 && reg->enc != set->enc)
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (set->n >= set->alloc) {
+ RR* nrs;
+ int new_alloc;
+
+ new_alloc = set->alloc * 2;
+ nrs = (RR* )xrealloc(set->rs, sizeof(set->rs[0]) * new_alloc);
+ CHECK_NULL_RETURN_MEMERR(nrs);
+
+ set->rs = nrs;
+ set->alloc = new_alloc;
+ }
+
+ region = onig_region_new();
+ CHECK_NULL_RETURN_MEMERR(region);
+
+ set->rs[set->n].reg = reg;
+ set->rs[set->n].region = region;
+ set->n++;
+
+ update_regset_by_reg(set, reg);
+ return 0;
+}
+
+extern int
+onig_regset_replace(OnigRegSet* set, int at, regex_t* reg)
+{
+ int i;
+
+ if (at < 0 || at >= set->n)
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (IS_NULL(reg)) {
+ onig_region_free(set->rs[at].region, 1);
+ for (i = at; i < set->n - 1; i++) {
+ set->rs[i].reg = set->rs[i+1].reg;
+ set->rs[i].region = set->rs[i+1].region;
+ }
+ set->n--;
+ }
+ else {
+ if (IS_FIND_LONGEST(reg->options))
+ return ONIGERR_INVALID_ARGUMENT;
+
+ if (set->n > 1 && reg->enc != set->enc)
+ return ONIGERR_INVALID_ARGUMENT;
+
+ set->rs[at].reg = reg;
+ }
+
+ for (i = 0; i < set->n; i++)
+ update_regset_by_reg(set, set->rs[i].reg);
+
+ return 0;
+}
+
+extern void
+onig_regset_free(OnigRegSet* set)
+{
+ int i;
+
+ for (i = 0; i < set->n; i++) {
+ regex_t* reg;
+ OnigRegion* region;
+
+ reg = set->rs[i].reg;
+ region = set->rs[i].region;
+ onig_free(reg);
+ if (IS_NOT_NULL(region))
+ onig_region_free(region, 1);
+ }
+
+ xfree(set->rs);
+ xfree(set);
+}
+
+extern int
+onig_regset_number_of_regex(OnigRegSet* set)
+{
+ return set->n;
+}
+
+extern regex_t*
+onig_regset_get_regex(OnigRegSet* set, int at)
+{
+ if (at < 0 || at >= set->n)
+ return (regex_t* )0;
+
+ return set->rs[at].reg;
+}
+
+extern OnigRegion*
+onig_regset_get_region(OnigRegSet* set, int at)
+{
+ if (at < 0 || at >= set->n)
+ return (OnigRegion* )0;
+
+ return set->rs[at].region;
+}
+
+
#ifdef USE_DIRECT_THREADED_CODE
extern int
onig_init_for_match_at(regex_t* reg)
@@ -5355,35 +6057,25 @@ onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, i
const UChar* str;
StackType* stk_base;
int i;
+ StackIndex* mem_start_stk;
+ StackIndex* mem_end_stk;
i = mem_num;
reg = a->regex;
str = a->string;
stk_base = a->stk_base;
+ mem_start_stk = a->mem_start_stk;
+ mem_end_stk = a->mem_end_stk;
if (i > 0) {
if (a->mem_end_stk[i] != INVALID_STACK_INDEX) {
- if (MEM_STATUS_AT(reg->bt_mem_start, i))
- *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str);
- else
- *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str);
-
- *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i)
- ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr
- : (UChar* )((void* )a->mem_end_stk[i])) - str);
+ *begin = (int )(STACK_MEM_START(reg, i) - str);
+ *end = (int )(STACK_MEM_END(reg, i) - str);
}
else {
*begin = *end = ONIG_REGION_NOTPOS;
}
}
- else if (i == 0) {
-#if 0
- *begin = a->start - str;
- *end = a->current - str;
-#else
- return ONIGERR_INVALID_ARGUMENT;
-#endif
- }
else
return ONIGERR_INVALID_ARGUMENT;
@@ -5421,14 +6113,6 @@ onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUS
return ONIG_MISMATCH;
}
-#if 0
-extern int
-onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED)
-{
- return ONIG_CALLOUT_SUCCESS;
-}
-#endif
-
extern int
onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
{
@@ -5443,6 +6127,9 @@ onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
if (n >= 0) {
n = ONIGERR_INVALID_CALLOUT_BODY;
}
+ else if (onig_is_error_code_needs_param(n)) {
+ n = ONIGERR_INVALID_CALLOUT_BODY;
+ }
return n;
}
diff --git a/src/regext.c b/src/regext.c
index fa4b360..c46f630 100644
--- a/src/regext.c
+++ b/src/regext.c
@@ -2,7 +2,7 @@
regext.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,6 +29,7 @@
#include "regint.h"
+#if 0
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
@@ -158,6 +159,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
+#endif
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
@@ -169,9 +171,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
- r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
- &cpat, &cpat_end);
- if (r != 0) return r;
+ return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
else {
cpat = (UChar* )pattern;
diff --git a/src/reggnu.c b/src/reggnu.c
index a124ae8..8a45078 100644
--- a/src/reggnu.c
+++ b/src/reggnu.c
@@ -2,7 +2,7 @@
reggnu.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/regint.h b/src/regint.h
index 56767e8..cc540da 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -47,23 +47,18 @@
#endif
#endif
-#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
- (defined(__ppc__) && defined(__APPLE__)) || \
- defined(__x86_64) || defined(__x86_64__) || \
- defined(__mc68020__)
-#define PLATFORM_UNALIGNED_WORD_ACCESS
-#endif
-
+#ifndef ONIG_DISABLE_DIRECT_THREADING
#ifdef __GNUC__
#define USE_GOTO_LABELS_AS_VALUES
#endif
+#endif
/* config */
/* spec. config */
#define USE_CALL
#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
+#define USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
#define USE_RETRY_LIMIT_IN_MATCH
@@ -82,6 +77,8 @@
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
+/* #define USE_REPEAT_AND_EMPTY_CHECK_LOCAL_VAR */
+
#include "regenc.h"
@@ -197,49 +194,16 @@ typedef unsigned int uintptr_t;
#define CHAR_MAP_SIZE 256
#define INFINITE_LEN ONIG_INFINITE_DISTANCE
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
-
-#define PLATFORM_GET_INC(val,p,type) do{\
- val = *(type* )p;\
- (p) += sizeof(type);\
-} while(0)
-
-#else
-
-#define PLATFORM_GET_INC(val,p,type) do{\
- xmemcpy(&val, (p), sizeof(type));\
- (p) += sizeof(type);\
-} while(0)
-
-/* sizeof(OnigCodePoint) */
-#ifdef SIZEOF_SIZE_T
-# define WORD_ALIGNMENT_SIZE SIZEOF_SIZE_T
-#else
-# define WORD_ALIGNMENT_SIZE SIZEOF_LONG
-#endif
-
-#define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\
- (pad_size) = WORD_ALIGNMENT_SIZE - ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
- if ((pad_size) == WORD_ALIGNMENT_SIZE) (pad_size) = 0;\
-} while (0)
-
-#define ALIGNMENT_RIGHT(addr) do {\
- (addr) += (WORD_ALIGNMENT_SIZE - 1);\
- (addr) -= ((uintptr_t )(addr) % WORD_ALIGNMENT_SIZE);\
-} while (0)
-
-#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
-
#ifdef USE_CALLOUT
typedef struct {
- int flag;
- OnigCalloutOf of;
- int in;
- int name_id;
- const UChar* tag_start;
- const UChar* tag_end;
+ int flag;
+ OnigCalloutOf of;
+ int in;
+ int name_id;
+ const UChar* tag_start;
+ const UChar* tag_end;
OnigCalloutType type;
OnigCalloutFunc start_func;
OnigCalloutFunc end_func;
@@ -272,7 +236,6 @@ enum OptimizeType {
OPTIMIZE_STR, /* Slow Search */
OPTIMIZE_STR_FAST, /* Sunday quick search / BMH */
OPTIMIZE_STR_FAST_STEP_FORWARD, /* Sunday quick search / BMH */
- OPTIMIZE_STR_CASE_FOLD_FAST, /* Sunday quick search / BMH (ignore case) */
OPTIMIZE_STR_CASE_FOLD, /* Slow Search (ignore case) */
OPTIMIZE_MAP /* char map */
};
@@ -288,6 +251,8 @@ typedef unsigned int MemStatusType;
#define MEM_STATUS_AT0(stats,n) \
((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1))
+#define MEM_STATUS_IS_ALL_ON(stats) (((stats) & 1) != 0)
+
#define MEM_STATUS_ON(stats,n) do {\
if ((n) < (int )MEM_STATUS_BITS_NUM) {\
if ((n) != 0)\
@@ -302,8 +267,14 @@ typedef unsigned int MemStatusType;
(stats) |= ((MemStatusType )1 << (n));\
} while (0)
+#define MEM_STATUS_LIMIT_AT(stats,n) \
+ ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : 0)
+#define MEM_STATUS_LIMIT_ON(stats,n) do {\
+ if ((n) < (int )MEM_STATUS_BITS_NUM && (n) != 0) {\
+ (stats) |= ((MemStatusType )1 << (n));\
+ }\
+} while (0)
-#define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1)
#define IS_CODE_WORD_ASCII(enc,code) \
(ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code))
@@ -348,22 +319,18 @@ typedef unsigned int MemStatusType;
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
-#define REPEAT_INFINITE -1
-#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
+#define INFINITE_REPEAT -1
+#define IS_INFINITE_REPEAT(n) ((n) == INFINITE_REPEAT)
/* bitset */
#define BITS_PER_BYTE 8
#define SINGLE_BYTE_SIZE (1 << BITS_PER_BYTE)
-#define BITS_IN_ROOM (sizeof(Bits) * BITS_PER_BYTE)
+#define BITS_IN_ROOM 32 /* 4 * BITS_PER_BYTE */
#define BITSET_SIZE (SINGLE_BYTE_SIZE / BITS_IN_ROOM)
-#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
-typedef unsigned int Bits;
-#else
-typedef unsigned char Bits;
-#endif
-typedef Bits BitSet[BITSET_SIZE];
-typedef Bits* BitSetRef;
+typedef uint32_t Bits;
+typedef Bits BitSet[BITSET_SIZE];
+typedef Bits* BitSetRef;
#define SIZE_BITSET sizeof(BitSet)
@@ -372,8 +339,8 @@ typedef Bits* BitSetRef;
for (i = 0; i < (int )BITSET_SIZE; i++) { (bs)[i] = 0; } \
} while (0)
-#define BS_ROOM(bs,pos) (bs)[pos / BITS_IN_ROOM]
-#define BS_BIT(pos) (1 << (pos % BITS_IN_ROOM))
+#define BS_ROOM(bs,pos) (bs)[(unsigned int )(pos) >> 5]
+#define BS_BIT(pos) (1u << ((unsigned int )(pos) & 0x1f))
#define BITSET_AT(bs, pos) (BS_ROOM(bs,pos) & BS_BIT(pos))
#define BITSET_SET_BIT(bs, pos) BS_ROOM(bs,pos) |= BS_BIT(pos)
@@ -389,11 +356,13 @@ typedef struct _BBuf {
#define BB_INIT(buf,size) bbuf_init((BBuf* )(buf), (size))
+/*
#define BB_SIZE_INC(buf,inc) do{\
(buf)->alloc += (inc);\
(buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\
if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\
} while (0)
+*/
#define BB_EXPAND(buf,low) do{\
do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\
@@ -491,39 +460,34 @@ typedef struct _BBuf {
/* operation code */
enum OpCode {
- OP_FINISH = 0, /* matching process terminator (no more alternative) */
- OP_END = 1, /* pattern code terminator (success end) */
-
- OP_EXACT1 = 2, /* single byte, N = 1 */
- OP_EXACT2, /* single byte, N = 2 */
- OP_EXACT3, /* single byte, N = 3 */
- OP_EXACT4, /* single byte, N = 4 */
- OP_EXACT5, /* single byte, N = 5 */
- OP_EXACTN, /* single byte */
- OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
- OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
- OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
- OP_EXACTMB2N, /* mb-length = 2 */
- OP_EXACTMB3N, /* mb-length = 3 */
- OP_EXACTMBN, /* other length */
-
- OP_EXACT1_IC, /* single byte, N = 1, ignore case */
- OP_EXACTN_IC, /* single byte, ignore case */
-
+ OP_FINISH = 0, /* matching process terminator (no more alternative) */
+ OP_END = 1, /* pattern code terminator (success end) */
+ OP_STR_1 = 2, /* single byte, N = 1 */
+ OP_STR_2, /* single byte, N = 2 */
+ OP_STR_3, /* single byte, N = 3 */
+ OP_STR_4, /* single byte, N = 4 */
+ OP_STR_5, /* single byte, N = 5 */
+ OP_STR_N, /* single byte */
+ OP_STR_MB2N1, /* mb-length = 2 N = 1 */
+ OP_STR_MB2N2, /* mb-length = 2 N = 2 */
+ OP_STR_MB2N3, /* mb-length = 2 N = 3 */
+ OP_STR_MB2N, /* mb-length = 2 */
+ OP_STR_MB3N, /* mb-length = 3 */
+ OP_STR_MBN, /* other length */
+ OP_STR_1_IC, /* single byte, N = 1, ignore case */
+ OP_STR_N_IC, /* single byte, ignore case */
OP_CCLASS,
OP_CCLASS_MB,
OP_CCLASS_MIX,
OP_CCLASS_NOT,
OP_CCLASS_MB_NOT,
OP_CCLASS_MIX_NOT,
-
OP_ANYCHAR, /* "." */
OP_ANYCHAR_ML, /* "." multi-line */
OP_ANYCHAR_STAR, /* ".*" */
OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
OP_ANYCHAR_STAR_PEEK_NEXT,
OP_ANYCHAR_ML_STAR_PEEK_NEXT,
-
OP_WORD,
OP_WORD_ASCII,
OP_NO_WORD,
@@ -532,16 +496,13 @@ enum OpCode {
OP_NO_WORD_BOUNDARY,
OP_WORD_BEGIN,
OP_WORD_END,
-
OP_TEXT_SEGMENT_BOUNDARY,
-
OP_BEGIN_BUF,
OP_END_BUF,
OP_BEGIN_LINE,
OP_END_LINE,
OP_SEMI_END_BUF,
OP_BEGIN_POSITION,
-
OP_BACKREF1,
OP_BACKREF2,
OP_BACKREF_N,
@@ -552,34 +513,35 @@ enum OpCode {
OP_BACKREF_WITH_LEVEL_IC, /* \k<xxx+n>, \k<xxx-n> */
OP_BACKREF_CHECK, /* (?(n)), (?('name')) */
OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n-level)), (?('name-level')) */
-
- OP_MEMORY_START,
- OP_MEMORY_START_PUSH, /* push back-tracker to stack */
- OP_MEMORY_END_PUSH, /* push back-tracker to stack */
- OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
- OP_MEMORY_END,
- OP_MEMORY_END_REC, /* push marker to stack */
-
+ OP_MEM_START,
+ OP_MEM_START_PUSH, /* push back-tracker to stack */
+ OP_MEM_END_PUSH, /* push back-tracker to stack */
+#ifdef USE_CALL
+ OP_MEM_END_PUSH_REC, /* push back-tracker to stack */
+#endif
+ OP_MEM_END,
+#ifdef USE_CALL
+ OP_MEM_END_REC, /* push marker to stack */
+#endif
OP_FAIL, /* pop stack and move */
OP_JUMP,
OP_PUSH,
OP_PUSH_SUPER,
OP_POP_OUT,
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
- OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
+ OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
#endif
- OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
- OP_REPEAT, /* {n,m} */
- OP_REPEAT_NG, /* {n,m}? (non greedy) */
+ OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
+ OP_REPEAT, /* {n,m} */
+ OP_REPEAT_NG, /* {n,m}? (non greedy) */
OP_REPEAT_INC,
- OP_REPEAT_INC_NG, /* non greedy */
- OP_REPEAT_INC_SG, /* search and get in stack */
- OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
+ OP_REPEAT_INC_NG, /* non greedy */
OP_EMPTY_CHECK_START, /* null loop checker start */
OP_EMPTY_CHECK_END, /* null loop checker end */
OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */
+#ifdef USE_CALL
OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
-
+#endif
OP_PREC_READ_START, /* (?=...) start */
OP_PREC_READ_END, /* (?=...) end */
OP_PREC_READ_NOT_START, /* (?!...) start */
@@ -589,11 +551,12 @@ enum OpCode {
OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */
OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */
-
- OP_CALL, /* \g<name> */
- OP_RETURN,
OP_PUSH_SAVE_VAL,
OP_UPDATE_VAR,
+#ifdef USE_CALL
+ OP_CALL, /* \g<name> */
+ OP_RETURN,
+#endif
#ifdef USE_CALLOUT
OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */
OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */
@@ -601,8 +564,8 @@ enum OpCode {
};
enum SaveType {
- SAVE_KEEP = 0, /* SAVE S */
- SAVE_S = 1,
+ SAVE_KEEP = 0, /* SAVE S */
+ SAVE_S = 1,
SAVE_RIGHT_RANGE = 2,
};
@@ -642,116 +605,57 @@ typedef int ModeType;
#define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType)
#define SIZE_MODE sizeof(ModeType)
-#define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType)
-#define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType)
-#define GET_LENGTH_INC(len,p) PLATFORM_GET_INC(len, p, LengthType)
-#define GET_MEMNUM_INC(num,p) PLATFORM_GET_INC(num, p, MemNumType)
-#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
-#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
-#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
-#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType)
-#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)
-#define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType)
-
/* code point's address must be aligned address. */
#define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p))
-#define GET_BYTE_INC(byte,p) do{\
- byte = *(p);\
- (p)++;\
-} while(0)
/* op-code + arg size */
-#if 0
-#define SIZE_OP_ANYCHAR_STAR SIZE_OPCODE
-#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1)
-#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_POP_OUT SIZE_OPCODE
-#ifdef USE_OP_PUSH_OR_JUMP_EXACT
-#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
-#endif
-#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
-#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_WORD_BOUNDARY (SIZE_OPCODE + SIZE_MODE)
-#define SIZE_OP_PREC_READ_START SIZE_OPCODE
-#define SIZE_OP_PREC_READ_NOT_START (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_PREC_READ_END SIZE_OPCODE
-#define SIZE_OP_PREC_READ_NOT_END SIZE_OPCODE
-#define SIZE_OP_FAIL SIZE_OPCODE
-#define SIZE_OP_MEMORY_START (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_START_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END_PUSH (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END_PUSH_REC (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_ATOMIC_START SIZE_OPCODE
-#define SIZE_OP_ATOMIC_END SIZE_OPCODE
-#define SIZE_OP_EMPTY_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_EMPTY_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH)
-#define SIZE_OP_LOOK_BEHIND_NOT_START (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH)
-#define SIZE_OP_LOOK_BEHIND_NOT_END SIZE_OPCODE
-#define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR)
-#define SIZE_OP_RETURN SIZE_OPCODE
-#define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM)
-#define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM)
-
-#ifdef USE_CALLOUT
-#define SIZE_OP_CALLOUT_CONTENTS (SIZE_OPCODE + SIZE_MEMNUM)
-#define SIZE_OP_CALLOUT_NAME (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM)
-#endif
-
-#else /* if 0 */
/* for relative address increment to go next op. */
-#define SIZE_INC_OP 1
-
-#define SIZE_OP_ANYCHAR_STAR 1
-#define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT 1
-#define SIZE_OP_JUMP 1
-#define SIZE_OP_PUSH 1
-#define SIZE_OP_PUSH_SUPER 1
-#define SIZE_OP_POP_OUT 1
+#define SIZE_INC 1
+
+#define OPSIZE_ANYCHAR_STAR 1
+#define OPSIZE_ANYCHAR_STAR_PEEK_NEXT 1
+#define OPSIZE_JUMP 1
+#define OPSIZE_PUSH 1
+#define OPSIZE_PUSH_SUPER 1
+#define OPSIZE_POP_OUT 1
#ifdef USE_OP_PUSH_OR_JUMP_EXACT
-#define SIZE_OP_PUSH_OR_JUMP_EXACT1 1
-#endif
-#define SIZE_OP_PUSH_IF_PEEK_NEXT 1
-#define SIZE_OP_REPEAT 1
-#define SIZE_OP_REPEAT_INC 1
-#define SIZE_OP_REPEAT_INC_NG 1
-#define SIZE_OP_WORD_BOUNDARY 1
-#define SIZE_OP_PREC_READ_START 1
-#define SIZE_OP_PREC_READ_NOT_START 1
-#define SIZE_OP_PREC_READ_END 1
-#define SIZE_OP_PREC_READ_NOT_END 1
-#define SIZE_OP_BACKREF 1
-#define SIZE_OP_FAIL 1
-#define SIZE_OP_MEMORY_START 1
-#define SIZE_OP_MEMORY_START_PUSH 1
-#define SIZE_OP_MEMORY_END_PUSH 1
-#define SIZE_OP_MEMORY_END_PUSH_REC 1
-#define SIZE_OP_MEMORY_END 1
-#define SIZE_OP_MEMORY_END_REC 1
-#define SIZE_OP_ATOMIC_START 1
-#define SIZE_OP_ATOMIC_END 1
-#define SIZE_OP_EMPTY_CHECK_START 1
-#define SIZE_OP_EMPTY_CHECK_END 1
-#define SIZE_OP_LOOK_BEHIND 1
-#define SIZE_OP_LOOK_BEHIND_NOT_START 1
-#define SIZE_OP_LOOK_BEHIND_NOT_END 1
-#define SIZE_OP_CALL 1
-#define SIZE_OP_RETURN 1
-#define SIZE_OP_PUSH_SAVE_VAL 1
-#define SIZE_OP_UPDATE_VAR 1
+#define OPSIZE_PUSH_OR_JUMP_EXACT1 1
+#endif
+#define OPSIZE_PUSH_IF_PEEK_NEXT 1
+#define OPSIZE_REPEAT 1
+#define OPSIZE_REPEAT_INC 1
+#define OPSIZE_REPEAT_INC_NG 1
+#define OPSIZE_WORD_BOUNDARY 1
+#define OPSIZE_PREC_READ_START 1
+#define OPSIZE_PREC_READ_NOT_START 1
+#define OPSIZE_PREC_READ_END 1
+#define OPSIZE_PREC_READ_NOT_END 1
+#define OPSIZE_BACKREF 1
+#define OPSIZE_FAIL 1
+#define OPSIZE_MEM_START 1
+#define OPSIZE_MEM_START_PUSH 1
+#define OPSIZE_MEM_END_PUSH 1
+#define OPSIZE_MEM_END_PUSH_REC 1
+#define OPSIZE_MEM_END 1
+#define OPSIZE_MEM_END_REC 1
+#define OPSIZE_ATOMIC_START 1
+#define OPSIZE_ATOMIC_END 1
+#define OPSIZE_EMPTY_CHECK_START 1
+#define OPSIZE_EMPTY_CHECK_END 1
+#define OPSIZE_LOOK_BEHIND 1
+#define OPSIZE_LOOK_BEHIND_NOT_START 1
+#define OPSIZE_LOOK_BEHIND_NOT_END 1
+#define OPSIZE_CALL 1
+#define OPSIZE_RETURN 1
+#define OPSIZE_PUSH_SAVE_VAL 1
+#define OPSIZE_UPDATE_VAR 1
#ifdef USE_CALLOUT
-#define SIZE_OP_CALLOUT_CONTENTS 1
-#define SIZE_OP_CALLOUT_NAME 1
+#define OPSIZE_CALLOUT_CONTENTS 1
+#define OPSIZE_CALLOUT_NAME 1
#endif
-#endif /* if 0 */
#define MC_ESC(syn) (syn)->meta_char_table.esc
@@ -882,7 +786,7 @@ typedef struct {
} repeat; /* REPEAT, REPEAT_NG */
struct {
MemNumType id;
- } repeat_inc; /* REPEAT_INC, REPEAT_INC_SG, REPEAT_INC_NG, REPEAT_INC_NG_SG */
+ } repeat_inc; /* REPEAT_INC, REPEAT_INC_NG */
struct {
MemNumType mem;
} empty_check_start;
@@ -933,48 +837,58 @@ typedef struct {
#endif
} RegexExt;
+typedef struct {
+ int lower;
+ int upper;
+ union {
+ Operation* pcode; /* address of repeated body */
+ int offset;
+ } u;
+} RepeatRange;
+
struct re_pattern_buffer {
/* common members of BBuf(bytes-buffer) */
Operation* ops;
#ifdef USE_DIRECT_THREADED_CODE
enum OpCode* ocs;
#endif
- Operation* ops_curr;
- unsigned int ops_used; /* used space for ops */
- unsigned int ops_alloc; /* allocated space for ops */
+ Operation* ops_curr;
+ unsigned int ops_used; /* used space for ops */
+ unsigned int ops_alloc; /* allocated space for ops */
unsigned char* string_pool;
unsigned char* string_pool_end;
- int num_mem; /* used memory(...) num counted from 1 */
- int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
- int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
- int num_call; /* number of subexp call */
- unsigned int capture_history; /* (?@...) flag (1-31) */
- unsigned int bt_mem_start; /* need backtrack flag */
- unsigned int bt_mem_end; /* need backtrack flag */
- int stack_pop_level;
- int repeat_range_alloc;
- OnigRepeatRange* repeat_range;
-
- OnigEncoding enc;
- OnigOptionType options;
- OnigSyntaxType* syntax;
- OnigCaseFoldType case_fold_flag;
- void* name_table;
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_empty_check; /* OP_EMPTY_CHECK_START/END id counter */
+ int num_call; /* number of subexp call */
+ MemStatusType capture_history; /* (?@...) flag (1-31) */
+ MemStatusType push_mem_start; /* need backtrack flag */
+ MemStatusType push_mem_end; /* need backtrack flag */
+ MemStatusType empty_status_mem;
+ int stack_pop_level;
+ int repeat_range_alloc;
+ RepeatRange* repeat_range;
+
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigSyntaxType* syntax;
+ OnigCaseFoldType case_fold_flag;
+ void* name_table;
/* optimization info (string search, char-map and anchors) */
int optimize; /* optimize flag */
int threshold_len; /* search str-length for apply optimize */
int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
- OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
- OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anc_dist_min; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anc_dist_max; /* (SEMI_)END_BUF anchor distance */
int sub_anchor; /* start-anchor for exact or map */
unsigned char *exact;
unsigned char *exact_end;
unsigned char map[CHAR_MAP_SIZE]; /* used as BMH skip or char-map */
int map_offset;
- OnigLen dmin; /* min-distance of exact or map */
- OnigLen dmax; /* max-distance of exact or map */
+ OnigLen dist_min; /* min-distance of exact or map */
+ OnigLen dist_max; /* max-distance of exact or map */
RegexExt* extp;
};
diff --git a/src/regparse.c b/src/regparse.c
index f1deea3..fed53f7 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -2,7 +2,7 @@
regparse.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -77,6 +77,7 @@ OnigSyntaxType OnigSyntaxOniguruma = {
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
+ ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
@@ -198,6 +199,24 @@ onig_set_parse_depth_limit(unsigned int depth)
return 0;
}
+#ifdef ONIG_DEBUG_PARSE
+#define INC_PARSE_DEPTH(d) do {\
+ (d)++;\
+ if (env->max_parse_depth < (d)) env->max_parse_depth = d;\
+ if ((d) > ParseDepthLimit) \
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
+} while (0)
+#else
+#define INC_PARSE_DEPTH(d) do {\
+ (d)++;\
+ if ((d) > ParseDepthLimit) \
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;\
+} while (0)
+#endif
+
+#define DEC_PARSE_DEPTH(d) (d)--
+
+
static int
bbuf_init(BBuf* buf, int size)
{
@@ -243,7 +262,8 @@ bbuf_clone(BBuf** rto, BBuf* from)
return 0;
}
-static int backref_rel_to_abs(int rel_no, ScanEnv* env)
+static int
+backref_rel_to_abs(int rel_no, ScanEnv* env)
{
if (rel_no > 0) {
return env->num_mem + rel_no;
@@ -291,15 +311,6 @@ bitset_set_range(BitSetRef bs, int from, int to)
}
}
-#if 0
-static void
-bitset_set_all(BitSetRef bs)
-{
- int i;
- for (i = 0; i < BITSET_SIZE; i++) { bs[i] = ~((Bits )0); }
-}
-#endif
-
static void
bitset_invert(BitSetRef bs)
{
@@ -362,24 +373,6 @@ save_entry(ScanEnv* env, enum SaveType type, int* id)
{
int nid = env->save_num;
-#if 0
- if (IS_NULL(env->saves)) {
- int n = 10;
- env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n);
- CHECK_NULL_RETURN_MEMERR(env->saves);
- env->save_alloc_num = n;
- }
- else if (env->save_alloc_num <= nid) {
- int n = env->save_alloc_num * 2;
- SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n);
- CHECK_NULL_RETURN_MEMERR(p);
- env->saves = p;
- env->save_alloc_num = n;
- }
-
- env->saves[nid].type = type;
-#endif
-
env->save_num++;
*id = nid;
return 0;
@@ -475,14 +468,14 @@ static int
str_end_hash(st_str_end_key* x)
{
UChar *p;
- int val = 0;
+ unsigned val = 0;
p = x->s;
while (p < x->end) {
- val = val * 997 + (int )*p++;
+ val = val * 997 + (unsigned )*p++;
}
- return val + (val >> 5);
+ return (int) (val + (val >> 5));
}
extern hash_table_type*
@@ -565,15 +558,15 @@ static int
callout_name_table_hash(st_callout_name_key* x)
{
UChar *p;
- int val = 0;
+ unsigned int val = 0;
p = x->s;
while (p < x->end) {
- val = val * 997 + (int )*p++;
+ val = val * 997 + (unsigned int )*p++;
}
/* use intptr_t for escape warning in Windows */
- return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type;
+ return (int )(val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type);
}
extern hash_table_type*
@@ -1093,6 +1086,35 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name,
return e->back_num;
}
+static int
+name_to_group_numbers(ScanEnv* env, const UChar* name, const UChar* name_end,
+ int** nums)
+{
+ regex_t* reg;
+ NameEntry* e;
+
+ reg = env->reg;
+ e = name_find(reg, name, name_end);
+
+ if (IS_NULL(e)) {
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
+ (UChar* )name, (UChar* )name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+
+ switch (e->back_num) {
+ case 0:
+ break;
+ case 1:
+ *nums = &(e->back_ref1);
+ break;
+ default:
+ *nums = e->back_refs;
+ break;
+ }
+ return e->back_num;
+}
+
extern int
onig_name_to_backref_number(regex_t* reg, const UChar* name,
const UChar* name_end, OnigRegion *region)
@@ -1869,8 +1891,8 @@ callout_tag_table_new(CalloutTagTable** rt)
}
static int
-callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
- CalloutTagVal entry_val)
+callout_tag_entry_raw(ScanEnv* env, CalloutTagTable* t, UChar* name,
+ UChar* name_end, CalloutTagVal entry_val)
{
int r;
CalloutTagVal val;
@@ -1879,8 +1901,11 @@ callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
return ONIGERR_INVALID_CALLOUT_TAG_NAME;
val = callout_tag_find(t, name, name_end);
- if (val >= 0)
+ if (val >= 0) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
+ name, name_end);
return ONIGERR_MULTIPLEX_DEFINED_NAME;
+ }
r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
if (r < 0) return r;
@@ -1909,7 +1934,7 @@ ext_ensure_tag_table(regex_t* reg)
}
static int
-callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
+callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end,
CalloutTagVal entry_val)
{
int r;
@@ -1921,7 +1946,7 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
ext = onig_get_regex_ext(reg);
CHECK_NULL_RETURN_MEMERR(ext);
- r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);
+ r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
e = onig_reg_callout_list_at(reg, (int )entry_val);
CHECK_NULL_RETURN_MEMERR(e);
@@ -1939,9 +1964,8 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
static void
scan_env_clear(ScanEnv* env)
{
- MEM_STATUS_CLEAR(env->capture_history);
- MEM_STATUS_CLEAR(env->bt_mem_start);
- MEM_STATUS_CLEAR(env->bt_mem_end);
+ MEM_STATUS_CLEAR(env->cap_history);
+ MEM_STATUS_CLEAR(env->backtrack_mem);
MEM_STATUS_CLEAR(env->backrefed_mem);
env->error = (UChar* )NULL;
env->error_end = (UChar* )NULL;
@@ -1960,6 +1984,10 @@ scan_env_clear(ScanEnv* env)
xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static));
env->parse_depth = 0;
+#ifdef ONIG_DEBUG_PARSE
+ env->max_parse_depth = 0;
+#endif
+ env->backref_num = 0;
env->keep_num = 0;
env->save_num = 0;
env->save_alloc_num = 0;
@@ -1991,11 +2019,8 @@ scan_env_add_mem_entry(ScanEnv* env)
}
for (i = env->num_mem + 1; i < alloc; i++) {
- p[i].node = NULL_NODE;
-#if 0
- p[i].in = 0;
- p[i].recursion = 0;
-#endif
+ p[i].mem_node = NULL_NODE;
+ p[i].empty_repeat_node = NULL_NODE;
}
env->mem_env_dynamic = p;
@@ -2011,7 +2036,7 @@ static int
scan_env_set_mem_node(ScanEnv* env, int num, Node* node)
{
if (env->num_mem >= num)
- SCANENV_MEMENV(env)[num].node = node;
+ SCANENV_MEMENV(env)[num].mem_node = node;
else
return ONIGERR_PARSER_BUG;
return 0;
@@ -2149,7 +2174,7 @@ node_new_ctype(int type, int not, OnigOptionType options)
static Node*
node_new_anychar(void)
{
- Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE);
+ Node* node = node_new_ctype(CTYPE_ANYCHAR, FALSE, ONIG_OPTION_NONE);
return node;
}
@@ -2209,24 +2234,6 @@ onig_node_new_list(Node* left, Node* right)
}
extern Node*
-onig_node_list_add(Node* list, Node* x)
-{
- Node *n;
-
- n = onig_node_new_list(x, NULL);
- if (IS_NULL(n)) return NULL_NODE;
-
- if (IS_NOT_NULL(list)) {
- while (IS_NOT_NULL(NODE_CDR(list)))
- list = NODE_CDR(list);
-
- NODE_CDR(list) = n;
- }
-
- return n;
-}
-
-extern Node*
onig_node_new_alt(Node* left, Node* right)
{
Node* node = node_new();
@@ -2324,7 +2331,7 @@ node_new_backref(int back_num, int* backrefs, int by_name,
for (i = 0; i < back_num; i++) {
if (backrefs[i] <= env->num_mem &&
- IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) {
+ IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].mem_node)) {
NODE_STATUS_ADD(node, RECURSION); /* /...(\1).../ */
break;
}
@@ -2344,6 +2351,8 @@ node_new_backref(int back_num, int* backrefs, int by_name,
for (i = 0; i < back_num; i++)
p[i] = backrefs[i];
}
+
+ env->backref_num++;
return node;
}
@@ -2391,13 +2400,13 @@ node_new_quantifier(int lower, int upper, int by_number)
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_QUANT);
- QUANT_(node)->lower = lower;
- QUANT_(node)->upper = upper;
- QUANT_(node)->greedy = 1;
- QUANT_(node)->empty_info = BODY_IS_NOT_EMPTY;
- QUANT_(node)->head_exact = NULL_NODE;
- QUANT_(node)->next_head_exact = NULL_NODE;
- QUANT_(node)->is_refered = 0;
+ QUANT_(node)->lower = lower;
+ QUANT_(node)->upper = upper;
+ QUANT_(node)->greedy = 1;
+ QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY;
+ QUANT_(node)->head_exact = NULL_NODE;
+ QUANT_(node)->next_head_exact = NULL_NODE;
+ QUANT_(node)->include_referred = 0;
if (by_number != 0)
NODE_STATUS_ADD(node, BY_NUMBER);
@@ -2683,7 +2692,7 @@ make_text_segment(Node** node, ScanEnv* env)
ns[1] = NULL_NODE;
r = ONIGERR_MEMORY;
- ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, 0);
+ ns[0] = onig_node_new_anchor(ANCR_NO_TEXT_SEGMENT_BOUNDARY, FALSE);
if (IS_NULL(ns[0])) goto err;
r = node_new_true_anychar(&ns[1], env);
@@ -2694,7 +2703,7 @@ make_text_segment(Node** node, ScanEnv* env)
ns[0] = x;
ns[1] = NULL_NODE;
- x = node_new_quantifier(0, REPEAT_INFINITE, 1);
+ x = node_new_quantifier(0, INFINITE_REPEAT, TRUE);
if (IS_NULL(x)) goto err;
NODE_BODY(x) = ns[0];
@@ -2763,7 +2772,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
ns[0] = x;
- x = node_new_quantifier(lower, upper, 0);
+ x = node_new_quantifier(lower, upper, FALSE);
if (IS_NULL(x)) goto err0;
NODE_BODY(x) = ns[0];
@@ -2792,7 +2801,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
x = make_alt(2, ns);
if (IS_NULL(x)) goto err0;
- if (is_range_cutter != 0)
+ if (is_range_cutter != FALSE)
NODE_STATUS_ADD(x, SUPER);
*node = x;
@@ -2882,7 +2891,10 @@ make_range_clear(Node** node, ScanEnv* env)
ns[0] = NULL_NODE; ns[1] = x;
- r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env);
+#define ID_NOT_USED_DONT_CARE_ME 0
+
+ r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT,
+ ID_NOT_USED_DONT_CARE_ME, env);
if (r != 0) goto err;
x = make_alt(2, ns);
@@ -3001,7 +3013,7 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua
id1 = GIMMICK_(ns[0])->id;
r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive,
- 0, env);
+ FALSE, env);
if (r != 0) goto err;
ns[2] = ns[3] = NULL_NODE;
@@ -3044,7 +3056,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (expr == NULL_NODE) {
/* default expr \O* */
- quant = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ quant = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
if (IS_NULL(quant)) goto err0;
r = node_new_true_anychar(&body, env);
@@ -3086,7 +3098,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (r != 0) goto err;
possessive = 1;
- r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,
+ r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
possessive, is_range_cutter, env);
if (r != 0) goto err;
@@ -3171,16 +3183,6 @@ node_str_cat_char(Node* node, UChar c)
}
extern void
-onig_node_conv_to_str_node(Node* node, int flag)
-{
- NODE_SET_TYPE(node, NODE_STRING);
- STR_(node)->flag = flag;
- STR_(node)->capacity = 0;
- STR_(node)->s = STR_(node)->buf;
- STR_(node)->end = STR_(node)->buf;
-}
-
-extern void
onig_node_str_clear(Node* node)
{
if (STR_(node)->capacity != 0 &&
@@ -3188,10 +3190,11 @@ onig_node_str_clear(Node* node)
xfree(STR_(node)->s);
}
- STR_(node)->capacity = 0;
STR_(node)->flag = 0;
STR_(node)->s = STR_(node)->buf;
STR_(node)->end = STR_(node)->buf;
+ STR_(node)->capacity = 0;
+ STR_(node)->case_min_len = 0;
}
static Node*
@@ -3201,10 +3204,12 @@ node_new_str(const UChar* s, const UChar* end)
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_STRING);
- STR_(node)->capacity = 0;
STR_(node)->flag = 0;
STR_(node)->s = STR_(node)->buf;
STR_(node)->end = STR_(node)->buf;
+ STR_(node)->capacity = 0;
+ STR_(node)->case_min_len = 0;
+
if (onig_node_str_cat(node, s, end)) {
onig_node_free(node);
return NULL;
@@ -3219,11 +3224,11 @@ onig_node_new_str(const UChar* s, const UChar* end)
}
static Node*
-node_new_str_raw(UChar* s, UChar* end)
+node_new_str_crude(UChar* s, UChar* end)
{
Node* node = node_new_str(s, end);
CHECK_NULL_RETURN(node);
- NODE_STRING_SET_RAW(node);
+ NODE_STRING_SET_CRUDE(node);
return node;
}
@@ -3234,12 +3239,20 @@ node_new_empty(void)
}
static Node*
-node_new_str_raw_char(UChar c)
+node_new_str_crude_char(UChar c)
{
+ int i;
UChar p[1];
+ Node* node;
p[0] = c;
- return node_new_str_raw(p, p + 1);
+ node = node_new_str_crude(p, p + 1);
+
+ /* clear buf tail */
+ for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
+ STR_(node)->buf[i] = '\0';
+
+ return node;
}
static Node*
@@ -3256,8 +3269,8 @@ str_node_split_last_char(Node* node, OnigEncoding enc)
if (p && p > sn->s) { /* can be split. */
rn = node_new_str(p, sn->end);
CHECK_NULL_RETURN(rn);
- if (NODE_STRING_IS_RAW(node))
- NODE_STRING_SET_RAW(rn);
+ if (NODE_STRING_IS_CRUDE(node))
+ NODE_STRING_SET_CRUDE(rn);
sn->end = (UChar* )p;
}
@@ -3275,28 +3288,10 @@ str_node_can_be_split(Node* node, OnigEncoding enc)
return 0;
}
-#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
static int
-node_str_head_pad(StrNode* sn, int num, UChar val)
-{
- UChar buf[NODE_STRING_BUF_SIZE];
- int i, len;
-
- len = sn->end - sn->s;
- onig_strcpy(buf, sn->s, sn->end);
- onig_strcpy(&(sn->s[num]), buf, buf + len);
- sn->end += num;
-
- for (i = 0; i < num; i++) {
- sn->s[i] = val;
- }
-}
-#endif
-
-extern int
-onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
+scan_number(UChar** src, const UChar* end, OnigEncoding enc)
{
- unsigned int num, val;
+ int num, val;
OnigCodePoint c;
UChar* p = *src;
PFETCH_READY;
@@ -3305,8 +3300,8 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
while (! PEND) {
PFETCH(c);
if (IS_CODE_DIGIT_ASCII(enc, c)) {
- val = (unsigned int )DIGITVAL(c);
- if ((INT_MAX_LIMIT - val) / 10UL < num)
+ val = (int )DIGITVAL(c);
+ if ((INT_MAX - val) / 10 < num)
return -1; /* overflow */
num = num * 10 + val;
@@ -3321,26 +3316,27 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
}
static int
-scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
- int maxlen, OnigEncoding enc)
+scan_hexadecimal_number(UChar** src, UChar* end, int minlen, int maxlen,
+ OnigEncoding enc, OnigCodePoint* rcode)
{
+ OnigCodePoint code;
OnigCodePoint c;
- unsigned int num, val;
+ unsigned int val;
int n;
UChar* p = *src;
PFETCH_READY;
- num = 0;
+ code = 0;
n = 0;
while (! PEND && n < maxlen) {
PFETCH(c);
if (IS_CODE_XDIGIT_ASCII(enc, c)) {
n++;
- val = (unsigned int )XDIGITVAL(enc,c);
- if ((INT_MAX_LIMIT - val) / 16UL < num)
+ val = (unsigned int )XDIGITVAL(enc, c);
+ if ((UINT_MAX - val) / 16UL < code)
return ONIGERR_TOO_BIG_NUMBER; /* overflow */
- num = (num << 4) + XDIGITVAL(enc,c);
+ code = (code << 4) + val;
}
else {
PUNFETCH;
@@ -3351,36 +3347,46 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int minlen,
if (n < minlen)
return ONIGERR_INVALID_CODE_POINT_VALUE;
+ *rcode = code;
*src = p;
- return num;
+ return ONIG_NORMAL;
}
static int
-scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen,
- OnigEncoding enc)
+scan_octal_number(UChar** src, UChar* end, int minlen, int maxlen,
+ OnigEncoding enc, OnigCodePoint* rcode)
{
+ OnigCodePoint code;
OnigCodePoint c;
- unsigned int num, val;
+ unsigned int val;
+ int n;
UChar* p = *src;
PFETCH_READY;
- num = 0;
- while (! PEND && maxlen-- != 0) {
+ code = 0;
+ n = 0;
+ while (! PEND && n < maxlen) {
PFETCH(c);
if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') {
- val = ODIGITVAL(c);
- if ((INT_MAX_LIMIT - val) / 8UL < num)
- return -1; /* overflow */
+ n++;
+ val = (unsigned int )ODIGITVAL(c);
+ if ((UINT_MAX - val) / 8UL < code)
+ return ONIGERR_TOO_BIG_NUMBER; /* overflow */
- num = (num << 3) + val;
+ code = (code << 3) + val;
}
else {
PUNFETCH;
break;
}
}
+
+ if (n < minlen)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+
+ *rcode = code;
*src = p;
- return num;
+ return ONIG_NORMAL;
}
@@ -3877,19 +3883,19 @@ quantifier_type_num(QuantNode* q)
if (q->greedy) {
if (q->lower == 0) {
if (q->upper == 1) return 0;
- else if (IS_REPEAT_INFINITE(q->upper)) return 1;
+ else if (IS_INFINITE_REPEAT(q->upper)) return 1;
}
else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 2;
+ if (IS_INFINITE_REPEAT(q->upper)) return 2;
}
}
else {
if (q->lower == 0) {
if (q->upper == 1) return 3;
- else if (IS_REPEAT_INFINITE(q->upper)) return 4;
+ else if (IS_INFINITE_REPEAT(q->upper)) return 4;
}
else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 5;
+ if (IS_INFINITE_REPEAT(q->upper)) return 5;
}
}
return -1;
@@ -3915,68 +3921,70 @@ static enum ReduceType ReduceTypeTable[6][6] = {
{RQ_ASIS, RQ_PQ_Q, RQ_DEL, RQ_AQ, RQ_AQ, RQ_DEL} /* '+?' */
};
-extern void
-onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
+extern int
+onig_reduce_nested_quantifier(Node* pnode)
{
int pnum, cnum;
QuantNode *p, *c;
+ Node* cnode;
+
+ cnode = NODE_BODY(pnode);
p = QUANT_(pnode);
c = QUANT_(cnode);
pnum = quantifier_type_num(p);
cnum = quantifier_type_num(c);
if (pnum < 0 || cnum < 0) {
- if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {
- if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {
- int n = onig_positive_int_multiply(p->lower, c->lower);
- if (n >= 0) {
- p->lower = p->upper = n;
- NODE_BODY(pnode) = NODE_BODY(cnode);
- goto remove_cnode;
- }
- }
+ if (p->lower == p->upper && c->lower == c->upper) {
+ int n = onig_positive_int_multiply(p->lower, c->lower);
+ if (n < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
+
+ p->lower = p->upper = n;
+ NODE_BODY(pnode) = NODE_BODY(cnode);
+ goto remove_cnode;
}
- return ;
+ return 0;
}
switch(ReduceTypeTable[cnum][pnum]) {
case RQ_DEL:
*pnode = *cnode;
+ goto remove_cnode;
break;
case RQ_A:
NODE_BODY(pnode) = NODE_BODY(cnode);
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
+ p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 1;
+ goto remove_cnode;
break;
case RQ_AQ:
NODE_BODY(pnode) = NODE_BODY(cnode);
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
+ p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 0;
+ goto remove_cnode;
break;
case RQ_QQ:
NODE_BODY(pnode) = NODE_BODY(cnode);
p->lower = 0; p->upper = 1; p->greedy = 0;
+ goto remove_cnode;
break;
case RQ_P_QQ:
- NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 0;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
- return ;
+ c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 1;
break;
case RQ_PQ_Q:
- NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 1;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
- return ;
+ c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 0;
break;
case RQ_ASIS:
- NODE_BODY(pnode) = cnode;
- return ;
break;
}
+ return 0;
+
remove_cnode:
NODE_BODY(cnode) = NULL_NODE;
onig_node_free(cnode);
+ return 0;
}
static int
@@ -3995,7 +4003,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen);
if (alen < 0) return alen;
- crnl = node_new_str_raw(buf, buf + dlen + alen);
+ crnl = node_new_str_crude(buf, buf + dlen + alen);
CHECK_NULL_RETURN_MEMERR(crnl);
ncc = node_new_cclass();
@@ -4023,7 +4031,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
if (r != 0) goto err1;
}
- x = node_new_bag_if_else(crnl, 0, ncc);
+ x = node_new_bag_if_else(crnl, NULL_NODE, ncc);
if (IS_NULL(x)) goto err1;
*node = x;
@@ -4032,7 +4040,7 @@ node_new_general_newline(Node** node, ScanEnv* env)
enum TokenSyms {
TK_EOT = 0, /* end of token */
- TK_RAW_BYTE = 1,
+ TK_CRUDE_BYTE = 1,
TK_CHAR,
TK_STRING,
TK_CODE_POINT,
@@ -4047,7 +4055,7 @@ enum TokenSyms {
TK_ALT,
TK_SUBEXP_OPEN,
TK_SUBEXP_CLOSE,
- TK_CC_OPEN,
+ TK_OPEN_CC,
TK_QUOTE_OPEN,
TK_CHAR_PROPERTY, /* \p{...}, \P{...} */
TK_KEEP, /* \K */
@@ -4059,9 +4067,9 @@ enum TokenSyms {
/* in cc */
TK_CC_CLOSE,
TK_CC_RANGE,
- TK_POSIX_BRACKET_OPEN,
- TK_CC_AND, /* && */
- TK_CC_CC_OPEN /* [ */
+ TK_CC_POSIX_BRACKET_OPEN,
+ TK_CC_AND, /* && */
+ TK_CC_OPEN_CC /* [ */
};
typedef struct {
@@ -4071,7 +4079,7 @@ typedef struct {
UChar* backp;
union {
UChar* s;
- int c;
+ UChar byte;
OnigCodePoint code;
int anchor;
int subtype;
@@ -4106,7 +4114,7 @@ typedef struct {
static int
-fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
+fetch_interval(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
{
int low, up, syn_allow, non_low = 0;
int r = 0;
@@ -4131,7 +4139,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
}
}
- low = onig_scan_unsigned_number(&p, end, env->enc);
+ low = scan_number(&p, end, env->enc);
if (low < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
if (low > ONIG_MAX_REPEAT_NUM)
return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
@@ -4150,7 +4158,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
PFETCH(c);
if (c == ',') {
UChar* prev = p;
- up = onig_scan_unsigned_number(&p, end, env->enc);
+ up = scan_number(&p, end, env->enc);
if (up < 0) return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
if (up > ONIG_MAX_REPEAT_NUM)
return ONIGERR_TOO_BIG_NUMBER_FOR_REPEAT_RANGE;
@@ -4158,7 +4166,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
if (p == prev) {
if (non_low != 0)
goto invalid;
- up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ up = INFINITE_REPEAT; /* {n,} : {n,infinite} */
}
}
else {
@@ -4173,12 +4181,12 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
if (PEND) goto invalid;
PFETCH(c);
if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) {
- if (c != MC_ESC(env->syntax)) goto invalid;
+ if (c != MC_ESC(env->syntax) || PEND) goto invalid;
PFETCH(c);
}
if (c != '}') goto invalid;
- if (!IS_REPEAT_INFINITE(up) && low > up) {
+ if (!IS_INFINITE_REPEAT(up) && low > up) {
/* {n,m}+ supported case */
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
@@ -4396,7 +4404,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
PFETCH(c);
if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err;
PUNFETCH;
- level = onig_scan_unsigned_number(&p, end, enc);
+ level = scan_number(&p, end, enc);
if (level < 0) return ONIGERR_TOO_BIG_NUMBER;
*rlevel = (level * flag);
exist_level = 1;
@@ -4417,7 +4425,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
end:
if (r == 0) {
if (*num_type != IS_NOT_NUM) {
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ *rback_num = scan_number(&pnum_head, name_end, enc);
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
else if (*rback_num == 0) {
if (*num_type == IS_REL_NUM)
@@ -4445,7 +4453,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end,
static int
fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
UChar** rname_end, ScanEnv* env, int* rback_num,
- enum REF_NUM* num_type, int ref)
+ enum REF_NUM* num_type, int is_ref)
{
int r, sign;
int digit_count;
@@ -4475,7 +4483,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
return ONIGERR_EMPTY_GROUP_NAME;
if (IS_CODE_DIGIT_ASCII(enc, c)) {
- if (ref == 1)
+ if (is_ref == TRUE)
*num_type = IS_ABS_NUM;
else {
r = ONIGERR_INVALID_GROUP_NAME;
@@ -4483,7 +4491,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
digit_count++;
}
else if (c == '-') {
- if (ref == 1) {
+ if (is_ref == TRUE) {
*num_type = IS_REL_NUM;
sign = -1;
pnum_head = p;
@@ -4493,7 +4501,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
}
}
else if (c == '+') {
- if (ref == 1) {
+ if (is_ref == TRUE) {
*num_type = IS_REL_NUM;
sign = 1;
pnum_head = p;
@@ -4543,7 +4551,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end,
}
if (*num_type != IS_NOT_NUM) {
- *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc);
+ *rback_num = scan_number(&pnum_head, name_end, enc);
if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER;
else if (*rback_num == 0) {
if (*num_type == IS_REL_NUM) {
@@ -4675,7 +4683,8 @@ str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to,
static int
fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
- int num;
+ int r;
+ OnigCodePoint code;
OnigCodePoint c, c2;
OnigSyntaxType* syn = env->syntax;
OnigEncoding enc = env->enc;
@@ -4691,7 +4700,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
PFETCH(c);
tok->type = TK_CHAR;
tok->base = 0;
- tok->u.c = c;
+ tok->u.code = c;
tok->escaped = 0;
if (c == ']') {
@@ -4708,7 +4717,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
PFETCH(c);
tok->escaped = 1;
- tok->u.c = c;
+ tok->u.code = c;
switch (c) {
case 'w':
tok->type = TK_CHAR_TYPE;
@@ -4781,8 +4790,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
PINC;
- num = scan_unsigned_octal_number(&p, end, 11, enc);
- if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ r = scan_octal_number(&p, end, 0, 11, enc, &code);
+ if (r < 0) return r;
if (!PEND) {
c2 = PPEEK;
if (IS_CODE_DIGIT_ASCII(enc, c2))
@@ -4793,7 +4802,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
PINC;
tok->type = TK_CODE_POINT;
tok->base = 8;
- tok->u.code = (OnigCodePoint )num;
+ tok->u.code = code;
}
else {
/* can't read nothing or invalid format */
@@ -4808,13 +4817,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
PINC;
- num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
- if (num < 0) {
- if (num == ONIGERR_TOO_BIG_NUMBER)
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- else
- return num;
- }
+ r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
+ if (r < 0) return r;
if (!PEND) {
c2 = PPEEK;
if (IS_CODE_XDIGIT_ASCII(enc, c2))
@@ -4825,7 +4829,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
PINC;
tok->type = TK_CODE_POINT;
tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
+ tok->u.code = code;
}
else {
/* can't read nothing or invalid format */
@@ -4833,14 +4837,14 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
- if (num < 0) return num;
+ r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
+ if (r < 0) return r;
if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
+ code = 0; /* but, it's not error */
}
- tok->type = TK_RAW_BYTE;
+ tok->type = TK_CRUDE_BYTE;
tok->base = 16;
- tok->u.c = num;
+ tok->u.byte = (UChar )code;
}
break;
@@ -4849,14 +4853,14 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
- if (num < 0) return num;
+ r = scan_hexadecimal_number(&p, end, 4, 4, enc, &code);
+ if (r < 0) return r;
if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
+ code = 0; /* but, it's not error */
}
tok->type = TK_CODE_POINT;
tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
+ tok->u.code = code;
}
break;
@@ -4865,22 +4869,23 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
PUNFETCH;
prev = p;
- num = scan_unsigned_octal_number(&p, end, 3, enc);
- if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
+ r = scan_octal_number(&p, end, 0, 3, enc, &code);
+ if (r < 0) return r;
+ if (code >= 256) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
+ code = 0; /* but, it's not error */
}
- tok->type = TK_RAW_BYTE;
+ tok->type = TK_CRUDE_BYTE;
tok->base = 8;
- tok->u.c = num;
+ tok->u.byte = (UChar )code;
}
break;
default:
PUNFETCH;
- num = fetch_escaped_value(&p, end, env, &c2);
- if (num < 0) return num;
- if (tok->u.c != c2) {
+ r = fetch_escaped_value(&p, end, env, &c2);
+ if (r < 0) return r;
+ if (tok->u.code != c2) {
tok->u.code = c2;
tok->type = TK_CODE_POINT;
}
@@ -4894,7 +4899,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
PINC;
if (str_exist_check_with_esc(send, 2, p, end,
(OnigCodePoint )']', enc, syn)) {
- tok->type = TK_POSIX_BRACKET_OPEN;
+ tok->type = TK_CC_POSIX_BRACKET_OPEN;
}
else {
PUNFETCH;
@@ -4904,7 +4909,7 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
else {
cc_in_cc:
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_CCLASS_SET_OP)) {
- tok->type = TK_CC_CC_OPEN;
+ tok->type = TK_CC_OPEN_CC;
}
else {
CC_ESC_WARN(env, (UChar* )"[");
@@ -4927,7 +4932,8 @@ fetch_token_in_cc(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
static int
fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
- int r, num;
+ int r;
+ OnigCodePoint code;
OnigCodePoint c;
OnigEncoding enc = env->enc;
OnigSyntaxType* syn = env->syntax;
@@ -4952,14 +4958,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->backp = p;
PFETCH(c);
- tok->u.c = c;
+ tok->u.code = c;
tok->escaped = 1;
switch (c) {
case '*':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -4967,7 +4973,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5003,7 +5009,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) break;
- r = fetch_interval_quantifier(&p, end, tok, env);
+ r = fetch_interval(&p, end, tok, env);
if (r < 0) return r; /* error */
if (r == 0) goto greedy_check2;
else if (r == 2) { /* {n} */
@@ -5191,8 +5197,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) {
PINC;
- num = scan_unsigned_octal_number(&p, end, 11, enc);
- if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
+ r = scan_octal_number(&p, end, 0, 11, enc, &code);
+ if (r < 0) return r;
if (!PEND) {
if (IS_CODE_DIGIT_ASCII(enc, PPEEK))
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
@@ -5201,7 +5207,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
PINC;
tok->type = TK_CODE_POINT;
- tok->u.code = (OnigCodePoint )num;
+ tok->u.code = code;
}
else {
/* can't read nothing or invalid format */
@@ -5216,13 +5222,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_BRACE_HEX8)) {
PINC;
- num = scan_unsigned_hexadecimal_number(&p, end, 0, 8, enc);
- if (num < 0) {
- if (num == ONIGERR_TOO_BIG_NUMBER)
- return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE;
- else
- return num;
- }
+ r = scan_hexadecimal_number(&p, end, 0, 8, enc, &code);
+ if (r < 0) return r;
if (!PEND) {
if (IS_CODE_XDIGIT_ASCII(enc, PPEEK))
return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE;
@@ -5231,7 +5232,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if ((p > prev + enclen(enc, prev)) && !PEND && PPEEK_IS('}')) {
PINC;
tok->type = TK_CODE_POINT;
- tok->u.code = (OnigCodePoint )num;
+ tok->u.code = code;
}
else {
/* can't read nothing or invalid format */
@@ -5239,14 +5240,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
else if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_X_HEX2)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 0, 2, enc);
- if (num < 0) return num;
+ r = scan_hexadecimal_number(&p, end, 0, 2, enc, &code);
+ if (r < 0) return r;
if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
+ code = 0; /* but, it's not error */
}
- tok->type = TK_RAW_BYTE;
+ tok->type = TK_CRUDE_BYTE;
tok->base = 16;
- tok->u.c = num;
+ tok->u.byte = (UChar )code;
}
break;
@@ -5255,14 +5256,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
if (IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_U_HEX4)) {
- num = scan_unsigned_hexadecimal_number(&p, end, 4, 4, enc);
- if (num < 0) return num;
+ r = scan_hexadecimal_number(&p, end, 4, 4, enc, &code);
+ if (r < 0) return r;
if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
+ code = 0; /* but, it's not error */
}
tok->type = TK_CODE_POINT;
tok->base = 16;
- tok->u.code = (OnigCodePoint )num;
+ tok->u.code = code;
}
break;
@@ -5270,21 +5271,21 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '5': case '6': case '7': case '8': case '9':
PUNFETCH;
prev = p;
- num = onig_scan_unsigned_number(&p, end, enc);
- if (num < 0 || num > ONIG_MAX_BACKREF_NUM) {
+ r = scan_number(&p, end, enc);
+ if (r < 0 || r > ONIG_MAX_BACKREF_NUM) {
goto skip_backref;
}
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) &&
- (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */
+ (r <= env->num_mem || r <= 9)) { /* This spec. from GNU regex */
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
- if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node))
+ if (r > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[r].mem_node))
return ONIGERR_INVALID_BACKREF;
}
tok->type = TK_BACKREF;
tok->u.backref.num = 1;
- tok->u.backref.ref1 = num;
+ tok->u.backref.ref1 = r;
tok->u.backref.by_name = 0;
#ifdef USE_BACKREF_WITH_LEVEL
tok->u.backref.exist_level = 0;
@@ -5304,14 +5305,14 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '0':
if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) {
prev = p;
- num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc);
- if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER;
+ r = scan_octal_number(&p, end, 0, (c == '0' ? 2:3), enc, &code);
+ if (r < 0 || r >= 256) return ONIGERR_TOO_BIG_NUMBER;
if (p == prev) { /* can't read nothing. */
- num = 0; /* but, it's not error */
+ code = 0; /* but, it's not error */
}
- tok->type = TK_RAW_BYTE;
+ tok->type = TK_CRUDE_BYTE;
tok->base = 8;
- tok->u.c = num;
+ tok->u.byte = (UChar )code;
}
else if (c != '0') {
PINC;
@@ -5336,7 +5337,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (r == 1) tok->u.backref.exist_level = 1;
else tok->u.backref.exist_level = 0;
#else
- r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1);
+ r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, TRUE);
#endif
if (r < 0) return r;
@@ -5349,7 +5350,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
if (back_num > env->num_mem ||
- IS_NULL(SCANENV_MEMENV(env)[back_num].node))
+ IS_NULL(SCANENV_MEMENV(env)[back_num].mem_node))
return ONIGERR_INVALID_BACKREF;
}
tok->type = TK_BACKREF;
@@ -5358,17 +5359,15 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.backref.ref1 = back_num;
}
else {
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ int num = name_to_group_numbers(env, prev, name_end, &backs);
if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
int i;
for (i = 0; i < num; i++) {
if (backs[i] > env->num_mem ||
- IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
+ IS_NULL(SCANENV_MEMENV(env)[backs[i]].mem_node))
return ONIGERR_INVALID_BACKREF;
}
}
@@ -5401,7 +5400,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
prev = p;
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env,
- &gnum, &num_type, 1);
+ &gnum, &num_type, TRUE);
if (r < 0) return r;
if (num_type != IS_NOT_NUM) {
@@ -5462,10 +5461,9 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
OnigCodePoint c2;
PUNFETCH;
- num = fetch_escaped_value(&p, end, env, &c2);
- if (num < 0) return num;
- /* set_raw: */
- if (tok->u.c != c2) {
+ r = fetch_escaped_value(&p, end, env, &c2);
+ if (r < 0) return r;
+ if (tok->u.code != c2) {
tok->type = TK_CODE_POINT;
tok->u.code = c2;
}
@@ -5477,7 +5475,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
else {
- tok->u.c = c;
+ tok->u.code = c;
tok->escaped = 0;
#ifdef USE_VARIABLE_META_CHARS
@@ -5514,7 +5512,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
#endif
tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5525,7 +5523,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
#endif
tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5542,7 +5540,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '{':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACE_INTERVAL)) break;
- r = fetch_interval_quantifier(&p, end, tok, env);
+ r = fetch_interval(&p, end, tok, env);
if (r < 0) return r; /* error */
if (r == 0) goto greedy_check2;
else if (r == 2) { /* {n} */
@@ -5590,8 +5588,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
PINC;
name = p;
- r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env, &gnum,
- &num_type, 0);
+ r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
+ &gnum, &num_type, FALSE);
if (r < 0) return r;
tok->type = TK_CALL;
@@ -5608,7 +5606,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.call.gnum = 0;
tok->u.call.name = p;
PINC;
- if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
+ if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
tok->u.call.name_end = p;
break;
@@ -5623,7 +5621,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
name = p;
r = fetch_name((OnigCodePoint )'(', &p, end, &name_end, env,
- &gnum, &num_type, 1);
+ &gnum, &num_type, TRUE);
if (r < 0) return r;
if (num_type == IS_NOT_NUM) {
@@ -5679,7 +5677,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case '[':
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_BRACKET_CC)) break;
- tok->type = TK_CC_OPEN;
+ tok->type = TK_OPEN_CC;
break;
case ']':
@@ -5890,6 +5888,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
int c, r;
int ascii_mode;
+ int is_single;
const OnigCodePoint *ranges;
OnigCodePoint limit;
OnigCodePoint sb_out;
@@ -5911,6 +5910,7 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
}
r = 0;
+ is_single = ONIGENC_IS_SINGLEBYTE(enc);
limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE;
switch (ctype) {
@@ -5927,19 +5927,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
case ONIGENC_CTYPE_ALNUM:
if (not != 0) {
for (c = 0; c < (int )limit; c++) {
- if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
+ if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
}
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
+ BITSET_SET_BIT(cc->bs, c);
}
- ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
+ if (is_single == 0)
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
- BITSET_SET_BIT(cc->bs, c);
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1) {
+ if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ BITSET_SET_BIT(cc->bs, c);
+ }
}
}
break;
@@ -5949,21 +5955,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env)
case ONIGENC_CTYPE_WORD:
if (not != 0) {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */
+ /* check invalid code point */
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
&& ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
for (c = limit; c < SINGLE_BYTE_SIZE; c++) {
- if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0)
+ if (is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
BITSET_SET_BIT(cc->bs, c);
}
+ if (ascii_mode != 0 && is_single == 0)
+ ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
else {
for (c = 0; c < (int )limit; c++) {
- if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
+ if ((is_single != 0 || ONIGENC_CODE_TO_MBCLEN(enc, c) == 1)
+ && ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype))
BITSET_SET_BIT(cc->bs, c);
}
- if (ascii_mode == 0)
+ if (ascii_mode == 0 && is_single == 0)
ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf);
}
break;
@@ -6055,10 +6065,12 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
{
int r;
OnigCodePoint c;
- OnigEncoding enc = env->enc;
- UChar *prev, *start, *p = *src;
+ OnigEncoding enc;
+ UChar *prev, *start, *p;
- r = 0;
+ p = *src;
+ enc = env->enc;
+ r = ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
start = prev = p;
while (!PEND) {
@@ -6066,18 +6078,20 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env)
PFETCH_S(c);
if (c == '}') {
r = ONIGENC_PROPERTY_NAME_TO_CTYPE(enc, start, prev);
- if (r < 0) break;
+ if (r >= 0) {
+ *src = p;
+ }
+ else {
+ onig_scan_env_set_error_string(env, r, *src, prev);
+ }
- *src = p;
return r;
}
else if (c == '(' || c == ')' || c == '{' || c == '|') {
- r = ONIGERR_INVALID_CHAR_PROPERTY_NAME;
break;
}
}
- onig_scan_env_set_error_string(env, r, *src, prev);
return r;
}
@@ -6093,7 +6107,7 @@ parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* en
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = CCLASS_(*np);
- r = add_ctype_to_cc(cc, ctype, 0, env);
+ r = add_ctype_to_cc(cc, ctype, FALSE, env);
if (r != 0) return r;
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
@@ -6101,67 +6115,67 @@ parse_char_property(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* en
}
-enum CCSTATE {
- CCS_VALUE,
- CCS_RANGE,
- CCS_COMPLETE,
- CCS_START
-};
+typedef enum {
+ CS_VALUE,
+ CS_RANGE,
+ CS_COMPLETE,
+ CS_START
+} CSTATE;
-enum CCVALTYPE {
- CCV_SB,
- CCV_CODE_POINT,
- CCV_CLASS
-};
+typedef enum {
+ CV_UNDEF,
+ CV_SB,
+ CV_MB,
+ CV_CPROP
+} CVAL;
static int
-next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type,
- enum CCSTATE* state, ScanEnv* env)
+cc_cprop_next(CClassNode* cc, OnigCodePoint* pcode, CVAL* val, CSTATE* state,
+ ScanEnv* env)
{
int r;
- if (*state == CCS_RANGE)
+ if (*state == CS_RANGE)
return ONIGERR_CHAR_CLASS_VALUE_AT_END_OF_RANGE;
- if (*state == CCS_VALUE && *type != CCV_CLASS) {
- if (*type == CCV_SB)
- BITSET_SET_BIT(cc->bs, (int )(*vs));
- else if (*type == CCV_CODE_POINT) {
- r = add_code_range(&(cc->mbuf), env, *vs, *vs);
+ if (*state == CS_VALUE) {
+ if (*val == CV_SB)
+ BITSET_SET_BIT(cc->bs, (int )(*pcode));
+ else if (*val == CV_MB) {
+ r = add_code_range(&(cc->mbuf), env, *pcode, *pcode);
if (r < 0) return r;
}
}
- *state = CCS_VALUE;
- *type = CCV_CLASS;
+ *state = CS_VALUE;
+ *val = CV_CPROP;
return 0;
}
static int
-next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
- int* from_israw, int to_israw,
- enum CCVALTYPE intype, enum CCVALTYPE* type,
- enum CCSTATE* state, ScanEnv* env)
+cc_char_next(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
+ int* from_raw, int to_raw, CVAL intype, CVAL* type,
+ CSTATE* state, ScanEnv* env)
{
int r;
switch (*state) {
- case CCS_VALUE:
- if (*type == CCV_SB) {
+ case CS_VALUE:
+ if (*type == CV_SB) {
if (*from > 0xff)
return ONIGERR_INVALID_CODE_POINT_VALUE;
BITSET_SET_BIT(cc->bs, (int )(*from));
}
- else if (*type == CCV_CODE_POINT) {
+ else if (*type == CV_MB) {
r = add_code_range(&(cc->mbuf), env, *from, *from);
if (r < 0) return r;
}
break;
- case CCS_RANGE:
+ case CS_RANGE:
if (intype == *type) {
- if (intype == CCV_SB) {
+ if (intype == CV_SB) {
if (*from > 0xff || to > 0xff)
return ONIGERR_INVALID_CODE_POINT_VALUE;
@@ -6190,21 +6204,21 @@ next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to,
if (r < 0) return r;
}
ccs_range_end:
- *state = CCS_COMPLETE;
+ *state = CS_COMPLETE;
break;
- case CCS_COMPLETE:
- case CCS_START:
- *state = CCS_VALUE;
+ case CS_COMPLETE:
+ case CS_START:
+ *state = CS_VALUE;
break;
default:
break;
}
- *from_israw = to_israw;
- *from = to;
- *type = intype;
+ *from_raw = to_raw;
+ *from = to;
+ *type = intype;
return 0;
}
@@ -6232,26 +6246,25 @@ code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped,
}
static int
-parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
+parse_cc(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
{
int r, neg, len, fetched, and_start;
- OnigCodePoint v, vs;
+ OnigCodePoint in_code, curr_code;
UChar *p;
Node* node;
CClassNode *cc, *prev_cc;
CClassNode work_cc;
-
- enum CCSTATE state;
- enum CCVALTYPE val_type, in_type;
- int val_israw, in_israw;
+ int curr_raw, in_raw;
+ CSTATE state;
+ CVAL in_type;
+ CVAL curr_type;
*np = NULL_NODE;
- env->parse_depth++;
- if (env->parse_depth > ParseDepthLimit)
- return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
+ INC_PARSE_DEPTH(env->parse_depth);
+
prev_cc = (CClassNode* )NULL;
r = fetch_token_in_cc(tok, src, end, env);
- if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
+ if (r == TK_CHAR && tok->u.code == (OnigCodePoint )'^' && tok->escaped == 0) {
neg = 1;
r = fetch_token_in_cc(tok, src, end, env);
}
@@ -6274,47 +6287,44 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
cc = CCLASS_(node);
and_start = 0;
- state = CCS_START;
+ state = CS_START;
+ curr_type = CV_UNDEF;
+
p = *src;
while (r != TK_CC_CLOSE) {
fetched = 0;
switch (r) {
case TK_CHAR:
any_char_in:
- len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.c);
- if (len > 1) {
- in_type = CCV_CODE_POINT;
- }
- else if (len < 0) {
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, tok->u.code);
+ if (len < 0) {
r = len;
goto err;
}
- else {
- /* sb_char: */
- in_type = CCV_SB;
- }
- v = (OnigCodePoint )tok->u.c;
- in_israw = 0;
+ in_type = (len == 1) ? CV_SB : CV_MB;
+ in_code = tok->u.code;
+ in_raw = 0;
goto val_entry2;
break;
- case TK_RAW_BYTE:
+ case TK_CRUDE_BYTE:
/* tok->base != 0 : octal or hexadec. */
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
+ int i, j;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
UChar* psave = p;
- int i, base = tok->base;
+ int base = tok->base;
- buf[0] = tok->u.c;
+ buf[0] = tok->u.byte;
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
r = fetch_token_in_cc(tok, &p, end, env);
if (r < 0) goto err;
- if (r != TK_RAW_BYTE || tok->base != base) {
+ if (r != TK_CRUDE_BYTE || tok->base != base) {
fetched = 1;
break;
}
- buf[i] = tok->u.c;
+ buf[i] = tok->u.byte;
}
if (i < ONIGENC_MBC_MINLEN(env->enc)) {
@@ -6322,6 +6332,9 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto err;
}
+ /* clear buf tail */
+ for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
+
len = enclen(env->enc, buf);
if (i < len) {
r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
@@ -6336,58 +6349,63 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
if (i == 1) {
- v = (OnigCodePoint )buf[0];
- goto raw_single;
+ in_code = (OnigCodePoint )buf[0];
+ goto crude_single;
}
else {
- v = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
- in_type = CCV_CODE_POINT;
+ in_code = ONIGENC_MBC_TO_CODE(env->enc, buf, bufe);
+ in_type = CV_MB;
}
}
else {
- v = (OnigCodePoint )tok->u.c;
- raw_single:
- in_type = CCV_SB;
+ in_code = (OnigCodePoint )tok->u.byte;
+ crude_single:
+ in_type = CV_SB;
}
- in_israw = 1;
+ in_raw = 1;
goto val_entry2;
break;
case TK_CODE_POINT:
- v = tok->u.code;
- in_israw = 1;
+ in_code = tok->u.code;
+ in_raw = 1;
val_entry:
- len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
+ len = ONIGENC_CODE_TO_MBCLEN(env->enc, in_code);
if (len < 0) {
- r = len;
- goto err;
+ if (state != CS_RANGE ||
+ ! IS_SYNTAX_BV(env->syntax,
+ ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
+ in_code < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
+ r = len;
+ goto err;
+ }
}
- in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
+ in_type = (len == 1 ? CV_SB : CV_MB);
val_entry2:
- r = next_state_val(cc, &vs, v, &val_israw, in_israw, in_type, &val_type,
- &state, env);
+ r = cc_char_next(cc, &curr_code, in_code, &curr_raw, in_raw, in_type,
+ &curr_type, &state, env);
if (r != 0) goto err;
break;
- case TK_POSIX_BRACKET_OPEN:
+ case TK_CC_POSIX_BRACKET_OPEN:
r = parse_posix_bracket(cc, &p, end, env);
if (r < 0) goto err;
if (r == 1) { /* is not POSIX bracket */
CC_ESC_WARN(env, (UChar* )"[");
p = tok->backp;
- v = (OnigCodePoint )tok->u.c;
- in_israw = 0;
+ in_code = tok->u.code;
+ in_raw = 0;
goto val_entry;
}
- goto next_class;
+ goto next_cprop;
break;
case TK_CHAR_TYPE:
r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env);
if (r != 0) goto err;
- next_class:
- r = next_state_class(cc, &vs, &val_type, &state, env);
+ next_cprop:
+ r = cc_cprop_next(cc, &curr_code, &curr_type, &state, env);
if (r != 0) goto err;
break;
@@ -6400,19 +6418,20 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env);
if (r != 0) goto err;
- goto next_class;
+ goto next_cprop;
}
break;
case TK_CC_RANGE:
- if (state == CCS_VALUE) {
+ if (state == CS_VALUE) {
r = fetch_token_in_cc(tok, &p, end, env);
if (r < 0) goto err;
+
fetched = 1;
if (r == TK_CC_CLOSE) { /* allow [x-] */
range_end_val:
- v = (OnigCodePoint )'-';
- in_israw = 0;
+ in_code = (OnigCodePoint )'-';
+ in_raw = 0;
goto val_entry;
}
else if (r == TK_CC_AND) {
@@ -6420,20 +6439,21 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto range_end_val;
}
- if (val_type == CCV_CLASS) {
+ if (curr_type == CV_CPROP) {
r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS;
goto err;
}
- state = CCS_RANGE;
+ state = CS_RANGE;
}
- else if (state == CCS_START) {
+ else if (state == CS_START) {
/* [-xa] is allowed */
- v = (OnigCodePoint )tok->u.c;
- in_israw = 0;
+ in_code = tok->u.code;
+ in_raw = 0;
r = fetch_token_in_cc(tok, &p, end, env);
if (r < 0) goto err;
+
fetched = 1;
/* [--x] or [a&&-x] is warned. */
if (r == TK_CC_RANGE || and_start != 0)
@@ -6441,15 +6461,17 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto val_entry;
}
- else if (state == CCS_RANGE) {
+ else if (state == CS_RANGE) {
CC_ESC_WARN(env, (UChar* )"-");
- goto any_char_in; /* [!--x] is allowed */
+ goto any_char_in; /* [!--] is allowed */
}
- else { /* CCS_COMPLETE */
+ else { /* CS_COMPLETE */
r = fetch_token_in_cc(tok, &p, end, env);
if (r < 0) goto err;
+
fetched = 1;
- if (r == TK_CC_CLOSE) goto range_end_val; /* allow [a-b-] */
+ if (r == TK_CC_CLOSE)
+ goto range_end_val; /* allow [a-b-] */
else if (r == TK_CC_AND) {
CC_ESC_WARN(env, (UChar* )"-");
goto range_end_val;
@@ -6464,12 +6486,19 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
break;
- case TK_CC_CC_OPEN: /* [ */
+ case TK_CC_OPEN_CC: /* [ */
{
Node *anode;
CClassNode* acc;
- r = parse_char_class(&anode, tok, &p, end, env);
+ if (state == CS_VALUE) {
+ r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
+ &state, env);
+ if (r != 0) goto err;
+ }
+ state = CS_COMPLETE;
+
+ r = parse_cc(&anode, tok, &p, end, env);
if (r != 0) {
onig_node_free(anode);
goto cc_open_err;
@@ -6485,14 +6514,14 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case TK_CC_AND: /* && */
{
- if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
- &val_type, &state, env);
+ if (state == CS_VALUE) {
+ r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
+ &state, env);
if (r != 0) goto err;
}
/* initialize local variables */
and_start = 1;
- state = CCS_START;
+ state = CS_START;
if (IS_NOT_NULL(prev_cc)) {
r = and_cclass(prev_cc, cc, env->enc);
@@ -6525,9 +6554,9 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
- if (state == CCS_VALUE) {
- r = next_state_val(cc, &vs, 0, &val_israw, 0, val_type,
- &val_type, &state, env);
+ if (state == CS_VALUE) {
+ r = cc_char_next(cc, &curr_code, 0, &curr_raw, 0, curr_type, &curr_type,
+ &state, env);
if (r != 0) goto err;
}
@@ -6560,7 +6589,7 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
}
}
*src = p;
- env->parse_depth--;
+ DEC_PARSE_DEPTH(env->parse_depth);
return 0;
err:
@@ -6569,8 +6598,8 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
return r;
}
-static int parse_subexp(Node** top, PToken* tok, int term,
- UChar** src, UChar* end, ScanEnv* env, int group_head);
+static int parse_alts(Node** top, PToken* tok, int term,
+ UChar** src, UChar* end, ScanEnv* env, int group_head);
#ifdef USE_CALLOUT
@@ -6673,7 +6702,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
}
if (tag_start != tag_end) {
- r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
if (r != ONIG_NORMAL) return r;
}
@@ -6741,7 +6770,8 @@ parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long*
static int
parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
- unsigned int types[], OnigValue vals[], ScanEnv* env)
+ int max_arg_num, unsigned int types[], OnigValue vals[],
+ ScanEnv* env)
{
#define MAX_CALLOUT_ARG_BYTE_LENGTH 128
@@ -6760,9 +6790,9 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
+ c = 0;
n = 0;
while (n < ONIG_CALLOUT_MAX_ARGS_NUM) {
- c = 0;
cn = 0;
esc = 0;
eesc = 0;
@@ -6795,7 +6825,7 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
size_t clen;
add_char:
- if (skip_mode == 0) {
+ if (skip_mode == FALSE) {
clen = p - e;
if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH)
return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */
@@ -6809,7 +6839,10 @@ parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end,
}
if (cn != 0) {
- if (skip_mode == 0) {
+ if (max_arg_num >= 0 && n >= max_arg_num)
+ return ONIGERR_INVALID_CALLOUT_ARG;
+
+ if (skip_mode == FALSE) {
if ((types[n] & ONIG_TYPE_LONG) != 0) {
int fixed = 0;
if (cn > 0) {
@@ -6941,7 +6974,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
/* read for single check only */
save = p;
- arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env);
+ arg_num = parse_callout_args(TRUE, '}', &p, end, -1, NULL, NULL, env);
if (arg_num < 0) return arg_num;
is_not_single = PPEEK_IS(cterm) ? 0 : 1;
@@ -6955,7 +6988,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
types[i] = get_callout_arg_type_by_name_id(name_id, i);
}
- arg_num = parse_callout_args(0, '}', &p, end, types, vals, env);
+ arg_num = parse_callout_args(FALSE, '}', &p, end, max_arg_num, types, vals, env);
if (arg_num < 0) return arg_num;
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
@@ -6994,7 +7027,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
}
if (tag_start != tag_end) {
- r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
if (r != ONIG_NORMAL) return r;
}
@@ -7055,17 +7088,17 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
group:
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(np, tok, term, &p, end, env, 0);
+ r = parse_alts(np, tok, term, &p, end, env, FALSE);
if (r < 0) return r;
*src = p;
return 1; /* group */
break;
case '=':
- *np = onig_node_new_anchor(ANCR_PREC_READ, 0);
+ *np = onig_node_new_anchor(ANCR_PREC_READ, FALSE);
break;
case '!': /* preceding read */
- *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, 0);
+ *np = onig_node_new_anchor(ANCR_PREC_READ_NOT, FALSE);
break;
case '>': /* (?>...) stop backtrack */
*np = node_new_bag(BAG_STOP_BACKTRACK);
@@ -7083,9 +7116,9 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS;
PFETCH(c);
if (c == '=')
- *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, 0);
+ *np = onig_node_new_anchor(ANCR_LOOK_BEHIND, FALSE);
else if (c == '!')
- *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, 0);
+ *np = onig_node_new_anchor(ANCR_LOOK_BEHIND_NOT, FALSE);
else {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
UChar *name;
@@ -7101,7 +7134,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
named_group2:
name = p;
r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num,
- &num_type, 0);
+ &num_type, FALSE);
if (r < 0) return r;
num = scan_env_add_mem_entry(env);
@@ -7115,7 +7148,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(*np);
BAG_(*np)->m.regnum = num;
if (list_capture != 0)
- MEM_STATUS_ON_SIMPLE(env->capture_history, num);
+ MEM_STATUS_ON_SIMPLE(env->cap_history, num);
env->num_named++;
}
else {
@@ -7150,7 +7183,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&absent, tok, term, &p, end, env, 1);
+ r = parse_alts(&absent, tok, term, &p, end, env, TRUE);
if (r < 0) {
onig_node_free(absent);
return r;
@@ -7237,7 +7270,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r == 1) exist_level = 1;
#else
r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('),
- &p, end, &name_end, env, &back_num, &num_type, 1);
+ &p, end, &name_end, env, &back_num, &num_type, TRUE);
#endif
if (r < 0) {
if (is_enclosed == 0) {
@@ -7257,11 +7290,11 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
if (back_num > env->num_mem ||
- IS_NULL(SCANENV_MEMENV(env)[back_num].node))
+ IS_NULL(SCANENV_MEMENV(env)[back_num].mem_node))
return ONIGERR_INVALID_BACKREF;
}
- condition = node_new_backref_checker(1, &back_num, 0,
+ condition = node_new_backref_checker(1, &back_num, FALSE,
#ifdef USE_BACKREF_WITH_LEVEL
exist_level, level,
#endif
@@ -7271,22 +7304,20 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
int num;
int* backs;
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ num = name_to_group_numbers(env, prev, name_end, &backs);
if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
int i;
for (i = 0; i < num; i++) {
if (backs[i] > env->num_mem ||
- IS_NULL(SCANENV_MEMENV(env)[backs[i]].node))
+ IS_NULL(SCANENV_MEMENV(env)[backs[i]].mem_node))
return ONIGERR_INVALID_BACKREF;
}
}
- condition = node_new_backref_checker(num, backs, 1,
+ condition = node_new_backref_checker(num, backs, TRUE,
#ifdef USE_BACKREF_WITH_LEVEL
exist_level, level,
#endif
@@ -7328,7 +7359,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
condition_is_checker = 0;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&condition, tok, term, &p, end, env, 0);
+ r = parse_alts(&condition, tok, term, &p, end, env, FALSE);
if (r < 0) {
onig_node_free(condition);
return r;
@@ -7371,7 +7402,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
onig_node_free(condition);
return r;
}
- r = parse_subexp(&target, tok, term, &p, end, env, 1);
+ r = parse_alts(&target, tok, term, &p, end, env, TRUE);
if (r < 0) {
onig_node_free(condition);
onig_node_free(target);
@@ -7414,6 +7445,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
break;
+#ifdef USE_CAPTURE_HISTORY
case '@':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
@@ -7435,12 +7467,13 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY;
}
BAG_(*np)->m.regnum = num;
- MEM_STATUS_ON_SIMPLE(env->capture_history, num);
+ MEM_STATUS_ON_SIMPLE(env->cap_history, num);
}
else {
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
break;
+#endif
#ifdef USE_POSIXLINE_OPTION
case 'p':
@@ -7470,7 +7503,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case 'm':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {
- OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0));
+ OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? TRUE : FALSE));
}
else if (IS_SYNTAX_OP2(env->syntax,
ONIG_SYN_OP2_OPTION_ONIGURUMA|ONIG_SYN_OP2_OPTION_RUBY)) {
@@ -7506,16 +7539,16 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (! ONIGENC_IS_UNICODE_ENCODING(enc))
return ONIGERR_UNDEFINED_GROUP_OPTION;
- OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 0);
- OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 1);
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, FALSE);
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, TRUE);
break;
#ifdef USE_UNICODE_WORD_BREAK
case 'w':
if (! ONIGENC_IS_UNICODE_ENCODING(enc))
return ONIGERR_UNDEFINED_GROUP_OPTION;
- OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, 0);
- OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, 1);
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_WORD, FALSE);
+ OPTION_NEGATE(option, ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER, TRUE);
break;
#endif
default:
@@ -7545,7 +7578,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
env->options = option;
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&target, tok, term, &p, end, env, 0);
+ r = parse_alts(&target, tok, term, &p, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@@ -7592,7 +7625,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
CHECK_NULL_RETURN_MEMERR(*np);
r = fetch_token(tok, &p, end, env);
if (r < 0) return r;
- r = parse_subexp(&target, tok, term, &p, end, env, 0);
+ r = parse_alts(&target, tok, term, &p, end, env, FALSE);
if (r < 0) {
onig_node_free(target);
return r;
@@ -7602,7 +7635,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (NODE_TYPE(*np) == NODE_BAG) {
if (BAG_(*np)->type == BAG_MEMORY) {
- /* Don't move this to previous of parse_subexp() */
+ /* Don't move this to previous of parse_alts() */
r = scan_env_set_mem_node(env, BAG_(*np)->m.regnum, *np);
if (r != 0) return r;
}
@@ -7622,7 +7655,7 @@ static const char* ReduceQStr[] = {
};
static int
-set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
+assign_quantifier_body(Node* qnode, Node* target, int group, ScanEnv* env)
{
QuantNode* qn;
@@ -7688,15 +7721,17 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (targetq_num >= 0 && nestq_num < 0) {
if (targetq_num == 1 || targetq_num == 2) { /* * or + */
/* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
- if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+ if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
}
}
}
else {
+ int r;
+
NODE_BODY(qnode) = target;
- onig_reduce_nested_quantifier(qnode, target);
- goto q_exit;
+ r = onig_reduce_nested_quantifier(qnode);
+ return r;
}
}
break;
@@ -7706,7 +7741,6 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
}
NODE_BODY(qnode) = target;
- q_exit:
return 0;
}
@@ -7736,6 +7770,38 @@ clear_not_flag_cclass(CClassNode* cc, OnigEncoding enc)
}
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
+#define ADD_CODE_INTO_CC(cc, code, enc) do {\
+ if (ONIGENC_MBC_MINLEN(enc) > 1 || ONIGENC_CODE_TO_MBCLEN(enc, code) != 1) {\
+ add_code_range_to_buf(&((cc)->mbuf), code, code);\
+ }\
+ else {\
+ BITSET_SET_BIT((cc)->bs, code);\
+ }\
+} while (0)
+
+extern int
+onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc,
+ int n, OnigCodePoint codes[])
+{
+ int i;
+ Node* node;
+ CClassNode* cc;
+
+ *rnode = NULL_NODE;
+
+ node = node_new_cclass();
+ CHECK_NULL_RETURN_MEMERR(node);
+
+ cc = CCLASS_(node);
+
+ for (i = 0; i < n; i++) {
+ ADD_CODE_INTO_CC(cc, codes[i], enc);
+ }
+
+ *rnode = node;
+ return 0;
+}
+
typedef struct {
ScanEnv* env;
CClassNode* cc;
@@ -7749,37 +7815,31 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
IApplyCaseFoldArg* iarg;
ScanEnv* env;
CClassNode* cc;
- BitSetRef bs;
iarg = (IApplyCaseFoldArg* )arg;
env = iarg->env;
cc = iarg->cc;
- bs = cc->bs;
if (to_len == 1) {
int is_in = onig_is_code_in_cc(env->enc, from, cc);
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
if ((is_in != 0 && !IS_NCCLASS_NOT(cc)) ||
(is_in == 0 && IS_NCCLASS_NOT(cc))) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
- add_code_range(&(cc->mbuf), env, *to, *to);
- }
- else {
- BITSET_SET_BIT(bs, *to);
- }
+ ADD_CODE_INTO_CC(cc, *to, env->enc);
}
#else
if (is_in != 0) {
- if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) {
+ if (ONIGENC_MBC_MINLEN(env->enc) > 1 ||
+ ONIGENC_CODE_TO_MBCLEN(env->enc, *to) != 1) {
if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc);
add_code_range(&(cc->mbuf), env, *to, *to);
}
else {
if (IS_NCCLASS_NOT(cc)) {
- BITSET_CLEAR_BIT(bs, *to);
+ BITSET_CLEAR_BIT(cc->bs, *to);
}
else
- BITSET_SET_BIT(bs, *to);
+ BITSET_SET_BIT(cc->bs, *to);
}
}
#endif /* CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS */
@@ -7787,34 +7847,65 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg)
else {
int r, i, len;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
- Node *snode = NULL_NODE;
if (onig_is_code_in_cc(env->enc, from, cc)
#ifdef CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS
&& !IS_NCCLASS_NOT(cc)
#endif
) {
+ int n, j, m, index;
+ Node* list_node;
+ Node* ns[3];
+
+ n = 0;
for (i = 0; i < to_len; i++) {
- len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
- if (i == 0) {
- snode = onig_node_new_str(buf, buf + len);
- CHECK_NULL_RETURN_MEMERR(snode);
-
- /* char-class expanded multi-char only
- compare with string folded at match time. */
- NODE_STRING_SET_AMBIG(snode);
+ OnigCodePoint code;
+ Node* csnode;
+ CClassNode* cs_cc;
+
+ index = onigenc_unicode_fold1_key(&to[i]);
+ if (index >= 0) {
+ csnode = node_new_cclass();
+ cs_cc = CCLASS_(csnode);
+ if (IS_NULL(csnode)) {
+ err_free_ns:
+ for (j = 0; j < n; j++) onig_node_free(ns[j]);
+ return ONIGERR_MEMORY;
+ }
+ m = FOLDS1_UNFOLDS_NUM(index);
+ for (j = 0; j < m; j++) {
+ code = FOLDS1_UNFOLDS(index)[j];
+ ADD_CODE_INTO_CC(cs_cc, code, env->enc);
+ }
+ ADD_CODE_INTO_CC(cs_cc, to[i], env->enc);
+ ns[n++] = csnode;
}
else {
- r = onig_node_str_cat(snode, buf, buf + len);
- if (r < 0) {
- onig_node_free(snode);
- return r;
+ len = ONIGENC_CODE_TO_MBC(env->enc, to[i], buf);
+ if (n == 0 || NODE_TYPE(ns[n-1]) != NODE_STRING) {
+ csnode = onig_node_new_str(buf, buf + len);
+ if (IS_NULL(csnode)) goto err_free_ns;
+
+ NODE_STRING_SET_CASE_EXPANDED(csnode);
+ ns[n++] = csnode;
+ }
+ else {
+ r = onig_node_str_cat(ns[n-1], buf, buf + len);
+ if (r < 0) goto err_free_ns;
}
}
}
- *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE);
- CHECK_NULL_RETURN_MEMERR(*(iarg->ptail));
+ if (n == 1)
+ list_node = ns[0];
+ else
+ list_node = make_list(n, ns);
+
+ *(iarg->ptail) = onig_node_new_alt(list_node, NULL_NODE);
+ if (IS_NULL(*(iarg->ptail))) {
+ onig_node_free(list_node);
+ return ONIGERR_MEMORY;
+ }
iarg->ptail = &(NODE_CDR((*(iarg->ptail))));
}
}
@@ -7826,14 +7917,18 @@ static int
parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env, int group_head)
{
- int r, len, group = 0;
+ int r, len, group;
Node* qn;
Node** tp;
+ unsigned int parse_depth;
+ group = 0;
*np = NULL;
if (tok->type == (enum TokenSyms )term)
goto end_of_token;
+ parse_depth = env->parse_depth;
+
switch (tok->type) {
case TK_ALT:
case TK_EOT:
@@ -7866,7 +7961,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
env->options = BAG_(*np)->o.options;
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- r = parse_subexp(&target, tok, term, src, end, env, 0);
+ r = parse_alts(&target, tok, term, src, end, env, FALSE);
env->options = prev;
if (r < 0) {
onig_node_free(target);
@@ -7881,7 +7976,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_UNMATCHED_CLOSE_SUBEXP))
return ONIGERR_UNMATCHED_CLOSE_PARENTHESIS;
- if (tok->escaped) goto tk_raw_byte;
+ if (tok->escaped) goto tk_crude_byte;
else goto tk_byte;
break;
@@ -7906,44 +8001,37 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
break;
- case TK_RAW_BYTE:
- tk_raw_byte:
+ case TK_CRUDE_BYTE:
+ tk_crude_byte:
{
- *np = node_new_str_raw_char((UChar )tok->u.c);
+ *np = node_new_str_crude_char(tok->u.byte);
CHECK_NULL_RETURN_MEMERR(*np);
len = 1;
while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
- if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */
+ if (len == enclen(env->enc, STR_(*np)->s)) {
r = fetch_token(tok, src, end, env);
- NODE_STRING_CLEAR_RAW(*np);
- goto string_end;
+ goto tk_crude_byte_end;
}
}
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- if (r != TK_RAW_BYTE) {
- /* Don't use this, it is wrong for little endian encodings. */
-#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
- int rem;
- if (len < ONIGENC_MBC_MINLEN(env->enc)) {
- rem = ONIGENC_MBC_MINLEN(env->enc) - len;
- (void )node_str_head_pad(STR_(*np), rem, (UChar )0);
- if (len + rem == enclen(env->enc, STR_(*np)->s)) {
- NODE_STRING_CLEAR_RAW(*np);
- goto string_end;
- }
- }
-#endif
+ if (r != TK_CRUDE_BYTE)
return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
- }
- r = node_str_cat_char(*np, (UChar )tok->u.c);
+ r = node_str_cat_char(*np, tok->u.byte);
if (r < 0) return r;
len++;
}
+
+ tk_crude_byte_end:
+ if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
+ NODE_STRING_CLEAR_CRUDE(*np);
+ goto string_end;
}
break;
@@ -7953,7 +8041,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
len = ONIGENC_CODE_TO_MBC(env->enc, tok->u.code, buf);
if (len < 0) return len;
#ifdef NUMBERED_CHAR_IS_NOT_CASE_AMBIG
- *np = node_new_str_raw(buf, buf + len);
+ *np = node_new_str_crude(buf, buf + len);
#else
*np = node_new_str(buf, buf + len);
#endif
@@ -7996,7 +8084,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
*np = node_new_cclass();
CHECK_NULL_RETURN_MEMERR(*np);
cc = CCLASS_(*np);
- add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env);
+ add_ctype_to_cc(cc, tok->u.prop.ctype, FALSE, env);
if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc);
}
break;
@@ -8013,11 +8101,11 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (r != 0) return r;
break;
- case TK_CC_OPEN:
+ case TK_OPEN_CC:
{
CClassNode* cc;
- r = parse_char_class(np, tok, src, end, env);
+ r = parse_cc(np, tok, src, end, env);
if (r != 0) return r;
cc = CCLASS_(*np);
@@ -8055,7 +8143,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case TK_ANYCHAR_ANYTIME:
*np = node_new_anychar();
CHECK_NULL_RETURN_MEMERR(*np);
- qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ qn = node_new_quantifier(0, INFINITE_REPEAT, FALSE);
CHECK_NULL_RETURN_MEMERR(qn);
NODE_BODY(qn) = *np;
*np = qn;
@@ -8158,6 +8246,8 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (is_invalid_quantifier_target(*tp))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+ INC_PARSE_DEPTH(parse_depth);
+
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
r == TK_INTERVAL);
CHECK_NULL_RETURN_MEMERR(qn);
@@ -8169,9 +8259,10 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
else {
target = *tp;
}
- r = set_quantifier(qn, target, group, env);
+ r = assign_quantifier_body(qn, target, group, env);
if (r < 0) {
onig_node_free(qn);
+ *tp = NULL_NODE;
return r;
}
@@ -8224,6 +8315,8 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
Node *node, **headp;
*top = NULL;
+ INC_PARSE_DEPTH(env->parse_depth);
+
r = parse_exp(&node, tok, term, src, end, env, group_head);
if (r < 0) {
onig_node_free(node);
@@ -8234,7 +8327,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
*top = node;
}
else {
- *top = node_new_list(node, NULL);
+ *top = node_new_list(node, NULL);
if (IS_NULL(*top)) {
onig_node_free(node);
return ONIGERR_MEMORY;
@@ -8242,7 +8335,7 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
headp = &(NODE_CDR(*top));
while (r != TK_EOT && r != term && r != TK_ALT) {
- r = parse_exp(&node, tok, term, src, end, env, 0);
+ r = parse_exp(&node, tok, term, src, end, env, FALSE);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8260,21 +8353,20 @@ parse_branch(Node** top, PToken* tok, int term, UChar** src, UChar* end,
}
}
+ DEC_PARSE_DEPTH(env->parse_depth);
return r;
}
/* term_tok: TK_EOT or TK_SUBEXP_CLOSE */
static int
-parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end,
- ScanEnv* env, int group_head)
+parse_alts(Node** top, PToken* tok, int term, UChar** src, UChar* end,
+ ScanEnv* env, int group_head)
{
int r;
Node *node, **headp;
*top = NULL;
- env->parse_depth++;
- if (env->parse_depth > ParseDepthLimit)
- return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
+ INC_PARSE_DEPTH(env->parse_depth);
r = parse_branch(&node, tok, term, src, end, env, group_head);
if (r < 0) {
@@ -8296,7 +8388,7 @@ parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end,
while (r == TK_ALT) {
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- r = parse_branch(&node, tok, term, src, end, env, 0);
+ r = parse_branch(&node, tok, term, src, end, env, FALSE);
if (r < 0) {
onig_node_free(node);
return r;
@@ -8323,7 +8415,7 @@ parse_subexp(Node** top, PToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_PARSER_BUG;
}
- env->parse_depth--;
+ DEC_PARSE_DEPTH(env->parse_depth);
return r;
}
@@ -8335,7 +8427,7 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env)
r = fetch_token(&tok, src, end, env);
if (r < 0) return r;
- r = parse_subexp(top, &tok, TK_EOT, src, end, env, 0);
+ r = parse_alts(top, &tok, TK_EOT, src, end, env, FALSE);
if (r < 0) return r;
return 0;
diff --git a/src/regparse.h b/src/regparse.h
index b7a2867..1525ccb 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
#include "regint.h"
#define NODE_STRING_MARGIN 16
-#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
/* node type */
@@ -66,27 +66,32 @@ enum GimmickType {
#endif
};
-enum BodyEmpty {
- BODY_IS_NOT_EMPTY = 0,
- BODY_IS_EMPTY = 1,
- BODY_IS_EMPTY_MEM = 2,
- BODY_IS_EMPTY_REC = 3
+enum BodyEmptyType {
+ BODY_IS_NOT_EMPTY = 0,
+ BODY_IS_EMPTY_POSSIBILITY = 1,
+ BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
+ BODY_IS_EMPTY_POSSIBILITY_REC = 3
};
+struct _Node;
+
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
UChar* s;
UChar* end;
unsigned int flag;
- int capacity; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STRING_BUF_SIZE];
+ int capacity; /* (allocated size - 1) or 0: use buf[] */
+ int case_min_len;
} StrNode;
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
unsigned int flags;
BitSet bs;
@@ -96,20 +101,22 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
int lower;
int upper;
int greedy;
- enum BodyEmpty empty_info;
+ enum BodyEmptyType emptiness;
struct _Node* head_exact;
struct _Node* next_head_exact;
- int is_refered; /* include called node. don't eliminate even if {0} */
+ int include_referred; /* include called node. don't eliminate even if {0} */
} QuantNode;
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
enum BagType type;
@@ -152,6 +159,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body; /* to BagNode : BAG_MEMORY */
int by_number;
@@ -166,6 +174,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
int back_num;
int back_static[NODE_BACKREFS_SIZE];
@@ -176,6 +185,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
int type;
@@ -186,6 +196,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* car;
struct _Node* cdr;
@@ -194,6 +205,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
int ctype;
int not;
@@ -204,6 +216,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
enum GimmickType type;
int detail_type;
@@ -216,6 +229,7 @@ typedef struct _Node {
struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
} base;
@@ -252,10 +266,6 @@ typedef struct _Node {
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
-#define NODE_IS_SIMPLE_TYPE(node) \
- ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
- (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
-
#define NODE_TYPE(node) ((node)->u.base.node_type)
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
@@ -284,26 +294,21 @@ typedef struct _Node {
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
-#define NODE_STRING_RAW (1<<0) /* by backslashed number */
-#define NODE_STRING_AMBIG (1<<1)
-#define NODE_STRING_GOOD_AMBIG (1<<2)
-#define NODE_STRING_DONT_GET_OPT_INFO (1<<3)
+#define NODE_STRING_CRUDE (1<<0)
+#define NODE_STRING_CASE_EXPANDED (1<<1)
+#define NODE_STRING_CASE_FOLD_MATCH (1<<2)
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
-#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
-#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
-#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
-#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
-#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
- (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
-#define NODE_STRING_IS_RAW(node) \
- (((node)->u.str.flag & NODE_STRING_RAW) != 0)
-#define NODE_STRING_IS_AMBIG(node) \
- (((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
-#define NODE_STRING_IS_GOOD_AMBIG(node) \
- (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
-#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
- (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
+#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE
+#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE
+#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED
+#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH
+#define NODE_STRING_IS_CRUDE(node) \
+ (((node)->u.str.flag & NODE_STRING_CRUDE) != 0)
+#define NODE_STRING_IS_CASE_EXPANDED(node) \
+ (((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0)
+#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \
+ (((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
@@ -314,7 +319,7 @@ typedef struct _Node {
#define NODE_ST_CLEN_FIXED (1<<2)
#define NODE_ST_MARK1 (1<<3)
#define NODE_ST_MARK2 (1<<4)
-#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
+#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
#define NODE_ST_RECURSION (1<<6)
#define NODE_ST_CALLED (1<<7)
#define NODE_ST_ADDR_FIXED (1<<8)
@@ -330,6 +335,7 @@ typedef struct _Node {
#define NODE_ST_FIXED_OPTION (1<<18)
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
#define NODE_ST_SUPER (1<<20)
+#define NODE_ST_EMPTY_STATUS_CHECK (1<<21)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
@@ -357,9 +363,12 @@ typedef struct _Node {
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
#define NODE_IS_PROHIBIT_RECURSION(node) \
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
-#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
- ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
+#define NODE_IS_STRICT_REAL_REPEAT(node) \
+ ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
+#define NODE_IS_EMPTY_STATUS_CHECK(node) \
+ ((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0)
+#define NODE_PARENT(node) ((node)->u.base.parent)
#define NODE_BODY(node) ((node)->u.base.body)
#define NODE_QUANT_BODY(node) ((node)->body)
#define NODE_BAG_BODY(node) ((node)->body)
@@ -372,11 +381,8 @@ typedef struct _Node {
(senv)->mem_env_dynamic : (senv)->mem_env_static)
typedef struct {
- Node* node;
-#if 0
- int in;
- int recursion;
-#endif
+ Node* mem_node;
+ Node* empty_repeat_node;
} MemEnv;
typedef struct {
@@ -388,9 +394,8 @@ typedef struct {
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
- MemStatusType capture_history;
- MemStatusType bt_mem_start;
- MemStatusType bt_mem_end;
+ MemStatusType cap_history;
+ MemStatusType backtrack_mem; /* backtrack/recursion */
MemStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
@@ -408,7 +413,10 @@ typedef struct {
MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
MemEnv* mem_env_dynamic;
unsigned int parse_depth;
-
+#ifdef ONIG_DEBUG_PARSE
+ unsigned int max_parse_depth;
+#endif
+ int backref_num;
int keep_num;
int save_num;
int save_alloc_num;
@@ -429,9 +437,7 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
-extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
-extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
-extern void onig_node_conv_to_str_node P_((Node* node, int raw));
+extern int onig_reduce_nested_quantifier P_((Node* pnode));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
@@ -439,13 +445,13 @@ extern Node* onig_node_new_bag P_((enum BagType type));
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
-extern Node* onig_node_list_add P_((Node* list, Node* x));
extern Node* onig_node_new_alt P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+extern int onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]);
extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
#ifdef USE_CALLOUT
diff --git a/src/regposerr.c b/src/regposerr.c
index e389531..e1747c5 100644
--- a/src/regposerr.c
+++ b/src/regposerr.c
@@ -2,7 +2,7 @@
regposerr.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/regposix.c b/src/regposix.c
index 09e16ac..b3e78ff 100644
--- a/src/regposix.c
+++ b/src/regposix.c
@@ -2,7 +2,7 @@
regposix.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/regsyntax.c b/src/regsyntax.c
index d4420cc..513c7f7 100644
--- a/src/regsyntax.c
+++ b/src/regsyntax.c
@@ -2,7 +2,7 @@
regsyntax.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/regtrav.c b/src/regtrav.c
index 58a17f5..8307695 100644
--- a/src/regtrav.c
+++ b/src/regtrav.c
@@ -2,7 +2,7 @@
regtrav.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2004 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/regversion.c b/src/regversion.c
index 594a52c..de993d3 100644
--- a/src/regversion.c
+++ b/src/regversion.c
@@ -2,7 +2,7 @@
regversion.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/src/sjis.c b/src/sjis.c
index 4f90b72..1fd92d9 100644
--- a/src/sjis.c
+++ b/src/sjis.c
@@ -2,7 +2,7 @@
sjis.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -149,10 +149,6 @@ code_to_mbc(OnigCodePoint code, UChar *buf)
if ((code & 0xff00) != 0) *p++ = (UChar )(((code >> 8) & 0xff));
*p++ = (UChar )(code & 0xff);
-#if 0
- if (enclen(ONIG_ENCODING_SJIS, buf) != (p - buf))
- return REGERR_INVALID_CODE_POINT_VALUE;
-#endif
return (int )(p - buf);
}
@@ -179,31 +175,6 @@ mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED,
}
}
-#if 0
-static int
-is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_SJIS, flag, pp, end);
-
-}
-#endif
-
-#if 0
-static int
-is_code_ctype(OnigCodePoint code, unsigned int ctype)
-{
- if (code < 128)
- return ONIGENC_IS_ASCII_CODE_CTYPE(code, ctype);
- else {
- if (CTYPE_IS_WORD_GRAPH_PRINT(ctype)) {
- return (code_to_mbclen(code) > 1 ? TRUE : FALSE);
- }
- }
-
- return FALSE;
-}
-#endif
-
static UChar*
left_adjust_char_head(const UChar* start, const UChar* s)
{
diff --git a/src/sjis_prop.c b/src/sjis_prop.c
index 3a88a38..e33fbb2 100644
--- a/src/sjis_prop.c
+++ b/src/sjis_prop.c
@@ -1,5 +1,5 @@
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -pt -T -L ANSI-C -N onigenc_sjis_lookup_property_name --output-file gperf2.tmp sjis_prop.gperf */
+/* Command-line: gperf -pt -T -L ANSI-C -N onigenc_sjis_lookup_property_name --output-file gperf2.tmp sjis_prop.gperf */
/* Computed positions: -k'1,3' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
diff --git a/src/unicode.c b/src/unicode.c
index 5820319..474436a 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -2,7 +2,7 @@
unicode.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -356,16 +356,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (fn = 0; fn < 2; fn++) {
int index;
cs[fn][0] = FOLDS2_FOLD(buk->index)[fn];
+ ncs[fn] = 1;
index = onigenc_unicode_fold1_key(&cs[fn][0]);
if (index >= 0) {
int m = FOLDS1_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
}
- ncs[fn] = m + 1;
+ ncs[fn] += m;
}
- else
- ncs[fn] = 1;
}
for (i = 0; i < ncs[0]; i++) {
@@ -393,16 +392,15 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
for (fn = 0; fn < 3; fn++) {
int index;
cs[fn][0] = FOLDS3_FOLD(buk->index)[fn];
+ ncs[fn] = 1;
index = onigenc_unicode_fold1_key(&cs[fn][0]);
if (index >= 0) {
int m = FOLDS1_UNFOLDS_NUM(index);
for (i = 0; i < m; i++) {
cs[fn][i+1] = FOLDS1_UNFOLDS(index)[i];
}
- ncs[fn] = m + 1;
+ ncs[fn] += m;
}
- else
- ncs[fn] = 1;
}
for (i = 0; i < ncs[0]; i++) {
diff --git a/src/unicode_egcb_data.c b/src/unicode_egcb_data.c
index 6a74c77..3c49422 100644
--- a/src/unicode_egcb_data.c
+++ b/src/unicode_egcb_data.c
@@ -1,6 +1,6 @@
/* unicode_egcb_data.c: Generated by make_unicode_egcb_data.py. */
/*-
- * Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2017-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#define GRAPHEME_BREAK_PROPERTY_VERSION 12_1_0
+#define GRAPHEME_BREAK_PROPERTY_VERSION 120100
/*
CR
diff --git a/src/unicode_fold1_key.c b/src/unicode_fold1_key.c
index b84b528..171a0fa 100644
--- a/src/unicode_fold1_key.c
+++ b/src/unicode_fold1_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold1_key unicode_fold1_key.gperf */
/* Computed positions: -k'1-3' */
@@ -9,7 +9,7 @@
/* This gperf source file was generated by make_unicode_fold_data.py */
/*-
- * Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2017-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -2983,7 +2983,7 @@ onigenc_unicode_fold1_key(OnigCodePoint codes[])
4026
};
- if (0 == 0)
+
{
int key = hash(codes);
diff --git a/src/unicode_fold2_key.c b/src/unicode_fold2_key.c
index 2310f0a..c39b19d 100644
--- a/src/unicode_fold2_key.c
+++ b/src/unicode_fold2_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold2_key unicode_fold2_key.gperf */
/* Computed positions: -k'3,6' */
@@ -9,7 +9,7 @@
/* This gperf source file was generated by make_unicode_fold_data.py */
/*-
- * Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2017-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -211,7 +211,7 @@ onigenc_unicode_fold2_key(OnigCodePoint codes[])
129
};
- if (0 == 0)
+
{
int key = hash(codes);
diff --git a/src/unicode_fold3_key.c b/src/unicode_fold3_key.c
index 0e02a62..295c447 100644
--- a/src/unicode_fold3_key.c
+++ b/src/unicode_fold3_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_fold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1 -N onigenc_unicode_fold3_key unicode_fold3_key.gperf */
/* Computed positions: -k'3,6,9' */
@@ -9,7 +9,7 @@
/* This gperf source file was generated by make_unicode_fold_data.py */
/*-
- * Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2017-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -121,7 +121,7 @@ onigenc_unicode_fold3_key(OnigCodePoint codes[])
0
};
- if (0 == 0)
+
{
int key = hash(codes);
diff --git a/src/unicode_fold_data.c b/src/unicode_fold_data.c
index 0dbf9ae..68694b0 100644
--- a/src/unicode_fold_data.c
+++ b/src/unicode_fold_data.c
@@ -1,7 +1,7 @@
/* This file was generated by make_unicode_fold_data.py. */
#include "regenc.h"
-#define UNICODE_CASEFOLD_VERSION 12_1_0
+#define UNICODE_CASEFOLD_VERSION 120100
OnigCodePoint OnigUnicodeFolds1[] = {
diff --git a/src/unicode_property_data.c b/src/unicode_property_data.c
index 5c1c8a9..0083dd6 100644
--- a/src/unicode_property_data.c
+++ b/src/unicode_property_data.c
@@ -1,5 +1,5 @@
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf */
+/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf1.tmp unicode_property_data.gperf */
/* Computed positions: -k'1-3,5-6,12,16,$' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
@@ -29580,7 +29580,8 @@ unicode_lookup_property_name (register const char *str, register size_t len)
-#define UNICODE_PROPERTY_VERSION 12_1_0
+#define UNICODE_PROPERTY_VERSION 120100
+#define UNICODE_EMOJI_VERSION 1201
#define PROPERTY_NAME_MAX_SIZE 59
#define CODE_RANGES_NUM 568
diff --git a/src/unicode_property_data_posix.c b/src/unicode_property_data_posix.c
index eddc108..e299e85 100644
--- a/src/unicode_property_data_posix.c
+++ b/src/unicode_property_data_posix.c
@@ -1,5 +1,5 @@
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf */
+/* Command-line: gperf -T -C -c -t -j1 -L ANSI-C --ignore-case --pic -Q unicode_prop_name_pool -N unicode_lookup_property_name --output-file gperf2.tmp unicode_property_data_posix.gperf */
/* Computed positions: -k'1,3' */
#if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) \
diff --git a/src/unicode_unfold_key.c b/src/unicode_unfold_key.c
index b2228e0..51a037b 100644
--- a/src/unicode_unfold_key.c
+++ b/src/unicode_unfold_key.c
@@ -1,7 +1,7 @@
/* This file was converted by gperf_unfold_key_conv.py
from gperf output file. */
/* ANSI-C code produced by gperf version 3.1 */
-/* Command-line: /usr/local/bin/gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf */
+/* Command-line: gperf -n -C -T -c -t -j1 -L ANSI-C -F,-1,0 -N onigenc_unicode_unfold_key unicode_unfold_key.gperf */
/* Computed positions: -k'1-3' */
@@ -9,7 +9,7 @@
/* This gperf source file was generated by make_unicode_fold_data.py */
/*-
- * Copyright (c) 2017-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2017-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -3288,7 +3288,7 @@ onigenc_unicode_unfold_key(OnigCodePoint code)
{0x1e907, 4005, 1}
};
- if (0 == 0)
+
{
int key = hash(&code);
diff --git a/src/unicode_wb_data.c b/src/unicode_wb_data.c
index 7778157..8e1a267 100644
--- a/src/unicode_wb_data.c
+++ b/src/unicode_wb_data.c
@@ -1,6 +1,6 @@
/* unicode_wb_data.c: Generated by make_unicode_wb_data.py. */
/*-
- * Copyright (c) 2019 K.Kosako <kkosako0 AT gmail DOT com>
+ * Copyright (c) 2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -25,7 +25,7 @@
* SUCH DAMAGE.
*/
-#define WORD_BREAK_PROPERTY_VERSION 12_1_0
+#define WORD_BREAK_PROPERTY_VERSION 120100
/*
ALetter
diff --git a/src/utf16_be.c b/src/utf16_be.c
index 22bf74d..d99af71 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -103,7 +103,25 @@ utf16be_mbc_enc_len(const UChar* p)
static int
is_valid_mbc_string(const UChar* s, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_BE, s, end);
+ while (s < end) {
+ int len = utf16be_mbc_enc_len(s);
+ if (len == 4) {
+ if (s + 2 >= end)
+ return FALSE;
+ if (! UTF16_IS_SURROGATE_SECOND(*(s+2)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*s))
+ return FALSE;
+
+ s += len;
+ }
+
+ if (s != end)
+ return FALSE;
+ else
+ return TRUE;
}
static int
@@ -146,7 +164,15 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
- return (code > 0xffff ? 4 : 2);
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 4;
+ }
+ else {
+ return 2;
+ }
}
static int
@@ -201,39 +227,6 @@ utf16be_mbc_case_fold(OnigCaseFoldType flag,
pp, end, fold);
}
-#if 0
-static int
-utf16be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- (*pp) += EncLen_UTF16[*p];
-
- if (*p == 0) {
- int c, v;
-
- p++;
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- return TRUE;
- }
-
- c = *p;
- v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c, (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
-
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (c >= 0xaa && c <= 0xba)
- return FALSE;
- else
- return TRUE;
- }
- return (v != 0 ? TRUE : FALSE);
- }
-
- return FALSE;
-}
-#endif
-
static UChar*
utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
{
@@ -243,7 +236,8 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-2)))
s -= 2;
return (UChar* )s;
diff --git a/src/utf16_le.c b/src/utf16_le.c
index 4b231c6..c6edd94 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = {
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
- return (code > 0xffff ? 4 : 2);
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 4;
+ }
+ else {
+ return 2;
+ }
}
static int
@@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
const UChar* end1 = end - 1;
while (p < end1) {
- p += utf16le_mbc_enc_len(p);
+ int len = utf16le_mbc_enc_len(p);
+ if (len == 4) {
+ if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
+ return FALSE;
+
+ p += len;
}
if (p != end)
@@ -210,39 +227,6 @@ utf16le_mbc_case_fold(OnigCaseFoldType flag,
fold);
}
-#if 0
-static int
-utf16le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp,
- const UChar* end)
-{
- const UChar* p = *pp;
-
- (*pp) += EncLen_UTF16[*(p+1)];
-
- if (*(p+1) == 0) {
- int c, v;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- return TRUE;
- }
-
- c = *p;
- v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
- (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (c >= 0xaa && c <= 0xba)
- return FALSE;
- else
- return TRUE;
- }
- return (v != 0 ? TRUE : FALSE);
- }
-
- return FALSE;
-}
-#endif
-
static UChar*
utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
{
@@ -252,7 +236,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-1)))
s -= 2;
return (UChar* )s;
diff --git a/src/utf32_be.c b/src/utf32_be.c
index dd17d3b..67e50a2 100644
--- a/src/utf32_be.c
+++ b/src/utf32_be.c
@@ -2,7 +2,7 @@
utf32_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -119,39 +119,6 @@ utf32be_mbc_case_fold(OnigCaseFoldType flag,
fold);
}
-#if 0
-static int
-utf32be_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- (*pp) += 4;
-
- if (*(p+2) == 0 && *(p+1) == 0 && *p == 0) {
- int c, v;
-
- p += 3;
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- return TRUE;
- }
-
- c = *p;
- v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
- (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (c >= 0xaa && c <= 0xba)
- return FALSE;
- else
- return TRUE;
- }
- return (v != 0 ? TRUE : FALSE);
- }
-
- return FALSE;
-}
-#endif
-
static UChar*
utf32be_left_adjust_char_head(const UChar* start, const UChar* s)
{
diff --git a/src/utf32_le.c b/src/utf32_le.c
index d9fe3c6..2ae2275 100644
--- a/src/utf32_le.c
+++ b/src/utf32_le.c
@@ -2,7 +2,7 @@
utf32_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -120,38 +120,6 @@ utf32le_mbc_case_fold(OnigCaseFoldType flag,
fold);
}
-#if 0
-static int
-utf32le_is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end)
-{
- const UChar* p = *pp;
-
- (*pp) += 4;
-
- if (*(p+1) == 0 && *(p+2) == 0 && *(p+3) == 0) {
- int c, v;
-
- if (*p == 0xdf && (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
- return TRUE;
- }
-
- c = *p;
- v = ONIGENC_IS_UNICODE_ISO_8859_1_BIT_CTYPE(c,
- (BIT_CTYPE_UPPER | BIT_CTYPE_LOWER));
- if ((v | BIT_CTYPE_LOWER) != 0) {
- /* 0xaa, 0xb5, 0xba are lower case letter, but can't convert. */
- if (c >= 0xaa && c <= 0xba)
- return FALSE;
- else
- return TRUE;
- }
- return (v != 0 ? TRUE : FALSE);
- }
-
- return FALSE;
-}
-#endif
-
static UChar*
utf32le_left_adjust_char_head(const UChar* start, const UChar* s)
{
diff --git a/src/utf8.c b/src/utf8.c
index 70c1503..1178d09 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -2,7 +2,7 @@
utf8.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -97,33 +97,6 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
return TRUE;
}
-#if 0
-static int
-is_mbc_newline(const UChar* p, const UChar* end)
-{
- if (p < end) {
- if (*p == 0x0a) return 1;
-
-#ifdef USE_UNICODE_ALL_LINE_TERMINATORS
-#ifndef USE_CRNL_AS_LINE_TERMINATOR
- if (*p == 0x0d) return 1;
-#endif
- if (p + 1 < end) {
- if (*(p+1) == 0x85 && *p == 0xc2) /* U+0085 */
- return 1;
- if (p + 2 < end) {
- if ((*(p+2) == 0xa8 || *(p+2) == 0xa9)
- && *(p+1) == 0x80 && *p == 0xe2) /* U+2028, U+2029 */
- return 1;
- }
- }
-#endif
- }
-
- return 0;
-}
-#endif
-
static OnigCodePoint
mbc_to_code(const UChar* p, const UChar* end)
{