summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--CMakeLists.txt41
-rw-r--r--HISTORY14
-rw-r--r--Makefile.am6
-rw-r--r--README.md71
-rwxr-xr-xbuild_harnesses.sh31
-rw-r--r--configure.ac2
-rw-r--r--contributed/libfuzzer-onig.cpp2
-rw-r--r--debian/watch2
-rw-r--r--doc/API26
-rw-r--r--doc/API.ja20
-rw-r--r--doc/UNICODE_PROPERTIES2
-rw-r--r--harnesses/ascii_compatible.dict111
-rw-r--r--harnesses/deluxe-encode-harness.c239
-rw-r--r--harnesses/dict_conv.py72
-rw-r--r--harnesses/encode-harness.c170
-rw-r--r--harnesses/syntax-harness.c120
-rw-r--r--index.html4
-rw-r--r--index_ja.html4
-rw-r--r--sample/bug_fix.c56
-rw-r--r--sample/crnl.c2
-rw-r--r--sample/encode.c142
-rw-r--r--sample/listcap.c2
-rw-r--r--sample/names.c3
-rw-r--r--sample/posix.c5
-rw-r--r--sample/scan.c2
-rw-r--r--sample/simple.c3
-rw-r--r--sample/sql.c4
-rw-r--r--sample/syntax.c2
-rw-r--r--sample/user_property.c5
-rw-r--r--src/gb18030.c6
-rw-r--r--src/oniguruma.h11
-rw-r--r--src/regcomp.c156
-rw-r--r--src/regenc.c2
-rw-r--r--src/regerror.c17
-rw-r--r--src/regexec.c130
-rw-r--r--src/regext.c6
-rw-r--r--src/regint.h6
-rw-r--r--src/regparse.c190
-rw-r--r--src/regparse.h22
-rw-r--r--src/utf16_be.c35
-rw-r--r--src/utf16_le.c26
-rw-r--r--test/test_utf8.c13
-rw-r--r--test/testu.c15
44 files changed, 1351 insertions, 450 deletions
diff --git a/.gitignore b/.gitignore
index 6af6a82..227b7df 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@ Makefile.in
m4/*.m4
/coverage
/coverage.info
+/fuzzers
# src/
/src/CaseFolding.txt
@@ -62,3 +63,5 @@ m4/*.m4
/sample/count
/sample/bug_fix
/sample/log*
+
+/harnesses/utf16*.dict
diff --git a/CMakeLists.txt b/CMakeLists.txt
index f3eca6b..c59bfe3 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1,28 +1,19 @@
cmake_minimum_required(VERSION 3.1)
-project(oniguruma VERSION 6.9.2)
+project(oniguruma
+ VERSION 6.9.3
+ LANGUAGES C)
set(PACKAGE onig)
set(PACKAGE_VERSION ${PROJECT_VERSION})
option(BUILD_SHARED_LIBS "Build shared libraries" ON)
option(ENABLE_POSIX_API "Include POSIX API" ON)
-
-set(USE_CRNL_AS_LINE_TERMINATOR 0)
-set(VERSION ${PACKAGE_VERSION})
-
if(MSVC)
- # Force to always compile with W4
- if(CMAKE_CXX_FLAGS MATCHES "/W[0-4]")
- string(REGEX REPLACE "/W[0-4]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
- else()
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
- endif()
-elseif(CMAKE_COMPILER_IS_GNUCXX)
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
-elseif(CMAKE_COMPILER_IS_GNUCC)
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
+ option(MSVC_STATIC_RUNTIME "Build with static runtime" OFF)
endif()
+set(USE_CRNL_AS_LINE_TERMINATOR 0)
+set(VERSION ${PACKAGE_VERSION})
include(CheckCSourceCompiles)
include(CheckIncludeFiles)
@@ -73,6 +64,26 @@ target_include_directories(onig PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/src>
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
+if(MSVC)
+ target_compile_options(onig PRIVATE
+ #/W4
+ )
+ if(MSVC_STATIC_RUNTIME)
+ target_compile_options(onig PRIVATE
+ $<$<CONFIG:Release>:/MT>
+ $<$<CONFIG:Debug>:/MTd>
+ $<$<CONFIG:MinSizeRel>:/MT>
+ $<$<CONFIG:RelWithDebgInfo>:/MTd>
+ )
+ target_compile_definitions(onig PUBLIC -DONIG_STATIC)
+ endif()
+elseif(CMAKE_COMPILER_IS_GNUCC)
+ target_compile_options(onig PRIVATE
+ -Wall
+ )
+endif()
+
+
# Installation (https://github.com/forexample/package-example)
# Introduce variables:
diff --git a/HISTORY b/HISTORY
index 3649e4e..0380cb4 100644
--- a/HISTORY
+++ b/HISTORY
@@ -1,5 +1,19 @@
History
+2019/08/06: Version 6.9.3 (secirity fix release)
+
+2019/07/30: add ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE
+2019/07/29: add STK_PREC_READ_START/END stack type
+2019/07/29: Fix #147: Stack Exhaustion Problem caused by some parsing functions
+2019/07/11: add a dictionary file for libfuzzer
+2019/07/07: add harnesses directory
+2019/07/05-2019/07/29: fix many problems found by libfuzzer programs
+2019/06/27: deprecate onig_new_deluxe()
+2019/06/27: Fix CVE-2019-13224: don't allow different encodings for onig_new_deluxe()
+2019/06/27: Fix CVE-2019-13225: problem in converting if-then-else pattern
+
+2019/05/07: Version 6.9.2 (same as Release Candidate 3)
+
2019/04/23: Release Candidate 3 for 6.9.2
2019/04/23: add doc/SYNTAX.md into distribution file
2019/04/09: Release Candidate 2 for 6.9.2
diff --git a/Makefile.am b/Makefile.am
index 6045eae..a0bbc7b 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -39,6 +39,12 @@ pkgconfig_DATA = oniguruma.pc
all-test:
cd test; make test
+sanitize:
+ make clean
+ ./configure CC=clang CFLAGS="-O -g -fsanitize=address"
+ make
+ make all-test
+
cov:
make lcov-clear
cd test; make CFLAGS="--coverage" test
diff --git a/README.md b/README.md
index 873f86d..6a4783b 100644
--- a/README.md
+++ b/README.md
@@ -27,46 +27,55 @@ Supported character encodings:
* doc/SYNTAX.md: contributed by seanofw
-New feature of version 6.9.2
------------------------------------
+Version 6.9.3 (security fix release)
+------------------------------------
+* Fixed CVE-2019-13224
+* Fixed CVE-2019-13225
+* Fixed many problems (found by libfuzzer programs)
+
+
+Version 6.9.2 (Reiwa)
+---------------------
+
+* add doc/SYNTAX.md
* Update Unicode version 12.1.0
-* NEW: Unicode Text Segment mode option (?y{g}) (?y{w})
+* NEW: Unicode Text Segment mode option (?y{g}) (?y{w}) (*original)
g: Extended Grapheme Cluster mode / w: Word mode
(Unicode Standard Annex #29 [http://unicode.org/reports/tr29/])
-New feature of version 6.9.1
---------------------------
+Version 6.9.1
+-------------
* Speed improvement (* especially UTF-8)
-New feature of version 6.9.0
---------------------------
+Version 6.9.0
+-------------
* Update Unicode version 11.0.0
* NEW: add Emoji properties
-New feature of version 6.8.2
---------------------------
+Version 6.8.2
+-------------
* Fix: #80 UChar in header causes issue
* NEW API: onig_set_callout_user_data_of_match_param() (* omission in 6.8.0)
* add doc/CALLOUTS.API and doc/CALLOUTS.API.ja
-New feature of version 6.8.1
---------------------------
+Version 6.8.1
+-------------
* Update shared library version to 5.0.0 for API incompatible changes from 6.7.1
-New feature of version 6.8.0
---------------------------
+Version 6.8.0
+-------------
* Retry-limit-in-match function enabled by default
* NEW: configure option --enable-posix-api=no (* enabled by default)
@@ -77,14 +86,14 @@ New feature of version 6.8.0
* Examples of Callouts program: [callout.c](sample/callout.c), [count.c](sample/count.c), [echo.c](sample/echo.c)
-New feature of version 6.7.1
---------------------------
+Version 6.7.1
+-------------
* NEW: Mechanism of retry-limit-in-match (* disabled by default)
-New feature of version 6.7.0
---------------------------
+Version 6.7.0
+-------------
* NEW: hexadecimal codepoint \uHHHH
* NEW: add ONIG_SYNTAX_ONIGURUMA (== ONIG_SYNTAX_DEFAULT)
@@ -92,8 +101,8 @@ New feature of version 6.7.0
* Reduced size of object file
-New feature of version 6.6.0
---------------------------
+Version 6.6.0
+-------------
* NEW: ASCII only mode options for character type/property (?WDSP)
* NEW: Extended Grapheme Cluster boundary \y, \Y
@@ -101,8 +110,8 @@ New feature of version 6.6.0
* Range-clear (Absent-clear) operator restores previous range in retractions.
-New feature of version 6.5.0
---------------------------
+Version 6.5.0
+-------------
* NEW: \K (keep)
* NEW: \R (general newline) \N (no newline)
@@ -114,16 +123,16 @@ New feature of version 6.5.0
* NEW: Absent stopper (?~|absent) (*original)
-New feature of version 6.4.0
---------------------------
+Version 6.4.0
+-------------
* Fix fatal problem of endless repeat on Windows
* NEW: call zero (call the total regexp) \g<0>
* NEW: relative backref/call by positive number \k<+n>, \g<+n>
-New feature of version 6.3.0
---------------------------
+Version 6.3.0
+-------------
* NEW: octal codepoint \o{.....}
* Fixed CVE-2017-9224
@@ -134,20 +143,20 @@ New feature of version 6.3.0
* Fixed CVE-2017-9229
-New feature of version 6.1.2
---------------------------
+Version 6.1.2
+-------------
* allow word bound, word begin and word end in look-behind.
* NEW option: ONIG_OPTION_CHECK_VALIDITY_OF_STRING
-New feature of version 6.1
---------------------------
+Version 6.1
+-----------
* improved doc/RE
* NEW API: onig_scan()
-New feature of version 6.0
---------------------------
+Version 6.0
+-----------
* Update Unicode 8.0 Property/Case-folding
* NEW API: onig_unicode_define_user_property()
diff --git a/build_harnesses.sh b/build_harnesses.sh
new file mode 100755
index 0000000..54dc9ff
--- /dev/null
+++ b/build_harnesses.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+make clean
+autoreconf -vfi
+
+# build the library with ASAN
+#NO_LINK="-fsanitize=fuzzer-no-link"
+NO_LINK=""
+./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer $NO_LINK" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer $NO_LINK"
+make -j4
+
+OUT=`pwd`/fuzzers
+mkdir -p $OUT
+LIBFUZZER_FLAGS="-fsanitize=fuzzer,address -fno-omit-frame-pointer"
+#LIBS="src/.libs/libonig.a"
+LIBS="src/.libs/libonig.a /usr/local/lib/libLLVMFuzzerMain.a"
+
+CFLAGS="-Isrc -g $LIBFUZZER_FLAGS"
+
+# Libfuzzer builds
+clang++ contributed/libfuzzer-onig.cpp $LIBS $CFLAGS -o $OUT/libfuzzer-onig
+clang harnesses/syntax-harness.c $LIBS $CFLAGS -o $OUT/syntax-libfuzzer
+clang harnesses/encode-harness.c $LIBS $CFLAGS -o $OUT/encode-libfuzzer
+clang harnesses/deluxe-encode-harness.c $LIBS $CFLAGS -o $OUT/deluxe-encode-libfuzzer
+
+clang -DUTF16_BE harnesses/encode-harness.c $LIBS $CFLAGS -o $OUT/utf16-be-libfuzzer
+clang -DUTF16_LE harnesses/encode-harness.c $LIBS $CFLAGS -o $OUT/utf16-le-libfuzzer
+clang -DWITH_READ_MAIN harnesses/encode-harness.c src/.libs/libonig.a $CFLAGS -o $OUT/main-encode
+clang -DWITH_READ_MAIN -DUTF16_LE harnesses/encode-harness.c src/.libs/libonig.a $CFLAGS -o $OUT/main-utf16-le
+clang -DWITH_READ_MAIN -DUTF16_BE harnesses/encode-harness.c src/.libs/libonig.a $CFLAGS -o $OUT/main-utf16-be
+clang -DWITH_READ_MAIN harnesses/deluxe-encode-harness.c $LIBS $CFLAGS -o $OUT/main-deluxe-encode
diff --git a/configure.ac b/configure.ac
index 010a0d8..62c9fa5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
dnl Process this file with autoconf to produce a configure script.
-AC_INIT(onig, 6.9.2)
+AC_INIT(onig, 6.9.3)
AC_CONFIG_MACRO_DIR([m4])
diff --git a/contributed/libfuzzer-onig.cpp b/contributed/libfuzzer-onig.cpp
index e137b73..526c826 100644
--- a/contributed/libfuzzer-onig.cpp
+++ b/contributed/libfuzzer-onig.cpp
@@ -29,6 +29,8 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
#ifdef FULL_TEST
onig_initialize(&enc, 1);
+ onig_set_retry_limit_in_match(120);
+ onig_set_parse_depth_limit(120);
#endif
if (onig_new(&reg, Data, Data + Size, ONIG_OPTION_DEFAULT, enc,
diff --git a/debian/watch b/debian/watch
index 8a7b475..2f0e85f 100644
--- a/debian/watch
+++ b/debian/watch
@@ -4,4 +4,4 @@ dversionmangle=s/\+(debian|dfsg|ds|deb)\d*$//,\
uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/;s/RC/rc/;s/\-/\./g;s/\_/\./g,\
filenamemangle=s/(?:.*?)?(?:rel|v|oniguruma|ONIGURUMA)?[\-\_]?(\d\S+)\.(tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))/oniguruma-$1.$2/ \
https://github.com/kkos/oniguruma/tags \
-(?:.*?/)?(?:rel|v|oniguruma|ONIGURUMA)?[\-\_]?(\d\S+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz))) \
+(?:.*?/)?(?:rel|v|oniguruma|ONIGURUMA)?[\-\_]?(\d\S+)\.(?:tgz|tbz|txz|(?:tar\.(?:gz|bz2|xz)))
diff --git a/doc/API b/doc/API
index 2309e5e..049db02 100644
--- a/doc/API
+++ b/doc/API
@@ -1,4 +1,4 @@
-Oniguruma API Version 6.9.2 2019/03/25
+Oniguruma API Version 6.9.3 2019/07/06
#include <oniguruma.h>
@@ -168,6 +168,9 @@ Oniguruma API Version 6.9.2 2019/03/25
# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
+ This function is deprecate, and it does not allow the case where
+ the encoding of pattern and target is different.
+
Create a regex object.
This function is deluxe version of onig_new().
@@ -299,6 +302,7 @@ Oniguruma API Version 6.9.2 2019/03/25
const UChar* range, OnigRegion* region, OnigOptionType option)
Search string and return search result and matching region.
+ Do not pass invalid byte string in the regex character encoding.
normal return: match position offset (i.e. p - str >= 0)
not found: ONIG_MISMATCH (< 0)
@@ -323,15 +327,19 @@ Oniguruma API Version 6.9.2 2019/03/25
const UChar* start, const UChar* range, OnigRegion* region,
OnigOptionType option, OnigMatchParam* mp)
- arguments
- 1-7: same as onig_search()
- 8 mp: match parameter values (match_stack_limit, retry_limit_in_match)
+ Search string and return search result and matching region.
+ Do not pass invalid byte string in the regex character encoding.
+
+ arguments
+ 1-7: same as onig_search()
+ 8 mp: match parameter values (match_stack_limit, retry_limit_in_match)
# int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at,
OnigRegion* region, OnigOptionType option)
Match string and return result and matching region.
+ Do not pass invalid byte string in the regex character encoding.
normal return: match length (>= 0)
not match: ONIG_MISMATCH ( < 0)
@@ -353,6 +361,9 @@ Oniguruma API Version 6.9.2 2019/03/25
const UChar* at, OnigRegion* region,
OnigOptionType option, OnigMatchParam* mp)
+ Match string and return result and matching region.
+ Do not pass invalid byte string in the regex character encoding.
+
arguments
1-6: same as onig_match()
7 mp: match parameter values (match_stack_limit, retry_limit_in_match)
@@ -364,6 +375,7 @@ Oniguruma API Version 6.9.2 2019/03/25
void* callback_arg)
Scan string and callback with matching region.
+ Do not pass invalid byte string in the regex character encoding.
normal return: number of matching times
error: error code
@@ -611,14 +623,20 @@ Oniguruma API Version 6.9.2 2019/03/25
# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
+
+ Return number of characters in the string.
+
+
# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
Return number of characters in the string.
+ Do not pass invalid byte string in the character encoding.
# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
Return number of bytes in the string.
+ Do not pass invalid byte string in the character encoding.
# int onig_set_default_syntax(OnigSyntaxType* syntax)
diff --git a/doc/API.ja b/doc/API.ja
index 164d0b8..5871558 100644
--- a/doc/API.ja
+++ b/doc/API.ja
@@ -1,4 +1,4 @@
-鬼車インターフェース Version 6.9.2 2019/03/29
+鬼車インターフェース Version 6.9.3 2019/07/06
#include <oniguruma.h>
@@ -167,6 +167,9 @@
# int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
OnigCompileInfo* ci, OnigErrorInfo* einfo)
+ この関数は廃止予定。
+ パターンと対象文字列の文字エンコーディングが異なる場合を許さなくなった。
+
正規表現オブジェクト(regex)を作成する。
この関数は、onig_new()のデラックス版。
@@ -298,6 +301,7 @@
const UChar* range, OnigRegion* region, OnigOptionType option)
正規表現で文字列を検索し、検索結果とマッチ領域を返す。
+ 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。
正常終了戻り値: マッチ位置 (p - str >= 0)
検索失敗: ONIG_MISMATCH (< 0)
@@ -322,6 +326,9 @@
const UChar* start, const UChar* range, OnigRegion* region,
OnigOptionType option, OnigMatchParam* mp)
+ 正規表現で文字列を検索し、検索結果とマッチ領域を返す。
+ 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。
+
引数
1-7: onig_search()と同じ
8 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match)
@@ -331,6 +338,7 @@
const UChar* at, OnigRegion* region, OnigOptionType option)
文字列の指定位置でマッチングを行い、結果とマッチ領域を返す。
+ 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。
正常終了戻り値: マッチしたバイト長 (>= 0)
not match: ONIG_MISMATCH ( < 0)
@@ -352,6 +360,9 @@
const UChar* at, OnigRegion* region,
OnigOptionType option, OnigMatchParam* mp)
+ 文字列の指定位置でマッチングを行い、結果とマッチ領域を返す。
+ 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。
+
引数
1-6: onig_match()と同じ
7 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match)
@@ -363,6 +374,7 @@
void* callback_arg)
正規表現で文字列をスキャンして、マッチングする毎にコールバック関数を呼び出す。
+ 正規表現オブジェクトの文字エンコーディングで、検索文字列として不正な文字列を渡してはいけない。
正常終了: マッチ回数 (0回も含める)
エラー: エラーコード (< 0)
@@ -616,14 +628,20 @@
# int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end)
+
+ 文字列の文字数を返す。
+
+
# int onigenc_strlen_null(OnigEncoding enc, const UChar* s)
文字列の文字数を返す。
+ 文字エンコーディングに対して、不正な文字列を渡してはいけない。
# int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s)
文字列のバイト数を返す。
+ 文字エンコーディングに対して、不正な文字列を渡してはいけない。
# int onig_set_default_syntax(OnigSyntaxType* syntax)
diff --git a/doc/UNICODE_PROPERTIES b/doc/UNICODE_PROPERTIES
index 1148b4d..ff2a6ce 100644
--- a/doc/UNICODE_PROPERTIES
+++ b/doc/UNICODE_PROPERTIES
@@ -1,4 +1,4 @@
-Unicode Properties (from Unicode Version: 12.0.0)
+Unicode Properties (from Unicode Version: 12.1.0)
15: ASCII_Hex_Digit
16: Adlam
diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict
new file mode 100644
index 0000000..820bf47
--- /dev/null
+++ b/harnesses/ascii_compatible.dict
@@ -0,0 +1,111 @@
+# First-pass fuzzing dictionary for Oniguruma by Mark Griffin
+"\\o{17777777777}"
+"\\777"
+"\\u"
+"\\uFFFF"
+"\\xFF"
+"\\x{70000000}"
+"\\C-"
+"\\M-\\C-"
+"\\X"
+"\\p{"
+"\\p{^"
+"}"
+"]"
+")"
+"\\n"
+"\\r"
+"\\R"
+"\\W"
+"\\w"
+"\\s"
+"\\S"
+"\\d"
+"\\O"
+"\\X"
+"\\b"
+"\\y"
+"\\Y"
+"\\A"
+"\\z"
+"\\K"
+"\\G"
+"\\p{Print}"
+"\\p{ASCII}"
+"\\p{Alnum}"
+"{0,2}"
+"{3,}"
+"{,3}"
+"{5}"
+"{4,2}"
+"??"
+"*?"
+"+?"
+"*+"
+"{1,3}+"
+"(?>"
+"\\B"
+"(?y{"
+"[abcd1-9]"
+"[\\w\\d"
+"[\\p{Alphabetic}"
+"[\\P{Arabic}"
+"[\\x{ffff}"
+"[a-w&&"
+"[^"
+"[:graph:]"
+"[^:cntrl:]"
+"(?i:"
+"(?i)"
+"(?m:"
+"(?x:"
+"(?W:"
+"(?y-:"
+"(?y{w}:"
+"(?P:"
+"(?#"
+"(?:"
+"(?="
+"(?!"
+"(?<="
+"(?<!"
+"(?>"
+"(?<name>"
+"(?{"
+"(?{....}[x])"
+"(?{.}[x]>)"
+"(?{{{.}}})"
+"(?~"
+"(?~a)"
+"(?~|a|.*)"
+"(?~|(?:a|b))"
+"(?~|)"
+"(?(.) |.)"
+"(?('-n'))"
+"(?(n+0))"
+"(?(n+1))"
+"(?(n-1))"
+"(?(<name+0>))"
+"(?(<name+1>))"
+"(?(<name-1>))"
+"(*ERROR{-2000})"
+"(*COUNT[tag]{X})"
+"\\1"
+"\\2"
+"\\k<name>"
+"\\k<1>"
+"\\k<2>"
+"\\k<-1>"
+"\\k<-2>"
+"\\k<name+0>"
+"\\k<name+1>"
+"\\k<name-1>"
+"\\g<-1>"
+"\\g<name>"
+"name"
+"(?<name>a|b\\g<name>c)"
+"(?-i:\\g<name>)"
+"\\N{name}"
+"\\p{Hiragana}"
+"\\p{Katakana}"
+"\\p{Emoji}"
diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe-encode-harness.c
new file mode 100644
index 0000000..e1f84a5
--- /dev/null
+++ b/harnesses/deluxe-encode-harness.c
@@ -0,0 +1,239 @@
+/*
+ * deluxe-encode-harness.c
+ * contributed by Mark Griffin
+ */
+#include <stdio.h>
+#include "oniguruma.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define DEFAULT_LIMIT 120
+typedef unsigned char uint8_t;
+
+static int
+search(regex_t* reg, unsigned char* str, unsigned char* end)
+{
+ int r;
+ unsigned char *start, *range;
+ OnigRegion *region;
+
+ region = onig_region_new();
+
+ start = str;
+ range = end;
+ r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+ if (r >= 0) {
+ int i;
+
+ fprintf(stdout, "match at %d (%s)\n", r,
+ ONIGENC_NAME(onig_get_encoding(reg)));
+ for (i = 0; i < region->num_regs; i++) {
+ fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
+ }
+ }
+ else if (r == ONIG_MISMATCH) {
+ fprintf(stdout, "search fail (%s)\n",
+ ONIGENC_NAME(onig_get_encoding(reg)));
+ }
+ else { /* error */
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(stdout, "ERROR: %s\n", s);
+ fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ return -1;
+ }
+
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ return 0;
+}
+
+static int
+exec(OnigEncoding enc, OnigOptionType options,
+ char* apattern, char* apattern_end, char* astr, char* astr_end)
+{
+ int r;
+ regex_t* reg;
+ OnigErrorInfo einfo;
+ UChar* pattern = (UChar* )apattern;
+ UChar* str = (UChar* )astr;
+ UChar* pattern_end = (UChar* )apattern_end;
+ unsigned char *end = (unsigned char* )astr_end;
+
+ onig_initialize(&enc, 1);
+ onig_set_retry_limit_in_match(DEFAULT_LIMIT);
+ onig_set_parse_depth_limit(DEFAULT_LIMIT);
+
+ r = onig_new(&reg, pattern, pattern_end,
+ options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
+ if (r != ONIG_NORMAL) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: %s\n", s);
+ onig_end();
+ return -1;
+ }
+
+ r = search(reg, str, end);
+
+ onig_free(reg);
+ onig_end();
+ return 0;
+}
+
+static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
+
+static int
+exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
+ OnigOptionType options, char* apattern, char* apattern_end,
+ char* astr, char* astr_end)
+{
+ int r;
+ regex_t* reg;
+ OnigCompileInfo ci;
+ OnigErrorInfo einfo;
+ UChar* pattern = (UChar* )apattern;
+ UChar* str = (UChar* )astr;
+ UChar* pattern_end = (UChar* )apattern_end;
+ unsigned char* end = (unsigned char* )astr_end;
+
+ onig_initialize(&str_enc, 1);
+ onig_set_retry_limit_in_match(DEFAULT_LIMIT);
+ onig_set_parse_depth_limit(DEFAULT_LIMIT);
+
+ ci.num_of_elements = 5;
+ ci.pattern_enc = pattern_enc;
+ ci.target_enc = str_enc;
+ ci.syntax = ONIG_SYNTAX_DEFAULT;
+ ci.option = options;
+ ci.case_fold_flag = CF;
+
+ r = onig_new_deluxe(&reg, pattern, pattern_end, &ci, &einfo);
+ if (r != ONIG_NORMAL) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: %s\n", s);
+ onig_end();
+ return -1;
+ }
+
+ if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) {
+ r = search(reg, str, end);
+ }
+
+ onig_free(reg);
+ onig_end();
+ return 0;
+}
+
+#define PATTERN_SIZE 48
+#define NUM_CONTROL_BYTES 1
+#define MIN_STR_SIZE 2
+int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+ int r;
+ size_t remaining_size;
+ unsigned char *data;
+ unsigned char pat_encoding_choice;
+ unsigned char str_encoding_choice;
+ unsigned char *pattern;
+ unsigned char *str;
+ unsigned char *pattern_end;
+ unsigned char *str_end;
+ unsigned int num_encodings;
+ OnigEncodingType *pattern_enc;
+ OnigEncodingType *str_enc;
+
+ OnigEncodingType *encodings[] = {
+ ONIG_ENCODING_ASCII,
+ ONIG_ENCODING_ISO_8859_1,
+ ONIG_ENCODING_ISO_8859_2,
+ ONIG_ENCODING_ISO_8859_3,
+ ONIG_ENCODING_ISO_8859_4,
+ ONIG_ENCODING_ISO_8859_5,
+ ONIG_ENCODING_ISO_8859_6,
+ ONIG_ENCODING_ISO_8859_7,
+ ONIG_ENCODING_ISO_8859_8,
+ ONIG_ENCODING_ISO_8859_9,
+ ONIG_ENCODING_ISO_8859_10,
+ ONIG_ENCODING_ISO_8859_11,
+ ONIG_ENCODING_ISO_8859_13,
+ ONIG_ENCODING_ISO_8859_14,
+ ONIG_ENCODING_ISO_8859_15,
+ ONIG_ENCODING_ISO_8859_16,
+ ONIG_ENCODING_UTF8,
+ ONIG_ENCODING_UTF16_BE,
+ ONIG_ENCODING_UTF16_LE,
+ ONIG_ENCODING_UTF32_BE,
+ ONIG_ENCODING_UTF32_LE,
+ ONIG_ENCODING_EUC_JP,
+ ONIG_ENCODING_EUC_TW,
+ ONIG_ENCODING_EUC_KR,
+ ONIG_ENCODING_EUC_CN,
+ ONIG_ENCODING_SJIS,
+ //ONIG_ENCODING_KOI8,
+ ONIG_ENCODING_KOI8_R,
+ ONIG_ENCODING_CP1251,
+ ONIG_ENCODING_BIG5,
+ ONIG_ENCODING_GB18030,
+ };
+
+ if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE))
+ return 0;
+ if (Size > 0x1000)
+ return 0;
+
+ remaining_size = Size;
+ data = (unsigned char *)(Data);
+
+ // pull off bytes to switch off
+ pat_encoding_choice = data[0];
+ data++;
+ remaining_size--;
+ str_encoding_choice = data[0];
+ data++;
+ remaining_size--;
+
+ // copy first PATTERN_SIZE bytes off to be the pattern
+ pattern = (unsigned char *)malloc(PATTERN_SIZE+4);
+ memset(pattern, 0, PATTERN_SIZE+4);
+ memcpy(pattern, data, PATTERN_SIZE);
+ pattern_end = pattern + PATTERN_SIZE;
+ data += PATTERN_SIZE;
+ remaining_size -= PATTERN_SIZE;
+
+ str = (unsigned char*)malloc(remaining_size+4);
+ memset(str, 0, remaining_size+4);
+ memcpy(str, data, remaining_size);
+ str_end = str + remaining_size;
+
+ num_encodings = sizeof(encodings) / sizeof(encodings[0]);
+ pattern_enc = encodings[pat_encoding_choice % num_encodings];
+ str_enc = encodings[str_encoding_choice % num_encodings];
+
+ r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end);
+
+ free(pattern);
+ free(str);
+
+ return r;
+}
+
+
+#ifdef WITH_READ_MAIN
+
+#include <unistd.h>
+
+extern int main(int argc, char* argv[])
+{
+ size_t n;
+ uint8_t Data[10000];
+
+ n = read(0, Data, sizeof(Data));
+ fprintf(stdout, "n: %ld\n", n);
+ LLVMFuzzerTestOneInput(Data, n);
+
+ return 0;
+}
+#endif /* WITH_READ_MAIN */
diff --git a/harnesses/dict_conv.py b/harnesses/dict_conv.py
new file mode 100644
index 0000000..f721293
--- /dev/null
+++ b/harnesses/dict_conv.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+# dict_conv.py (Python3 script)
+
+import sys
+
+ENC_UTF16_BE = 1
+ENC_UTF16_LE = 2
+
+def add_char(enc, s, c):
+ if enc == ENC_UTF16_BE:
+ s += "\\x00"
+
+ s += c
+ if enc == ENC_UTF16_LE:
+ s += "\\x00"
+
+ return s
+
+def conv(enc, s):
+ n = len(s)
+ r = ""
+ i = 0
+ while i < n:
+ c = s[i]
+ if c == '\\':
+ c = s[i+1]
+ if c == '\\' or c == '"':
+ r = add_char(enc, r, "\\" + c)
+ i += 2
+ continue
+ else:
+ raise("Unknown escape {0}".format(s))
+
+ r = add_char(enc, r, c)
+ i += 1
+
+ return r
+
+def main(enc):
+ print("# This file was generated by dict_conv.py.")
+ for line in sys.stdin:
+ s = line.strip()
+ if s[0] == '#':
+ print(s)
+ continue
+
+ if s[0] == '"' and s[-1] == '"':
+ s = conv(enc, s[1:-1])
+ print("\"{0}\"".format(s))
+ else:
+ raise("Invalid format {0}".format(s))
+
+def usage(argv):
+ raise RuntimeError("Usage: python {0} utf16_be/utf16_le".format(argv[0]))
+
+
+if __name__ == "__main__":
+ argv = sys.argv
+ argc = len(argv)
+
+ if argc >= 2:
+ s = argv[1]
+ if s == 'utf16_be':
+ enc = ENC_UTF16_BE
+ elif s == 'utf16_le':
+ enc = ENC_UTF16_LE
+ else:
+ usage(argv)
+ else:
+ usage(argv)
+
+ main(enc)
diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c
new file mode 100644
index 0000000..e57fd4f
--- /dev/null
+++ b/harnesses/encode-harness.c
@@ -0,0 +1,170 @@
+/*
+ * encode-harness.c
+ * contributed by Mark Griffin
+ */
+#include <stdio.h>
+#include "oniguruma.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define PARSE_DEPTH_LIMIT 120
+#define RETRY_LIMIT 4000
+
+typedef unsigned char uint8_t;
+
+static int
+search(regex_t* reg, unsigned char* str, unsigned char* end)
+{
+ int r;
+ unsigned char *start, *range;
+ OnigRegion *region;
+
+ region = onig_region_new();
+
+ start = str;
+ range = end;
+ r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+ if (r >= 0) {
+ int i;
+
+ fprintf(stdout, "match at %d (%s)\n", r,
+ ONIGENC_NAME(onig_get_encoding(reg)));
+ for (i = 0; i < region->num_regs; i++) {
+ fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
+ }
+ }
+ else if (r == ONIG_MISMATCH) {
+ fprintf(stdout, "search fail (%s)\n",
+ ONIGENC_NAME(onig_get_encoding(reg)));
+ }
+ else { /* error */
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(stdout, "ERROR: %s\n", s);
+ fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ return -1;
+ }
+
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ return 0;
+}
+
+static int
+exec(OnigEncoding enc, OnigOptionType options,
+ char* apattern, char* apattern_end, char* astr, UChar* end)
+{
+ int r;
+ regex_t* reg;
+ OnigErrorInfo einfo;
+ UChar* pattern = (UChar* )apattern;
+ UChar* str = (UChar* )astr;
+ UChar* pattern_end = (UChar* )apattern_end;
+
+ onig_initialize(&enc, 1);
+ onig_set_retry_limit_in_match(RETRY_LIMIT);
+ onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT);
+
+ r = onig_new(&reg, pattern, pattern_end,
+ options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
+ if (r != ONIG_NORMAL) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: %s\n", s);
+ onig_end();
+ return -1;
+ }
+
+ if (onigenc_is_valid_mbc_string(enc, str, end) != 0) {
+ r = search(reg, str, end);
+ }
+
+ onig_free(reg);
+ onig_end();
+ return 0;
+}
+
+#define PATTERN_SIZE 32
+#define NUM_CONTROL_BYTES 1
+#define MIN_STR_SIZE 1
+int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+ if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE))
+ return 0;
+ if (Size > 0x1000)
+ return 0;
+
+ unsigned char *pattern_end;
+ unsigned char *str_null_end;
+
+ size_t remaining_size = Size;
+ unsigned char *data = (unsigned char *)(Data);
+
+ // pull off one byte to switch off
+ unsigned char encoding_choice = data[0];
+ data++;
+ remaining_size--;
+
+ // copy first PATTERN_SIZE bytes off to be the pattern
+ unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+4);
+ memset(pattern, 0, PATTERN_SIZE+4);
+ memcpy(pattern, data, PATTERN_SIZE);
+ pattern_end = pattern + PATTERN_SIZE;
+ data += PATTERN_SIZE;
+ remaining_size -= PATTERN_SIZE;
+
+ unsigned char *str = (unsigned char*)malloc(remaining_size+4);
+ memset(str, 0, remaining_size+4);
+ memcpy(str, data, remaining_size);
+ str_null_end = str + remaining_size;
+
+ int r;
+ OnigEncodingType *encodings[] = {
+ ONIG_ENCODING_SJIS,
+ ONIG_ENCODING_EUC_JP,
+ ONIG_ENCODING_CP1251,
+ ONIG_ENCODING_ISO_8859_1,
+ ONIG_ENCODING_UTF8,
+ ONIG_ENCODING_KOI8_R,
+ ONIG_ENCODING_BIG5
+ };
+
+ OnigEncodingType *enc;
+
+#ifdef UTF16_BE
+ enc = ONIG_ENCODING_UTF16_BE;
+#else
+#ifdef UTF16_LE
+ enc = ONIG_ENCODING_UTF16_LE;
+#else
+ int num_encodings = sizeof(encodings)/sizeof(encodings[0]);
+ enc = encodings[encoding_choice % num_encodings];
+#endif
+#endif
+
+ r = exec(enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end,
+ (char *)str, str_null_end);
+
+ free(pattern);
+ free(str);
+
+ return r;
+}
+
+#ifdef WITH_READ_MAIN
+
+#include <unistd.h>
+
+extern int main(int argc, char* argv[])
+{
+ size_t n;
+ uint8_t Data[10000];
+
+ n = read(0, Data, sizeof(Data));
+ fprintf(stdout, "n: %ld\n", n);
+ LLVMFuzzerTestOneInput(Data, n);
+
+ return 0;
+}
+#endif /* WITH_READ_MAIN */
diff --git a/harnesses/syntax-harness.c b/harnesses/syntax-harness.c
new file mode 100644
index 0000000..0fb3587
--- /dev/null
+++ b/harnesses/syntax-harness.c
@@ -0,0 +1,120 @@
+/*
+ * syntax-harness.c
+ * contributed by Mark Griffin
+ */
+#include <stdio.h>
+#include <string.h>
+#include "oniguruma.h"
+
+#include <stdlib.h>
+
+#define DEFAULT_LIMIT 120
+typedef unsigned char uint8_t;
+
+extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr)
+{
+ int r;
+ unsigned char *start, *range, *end;
+ regex_t* reg;
+ OnigErrorInfo einfo;
+ OnigRegion *region;
+ UChar* pattern = (UChar* )apattern;
+ UChar* str = (UChar* )astr;
+
+ r = onig_new(&reg, pattern, pattern + strlen((char* )pattern),
+ ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo);
+ if (r != ONIG_NORMAL) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: %s\n", s);
+ return -1;
+ }
+
+ region = onig_region_new();
+
+ end = str + strlen((char* )str);
+ start = str;
+ range = end;
+ r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+ if (r >= 0) {
+ int i;
+
+ fprintf(stdout, "match at %d\n", r);
+ for (i = 0; i < region->num_regs; i++) {
+ fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
+ }
+ }
+ else if (r == ONIG_MISMATCH) {
+ fprintf(stdout, "search fail\n");
+ }
+ else { /* error */
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(stdout, "ERROR: %s\n", s);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
+ return -1;
+ }
+
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
+ return 0;
+}
+
+#define PATTERN_SIZE 64
+#define NUM_CONTROL_BYTES 1
+#define MIN_STR_SIZE 1
+int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+ if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE))
+ return 0;
+ if (Size > 0x1000)
+ return 0;
+ size_t remaining_size = Size;
+ unsigned char *data = (unsigned char *)(Data);
+
+ // pull off one byte to switch syntax choice
+ unsigned char syntax_choice = data[0];
+ data++;
+ remaining_size--;
+
+ // copy first PATTERN_SIZE bytes off to be the pattern
+ unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+1);
+ memset(pattern, 0, PATTERN_SIZE+1);
+ memcpy(pattern, data, PATTERN_SIZE);
+ data += PATTERN_SIZE;
+ remaining_size -= PATTERN_SIZE;
+
+ unsigned char *str = (unsigned char*)malloc(remaining_size+1);
+ memset(str, 0, remaining_size+1);
+ memcpy(str, data, remaining_size);
+
+ OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII };
+ onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0]));
+
+ onig_set_retry_limit_in_match(DEFAULT_LIMIT);
+ onig_set_parse_depth_limit(DEFAULT_LIMIT);
+
+ OnigSyntaxType *syntaxes[] = {
+ ONIG_SYNTAX_POSIX_EXTENDED,
+ ONIG_SYNTAX_EMACS,
+ ONIG_SYNTAX_GREP,
+ ONIG_SYNTAX_GNU_REGEX,
+ ONIG_SYNTAX_JAVA,
+ ONIG_SYNTAX_PERL_NG,
+ ONIG_SYNTAX_RUBY,
+ ONIG_SYNTAX_ONIGURUMA,
+ };
+ OnigSyntaxType *syntax = syntaxes[syntax_choice % 8];
+
+ int r;
+ r = exec(syntax, (char *)pattern, (char *)str);
+ // r = exec(ONIG_SYNTAX_JAVA, "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc");
+
+ onig_end();
+
+ free(pattern);
+ free(str);
+
+ return 0;
+}
diff --git a/index.html b/index.html
index 5ad8231..58ba66d 100644
--- a/index.html
+++ b/index.html
@@ -8,7 +8,7 @@
<h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>)
<p>
-(c) K.Kosako, updated at: 2018/12/06
+(c) K.Kosako, updated at: 2019/08/05
</p>
<dl>
@@ -16,6 +16,8 @@
<dt><b>What's new</b>
</font>
<ul>
+<li>2019/08/06: Version 6.9.3 released.</li>
+<li>2019/05/07: Version 6.9.2 released.</li>
<li>2018/12/11: Version 6.9.1 released.</li>
<li>2018/09/03: Version 6.9.0 released.</li>
<li>2018/04/17: Version 6.8.2 released.</li>
diff --git a/index_ja.html b/index_ja.html
index 0ada788..6b75c6c 100644
--- a/index_ja.html
+++ b/index_ja.html
@@ -8,7 +8,7 @@
<h1>鬼車</h1>
<p>
-(c) K.Kosako, 最終更新: 2018/12/06
+(c) K.Kosako, 最終更新: 2019/08/05
</p>
<dl>
@@ -16,6 +16,8 @@
<dt><b>更新情報</b>
</font>
<ul>
+<li>2019/08/06: Version 6.9.3 リリース</li>
+<li>2019/05/07: Version 6.9.2 リリース</li>
<li>2018/12/11: Version 6.9.1 リリース</li>
<li>2018/09/03: Version 6.9.0 リリース</li>
<li>2018/04/17: Version 6.8.2 リリース</li>
diff --git a/sample/bug_fix.c b/sample/bug_fix.c
index 81c2784..3f60c5b 100644
--- a/sample/bug_fix.c
+++ b/sample/bug_fix.c
@@ -4,8 +4,6 @@
#include <stdio.h>
#include "oniguruma.h"
-static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
-
static int
search(regex_t* reg, unsigned char* str, unsigned char* end)
{
@@ -36,6 +34,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
return -1;
}
@@ -44,45 +43,6 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)
}
static int
-exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
- OnigOptionType options, char* apattern, char* astr)
-{
- int r;
- unsigned char *end;
- regex_t* reg;
- OnigCompileInfo ci;
- OnigErrorInfo einfo;
- UChar* pattern = (UChar* )apattern;
- UChar* str = (UChar* )astr;
-
- onig_initialize(&str_enc, 1);
-
- ci.num_of_elements = 5;
- ci.pattern_enc = pattern_enc;
- ci.target_enc = str_enc;
- ci.syntax = ONIG_SYNTAX_DEFAULT;
- ci.option = options;
- ci.case_fold_flag = CF;
-
- r = onig_new_deluxe(&reg, pattern,
- pattern + onigenc_str_bytelen_null(pattern_enc, pattern),
- &ci, &einfo);
- if (r != ONIG_NORMAL) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str((UChar* )s, r, &einfo);
- fprintf(stderr, "ERROR: %s\n", s);
- return -1;
- }
-
- end = str + onigenc_str_bytelen_null(str_enc, str);
- r = search(reg, str, end);
-
- onig_free(reg);
- onig_end();
- return 0;
-}
-
-static int
exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr)
{
int r;
@@ -92,8 +52,6 @@ exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr)
UChar* pattern = (UChar* )apattern;
UChar* str = (UChar* )astr;
- onig_initialize(&enc, 1);
-
r = onig_new(&reg, pattern,
pattern + onigenc_str_bytelen_null(enc, pattern),
options, enc, ONIG_SYNTAX_DEFAULT, &einfo);
@@ -108,7 +66,6 @@ exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr)
r = search(reg, str, end);
onig_free(reg);
- onig_end();
return 0;
}
@@ -116,16 +73,21 @@ exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr)
extern int main(int argc, char* argv[])
{
+ OnigEncoding use_encs[1];
+
+ use_encs[0] = ONIG_ENCODING_UTF8;
+ onig_initialize(use_encs, 1);
+
/* fix ignore case in look-behind
commit: 3340ec2cc5627172665303fe248c9793354d2251 */
- exec_deluxe(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8,
- ONIG_OPTION_IGNORECASE,
- "(?<=\305\211)a", "\312\274na"); /* \u{0149}a \u{02bc}na */
+ exec(ONIG_ENCODING_UTF8, ONIG_OPTION_IGNORECASE,
+ "(?<=\305\211)a", "\312\274na"); /* \u{0149}a \u{02bc}na */
exec(ONIG_ENCODING_UTF8, ONIG_OPTION_NONE, "(\\2)(\\1)", "aa"); /* fail. */
exec(ONIG_ENCODING_UTF8, ONIG_OPTION_FIND_LONGEST,
"a*", "aa aaa aaaa aaaaa "); /* match 12-17 */
+ onig_end();
return 0;
}
diff --git a/sample/crnl.c b/sample/crnl.c
index 3ad1210..bfa563e 100644
--- a/sample/crnl.c
+++ b/sample/crnl.c
@@ -65,6 +65,8 @@ x(int no, char* pattern_arg, char* str_arg,
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str(s, r);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
return -1;
}
diff --git a/sample/encode.c b/sample/encode.c
index 8a03ab8..c5d4771 100644
--- a/sample/encode.c
+++ b/sample/encode.c
@@ -34,6 +34,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end)
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
fprintf(stderr, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
return -1;
}
@@ -72,55 +73,6 @@ exec(OnigEncoding enc, OnigOptionType options,
return 0;
}
-static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
-
-#if 0
-static void
-set_case_fold(OnigCaseFoldType cf)
-{
- CF = cf;
-}
-#endif
-
-static int
-exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
- OnigOptionType options, char* apattern, char* astr)
-{
- int r;
- unsigned char *end;
- regex_t* reg;
- OnigCompileInfo ci;
- OnigErrorInfo einfo;
- UChar* pattern = (UChar* )apattern;
- UChar* str = (UChar* )astr;
-
- onig_initialize(&str_enc, 1);
-
- ci.num_of_elements = 5;
- ci.pattern_enc = pattern_enc;
- ci.target_enc = str_enc;
- ci.syntax = ONIG_SYNTAX_DEFAULT;
- ci.option = options;
- ci.case_fold_flag = CF;
-
- r = onig_new_deluxe(&reg, pattern,
- pattern + onigenc_str_bytelen_null(pattern_enc, pattern),
- &ci, &einfo);
- if (r != ONIG_NORMAL) {
- char s[ONIG_MAX_ERROR_MESSAGE_LEN];
- onig_error_code_to_str((UChar* )s, r, &einfo);
- fprintf(stderr, "ERROR: %s\n", s);
- return -1;
- }
-
- end = str + onigenc_str_bytelen_null(str_enc, str);
- r = search(reg, str, end);
-
- onig_free(reg);
- onig_end();
- return 0;
-}
-
extern int main(int argc, char* argv[])
{
int r;
@@ -196,39 +148,6 @@ extern int main(int argc, char* argv[])
r = exec(ONIG_ENCODING_ISO_8859_1, ONIG_OPTION_IGNORECASE,
"is", "iss");
- r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_NONE, "a+",
- "\000b\000a\000a\000a\000c\000c\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_ASCII, ONIG_ENCODING_UTF16_LE,
- ONIG_OPTION_NONE, "a+",
- "b\000a\000a\000a\000a\000c\000\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_LE,
- ONIG_OPTION_NONE,
- "\000b\000a\000a\000a\000c\000c\000\000",
- "x\000b\000a\000a\000a\000c\000c\000\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\337", "\000S\000S\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "SS", "\000\337\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_LE,
- ONIG_OPTION_IGNORECASE,
- "\337", "S\000S\000\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_BE,
- ONIG_OPTION_IGNORECASE,
- "SS", "\000\000\000\337\000\000\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF32_LE,
- ONIG_OPTION_IGNORECASE,
- "\337", "S\000\000\000S\000\000\000\000\000\000\000");
-
r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_NONE,
"\000[\000[\000:\000a\000l\000n\000u\000m\000:\000]\000]\000+\000\000",
"\000#\002\120\000a\000Z\012\077\012\076\012\075\000\000");
@@ -242,44 +161,34 @@ extern int main(int argc, char* argv[])
r = exec(ONIG_ENCODING_GB18030, ONIG_OPTION_IGNORECASE,
"(Aa\\d)+", "BaA5Aa0234");
- r = exec_deluxe(ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_NONE,
- "^\\P{Hiragana}\\p{^Hiragana}(\\p{Hiragana}+)$",
- "\060\100\060\240\060\101\060\102\060\226\060\237\000\000");
-
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\000[\000\337\000]\000\000", "\000S\000S\000\000");
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ "\000[\000\337\000]\000\000", "\000S\000S\000\000");
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\000[\000\337\000]\000\000", "\000s\000S\000\000");
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ "\000[\000\337\000]\000\000", "\000s\000S\000\000");
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\000^\000[\000\001\000-\377\375\000]\000$\000\000",
- "\000s\000S\000\000");
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ "\000^\000[\000\001\000-\377\375\000]\000$\000\000",
+ "\000s\000S\000\000");
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\000S\000S\000\000",
- "\000S\000T\000\337\000\000");
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ "\000S\000S\000\000",
+ "\000S\000T\000\337\000\000");
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\000S\000T\000S\000S\000\000",
- "\000S\000t\000s\000S\000\000");
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ "\000S\000T\000S\000S\000\000",
+ "\000S\000t\000s\000S\000\000");
{
UChar pat[] = { 0x1f, 0xfc, 0x00, 0x00 };
UChar str1[] = { 0x21, 0x26, 0x1f, 0xbe, 0x00, 0x00 };
UChar str2[] = { 0x1f, 0xf3, 0x00, 0x00 };
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE, (char* )pat, (char* )str1);
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ (char* )pat, (char* )str1);
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE, (char* )pat, (char* )str2);
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ (char* )pat, (char* )str2);
}
#if 0
@@ -287,17 +196,14 @@ extern int main(int argc, char* argv[])
set_case_fold(ONIGENC_CASE_FOLD_TURKISH_AZERI);
- r = exec_deluxe(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8,
- ONIG_OPTION_IGNORECASE,
- "Ii", "\304\261\304\260");
+ r = exec(ONIG_ENCODING_UTF8, ONIG_ENCODING_UTF8, ONIG_OPTION_IGNORECASE,
+ "Ii", "\304\261\304\260");
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\000I\000i\000\000", "\001\061\001\060\000\000");
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ "\000I\000i\000\000", "\001\061\001\060\000\000");
- r = exec_deluxe(ONIG_ENCODING_UTF16_BE, ONIG_ENCODING_UTF16_BE,
- ONIG_OPTION_IGNORECASE,
- "\001\061\001\060\000\000", "\000I\000i\000\000");
+ r = exec(ONIG_ENCODING_UTF16_BE, ONIG_OPTION_IGNORECASE,
+ "\001\061\001\060\000\000", "\000I\000i\000\000");
set_case_fold(ONIGENC_CASE_FOLD_MIN);
#endif
diff --git a/sample/listcap.c b/sample/listcap.c
index e0fe23a..a73f7d4 100644
--- a/sample/listcap.c
+++ b/sample/listcap.c
@@ -69,6 +69,8 @@ extern int ex(unsigned char* str, unsigned char* pattern,
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
return -1;
}
diff --git a/sample/names.c b/sample/names.c
index a838056..9b1eb24 100644
--- a/sample/names.c
+++ b/sample/names.c
@@ -65,6 +65,9 @@ extern int main(int argc, char* argv[])
else { /* error */
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
+ onig_end();
return -1;
}
diff --git a/sample/posix.c b/sample/posix.c
index 35ccb68..c555936 100644
--- a/sample/posix.c
+++ b/sample/posix.c
@@ -49,6 +49,7 @@ extern int main(int argc, char* argv[])
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
regfree(&reg);
+ onig_end();
return -1;
}
x(&reg, pattern, (UChar* )"aaabbbbd");
@@ -60,6 +61,7 @@ extern int main(int argc, char* argv[])
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
regfree(&reg);
+ onig_end();
return -1;
}
x(&reg, pattern, (UChar* )"a+b{2,7}d?|uuu");
@@ -71,6 +73,7 @@ extern int main(int argc, char* argv[])
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
regfree(&reg);
+ onig_end();
return -1;
}
x(&reg, pattern, (UChar* )"aaaabbbbbbd");
@@ -83,6 +86,7 @@ extern int main(int argc, char* argv[])
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
regfree(&reg);
+ onig_end();
return -1;
}
x(&reg, pattern, (UChar* )"aaabbbbd)");
@@ -93,6 +97,7 @@ extern int main(int argc, char* argv[])
regerror(r, &reg, buf, sizeof(buf));
fprintf(stderr, "ERROR: %s\n", buf);
regfree(&reg);
+ onig_end();
return -1;
}
x(&reg, pattern, (UChar* )"a\nb\n");
diff --git a/sample/scan.c b/sample/scan.c
index ad5ae74..4039e46 100644
--- a/sample/scan.c
+++ b/sample/scan.c
@@ -36,6 +36,7 @@ scan(regex_t* reg, unsigned char* str, unsigned char* end)
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((OnigUChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
return -1;
}
@@ -63,6 +64,7 @@ exec(OnigEncoding enc, OnigOptionType options, char* apattern, char* astr)
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((OnigUChar* )s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_end();
return -1;
}
diff --git a/sample/simple.c b/sample/simple.c
index 95110b8..5a14042 100644
--- a/sample/simple.c
+++ b/sample/simple.c
@@ -49,6 +49,9 @@ extern int main(int argc, char* argv[])
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
+ onig_end();
return -1;
}
diff --git a/sample/sql.c b/sample/sql.c
index 8e95f70..1602ac9 100644
--- a/sample/sql.c
+++ b/sample/sql.c
@@ -42,6 +42,7 @@ extern int main(int argc, char* argv[])
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r, &einfo);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_end();
return -1;
}
@@ -66,6 +67,9 @@ extern int main(int argc, char* argv[])
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
+ onig_end();
return -1;
}
diff --git a/sample/syntax.c b/sample/syntax.c
index e292079..e034608 100644
--- a/sample/syntax.c
+++ b/sample/syntax.c
@@ -45,6 +45,8 @@ extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr)
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
return -1;
}
diff --git a/sample/user_property.c b/sample/user_property.c
index 8b2abd2..d52adc0 100644
--- a/sample/user_property.c
+++ b/sample/user_property.c
@@ -40,6 +40,7 @@ main(int argc, char* argv[])
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_end();
return -1;
}
@@ -52,6 +53,7 @@ main(int argc, char* argv[])
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r, &einfo);
fprintf(stderr, "onig_new: ERROR: %s\n", s);
+ onig_end();
return -1;
}
@@ -76,6 +78,9 @@ main(int argc, char* argv[])
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r);
fprintf(stderr, "ERROR: %s\n", s);
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ onig_free(reg);
+ onig_end();
return -1;
}
diff --git a/src/gb18030.c b/src/gb18030.c
index 7654432..8d415b0 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -2,7 +2,7 @@
gb18030.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2005-2018 KUBO Takehiro <kubo AT jiubao DOT org>
+ * Copyright (c) 2005-2019 KUBO Takehiro <kubo AT jiubao DOT org>
* K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
@@ -67,11 +67,11 @@ gb18030_mbc_enc_len(const UChar* p)
{
if (GB18030_MAP[*p] != CM)
return 1;
+
p++;
if (GB18030_MAP[*p] == C4)
return 4;
- if (GB18030_MAP[*p] == C1)
- return 1; /* illegal sequence */
+
return 2;
}
diff --git a/src/oniguruma.h b/src/oniguruma.h
index f6aa5ba..90cf2d9 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -36,9 +36,9 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 9
-#define ONIGURUMA_VERSION_TEENY 2
+#define ONIGURUMA_VERSION_TEENY 3
-#define ONIGURUMA_VERSION_INT 60902
+#define ONIGURUMA_VERSION_INT 60903
#ifndef P_
#if defined(__STDC__) || defined(_WIN32)
@@ -52,6 +52,7 @@ extern "C" {
# define PV_(args) args
#endif
+#ifndef ONIG_STATIC
#ifndef ONIG_EXTERN
#if defined(_WIN32) && !defined(__GNUC__)
#if defined(ONIGURUMA_EXPORT)
@@ -65,6 +66,9 @@ extern "C" {
#ifndef ONIG_EXTERN
#define ONIG_EXTERN extern
#endif
+#else
+#define ONIG_EXTERN extern
+#endif
/* PART: character encoding */
@@ -517,6 +521,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (1U<<21) /* [..\w..] etc.. */
#define ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC (1U<<22)
#define ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (1U<<23) /* [0-9-a]=[0-9\-a] */
+#define ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC (1U<<26)
/* syntax (behavior) warning */
#define ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (1U<<24) /* [,-,] */
#define ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (1U<<25) /* (?:a*)+ */
@@ -766,6 +771,8 @@ int onig_init P_((void));
ONIG_EXTERN
int onig_error_code_to_str PV_((OnigUChar* s, int err_code, ...));
ONIG_EXTERN
+int onig_is_error_code_needs_param PV_((int code));
+ONIG_EXTERN
void onig_set_warn_func P_((OnigWarnFunc f));
ONIG_EXTERN
void onig_set_verb_warn_func P_((OnigWarnFunc f));
diff --git a/src/regcomp.c b/src/regcomp.c
index c2c04a4..b96c793 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -599,12 +599,34 @@ select_str_opcode(int mb_len, int str_len, int ignore_case)
}
static int
-compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
+is_strict_real_node(Node* node)
+{
+ switch (NODE_TYPE(node)) {
+ case NODE_STRING:
+ {
+ StrNode* sn = STR_(node);
+ return (sn->end != sn->s);
+ }
+ break;
+
+ case NODE_CCLASS:
+ case NODE_CTYPE:
+ return 1;
+ break;
+
+ default:
+ return 0;
+ break;
+ }
+}
+
+static int
+compile_tree_empty_check(Node* node, regex_t* reg, int emptiness, ScanEnv* env)
{
int r;
int saved_num_null_check = reg->num_null_check;
- if (empty_info != BODY_IS_NOT_EMPTY) {
+ if (emptiness != BODY_IS_NOT_EMPTY) {
r = add_op(reg, OP_EMPTY_CHECK_START);
if (r != 0) return r;
COP(reg)->empty_check_start.mem = reg->num_null_check; /* NULL CHECK ID */
@@ -614,12 +636,12 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env)
r = compile_tree(node, reg, env);
if (r != 0) return r;
- if (empty_info != BODY_IS_NOT_EMPTY) {
- if (empty_info == BODY_IS_EMPTY)
+ if (emptiness != BODY_IS_NOT_EMPTY) {
+ if (emptiness == BODY_IS_EMPTY_POSSIBILITY)
r = add_op(reg, OP_EMPTY_CHECK_END);
- else if (empty_info == BODY_IS_EMPTY_MEM)
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_MEM)
r = add_op(reg, OP_EMPTY_CHECK_END_MEMST);
- else if (empty_info == BODY_IS_EMPTY_REC)
+ else if (emptiness == BODY_IS_EMPTY_POSSIBILITY_REC)
r = add_op(reg, OP_EMPTY_CHECK_END_MEMST_PUSH);
if (r != 0) return r;
@@ -895,12 +917,12 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper)
}
p[id].lower = lower;
- p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
+ p[id].upper = (IS_INFINITE_REPEAT(upper) ? 0x7fffffff : upper);
return 0;
}
static int
-compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
+compile_range_repeat_node(QuantNode* qn, int target_len, int emptiness,
regex_t* reg, ScanEnv* env)
{
int r;
@@ -915,7 +937,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
if (r != 0) return r;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
if (
@@ -937,7 +959,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info,
static int
is_anychar_infinite_greedy(QuantNode* qn)
{
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper) &&
NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn)))
return 1;
else
@@ -951,8 +973,8 @@ static int
compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
{
int len, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -969,10 +991,9 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg)
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
@@ -1026,8 +1047,8 @@ static int
compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
{
int i, r, mod_tlen;
- int infinite = IS_REPEAT_INFINITE(qn->upper);
- enum BodyEmpty empty_info = qn->empty_info;
+ int infinite = IS_INFINITE_REPEAT(qn->upper);
+ enum BodyEmptyType emptiness = qn->emptiness;
int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg);
if (tlen < 0) return tlen;
@@ -1055,10 +1076,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
}
}
- if (empty_info == BODY_IS_NOT_EMPTY)
- mod_tlen = tlen;
- else
- mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END);
+ mod_tlen = tlen;
+ if (emptiness != BODY_IS_NOT_EMPTY)
+ mod_tlen += SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END;
if (infinite &&
(qn->lower <= 1 ||
@@ -1096,7 +1116,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
COP(reg)->push_or_jump_exact1.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
COP(reg)->push_or_jump_exact1.c = STR_(qn->head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
addr = -(mod_tlen + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1);
@@ -1109,7 +1129,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
COP(reg)->push_if_peek_next.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
COP(reg)->push_if_peek_next.c = STR_(qn->next_head_exact)->s[0];
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
addr = -(mod_tlen + (int )SIZE_OP_PUSH_IF_PEEK_NEXT);
@@ -1119,7 +1139,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
COP(reg)->push.addr = SIZE_INC_OP + mod_tlen + SIZE_OP_JUMP;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
addr = -(mod_tlen + (int )SIZE_OP_PUSH);
@@ -1134,7 +1154,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
if (r != 0) return r;
COP(reg)->jump.addr = mod_tlen + SIZE_INC_OP;
- r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env);
+ r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, emptiness, env);
if (r != 0) return r;
r = add_op(reg, OP_PUSH);
@@ -1188,7 +1208,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env)
r = compile_tree(NODE_QUANT_BODY(qn), reg, env);
}
else {
- r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env);
+ r = compile_range_repeat_node(qn, mod_tlen, emptiness, reg, env);
}
return r;
}
@@ -1273,7 +1293,7 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
int v;
QuantNode* qn;
@@ -1307,8 +1327,9 @@ compile_length_bag_node(BagNode* node, regex_t* reg)
len += tlen;
}
+ len += SIZE_OP_JUMP + SIZE_OP_ATOMIC_END;
+
if (IS_NOT_NULL(Else)) {
- len += SIZE_OP_JUMP;
tlen = compile_length_tree(Else, reg);
if (tlen < 0) return tlen;
len += tlen;
@@ -1423,7 +1444,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
break;
case BAG_STOP_BACKTRACK:
- if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) {
+ if (NODE_IS_STRICT_REAL_REPEAT(node)) {
QuantNode* qn = QUANT_(NODE_BAG_BODY(node));
r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env);
if (r != 0) return r;
@@ -1455,7 +1476,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
case BAG_IF_ELSE:
{
- int cond_len, then_len, jump_len;
+ int cond_len, then_len, else_len, jump_len;
Node* cond = NODE_BAG_BODY(node);
Node* Then = node->te.Then;
Node* Else = node->te.Else;
@@ -1472,8 +1493,7 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
else
then_len = 0;
- jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END;
- if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP;
+ jump_len = cond_len + then_len + SIZE_OP_ATOMIC_END + SIZE_OP_JUMP;
r = add_op(reg, OP_PUSH);
if (r != 0) return r;
@@ -1490,11 +1510,20 @@ compile_bag_node(BagNode* node, regex_t* reg, ScanEnv* env)
}
if (IS_NOT_NULL(Else)) {
- int else_len = compile_length_tree(Else, reg);
- r = add_op(reg, OP_JUMP);
- if (r != 0) return r;
- COP(reg)->jump.addr = else_len + SIZE_INC_OP;
+ else_len = compile_length_tree(Else, reg);
+ if (else_len < 0) return else_len;
+ }
+ else
+ else_len = 0;
+
+ r = add_op(reg, OP_JUMP);
+ if (r != 0) return r;
+ COP(reg)->jump.addr = SIZE_OP_ATOMIC_END + else_len + SIZE_INC_OP;
+ r = add_op(reg, OP_ATOMIC_END);
+ if (r != 0) return r;
+
+ if (IS_NOT_NULL(Else)) {
r = compile_tree(Else, reg, env);
}
}
@@ -3035,7 +3064,7 @@ tree_max_len(Node* node, ScanEnv* env)
if (qn->upper != 0) {
len = tree_max_len(NODE_BODY(node), env);
if (len != 0) {
- if (! IS_REPEAT_INFINITE(qn->upper))
+ if (! IS_INFINITE_REPEAT(qn->upper))
len = distance_multiply(len, qn->upper);
else
len = INFINITE_LEN;
@@ -3581,7 +3610,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
type = NODE_TYPE(node);
if (type == NODE_QUANT) {
QuantNode* qn = QUANT_(node);
- if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
+ if (qn->greedy && IS_INFINITE_REPEAT(qn->upper)) {
#ifdef USE_QUANT_PEEK_NEXT
Node* n = get_head_value_node(next_node, 1, reg);
/* '\0': for UTF-16BE etc... */
@@ -3591,7 +3620,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
#endif
/* automatic posseivation a*b ==> (?>a*)b */
if (qn->lower <= 1) {
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) {
+ if (is_strict_real_node(NODE_BODY(node))) {
Node *x, *y;
x = get_head_value_node(NODE_BODY(node), 0, reg);
if (IS_NOT_NULL(x)) {
@@ -3599,7 +3628,7 @@ next_setup(Node* node, Node* next_node, regex_t* reg)
if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) {
Node* en = onig_node_new_bag(BAG_STOP_BACKTRACK);
CHECK_NULL_RETURN_MEMERR(en);
- NODE_STATUS_ADD(en, STOP_BT_SIMPLE_REPEAT);
+ NODE_STATUS_ADD(en, STRICT_REAL_REPEAT);
swap_node(node, en);
NODE_BODY(node) = en;
}
@@ -4001,11 +4030,11 @@ expand_case_fold_string(Node* node, regex_t* reg, int state)
return r;
}
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
-static enum BodyEmpty
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+static enum BodyEmptyType
quantifiers_memory_node_info(Node* node)
{
- int r = BODY_IS_EMPTY;
+ int r = BODY_IS_EMPTY_POSSIBILITY;
switch (NODE_TYPE(node)) {
case NODE_LIST:
@@ -4022,7 +4051,7 @@ quantifiers_memory_node_info(Node* node)
#ifdef USE_CALL
case NODE_CALL:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC; /* tiny version */
+ return BODY_IS_EMPTY_POSSIBILITY_REC; /* tiny version */
}
else
r = quantifiers_memory_node_info(NODE_BODY(node));
@@ -4044,9 +4073,9 @@ quantifiers_memory_node_info(Node* node)
switch (en->type) {
case BAG_MEMORY:
if (NODE_IS_RECURSION(node)) {
- return BODY_IS_EMPTY_REC;
+ return BODY_IS_EMPTY_POSSIBILITY_REC;
}
- return BODY_IS_EMPTY_MEM;
+ return BODY_IS_EMPTY_POSSIBILITY_MEM;
break;
case BAG_OPTION:
@@ -4083,7 +4112,7 @@ quantifiers_memory_node_info(Node* node)
return r;
}
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#ifdef USE_CALL
@@ -4351,7 +4380,7 @@ setup_called_state_call(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
@@ -4468,7 +4497,7 @@ setup_called_state(Node* node, int state)
{
QuantNode* qn = QUANT_(node);
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
@@ -4600,24 +4629,24 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
NODE_STATUS_ADD(node, IN_MULTI_ENTRY);
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 1) {
d = tree_min_len(body, env);
if (d == 0) {
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
- qn->empty_info = quantifiers_memory_node_info(body);
- if (qn->empty_info == BODY_IS_EMPTY_REC) {
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
+ qn->emptiness = quantifiers_memory_node_info(body);
+ if (qn->emptiness == BODY_IS_EMPTY_POSSIBILITY_REC) {
if (NODE_TYPE(body) == NODE_BAG &&
BAG_(body)->type == BAG_MEMORY) {
MEM_STATUS_ON(env->bt_mem_end, BAG_(body)->m.regnum);
}
}
#else
- qn->empty_info = BODY_IS_EMPTY;
+ qn->emptiness = BODY_IS_EMPTY_POSSIBILITY;
#endif
}
}
- if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2)
+ if (IS_INFINITE_REPEAT(qn->upper) || qn->upper >= 2)
state |= IN_REAL_REPEAT;
if (qn->lower != qn->upper)
state |= IN_VAR_REPEAT;
@@ -4628,7 +4657,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
/* expand string */
#define EXPAND_STRING_MAX_LENGTH 100
if (NODE_TYPE(body) == NODE_STRING) {
- if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper &&
+ if (!IS_INFINITE_REPEAT(qn->lower) && qn->lower == qn->upper &&
qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) {
int len = NODE_STRING_LEN(body);
StrNode* sn = STR_(body);
@@ -4646,7 +4675,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
}
- if (qn->greedy && (qn->empty_info == BODY_IS_NOT_EMPTY)) {
+ if (qn->greedy && (qn->emptiness == BODY_IS_NOT_EMPTY)) {
if (NODE_TYPE(body) == NODE_QUANT) {
QuantNode* tqn = QUANT_(body);
if (IS_NOT_NULL(tqn->head_exact)) {
@@ -4663,7 +4692,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
}
/* setup_tree does the following work.
- 1. check empty loop. (set qn->empty_info)
+ 1. check empty loop. (set qn->emptiness)
2. expand ignore-case in char class.
3. set memory status bit flags. (reg->mem_stats)
4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
@@ -4752,10 +4781,10 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
r = setup_tree(target, reg, state, env);
if (NODE_TYPE(target) == NODE_QUANT) {
QuantNode* tqn = QUANT_(target);
- if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
+ if (IS_INFINITE_REPEAT(tqn->upper) && tqn->lower <= 1 &&
tqn->greedy != 0) { /* (?>a*), a*+ etc... */
- if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target)))
- NODE_STATUS_ADD(node, STOP_BT_SIMPLE_REPEAT);
+ if (is_strict_real_node(NODE_BODY(target)))
+ NODE_STATUS_ADD(node, STRICT_REAL_REPEAT);
}
}
}
@@ -5752,7 +5781,7 @@ optimize_nodes(Node* node, OptNode* opt, OptEnv* env)
opt->sm.reach_end = 0;
}
- if (IS_REPEAT_INFINITE(qn->upper)) {
+ if (IS_INFINITE_REPEAT(qn->upper)) {
if (env->mmd.max == 0 &&
NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) {
if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env)))
@@ -6672,6 +6701,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
}
else {
len = ONIGENC_CODE_TO_MBCLEN(enc, code);
+ if (len < 0) return 0;
}
return onig_is_code_in_cc_len(len, code, cc);
}
diff --git a/src/regenc.c b/src/regenc.c
index 6376565..9fab721 100644
--- a/src/regenc.c
+++ b/src/regenc.c
@@ -853,6 +853,8 @@ onigenc_mbn_is_mbc_ambiguous(OnigEncoding enc, OnigCaseFoldType flag,
extern int
onigenc_mb2_code_to_mbclen(OnigCodePoint code)
{
+ if ((code & (~0xffff)) != 0) return ONIGERR_INVALID_CODE_POINT_VALUE;
+
if ((code & 0xff00) != 0) return 2;
else return 1;
}
diff --git a/src/regerror.c b/src/regerror.c
index 7564827..e6d1806 100644
--- a/src/regerror.c
+++ b/src/regerror.c
@@ -257,6 +257,23 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end,
}
+extern int
+onig_is_error_code_needs_param(int code)
+{
+ switch (code) {
+ case ONIGERR_UNDEFINED_NAME_REFERENCE:
+ case ONIGERR_UNDEFINED_GROUP_REFERENCE:
+ case ONIGERR_MULTIPLEX_DEFINED_NAME:
+ case ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL:
+ case ONIGERR_INVALID_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_IN_GROUP_NAME:
+ case ONIGERR_INVALID_CHAR_PROPERTY_NAME:
+ return 1;
+ default:
+ return 0;
+ }
+}
+
/* for ONIG_MAX_ERROR_MESSAGE_LEN */
#define MAX_ERROR_PAR_LEN 30
diff --git a/src/regexec.c b/src/regexec.c
index 6618996..f957b75 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -980,6 +980,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from)
#define STK_CALL_FRAME 0x0400
#define STK_RETURN 0x0500
#define STK_SAVE_VAL 0x0600
+#define STK_PREC_READ_START 0x0700
+#define STK_PREC_READ_END 0x0800
/* stack type check mask */
#define STK_MASK_POP_USED STK_ALT_FLAG
@@ -1544,8 +1546,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev)
#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev)
-#define STACK_PUSH_POS(s,sprev) \
- STACK_PUSH(STK_TO_VOID_START,(Operation* )0,s,sprev)
+#define STACK_PUSH_PREC_READ_START(s,sprev) \
+ STACK_PUSH(STK_PREC_READ_START,(Operation* )0,s,sprev)
#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \
STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev)
#define STACK_PUSH_TO_VOID_START STACK_PUSH_TYPE(STK_TO_VOID_START)
@@ -1887,6 +1889,27 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
+#define STACK_GET_PREC_READ_START(k) do {\
+ int level = 0;\
+ k = stk;\
+ while (1) {\
+ k--;\
+ STACK_BASE_CHECK(k, "STACK_GET_PREC_READ_START");\
+ if (IS_TO_VOID_TARGET(k)) {\
+ k->type = STK_VOID;\
+ }\
+ else if (k->type == STK_PREC_READ_START) {\
+ if (level == 0) {\
+ break;\
+ }\
+ level--;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ level++;\
+ }\
+ }\
+} while(0)
+
#define STACK_EMPTY_CHECK(isnull,sid,s) do {\
StackType* k = stk;\
while (1) {\
@@ -1913,7 +1936,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while (0)
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
StackType* k = stk;\
while (1) {\
@@ -1927,9 +1950,10 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
else {\
UChar* endp;\
+ int level = 0;\
(isnull) = 1;\
while (k < stk) {\
- if (k->type == STK_MEM_START) {\
+ if (k->type == STK_MEM_START && level == 0) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
@@ -1941,6 +1965,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
(isnull) = -1; /* empty, but position changed */ \
}\
}\
+ else if (k->type == STK_PREC_READ_START) {\
+ level++;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ level--;\
+ }\
k++;\
}\
break;\
@@ -1965,10 +1995,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
else {\
UChar* endp;\
+ int prec_level = 0;\
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START) {\
- if (level == 0) {\
+ if (level == 0 && prec_level == 0) {\
STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
if (endp == 0) {\
(isnull) = 0; break;\
@@ -1987,6 +2018,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
else if (k->type == STK_EMPTY_CHECK_END) {\
if (k->zid == (sid)) level--;\
}\
+ else if (k->type == STK_PREC_READ_START) {\
+ prec_level++;\
+ }\
+ else if (k->type == STK_PREC_READ_END) {\
+ prec_level--;\
+ }\
k++;\
}\
break;\
@@ -2023,7 +2060,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
} while(0)
-#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
+#endif /* USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#define STACK_GET_REPEAT(sid, k) do {\
int level = 0;\
@@ -2968,6 +3005,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
NEXT_OUT;
CASE_OP(CCLASS_MB)
+ DATA_ENSURE(1);
if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail;
cclass_mb:
@@ -3441,11 +3479,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP(pstart, s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP(s, pstart, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3468,11 +3508,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- STRING_CMP_IC(case_fold_flag, pstart, &s, n);
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ STRING_CMP_IC(case_fold_flag, pstart, &s, n);
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
}
INC_OP;
JUMP_OUT;
@@ -3498,15 +3540,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE(pstart, swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE(swork, pstart, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3535,15 +3578,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
? STACK_AT(mem_end_stk[mem])->u.mem.pstr
: (UChar* )((void* )mem_end_stk[mem]));
n = (int )(pend - pstart);
- DATA_ENSURE(n);
- sprev = s;
- swork = s;
- STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
- if (is_fail) continue;
- s = swork;
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (n != 0) {
+ DATA_ENSURE(n);
+ sprev = s;
+ swork = s;
+ STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, is_fail);
+ if (is_fail) continue;
+ s = swork;
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
break; /* success */
}
if (i == tlen) goto fail;
@@ -3560,6 +3604,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int len;
int level;
MemNumType* mems;
+ UChar* ssave;
n = 0;
backref_with_level:
@@ -3567,10 +3612,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
tlen = p->backref_general.num;
mems = tlen == 1 ? &(p->backref_general.n1) : p->backref_general.ns;
- sprev = s;
+ ssave = s;
if (backref_match_at_nested_level(reg, stk, stk_base, n,
case_fold_flag, level, (int )tlen, mems, &s, end)) {
- if (sprev < end) {
+ if (ssave != s) {
+ sprev = ssave;
while (sprev + (len = enclen(encode, sprev)) < s)
sprev += len;
}
@@ -3658,7 +3704,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
}
JUMP_OUT;
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
CASE_OP(EMPTY_CHECK_END_MEMST)
{
int is_empty;
@@ -3683,7 +3729,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int is_empty;
mem = p->empty_check_end.mem; /* mem: null check id */
-#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#ifdef USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT
STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
#else
STACK_EMPTY_CHECK_REC(is_empty, mem, s);
@@ -3851,14 +3897,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
goto repeat_inc_ng;
CASE_OP(PREC_READ_START)
- STACK_PUSH_POS(s, sprev);
+ STACK_PUSH_PREC_READ_START(s, sprev);
INC_OP;
JUMP_OUT;
CASE_OP(PREC_READ_END)
- STACK_EXEC_TO_VOID(stkp);
+ STACK_GET_PREC_READ_START(stkp);
s = stkp->u.state.pstr;
sprev = stkp->u.state.pstr_prev;
+ STACK_PUSH(STK_PREC_READ_END,0,0,0);
INC_OP;
JUMP_OUT;
@@ -5443,6 +5490,9 @@ onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
if (n >= 0) {
n = ONIGERR_INVALID_CALLOUT_BODY;
}
+ else if (onig_is_error_code_needs_param(n)) {
+ n = ONIGERR_INVALID_CALLOUT_BODY;
+ }
return n;
}
diff --git a/src/regext.c b/src/regext.c
index fa4b360..965c793 100644
--- a/src/regext.c
+++ b/src/regext.c
@@ -29,6 +29,7 @@
#include "regint.h"
+#if 0
static void
conv_ext0be32(const UChar* s, const UChar* end, UChar* conv)
{
@@ -158,6 +159,7 @@ conv_encoding(OnigEncoding from, OnigEncoding to, const UChar* s, const UChar* e
return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
+#endif
extern int
onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
@@ -169,9 +171,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
if (ci->pattern_enc != ci->target_enc) {
- r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end,
- &cpat, &cpat_end);
- if (r != 0) return r;
+ return ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION;
}
else {
cpat = (UChar* )pattern;
diff --git a/src/regint.h b/src/regint.h
index 56767e8..38389a1 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -63,7 +63,7 @@
#define USE_CALL
#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
+#define USE_STUBBORN_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
#define USE_RETRY_LIMIT_IN_MATCH
@@ -348,8 +348,8 @@ typedef unsigned int MemStatusType;
#define DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag) \
((case_fold_flag) & ~INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR)
-#define REPEAT_INFINITE -1
-#define IS_REPEAT_INFINITE(n) ((n) == REPEAT_INFINITE)
+#define INFINITE_REPEAT -1
+#define IS_INFINITE_REPEAT(n) ((n) == INFINITE_REPEAT)
/* bitset */
#define BITS_PER_BYTE 8
diff --git a/src/regparse.c b/src/regparse.c
index f1deea3..7f8b1a9 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -77,6 +77,7 @@ OnigSyntaxType OnigSyntaxOniguruma = {
ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP |
ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME |
ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY |
+ ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC |
ONIG_SYN_WARN_CC_OP_NOT_ESCAPED |
ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT )
, ONIG_OPTION_NONE
@@ -1093,6 +1094,35 @@ onig_name_to_group_numbers(regex_t* reg, const UChar* name,
return e->back_num;
}
+static int
+name_to_group_numbers(ScanEnv* env, const UChar* name, const UChar* name_end,
+ int** nums)
+{
+ regex_t* reg;
+ NameEntry* e;
+
+ reg = env->reg;
+ e = name_find(reg, name, name_end);
+
+ if (IS_NULL(e)) {
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
+ (UChar* )name, (UChar* )name_end);
+ return ONIGERR_UNDEFINED_NAME_REFERENCE;
+ }
+
+ switch (e->back_num) {
+ case 0:
+ break;
+ case 1:
+ *nums = &(e->back_ref1);
+ break;
+ default:
+ *nums = e->back_refs;
+ break;
+ }
+ return e->back_num;
+}
+
extern int
onig_name_to_backref_number(regex_t* reg, const UChar* name,
const UChar* name_end, OnigRegion *region)
@@ -1869,8 +1899,8 @@ callout_tag_table_new(CalloutTagTable** rt)
}
static int
-callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
- CalloutTagVal entry_val)
+callout_tag_entry_raw(ScanEnv* env, CalloutTagTable* t, UChar* name,
+ UChar* name_end, CalloutTagVal entry_val)
{
int r;
CalloutTagVal val;
@@ -1879,8 +1909,11 @@ callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end,
return ONIGERR_INVALID_CALLOUT_TAG_NAME;
val = callout_tag_find(t, name, name_end);
- if (val >= 0)
+ if (val >= 0) {
+ onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINED_NAME,
+ name, name_end);
return ONIGERR_MULTIPLEX_DEFINED_NAME;
+ }
r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val);
if (r < 0) return r;
@@ -1909,7 +1942,7 @@ ext_ensure_tag_table(regex_t* reg)
}
static int
-callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
+callout_tag_entry(ScanEnv* env, regex_t* reg, UChar* name, UChar* name_end,
CalloutTagVal entry_val)
{
int r;
@@ -1921,7 +1954,7 @@ callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end,
ext = onig_get_regex_ext(reg);
CHECK_NULL_RETURN_MEMERR(ext);
- r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val);
+ r = callout_tag_entry_raw(env, ext->tag_table, name, name_end, entry_val);
e = onig_reg_callout_list_at(reg, (int )entry_val);
CHECK_NULL_RETURN_MEMERR(e);
@@ -2391,10 +2424,10 @@ node_new_quantifier(int lower, int upper, int by_number)
CHECK_NULL_RETURN(node);
NODE_SET_TYPE(node, NODE_QUANT);
- QUANT_(node)->lower = lower;
- QUANT_(node)->upper = upper;
- QUANT_(node)->greedy = 1;
- QUANT_(node)->empty_info = BODY_IS_NOT_EMPTY;
+ QUANT_(node)->lower = lower;
+ QUANT_(node)->upper = upper;
+ QUANT_(node)->greedy = 1;
+ QUANT_(node)->emptiness = BODY_IS_NOT_EMPTY;
QUANT_(node)->head_exact = NULL_NODE;
QUANT_(node)->next_head_exact = NULL_NODE;
QUANT_(node)->is_refered = 0;
@@ -2694,7 +2727,7 @@ make_text_segment(Node** node, ScanEnv* env)
ns[0] = x;
ns[1] = NULL_NODE;
- x = node_new_quantifier(0, REPEAT_INFINITE, 1);
+ x = node_new_quantifier(0, INFINITE_REPEAT, 1);
if (IS_NULL(x)) goto err;
NODE_BODY(x) = ns[0];
@@ -3044,7 +3077,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (expr == NULL_NODE) {
/* default expr \O* */
- quant = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ quant = node_new_quantifier(0, INFINITE_REPEAT, 0);
if (IS_NULL(quant)) goto err0;
r = node_new_true_anychar(&body, env);
@@ -3086,7 +3119,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter,
if (r != 0) goto err;
possessive = 1;
- r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE,
+ r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, INFINITE_REPEAT,
possessive, is_range_cutter, env);
if (r != 0) goto err;
@@ -3236,10 +3269,18 @@ node_new_empty(void)
static Node*
node_new_str_raw_char(UChar c)
{
+ int i;
UChar p[1];
+ Node* node;
p[0] = c;
- return node_new_str_raw(p, p + 1);
+ node = node_new_str_raw(p, p + 1);
+
+ /* clear buf tail */
+ for (i = 1; i < NODE_STRING_BUF_SIZE; i++)
+ STR_(node)->buf[i] = '\0';
+
+ return node;
}
static Node*
@@ -3275,24 +3316,6 @@ str_node_can_be_split(Node* node, OnigEncoding enc)
return 0;
}
-#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
-static int
-node_str_head_pad(StrNode* sn, int num, UChar val)
-{
- UChar buf[NODE_STRING_BUF_SIZE];
- int i, len;
-
- len = sn->end - sn->s;
- onig_strcpy(buf, sn->s, sn->end);
- onig_strcpy(&(sn->s[num]), buf, buf + len);
- sn->end += num;
-
- for (i = 0; i < num; i++) {
- sn->s[i] = val;
- }
-}
-#endif
-
extern int
onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc)
{
@@ -3877,19 +3900,19 @@ quantifier_type_num(QuantNode* q)
if (q->greedy) {
if (q->lower == 0) {
if (q->upper == 1) return 0;
- else if (IS_REPEAT_INFINITE(q->upper)) return 1;
+ else if (IS_INFINITE_REPEAT(q->upper)) return 1;
}
else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 2;
+ if (IS_INFINITE_REPEAT(q->upper)) return 2;
}
}
else {
if (q->lower == 0) {
if (q->upper == 1) return 3;
- else if (IS_REPEAT_INFINITE(q->upper)) return 4;
+ else if (IS_INFINITE_REPEAT(q->upper)) return 4;
}
else if (q->lower == 1) {
- if (IS_REPEAT_INFINITE(q->upper)) return 5;
+ if (IS_INFINITE_REPEAT(q->upper)) return 5;
}
}
return -1;
@@ -3926,8 +3949,8 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
pnum = quantifier_type_num(p);
cnum = quantifier_type_num(c);
if (pnum < 0 || cnum < 0) {
- if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) {
- if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) {
+ if ((p->lower == p->upper) && ! IS_INFINITE_REPEAT(p->upper)) {
+ if ((c->lower == c->upper) && ! IS_INFINITE_REPEAT(c->upper)) {
int n = onig_positive_int_multiply(p->lower, c->lower);
if (n >= 0) {
p->lower = p->upper = n;
@@ -3946,11 +3969,11 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
break;
case RQ_A:
NODE_BODY(pnode) = NODE_BODY(cnode);
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1;
+ p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 1;
break;
case RQ_AQ:
NODE_BODY(pnode) = NODE_BODY(cnode);
- p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0;
+ p->lower = 0; p->upper = INFINITE_REPEAT; p->greedy = 0;
break;
case RQ_QQ:
NODE_BODY(pnode) = NODE_BODY(cnode);
@@ -3959,13 +3982,13 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode)
case RQ_P_QQ:
NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 0;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1;
+ c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 1;
return ;
break;
case RQ_PQ_Q:
NODE_BODY(pnode) = cnode;
p->lower = 0; p->upper = 1; p->greedy = 1;
- c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0;
+ c->lower = 1; c->upper = INFINITE_REPEAT; c->greedy = 0;
return ;
break;
case RQ_ASIS:
@@ -4158,7 +4181,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
if (p == prev) {
if (non_low != 0)
goto invalid;
- up = REPEAT_INFINITE; /* {n,} : {n,infinite} */
+ up = INFINITE_REPEAT; /* {n,} : {n,infinite} */
}
}
else {
@@ -4178,7 +4201,7 @@ fetch_interval_quantifier(UChar** src, UChar* end, PToken* tok, ScanEnv* env)
}
if (c != '}') goto invalid;
- if (!IS_REPEAT_INFINITE(up) && low > up) {
+ if (!IS_INFINITE_REPEAT(up) && low > up) {
/* {n,m}+ supported case */
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_PLUS_POSSESSIVE_INTERVAL))
return ONIGERR_UPPER_SMALLER_THAN_LOWER_IN_REPEAT_RANGE;
@@ -4959,7 +4982,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_ASTERISK_ZERO_INF)) break;
tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -4967,7 +4990,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_PLUS_ONE_INF)) break;
tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5358,10 +5381,8 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.backref.ref1 = back_num;
}
else {
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ num = name_to_group_numbers(env, prev, name_end, &backs);
if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) {
@@ -5514,7 +5535,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
#endif
tok->type = TK_REPEAT;
tok->u.repeat.lower = 0;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5525,7 +5546,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
#endif
tok->type = TK_REPEAT;
tok->u.repeat.lower = 1;
- tok->u.repeat.upper = REPEAT_INFINITE;
+ tok->u.repeat.upper = INFINITE_REPEAT;
goto greedy_check;
break;
@@ -5608,7 +5629,7 @@ fetch_token(PToken* tok, UChar** src, UChar* end, ScanEnv* env)
tok->u.call.gnum = 0;
tok->u.call.name = p;
PINC;
- if (! PPEEK_IS(')')) return ONIGERR_INVALID_GROUP_NAME;
+ if (! PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION;
tok->u.call.name_end = p;
break;
@@ -6249,6 +6270,7 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
env->parse_depth++;
if (env->parse_depth > ParseDepthLimit)
return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
+
prev_cc = (CClassNode* )NULL;
r = fetch_token_in_cc(tok, src, end, env);
if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) {
@@ -6301,10 +6323,11 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
case TK_RAW_BYTE:
/* tok->base != 0 : octal or hexadec. */
if (! ONIGENC_IS_SINGLEBYTE(env->enc) && tok->base != 0) {
+ int i, j;
UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
UChar* bufe = buf + ONIGENC_CODE_TO_MBC_MAXLEN;
UChar* psave = p;
- int i, base = tok->base;
+ int base = tok->base;
buf[0] = tok->u.c;
for (i = 1; i < ONIGENC_MBC_MAXLEN(env->enc); i++) {
@@ -6322,6 +6345,9 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
goto err;
}
+ /* clear buf tail */
+ for (j = i; j < ONIGENC_CODE_TO_MBC_MAXLEN; j++) buf[j] = '\0';
+
len = enclen(env->enc, buf);
if (i < len) {
r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
@@ -6359,8 +6385,13 @@ parse_char_class(Node** np, PToken* tok, UChar** src, UChar* end, ScanEnv* env)
val_entry:
len = ONIGENC_CODE_TO_MBCLEN(env->enc, v);
if (len < 0) {
- r = len;
- goto err;
+ if (state != CCS_RANGE ||
+ ! IS_SYNTAX_BV(env->syntax,
+ ONIG_SYN_ALLOW_INVALID_CODE_END_OF_RANGE_IN_CC) ||
+ v < 0x100 || ONIGENC_MBC_MAXLEN(env->enc) == 1) {
+ r = len;
+ goto err;
+ }
}
in_type = (len == 1 ? CCV_SB : CCV_CODE_POINT);
val_entry2:
@@ -6673,7 +6704,7 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
}
if (tag_start != tag_end) {
- r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
if (r != ONIG_NORMAL) return r;
}
@@ -6994,7 +7025,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
}
if (tag_start != tag_end) {
- r = callout_tag_entry(env->reg, tag_start, tag_end, num);
+ r = callout_tag_entry(env, env->reg, tag_start, tag_end, num);
if (r != ONIG_NORMAL) return r;
}
@@ -7271,10 +7302,8 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
int num;
int* backs;
- num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs);
+ num = name_to_group_numbers(env, prev, name_end, &backs);
if (num <= 0) {
- onig_scan_env_set_error_string(env,
- ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end);
return ONIGERR_UNDEFINED_NAME_REFERENCE;
}
if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) {
@@ -7414,6 +7443,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
}
break;
+#ifdef USE_CAPTURE_HISTORY
case '@':
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) {
if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) {
@@ -7441,6 +7471,7 @@ parse_bag(Node** np, PToken* tok, int term, UChar** src, UChar* end,
return ONIGERR_UNDEFINED_GROUP_OPTION;
}
break;
+#endif
#ifdef USE_POSIXLINE_OPTION
case 'p':
@@ -7688,7 +7719,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env)
if (targetq_num >= 0 && nestq_num < 0) {
if (targetq_num == 1 || targetq_num == 2) { /* * or + */
/* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */
- if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) {
+ if (! IS_INFINITE_REPEAT(qn->upper) && qn->upper > 1 && qn->greedy) {
qn->upper = (qn->lower == 0 ? 1 : qn->lower);
}
}
@@ -7826,14 +7857,18 @@ static int
parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
ScanEnv* env, int group_head)
{
- int r, len, group = 0;
+ int r, len, group;
Node* qn;
Node** tp;
+ unsigned int parse_depth;
+ group = 0;
*np = NULL;
if (tok->type == (enum TokenSyms )term)
goto end_of_token;
+ parse_depth = env->parse_depth;
+
switch (tok->type) {
case TK_ALT:
case TK_EOT:
@@ -7914,36 +7949,29 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
len = 1;
while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
- if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */
+ if (len == enclen(env->enc, STR_(*np)->s)) {
r = fetch_token(tok, src, end, env);
- NODE_STRING_CLEAR_RAW(*np);
- goto string_end;
+ goto tk_raw_byte_end;
}
}
r = fetch_token(tok, src, end, env);
if (r < 0) return r;
- if (r != TK_RAW_BYTE) {
- /* Don't use this, it is wrong for little endian encodings. */
-#ifdef USE_PAD_TO_SHORT_BYTE_CHAR
- int rem;
- if (len < ONIGENC_MBC_MINLEN(env->enc)) {
- rem = ONIGENC_MBC_MINLEN(env->enc) - len;
- (void )node_str_head_pad(STR_(*np), rem, (UChar )0);
- if (len + rem == enclen(env->enc, STR_(*np)->s)) {
- NODE_STRING_CLEAR_RAW(*np);
- goto string_end;
- }
- }
-#endif
+ if (r != TK_RAW_BYTE)
return ONIGERR_TOO_SHORT_MULTI_BYTE_STRING;
- }
r = node_str_cat_char(*np, (UChar )tok->u.c);
if (r < 0) return r;
len++;
}
+
+ tk_raw_byte_end:
+ if (! ONIGENC_IS_VALID_MBC_STRING(env->enc, STR_(*np)->s, STR_(*np)->end))
+ return ONIGERR_INVALID_WIDE_CHAR_VALUE;
+
+ NODE_STRING_CLEAR_RAW(*np);
+ goto string_end;
}
break;
@@ -8055,7 +8083,7 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
case TK_ANYCHAR_ANYTIME:
*np = node_new_anychar();
CHECK_NULL_RETURN_MEMERR(*np);
- qn = node_new_quantifier(0, REPEAT_INFINITE, 0);
+ qn = node_new_quantifier(0, INFINITE_REPEAT, 0);
CHECK_NULL_RETURN_MEMERR(qn);
NODE_BODY(qn) = *np;
*np = qn;
@@ -8158,6 +8186,10 @@ parse_exp(Node** np, PToken* tok, int term, UChar** src, UChar* end,
if (is_invalid_quantifier_target(*tp))
return ONIGERR_TARGET_OF_REPEAT_OPERATOR_INVALID;
+ parse_depth++;
+ if (parse_depth > ParseDepthLimit)
+ return ONIGERR_PARSE_DEPTH_LIMIT_OVER;
+
qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper,
r == TK_INTERVAL);
CHECK_NULL_RETURN_MEMERR(qn);
diff --git a/src/regparse.h b/src/regparse.h
index b7a2867..231f7b5 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -66,11 +66,11 @@ enum GimmickType {
#endif
};
-enum BodyEmpty {
- BODY_IS_NOT_EMPTY = 0,
- BODY_IS_EMPTY = 1,
- BODY_IS_EMPTY_MEM = 2,
- BODY_IS_EMPTY_REC = 3
+enum BodyEmptyType {
+ BODY_IS_NOT_EMPTY = 0,
+ BODY_IS_EMPTY_POSSIBILITY = 1,
+ BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
+ BODY_IS_EMPTY_POSSIBILITY_REC = 3
};
typedef struct {
@@ -101,7 +101,7 @@ typedef struct {
int lower;
int upper;
int greedy;
- enum BodyEmpty empty_info;
+ enum BodyEmptyType emptiness;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
@@ -252,10 +252,6 @@ typedef struct _Node {
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
-#define NODE_IS_SIMPLE_TYPE(node) \
- ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
- (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
-
#define NODE_TYPE(node) ((node)->u.base.node_type)
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
@@ -314,7 +310,7 @@ typedef struct _Node {
#define NODE_ST_CLEN_FIXED (1<<2)
#define NODE_ST_MARK1 (1<<3)
#define NODE_ST_MARK2 (1<<4)
-#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
+#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
#define NODE_ST_RECURSION (1<<6)
#define NODE_ST_CALLED (1<<7)
#define NODE_ST_ADDR_FIXED (1<<8)
@@ -357,8 +353,8 @@ typedef struct _Node {
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
#define NODE_IS_PROHIBIT_RECURSION(node) \
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
-#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
- ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
+#define NODE_IS_STRICT_REAL_REPEAT(node) \
+ ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
#define NODE_BODY(node) ((node)->u.base.body)
#define NODE_QUANT_BODY(node) ((node)->body)
diff --git a/src/utf16_be.c b/src/utf16_be.c
index 22bf74d..b66d868 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -2,7 +2,7 @@
utf16_be.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -103,7 +103,25 @@ utf16be_mbc_enc_len(const UChar* p)
static int
is_valid_mbc_string(const UChar* s, const UChar* end)
{
- return onigenc_length_check_is_valid_mbc_string(ONIG_ENCODING_UTF16_BE, s, end);
+ while (s < end) {
+ int len = utf16be_mbc_enc_len(s);
+ if (len == 4) {
+ if (s + 2 >= end)
+ return FALSE;
+ if (! UTF16_IS_SURROGATE_SECOND(*(s+2)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*s))
+ return FALSE;
+
+ s += len;
+ }
+
+ if (s != end)
+ return FALSE;
+ else
+ return TRUE;
}
static int
@@ -146,7 +164,15 @@ utf16be_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED)
static int
utf16be_code_to_mbclen(OnigCodePoint code)
{
- return (code > 0xffff ? 4 : 2);
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 4;
+ }
+ else {
+ return 2;
+ }
}
static int
@@ -243,7 +269,8 @@ utf16be_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*s) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-2)))
s -= 2;
return (UChar* )s;
diff --git a/src/utf16_le.c b/src/utf16_le.c
index 4b231c6..cdc74b0 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -2,7 +2,7 @@
utf16_le.c - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -95,7 +95,15 @@ static const int EncLen_UTF16[] = {
static int
utf16le_code_to_mbclen(OnigCodePoint code)
{
- return (code > 0xffff ? 4 : 2);
+ if (code > 0xffff) {
+ if (code > 0x10ffff)
+ return ONIGERR_INVALID_CODE_POINT_VALUE;
+ else
+ return 4;
+ }
+ else {
+ return 2;
+ }
}
static int
@@ -110,7 +118,16 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
const UChar* end1 = end - 1;
while (p < end1) {
- p += utf16le_mbc_enc_len(p);
+ int len = utf16le_mbc_enc_len(p);
+ if (len == 4) {
+ if (p + 3 < end && ! UTF16_IS_SURROGATE_SECOND(*(p + 3)))
+ return FALSE;
+ }
+ else
+ if (UTF16_IS_SURROGATE_SECOND(*(p + 1)))
+ return FALSE;
+
+ p += len;
}
if (p != end)
@@ -252,7 +269,8 @@ utf16le_left_adjust_char_head(const UChar* start, const UChar* s)
s--;
}
- if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1)
+ if (UTF16_IS_SURROGATE_SECOND(*(s+1)) && s > start + 1 &&
+ UTF16_IS_SURROGATE_FIRST(*(s-1)))
s -= 2;
return (UChar* )s;
diff --git a/test/test_utf8.c b/test/test_utf8.c
index bab6b0d..2338526 100644
--- a/test/test_utf8.c
+++ b/test/test_utf8.c
@@ -1202,10 +1202,23 @@ extern int main(int argc, char* argv[])
x2("a{3,2}b", "aab", 0, 3);
x2("a{3,2}?", "", 0, 0); /* == (?:a{3,2})?*/
x2("a{2,3}+a", "aaa", 0, 3); /* == (?:a{2,3})+*/
+ x2("[\\x{0}-\\x{7fffffff}]", "a", 0, 1);
+ x2("[\\x{7f}-\\x{7fffffff}]", "\xe5\xae\xb6", 0, 3);
+
+ n(" \xfd", ""); /* https://bugs.php.net/bug.php?id=77370 */
+ /* can't use \xfc00.. because compiler error: hex escape sequence out of range */
+ n("()0\\xfc00000\\xfc00000\\xfc00000\xfc", ""); /* https://bugs.php.net/bug.php?id=77371 */
+ x2("000||0\xfa", "0", 0, 0); /* https://bugs.php.net/bug.php?id=77381 */
+ e("(?i)000000000000000000000\xf0", "", ONIGERR_INVALID_CODE_POINT_VALUE); /* https://bugs.php.net/bug.php?id=77382 */
+ n("0000\\\xf5", "0"); /* https://bugs.php.net/bug.php?id=77385 */
+ n("(?i)FFF00000000000000000\xfd", ""); /* https://bugs.php.net/bug.php?id=77394 */
+
x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */
x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */
+ e("\\x{7fffffff}", "", ONIGERR_TOO_BIG_WIDE_CHAR_VALUE);
+ e("[\\x{7fffffff}]", "", ONIGERR_INVALID_CODE_POINT_VALUE);
e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE);
e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION);
e("(?<=(?>abc))", "abc", ONIGERR_INVALID_LOOK_BEHIND_PATTERN);
diff --git a/test/testu.c b/test/testu.c
index 4b053e5..397da95 100644
--- a/test/testu.c
+++ b/test/testu.c
@@ -116,28 +116,13 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not)
#else
regex_t* reg;
- OnigCompileInfo ci;
OnigErrorInfo einfo;
uconv(pattern, cpat, ulen(pattern));
uconv(str, cstr, ulen(str));
-#if 0
r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + ulen(pattern)),
ONIG_OPTION_DEFAULT, ENC, ONIG_SYNTAX_DEFAULT, &einfo);
-#else
- ci.num_of_elements = 5;
- ci.pattern_enc = ENC;
- ci.target_enc = ENC;
- ci.syntax = ONIG_SYNTAX_DEFAULT;
- ci.option = ONIG_OPTION_DEFAULT;
- ci.case_fold_flag = ONIGENC_CASE_FOLD_DEFAULT;
-
- r = onig_new_deluxe(&reg, (UChar* )pattern,
- (UChar* )(pattern + ulen(pattern)),
- &ci, &einfo);
-#endif
-
if (r) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
onig_error_code_to_str((UChar* )s, r, &einfo);