From 35e13e4c9637f5bf7bef6039c8c813207780a174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Mon, 20 Nov 2023 20:10:50 +0100 Subject: New upstream version 6.9.9 --- test/CMakeLists.txt | 55 ++++++++++++++++++++++++++++++++++ test/test.sh | 17 +++++++++++ test/test_options.c | 6 +++- test/test_syntax.c | 44 +++++++++++++++++++++++++++ test/test_utf8.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 207 insertions(+), 1 deletion(-) create mode 100644 test/CMakeLists.txt create mode 100755 test/test.sh (limited to 'test') diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt new file mode 100644 index 0000000..a508f6a --- /dev/null +++ b/test/CMakeLists.txt @@ -0,0 +1,55 @@ +if(MSVC) + if(MSVC_VERSION LESS "1900") + # < VS2015, no "/utf-8" option, can not build test + return() + endif() +endif() + +enable_testing() + +add_executable(test_utf8 test_utf8.c) +target_link_libraries(test_utf8 onig) +if(MSVC) + target_compile_options(test_utf8 PRIVATE /utf-8) +endif(MSVC) + +add_executable(test_syntax test_syntax.c) +target_link_libraries(test_syntax onig) +if(MSVC) + target_compile_options(test_syntax PRIVATE /utf-8) +endif(MSVC) + +add_executable(test_options test_options.c) +target_link_libraries(test_options onig) +if(MSVC) + target_compile_options(test_options PRIVATE /utf-8) +endif(MSVC) + +if(NOT MSVC) + # EUC + add_executable(testc testc.c) + target_link_libraries(testc onig) + if (CMAKE_C_COMPILER_ID MATCHES "Clang|GNU") + target_compile_options(testc PRIVATE -Wall -Wno-invalid-source-encoding) + endif() +endif(NOT MSVC) + +if(ENABLE_POSIX_API) + add_executable(testp testp.c) + target_link_libraries(testp onig) +endif() + +add_executable(testcu testu.c) +target_link_libraries(testcu onig) + +add_executable(test_regset test_regset.c) +target_link_libraries(test_regset onig) +if(MSVC) + target_compile_options(test_regset PRIVATE /utf-8) +endif(MSVC) + +add_executable(test_back test_back.c) +target_link_libraries(test_back onig) +if(MSVC) + target_compile_options(test_back PRIVATE /utf-8) +endif(MSVC) diff --git a/test/test.sh b/test/test.sh new file mode 100755 index 0000000..08c52a8 --- /dev/null +++ b/test/test.sh @@ -0,0 +1,17 @@ +#!/bin/sh + +echo "[Oniguruma API, UTF-8 check]" +./test_utf8 | grep RESULT +echo "[Oniguruma API, SYNTAX check]" +./test_syntax | grep RESULT +echo "[Oniguruma API, Options check]" +./test_options | grep RESULT +echo "[Oniguruma API, EUC-JP check]" +./testc | grep RESULT +echo "[Oniguruma API, UTF-16 check]" +./testcu | grep RESULT +echo "" +echo "[Oniguruma API, regset check]" +./test_regset +echo "[Oniguruma API, backward search check]" +./test_back | grep RESULT diff --git a/test/test_options.c b/test/test_options.c index 7010f0f..ce92ee1 100644 --- a/test/test_options.c +++ b/test/test_options.c @@ -197,6 +197,11 @@ extern int main(int argc, char* argv[]) n(ONIG_OPTION_NOT_END_STRING, "ab\\Z", "ab"); n(ONIG_OPTION_NOT_END_STRING, "ab\\Z", "ab\n"); + x2(ONIG_OPTION_NONE, "a|abc", "abc", 0, 1); + x2(ONIG_OPTION_NONE, "(a|abc)\\Z", "abc", 0, 3); + x2(ONIG_OPTION_MATCH_WHOLE_STRING, "a|abc", "abc", 0, 3); + x2(ONIG_OPTION_MATCH_WHOLE_STRING, "a|abc", "a", 0, 1); + x2(ONIG_OPTION_WORD_IS_ASCII, "\\w", "@g", 1, 2); n(ONIG_OPTION_WORD_IS_ASCII, "\\w", "あ"); x2(ONIG_OPTION_NONE, "\\d", "1", 0, 3); @@ -219,6 +224,5 @@ extern int main(int argc, char* argv[]) onig_region_free(region, 1); onig_end(); - return ((nfail == 0 && nerror == 0) ? 0 : -1); } diff --git a/test/test_syntax.c b/test/test_syntax.c index b501ccd..b07fe67 100644 --- a/test/test_syntax.c +++ b/test/test_syntax.c @@ -259,6 +259,30 @@ static int test_python_single_multi() return 0; } +static int test_BRE_anchors() +{ + x2("a\\^b", "a^b", 0, 3); + x2("a^b", "a^b", 0, 3); + x2("a\\$b", "a$b", 0, 3); + x2("a$b", "a$b", 0, 3); + + x2("^ab", "ab", 0, 2); + x2("(^ab)", "(^ab)", 0, 5); + x2("\\(^ab\\)", "ab", 0, 2); + x2("\\\\(^ab\\\\)", "\\(^ab\\)", 0, 7); + n("\\\\\\(^ab\\\\\\)", "\\ab\\"); + x2("^\\\\\\(ab\\\\\\)", "\\ab\\", 0, 4); + + x2("ab$", "ab", 0, 2); + x2("(ab$)", "(ab$)", 0, 5); + x2("\\(ab$\\)", "ab", 0, 2); + x2("\\\\(ab$\\\\)", "\\(ab$\\)", 0, 7); + n("\\\\\\(ab$\\\\\\)", "\\ab\\"); + x2("\\\\\\(ab\\\\\\)$", "\\ab\\", 0, 4); + + return 0; +} + extern int main(int argc, char* argv[]) { OnigEncoding use_encs[1]; @@ -309,6 +333,26 @@ extern int main(int argc, char* argv[]) x2("\\U00000041", "A", 0, 1); e("\\U0041", "A", ONIGERR_INVALID_CODE_POINT_VALUE); + Syntax = ONIG_SYNTAX_POSIX_BASIC; + test_BRE_anchors(); + + Syntax = ONIG_SYNTAX_GREP; + test_BRE_anchors(); + x2("zz\\|^ab", "ab", 0, 2); + x2("ab$\\|zz", "ab", 0, 2); + + Syntax = ONIG_SYNTAX_PERL_NG; + + x2("(?i)test", "test", 0, 4); + x2("(?-i)test", "test", 0, 4); + x2("(?i)test", "TEST", 0, 4); + n("(?-i)test", "teSt"); + x2("(?i)te(?-i)st", "TEst", 0, 4); + n("(?i)te(?-i)st", "TesT"); + + x2("(abc)(?-1)", "abcabc", 0, 6); + x2("(?+1)(abc)", "abcabc", 0, 6); + x2("(abc)(?1)", "abcabc", 0, 6); fprintf(stdout, "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", diff --git a/test/test_utf8.c b/test/test_utf8.c index 75392e8..77149d7 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -226,6 +226,27 @@ extern int main(int argc, char* argv[]) x2("[*[:xdigit:]+]", "-@^+", 3, 4); n("[[:upper]]", "A"); x2("[[:upper]]", ":", 0, 1); + n("[[:upper:]]", "a"); + x2("[[:^upper:]]", "a", 0, 1); + n("[[:lower:]]", "A"); + x2("[[:^lower:]]", "A", 0, 1); + + // Issue #253 + e("[[:::]", ":[", ONIGERR_PREMATURE_END_OF_CHAR_CLASS); + e("[[:\\]:]", ":]", ONIGERR_PREMATURE_END_OF_CHAR_CLASS); + e("[[:\\[:]", ":[", ONIGERR_PREMATURE_END_OF_CHAR_CLASS); + e("[[:\\]]", ":]", ONIGERR_PREMATURE_END_OF_CHAR_CLASS); + e("[[:u:]]", "", ONIGERR_INVALID_POSIX_BRACKET_TYPE); + e("[[:upp:]]", "", ONIGERR_INVALID_POSIX_BRACKET_TYPE); + e("[[:uppers:]]", "", ONIGERR_INVALID_POSIX_BRACKET_TYPE); + x2("[[:upper\\] :]]", "]", 0, 1); + + x2("[[::]]", ":", 0, 1); + x2("[[:::]]", ":", 0, 1); + x2("[[:\\]:]]*", ":]", 0, 2); + x2("[[:\\[:]]*", ":[", 0, 2); + x2("[[:\\]]]*", ":]", 0, 2); + x2("[\\044-\\047]", "\046", 0, 1); x2("[\\x5a-\\x5c]", "\x5b", 0, 1); x2("[\\x6A-\\x6D]", "\x6c", 0, 1); @@ -1623,6 +1644,8 @@ extern int main(int argc, char* argv[]) e("(?Ii)|(?Ii)", "", ONIGERR_INVALID_GROUP_OPTION); x2("a*", "aabcaaa", 0, 2); x2("(?L)a*", "aabcaaa", 4, 7); + x2("(?L)a{4}|a{3}|b*", "baaaaabbb", 1, 5); + x2("(?L)a{3}|a{4}|b*", "baaaaabbb", 1, 5); e("x(?L)xxxxx", "", ONIGERR_INVALID_GROUP_OPTION); e("(?-L)x", "", ONIGERR_INVALID_GROUP_OPTION); x3("(..)\\1", "abab", 0, 2, 1); @@ -1630,6 +1653,69 @@ extern int main(int argc, char* argv[]) e("(?-C)", "", ONIGERR_INVALID_GROUP_OPTION); e("(?C)(.)(.)(.)(?.)\\1", "abcdd", ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); + // Issue #264 + n("(?iI)s", "\xc5\xbf"); + n("(?iI)[s]", "\xc5\xbf"); // FAIL + n("(?iI:s)", "\xc5\xbf"); + n("(?iI:[s])", "\xc5\xbf"); // FAIL + x2("(?iI)(?:[[:word:]])", "\xc5\xbf", 0, 2); + n("(?iI)(?W:[[:word:]])", "\xc5\xbf"); // FAIL + n("(?iI)(?W:\\w)", "\xc5\xbf"); + n("(?iI)(?W:[\\w])", "\xc5\xbf"); // FAIL + n("(?iI)(?W:\\p{Word})", "\xc5\xbf"); + n("(?iI)(?W:[\\p{Word}])", "\xc5\xbf"); // FAIL + + x2("(?iW:[[:word:]])", "\xc5\xbf", 0, 2); + x2("(?iW:[\\p{Word}])", "\xc5\xbf", 0, 2); + x2("(?iW:[\\w])", "\xc5\xbf", 0, 2); + n("(?iW:\\p{Word})", "\xc5\xbf"); + n("(?iW:\\w)", "\xc5\xbf"); + x2("(?i)\\p{Word}", "\xc5\xbf", 0, 2); + x2("(?i)\\w", "\xc5\xbf", 0, 2); + + x2("(?iW:[[:^word:]])", "\xc5\xbf", 0, 2); + x2("(?iW:[\\P{Word}])", "\xc5\xbf", 0, 2); + x2("(?iW:[\\W])", "\xc5\xbf", 0, 2); + x2("(?iW:\\P{Word})", "\xc5\xbf", 0, 2); + x2("(?iW:\\W)", "\xc5\xbf", 0, 2); + n("(?i)\\P{Word}", "\xc5\xbf"); + n("(?i)\\W", "\xc5\xbf"); + + x2("(?iW:[[:^word:]])", "s", 0, 1); + x2("(?iW:[\\P{Word}])", "s", 0, 1); + x2("(?iW:[\\W])", "s", 0, 1); + n("(?iW:\\P{Word})", "s"); + n("(?iW:\\W)", "s"); + n("(?i)\\P{Word}", "s"); + n("(?i)\\W", "s"); + + x2("[[:punct:]]", ":", 0, 1); + x2("[[:punct:]]", "$", 0, 1); + x2("[[:punct:]]+", "$+<=>^`|~", 0, 9); + n("[[:punct:]]", "a"); + n("[[:punct:]]", "7"); + x2("\\p{PosixPunct}+", "$¦", 0, 3); + + x2("\\A.*\\R", "\n", 0, 1); + x2("\\A\\O*\\R", "\n", 0, 1); + x2("\\A\\n*\\R", "\n", 0, 1); + x2("\\A\\R*\\R", "\n", 0, 1); + x2("\\At*\\R", "\n", 0, 1); + + x2("\\A.{0,99}\\R", "\n", 0, 1); + x2("\\A\\O{0,99}\\R", "\n", 0, 1); + x2("\\A\\n{0,99}\\R", "\n", 0, 1); + x2("\\A\\R{0,99}\\R", "\n", 0, 1); + x2("\\At{0,99}\\R", "\n", 0, 1); + + x2("\\A.*\\n", "\n", 0, 1); // \n + x2("\\A.{0,99}\\n", "\n", 0, 1); + x2("\\A.*\\O", "\n", 0, 1); // \O + x2("\\A.{0,99}\\O", "\n", 0, 1); + x2("\\A.*\\s", "\n", 0, 1); // \s + x2("\\A.{0,99}\\s", "\n", 0, 1); + + n("a(b|)+d", "abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbcd"); /* https://www.haijin-boys.com/discussions/5079 */ n(" \xfd", ""); /* https://bugs.php.net/bug.php?id=77370 */ /* can't use \xfc00.. because compiler error: hex escape sequence out of range */ -- cgit v1.2.3