summaryrefslogtreecommitdiff
path: root/test/test_utf8.c
diff options
context:
space:
mode:
Diffstat (limited to 'test/test_utf8.c')
-rw-r--r--test/test_utf8.c106
1 files changed, 99 insertions, 7 deletions
diff --git a/test/test_utf8.c b/test/test_utf8.c
index 02aa06b..bab6b0d 100644
--- a/test/test_utf8.c
+++ b/test/test_utf8.c
@@ -1,5 +1,6 @@
/*
- * This program was generated by testconv.rb.
+ * test_utf8.c
+ * Copyright (c) 2019 K.Kosako
*/
#include "config.h"
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
@@ -29,7 +30,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not,
OnigErrorInfo einfo;
r = onig_new(&reg, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)),
- ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
+ ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo);
if (r) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
@@ -54,14 +55,14 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not,
}
r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)),
- (UChar* )str, (UChar* )(str + SLEN(str)),
- region, ONIG_OPTION_NONE);
+ (UChar* )str, (UChar* )(str + SLEN(str)),
+ region, ONIG_OPTION_NONE);
if (r < ONIG_MISMATCH) {
char s[ONIG_MAX_ERROR_MESSAGE_LEN];
if (error_no == 0) {
onig_error_code_to_str((UChar* )s, r);
- fprintf(err_file, "ERROR: %s\n", s);
+ fprintf(err_file, "ERROR: %s /%s/\n", s, pattern);
nerror++;
}
else {
@@ -101,7 +102,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not,
}
else {
fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str,
- from, to, region->beg[mem], region->end[mem]);
+ from, to, region->beg[mem], region->end[mem]);
nfail++;
}
}
@@ -141,7 +142,9 @@ extern int main(int argc, char* argv[])
x2("", "", 0, 0);
x2("^", "", 0, 0);
+ x2("^a", "\na", 1, 2);
x2("$", "", 0, 0);
+ x2("$\\O", "bb\n", 2, 3);
x2("\\G", "", 0, 0);
x2("\\A", "", 0, 0);
x2("\\Z", "", 0, 0);
@@ -176,6 +179,7 @@ extern int main(int argc, char* argv[])
n("\\D", "4");
x2("\\b", "z ", 0, 0);
x2("\\b", " z", 1, 1);
+ x2("\\b", " z ", 2, 2);
x2("\\B", "zz ", 1, 1);
x2("\\B", "z ", 2, 2);
x2("\\B", " z", 0, 0);
@@ -576,6 +580,7 @@ extern int main(int argc, char* argv[])
x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5);
x2("()*\\1", "", 0, 0);
x2("(?:()|())*\\1\\2", "", 0, 0);
+ x2("(?:a*|b*)*c", "abadc", 4, 5);
x3("(?:\\1a|())*", "a", 0, 0, 1);
x2("x((.)*)*x", "0x1x2x3", 1, 6);
x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9);
@@ -585,6 +590,10 @@ extern int main(int argc, char* argv[])
n("[0-9-a]", ":"); // PR#44
x3("(\\(((?:[^(]|\\g<1>)*)\\))", "(abc)(abc)", 1, 4, 2); // PR#43
x2("\\o{101}", "A", 0, 1);
+ x2("\\A(a|b\\g<1>c)\\k<1+3>\\z", "bbacca", 0, 6);
+ n("\\A(a|b\\g<1>c)\\k<1+3>\\z", "bbaccb");
+ x2("(?i)\\A(a|b\\g<1>c)\\k<1+2>\\z", "bBACcbac", 0, 8);
+ x2("(?i)(?<X>aa)|(?<X>bb)\\k<X>", "BBbb", 0, 4);
x2("(?:\\k'+1'B|(A)C)*", "ACAB", 0, 4); // relative backref by postitive number
x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number
x2("A\\g'0'|B()", "AAAAB", 0, 5);
@@ -626,6 +635,47 @@ extern int main(int argc, char* argv[])
x2("(?:()|()|())*\\3\\1", "abc", 0, 0);
x2("(|(?:a(?:\\g'1')*))b|", "abc", 0, 2);
x2("^(\"|)(.*)\\1$", "XX", 0, 2);
+ x2("(abc|def|ghi|jkl|mno|pqr|stu){0,10}?\\z", "admno", 2, 5);
+ x2("(abc|(def|ghi|jkl|mno|pqr){0,7}?){5}\\z", "adpqrpqrpqr", 2, 11); // cover OP_REPEAT_INC_NG_SG
+ x2("(?!abc).*\\z", "abcde", 1, 5); // cover OP_PREC_READ_NOT_END
+ x2("(.{2,})?", "abcde", 0, 5); // up coverage
+ x2("((a|b|c|d|e|f|g|h|i|j|k|l|m|n)+)?", "abcde", 0, 5); // up coverage
+ x2("((a|b|c|d|e|f|g|h|i|j|k|l|m|n){3,})?", "abcde", 0, 5); // up coverage
+ x2("((?:a(?:b|c|d|e|f|g|h|i|j|k|l|m|n))+)?", "abacadae", 0, 8); // up coverage
+ x2("((?:a(?:b|c|d|e|f|g|h|i|j|k|l|m|n))+?)?z", "abacadaez", 0, 9); // up coverage
+ x2("\\A((a|b)\?\?)?z", "bz", 0, 2); // up coverage
+ x2("((?<x>abc){0}a\\g<x>d)+", "aabcd", 0, 5); // up coverage
+ x2("((?(abc)true|false))+", "false", 0, 5); // up coverage
+ x2("((?i:abc)d)+", "abcdABCd", 0, 8); // up coverage
+ x2("((?<!abc)def)+", "bcdef", 2, 5); // up coverage
+ x2("(\\ba)+", "aaa", 0, 1); // up coverage
+ x2("()(?<x>ab)(?(<x>)a|b)", "aba", 0, 3); // up coverage
+ x2("(?<=a.b)c", "azbc", 3, 4); // up coverage
+ n("(?<=(?:abcde){30})z", "abc"); // up coverage
+ x2("(?<=(?(a)a|bb))z", "aaz", 2, 3); // up coverage
+ x2("[a]*\\W", "aa@", 0, 3); // up coverage
+ x2("[a]*[b]", "aab", 0, 3); // up coverage
+ n("a*\\W", "aaa"); // up coverage
+ n("(?W)a*\\W", "aaa"); // up coverage
+ x2("(?<=ab(?<=ab))", "ab", 2, 2); // up coverage
+ x2("(?<x>a)(?<x>b)(\\k<x>)+", "abbaab", 0, 6); // up coverage
+ x2("()(\\1)(\\2)", "abc", 0, 0); // up coverage
+ x2("((?(a)b|c))(\\1)", "abab", 0, 4); // up coverage
+ x2("(?<x>$|b\\g<x>)", "bbb", 0, 3); // up coverage
+ x2("(?<x>(?(a)a|b)|c\\g<x>)", "cccb", 0, 4); // up coverage
+ x2("(a)(?(1)a*|b*)+", "aaaa", 0, 4); // up coverage
+ x2("[[^abc]&&cde]*", "de", 0, 2); // up coverage
+ n("(a){10}{10}", "aa"); // up coverage
+ x2("(?:a?)+", "aa", 0, 2); // up coverage
+ x2("(?:a?)*?", "a", 0, 0); // up coverage
+ x2("(?:a*)*?", "a", 0, 0); // up coverage
+ x2("(?:a+?)*", "a", 0, 1); // up coverage
+ x2("\\h", "5", 0, 1); // up coverage
+ x2("\\H", "z", 0, 1); // up coverage
+ x2("[\\h]", "5", 0, 1); // up coverage
+ x2("[\\H]", "z", 0, 1); // up coverage
+ x2("[\\o{101}]", "A", 0, 1); // up coverage
+ x2("[\\u0041]", "A", 0, 1); // up coverage
x2("(?~)", "", 0, 0);
x2("(?~)", "A", 0, 0);
@@ -957,6 +1007,8 @@ extern int main(int argc, char* argv[])
x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 44);
x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 44);
x2("\\n?\\z", "こんにちは", 15, 15);
+ x2("(?m).*", "青赤黄", 0, 9);
+ x2("(?m).*a", "青赤黄a", 0, 10);
x2("\\p{Hiragana}", "ぴ", 0, 3);
n("\\P{Hiragana}", "ぴ");
@@ -1095,6 +1147,31 @@ extern int main(int argc, char* argv[])
// a + COMBINING GRAVE ACCENT (U+0300)
x2("h\\Xllo", "ha\xCC\x80llo", 0, 7);
+ // Text Segment: Extended Grapheme Cluster <-> Word Boundary
+ x2("(?y{g})\\yabc\\y", "abc", 0, 3);
+ x2("(?y{g})\\y\\X\\y", "abc", 0, 1);
+ x2("(?y{w})\\yabc\\y", "abc", 0, 3); // WB1, WB2
+ x2("(?y{w})\\X", "\r\n", 0, 2); // WB3
+ x2("(?y{w})\\X", "\x0cz", 0, 1); // WB3a
+ x2("(?y{w})\\X", "q\x0c", 0, 1); // WB3b
+ x2("(?y{w})\\X", "\xE2\x80\x8D\xE2\x9D\x87", 0, 6); // WB3c
+ x2("(?y{w})\\X", "\x20\x20", 0, 2); // WB3d
+ x2("(?y{w})\\X", "a\xE2\x80\x8D", 0, 4); // WB4
+ x2("(?y{w})\\y\\X\\y", "abc", 0, 3); // WB5
+ x2("(?y{w})\\y\\X\\y", "v\xCE\x87w", 0, 4); // WB6, WB7
+ x2("(?y{w})\\y\\X\\y", "\xD7\x93\x27", 0, 3); // WB7a
+ x2("(?y{w})\\y\\X\\y", "\xD7\x93\x22\xD7\x93", 0, 5); // WB7b, WB7c
+ x2("(?y{w})\\X", "14 45", 0, 2); // WB8
+ x2("(?y{w})\\X", "a14", 0, 3); // WB9
+ x2("(?y{w})\\X", "832e", 0, 4); // WB10
+ x2("(?y{w})\\X", "8\xEF\xBC\x8C\xDB\xB0", 0, 6); // WB11, WB12
+ x2("(?y{w})\\y\\X\\y", "ケン", 0, 6); // WB13
+ x2("(?y{w})\\y\\X\\y", "ケン\xE2\x80\xAFタ", 0, 12); // WB13a, WB13b
+ x2("(?y{w})\\y\\X\\y", "\x21\x23", 0, 1); // WB999
+ x2("(?y{w})\\y\\X\\y", "山ア", 0, 3);
+ x2("(?y{w})\\X", "3.14", 0, 4);
+ x2("(?y{w})\\X", "3 14", 0, 1);
+
x2("\\x40", "@", 0, 1);
x2("\\x1", "\x01", 0, 1);
x2("\\x{1}", "\x01", 0, 1);
@@ -1104,6 +1181,7 @@ extern int main(int argc, char* argv[])
x2("c.*\\b", "abc", 2, 3);
x2("\\b.*abc.*\\b", "abc", 0, 3);
+ x2("((?()0+)+++(((0\\g<0>)0)|())++++((?(1)(0\\g<0>))++++++0*())++++((?(1)(0\\g<1>)+)++++++++++*())++++((?(1)((0)\\g<0>)+)++())+0++*+++(((0\\g<0>))*())++++((?(1)(0\\g<0>)+)++++++++++*|)++++*+++((?(1)((0)\\g<0>)+)+++++++++())++*|)++++((?()0))|", "abcde", 0, 0); // #139
n("(*FAIL)", "abcdefg");
n("abcd(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)", "abcdefg");
@@ -1113,6 +1191,20 @@ extern int main(int argc, char* argv[])
x2("(?(?{....})123|456)", "123", 0, 3);
x2("(?(*FAIL)123|456)", "456", 0, 3);
+ x2("\\g'0'++{,0}", "abcdefgh", 0, 0);
+ x2("\\g'0'++{,0}?", "abcdefgh", 0, 0);
+ x2("\\g'0'++{,0}b", "abcdefgh", 1, 2);
+ x2("\\g'0'++{,0}?def", "abcdefgh", 3, 6);
+ n("a{2,3}?", "a");
+ n("a{3,2}a", "aaa");
+ x2("a{3,2}b", "aaab", 0, 4);
+ x2("a{3,2}b", "aaaab", 1, 5);
+ x2("a{3,2}b", "aab", 0, 3);
+ x2("a{3,2}?", "", 0, 0); /* == (?:a{3,2})?*/
+ x2("a{2,3}+a", "aaa", 0, 3); /* == (?:a{2,3})+*/
+
+ x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */
+ x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */
e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE);
e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION);
@@ -1120,7 +1212,7 @@ extern int main(int argc, char* argv[])
e("(*FOO)", "abcdefg", ONIGERR_UNDEFINED_CALLOUT_NAME);
fprintf(stdout,
- "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
+ "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n",
nsucc, nfail, nerror, onig_version());
onig_region_free(region, 1);