diff options
Diffstat (limited to 'test/test_utf8.c')
-rw-r--r-- | test/test_utf8.c | 106 |
1 files changed, 99 insertions, 7 deletions
diff --git a/test/test_utf8.c b/test/test_utf8.c index 02aa06b..bab6b0d 100644 --- a/test/test_utf8.c +++ b/test/test_utf8.c @@ -1,5 +1,6 @@ /* - * This program was generated by testconv.rb. + * test_utf8.c + * Copyright (c) 2019 K.Kosako */ #include "config.h" #ifdef ONIG_ESCAPE_UCHAR_COLLISION @@ -29,7 +30,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not, OnigErrorInfo einfo; r = onig_new(®, (UChar* )pattern, (UChar* )(pattern + SLEN(pattern)), - ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); + ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); if (r) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; @@ -54,14 +55,14 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not, } r = onig_search(reg, (UChar* )str, (UChar* )(str + SLEN(str)), - (UChar* )str, (UChar* )(str + SLEN(str)), - region, ONIG_OPTION_NONE); + (UChar* )str, (UChar* )(str + SLEN(str)), + region, ONIG_OPTION_NONE); if (r < ONIG_MISMATCH) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; if (error_no == 0) { onig_error_code_to_str((UChar* )s, r); - fprintf(err_file, "ERROR: %s\n", s); + fprintf(err_file, "ERROR: %s /%s/\n", s, pattern); nerror++; } else { @@ -101,7 +102,7 @@ static void xx(char* pattern, char* str, int from, int to, int mem, int not, } else { fprintf(stdout, "FAIL: /%s/ '%s' %d-%d : %d-%d\n", pattern, str, - from, to, region->beg[mem], region->end[mem]); + from, to, region->beg[mem], region->end[mem]); nfail++; } } @@ -141,7 +142,9 @@ extern int main(int argc, char* argv[]) x2("", "", 0, 0); x2("^", "", 0, 0); + x2("^a", "\na", 1, 2); x2("$", "", 0, 0); + x2("$\\O", "bb\n", 2, 3); x2("\\G", "", 0, 0); x2("\\A", "", 0, 0); x2("\\Z", "", 0, 0); @@ -176,6 +179,7 @@ extern int main(int argc, char* argv[]) n("\\D", "4"); x2("\\b", "z ", 0, 0); x2("\\b", " z", 1, 1); + x2("\\b", " z ", 2, 2); x2("\\B", "zz ", 1, 1); x2("\\B", "z ", 2, 2); x2("\\B", " z", 0, 0); @@ -576,6 +580,7 @@ extern int main(int argc, char* argv[]) x2("(?<pare>\\(([^\\(\\)]++|\\g<pare>)*+\\))", "((a))", 0, 5); x2("()*\\1", "", 0, 0); x2("(?:()|())*\\1\\2", "", 0, 0); + x2("(?:a*|b*)*c", "abadc", 4, 5); x3("(?:\\1a|())*", "a", 0, 0, 1); x2("x((.)*)*x", "0x1x2x3", 1, 6); x2("x((.)*)*x(?i:\\1)\\Z", "0x1x2x1X2", 1, 9); @@ -585,6 +590,10 @@ extern int main(int argc, char* argv[]) n("[0-9-a]", ":"); // PR#44 x3("(\\(((?:[^(]|\\g<1>)*)\\))", "(abc)(abc)", 1, 4, 2); // PR#43 x2("\\o{101}", "A", 0, 1); + x2("\\A(a|b\\g<1>c)\\k<1+3>\\z", "bbacca", 0, 6); + n("\\A(a|b\\g<1>c)\\k<1+3>\\z", "bbaccb"); + x2("(?i)\\A(a|b\\g<1>c)\\k<1+2>\\z", "bBACcbac", 0, 8); + x2("(?i)(?<X>aa)|(?<X>bb)\\k<X>", "BBbb", 0, 4); x2("(?:\\k'+1'B|(A)C)*", "ACAB", 0, 4); // relative backref by postitive number x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number x2("A\\g'0'|B()", "AAAAB", 0, 5); @@ -626,6 +635,47 @@ extern int main(int argc, char* argv[]) x2("(?:()|()|())*\\3\\1", "abc", 0, 0); x2("(|(?:a(?:\\g'1')*))b|", "abc", 0, 2); x2("^(\"|)(.*)\\1$", "XX", 0, 2); + x2("(abc|def|ghi|jkl|mno|pqr|stu){0,10}?\\z", "admno", 2, 5); + x2("(abc|(def|ghi|jkl|mno|pqr){0,7}?){5}\\z", "adpqrpqrpqr", 2, 11); // cover OP_REPEAT_INC_NG_SG + x2("(?!abc).*\\z", "abcde", 1, 5); // cover OP_PREC_READ_NOT_END + x2("(.{2,})?", "abcde", 0, 5); // up coverage + x2("((a|b|c|d|e|f|g|h|i|j|k|l|m|n)+)?", "abcde", 0, 5); // up coverage + x2("((a|b|c|d|e|f|g|h|i|j|k|l|m|n){3,})?", "abcde", 0, 5); // up coverage + x2("((?:a(?:b|c|d|e|f|g|h|i|j|k|l|m|n))+)?", "abacadae", 0, 8); // up coverage + x2("((?:a(?:b|c|d|e|f|g|h|i|j|k|l|m|n))+?)?z", "abacadaez", 0, 9); // up coverage + x2("\\A((a|b)\?\?)?z", "bz", 0, 2); // up coverage + x2("((?<x>abc){0}a\\g<x>d)+", "aabcd", 0, 5); // up coverage + x2("((?(abc)true|false))+", "false", 0, 5); // up coverage + x2("((?i:abc)d)+", "abcdABCd", 0, 8); // up coverage + x2("((?<!abc)def)+", "bcdef", 2, 5); // up coverage + x2("(\\ba)+", "aaa", 0, 1); // up coverage + x2("()(?<x>ab)(?(<x>)a|b)", "aba", 0, 3); // up coverage + x2("(?<=a.b)c", "azbc", 3, 4); // up coverage + n("(?<=(?:abcde){30})z", "abc"); // up coverage + x2("(?<=(?(a)a|bb))z", "aaz", 2, 3); // up coverage + x2("[a]*\\W", "aa@", 0, 3); // up coverage + x2("[a]*[b]", "aab", 0, 3); // up coverage + n("a*\\W", "aaa"); // up coverage + n("(?W)a*\\W", "aaa"); // up coverage + x2("(?<=ab(?<=ab))", "ab", 2, 2); // up coverage + x2("(?<x>a)(?<x>b)(\\k<x>)+", "abbaab", 0, 6); // up coverage + x2("()(\\1)(\\2)", "abc", 0, 0); // up coverage + x2("((?(a)b|c))(\\1)", "abab", 0, 4); // up coverage + x2("(?<x>$|b\\g<x>)", "bbb", 0, 3); // up coverage + x2("(?<x>(?(a)a|b)|c\\g<x>)", "cccb", 0, 4); // up coverage + x2("(a)(?(1)a*|b*)+", "aaaa", 0, 4); // up coverage + x2("[[^abc]&&cde]*", "de", 0, 2); // up coverage + n("(a){10}{10}", "aa"); // up coverage + x2("(?:a?)+", "aa", 0, 2); // up coverage + x2("(?:a?)*?", "a", 0, 0); // up coverage + x2("(?:a*)*?", "a", 0, 0); // up coverage + x2("(?:a+?)*", "a", 0, 1); // up coverage + x2("\\h", "5", 0, 1); // up coverage + x2("\\H", "z", 0, 1); // up coverage + x2("[\\h]", "5", 0, 1); // up coverage + x2("[\\H]", "z", 0, 1); // up coverage + x2("[\\o{101}]", "A", 0, 1); // up coverage + x2("[\\u0041]", "A", 0, 1); // up coverage x2("(?~)", "", 0, 0); x2("(?~)", "A", 0, 0); @@ -957,6 +1007,8 @@ extern int main(int argc, char* argv[]) x2("a<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 44); x2(".<b>バージョンのダウンロード<\\/b>", "a<b>バージョンのダウンロード</b>", 0, 44); x2("\\n?\\z", "こんにちは", 15, 15); + x2("(?m).*", "青赤黄", 0, 9); + x2("(?m).*a", "青赤黄a", 0, 10); x2("\\p{Hiragana}", "ぴ", 0, 3); n("\\P{Hiragana}", "ぴ"); @@ -1095,6 +1147,31 @@ extern int main(int argc, char* argv[]) // a + COMBINING GRAVE ACCENT (U+0300) x2("h\\Xllo", "ha\xCC\x80llo", 0, 7); + // Text Segment: Extended Grapheme Cluster <-> Word Boundary + x2("(?y{g})\\yabc\\y", "abc", 0, 3); + x2("(?y{g})\\y\\X\\y", "abc", 0, 1); + x2("(?y{w})\\yabc\\y", "abc", 0, 3); // WB1, WB2 + x2("(?y{w})\\X", "\r\n", 0, 2); // WB3 + x2("(?y{w})\\X", "\x0cz", 0, 1); // WB3a + x2("(?y{w})\\X", "q\x0c", 0, 1); // WB3b + x2("(?y{w})\\X", "\xE2\x80\x8D\xE2\x9D\x87", 0, 6); // WB3c + x2("(?y{w})\\X", "\x20\x20", 0, 2); // WB3d + x2("(?y{w})\\X", "a\xE2\x80\x8D", 0, 4); // WB4 + x2("(?y{w})\\y\\X\\y", "abc", 0, 3); // WB5 + x2("(?y{w})\\y\\X\\y", "v\xCE\x87w", 0, 4); // WB6, WB7 + x2("(?y{w})\\y\\X\\y", "\xD7\x93\x27", 0, 3); // WB7a + x2("(?y{w})\\y\\X\\y", "\xD7\x93\x22\xD7\x93", 0, 5); // WB7b, WB7c + x2("(?y{w})\\X", "14 45", 0, 2); // WB8 + x2("(?y{w})\\X", "a14", 0, 3); // WB9 + x2("(?y{w})\\X", "832e", 0, 4); // WB10 + x2("(?y{w})\\X", "8\xEF\xBC\x8C\xDB\xB0", 0, 6); // WB11, WB12 + x2("(?y{w})\\y\\X\\y", "ケン", 0, 6); // WB13 + x2("(?y{w})\\y\\X\\y", "ケン\xE2\x80\xAFタ", 0, 12); // WB13a, WB13b + x2("(?y{w})\\y\\X\\y", "\x21\x23", 0, 1); // WB999 + x2("(?y{w})\\y\\X\\y", "山ア", 0, 3); + x2("(?y{w})\\X", "3.14", 0, 4); + x2("(?y{w})\\X", "3 14", 0, 1); + x2("\\x40", "@", 0, 1); x2("\\x1", "\x01", 0, 1); x2("\\x{1}", "\x01", 0, 1); @@ -1104,6 +1181,7 @@ extern int main(int argc, char* argv[]) x2("c.*\\b", "abc", 2, 3); x2("\\b.*abc.*\\b", "abc", 0, 3); + x2("((?()0+)+++(((0\\g<0>)0)|())++++((?(1)(0\\g<0>))++++++0*())++++((?(1)(0\\g<1>)+)++++++++++*())++++((?(1)((0)\\g<0>)+)++())+0++*+++(((0\\g<0>))*())++++((?(1)(0\\g<0>)+)++++++++++*|)++++*+++((?(1)((0)\\g<0>)+)+++++++++())++*|)++++((?()0))|", "abcde", 0, 0); // #139 n("(*FAIL)", "abcdefg"); n("abcd(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)(*FAIL)", "abcdefg"); @@ -1113,6 +1191,20 @@ extern int main(int argc, char* argv[]) x2("(?(?{....})123|456)", "123", 0, 3); x2("(?(*FAIL)123|456)", "456", 0, 3); + x2("\\g'0'++{,0}", "abcdefgh", 0, 0); + x2("\\g'0'++{,0}?", "abcdefgh", 0, 0); + x2("\\g'0'++{,0}b", "abcdefgh", 1, 2); + x2("\\g'0'++{,0}?def", "abcdefgh", 3, 6); + n("a{2,3}?", "a"); + n("a{3,2}a", "aaa"); + x2("a{3,2}b", "aaab", 0, 4); + x2("a{3,2}b", "aaaab", 1, 5); + x2("a{3,2}b", "aab", 0, 3); + x2("a{3,2}?", "", 0, 0); /* == (?:a{3,2})?*/ + x2("a{2,3}+a", "aaa", 0, 3); /* == (?:a{2,3})+*/ + + x2("\\p{Common}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ + x2("\\p{In_Enclosed_CJK_Letters_and_Months}", "\xe3\x8b\xbf", 0, 3); /* U+32FF */ e("\\u040", "@", ONIGERR_INVALID_CODE_POINT_VALUE); e("(?<abc>\\g<abc>)", "zzzz", ONIGERR_NEVER_ENDING_RECURSION); @@ -1120,7 +1212,7 @@ extern int main(int argc, char* argv[]) e("(*FOO)", "abcdefg", ONIGERR_UNDEFINED_CALLOUT_NAME); fprintf(stdout, - "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", + "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); onig_region_free(region, 1); |