/* * test_regset.c --- test for regset API * Copyright (c) 2019 K.Kosako */ #include #include #include #include #include "oniguruma.h" static int nsucc = 0; static int nfail = 0; static int nerror = 0; static int make_regset(int line_no, int n, char* pat[], OnigRegSet** rset, int error_no) { int r; int i; OnigRegSet* set; regex_t* reg; OnigErrorInfo einfo; *rset = NULL; r = onig_regset_new(&set, 0, NULL); if (r != 0) return r; for (i = 0; i < n; i++) { r = onig_new(®, (UChar* )pat[i], (UChar* )(pat[i] + strlen(pat[i])), ONIG_OPTION_DEFAULT, ONIG_ENCODING_UTF8, ONIG_SYNTAX_DEFAULT, &einfo); if (r != 0) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; if (error_no == 0) { onig_error_code_to_str((UChar* )s, r, &einfo); fprintf(stderr, "ERROR: %d: %s /%s/\n", line_no, s, pat[i]); nerror++; } else { if (r == error_no) { fprintf(stdout, "OK(ERROR): %d: /%s/ %d\n", line_no, pat[i], r); nsucc++; } else { fprintf(stdout, "FAIL(ERROR): %d: /%s/ %d, %d\n", line_no, pat[i], error_no, r); nfail++; } } return r; } r = onig_regset_add(set, reg); if (r != 0) { onig_regset_free(set); fprintf(stderr, "ERROR: %d: onig_regset_add(): /%s/\n", line_no, pat[i]); nerror++; return r; } } *rset = set; return 0; } static double get_sec(clock_t start, clock_t end) { double t; t = (double )(end - start) / CLOCKS_PER_SEC; return t; } /* use clock(), because clock_gettime() doesn't exist in Windows and old Unix. */ static int time_test(int repeat, int n, char* ps[], char* s, char* end, double* rt_set, double* rt_reg) { int r; int i; int match_pos; OnigRegSet* set; clock_t ts1, ts2; double t_set, t_reg; r = make_regset(0, n, ps, &set, 0); if (r != 0) return r; ts1 = clock(); for (i = 0; i < repeat; i++) { r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, ONIG_REGSET_POSITION_LEAD, ONIG_OPTION_NONE, &match_pos); if (r < 0) { fprintf(stderr, "FAIL onig_regset_search(POSITION_LEAD): %d\n", r); return r; } } ts2 = clock(); t_set = get_sec(ts1, ts2); ts1 = clock(); for (i = 0; i < repeat; i++) { r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, ONIG_REGSET_REGEX_LEAD, ONIG_OPTION_NONE, &match_pos); if (r < 0) { fprintf(stderr, "FAIL onig_regset_search(REGEX_LEAD): %d\n", r); return r; } } ts2 = clock(); t_reg = get_sec(ts1, ts2); onig_regset_free(set); *rt_set = t_set; *rt_reg = t_reg; return 0; } static void fisher_yates_shuffle(int n, char* ps[], char* cps[]) { #define GET_RAND(n) (rand()%(n+1)) #define SWAP(a,b) { char* tmp = a; a = b; b = tmp; } int i; for (i = 0; i < n; i++) cps[i] = ps[i]; for (i = n - 1; i > 0; i--) { int x = GET_RAND(i); SWAP(cps[i], cps[x]); } } static void time_compare(int n, char* ps[], char* s, char* end) { int r; int i; int repeat; double t_set, t_reg; double total_set, total_reg; char** cps; cps = (char** )malloc(sizeof(char*) * n); if (cps == 0) return ; repeat = 100 / n; total_set = total_reg = 0.0; for (i = 0; i < n; i++) { fisher_yates_shuffle(n, ps, cps); r = time_test(repeat, n, cps, s, end, &t_set, &t_reg); if (r != 0) return ; total_set += t_set; total_reg += t_reg; } free(cps); fprintf(stdout, "POS lead: %6.2lfmsec. REG lead: %6.2lfmsec.\n", total_set * 1000.0, total_reg * 1000.0); } static OnigRegSetLead XX_LEAD = ONIG_REGSET_POSITION_LEAD; static void xx(int line_no, int n, char* ps[], char* s, int from, int to, int mem, int not, int error_no) { int r; int match_pos; int match_index; OnigRegSet* set; char *end; r = make_regset(line_no, n, ps, &set, error_no); if (r != 0) return ; end = s + strlen(s); r = onig_regset_search(set, (UChar* )s, (UChar* )end, (UChar* )s, (UChar* )end, XX_LEAD, ONIG_OPTION_NONE, &match_pos); if (r < 0) { if (r == ONIG_MISMATCH) { if (not) { fprintf(stdout, "OK(N): %d\n", line_no); nsucc++; } else { fprintf(stdout, "FAIL: %d\n", line_no); nfail++; } } else { if (error_no == 0) { char buf[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )buf, r); fprintf(stderr, "ERROR: %d: %s\n", line_no, buf); nerror++; } else { if (r == error_no) { fprintf(stdout, "OK(ERROR): %d: %d\n", line_no, r); nsucc++; } else { fprintf(stdout, "FAIL ERROR NO: %d: %d, %d\n", line_no, error_no, r); nfail++; } } } } else { if (not) { fprintf(stdout, "FAIL(N): %d\n", line_no); nfail++; } else { OnigRegion* region; match_index = r; region = onig_regset_get_region(set, match_index); if (region == 0) { fprintf(stderr, "ERROR: %d: can't get region.\n", line_no); nerror++; return ; } if (region->beg[mem] == from && region->end[mem] == to) { fprintf(stdout, "OK: %d\n", line_no); nsucc++; } else { char buf[1000]; int len; len = region->end[mem] - region->beg[mem]; strncpy(buf, s + region->beg[mem], len); buf[len] = '\0'; fprintf(stdout, "FAIL: %d: %d-%d : %d-%d (%s)\n", line_no, from, to, region->beg[mem], region->end[mem], buf); nfail++; } } } onig_regset_free(set); } static void x2(int line_no, int n, char* ps[], char* s, int from, int to) { xx(line_no, n, ps, s, from, to, 0, 0, 0); } static void x3(int line_no, int n, char* ps[], char* s, int from, int to, int mem) { xx(line_no, n, ps, s, from, to, mem, 0, 0); } static void n(int line_no, int n, char* ps[], char* s) { xx(line_no, n, ps, s, 0, 0, 0, 1, 0); } #define ASIZE(a) sizeof(a)/sizeof(a[0]) #define X2(ps,s,from,to) x2(__LINE__,ASIZE(ps),ps,s,from,to) #define X3(ps,s,from,to,mem) x3(__LINE__,ASIZE(ps),ps,s,from,to,mem) #define N(ps,s) n(__LINE__,ASIZE(ps),ps,s) #define NZERO(s) n(__LINE__,0,(char** )0,s) #ifndef _WIN32 /* getdelim() doesn't exist in Windows */ static int get_all_content_of_file(char* path, char** rs, char** rend) { size_t len; size_t n; char* line; FILE* fp; fp = fopen(path, "r"); if (fp == 0) return -1; n = 0; line = NULL; len = getdelim(&line, &n, EOF, fp); fclose(fp); if (len < 0) return -2; *rs = line; *rend = line + len; return 0; } #endif #define TEXT_PATH "kofu-utf8.txt" /* --- To get kofu.txt --- $ wget https://www.aozora.gr.jp/cards/000148/files/774_ruby_1640.zip $ unzip 774_ruby_1640.zip $ nkf -Lu -w8 kofu.txt > kofu-utf8.txt (convert encoding to utf-8 with BOM and line terminator to be Unix-form) */ static char* p1[] = { "abc", "(bca)", "(cab)" }; static char* p2[] = { "小説", "9", "夏目漱石", }; static char* p3[] = { "^いる。", "^校正", "^底本", "^ 翌日", }; static char* p4[] = { "《[^》]{5}》", "《[^》]{6}》", "《[^》]{7}》", "《[^》]{8}》", "《[^》]{9}》", "《[^》]{10}》", "《[^》]{11}》", "《[^》]{12}》", "《[^》]{13}》", "《[^》]{14}》", "《[^》]{15}》", "《[^》]{16}》", "《[^》]{17}》", "《[^》]{18}》", "《[^》]{19}》", "《[^》]{20}》", }; static char* p5[] = { "小室圭", "bbbbbb", "ドナルド・トランプ", "筑摩書房", "松原", "aaaaaaaaa", "bbbbbbbbb", "ccccc", "ddddddddddd", "eee", "ffffffffffff", "gggggggggg", "hhhhhhhhhhhhhh", "iiiiiii", }; static char* p6[] = { "^.{1000,}", "松原", "小室圭", "ドナルド・トランプ", "筑摩書房", }; static char* p7[] = { "0+", "1+", "2+", "3+", "4+", "5+", "6+", "7+", "8+", "9+", }; static char* p8[] = {"a", ".*"}; extern int main(int argc, char* argv[]) { #ifndef _WIN32 int file_exist; #endif int r; char *s, *end; OnigEncoding use_encs[1]; use_encs[0] = ONIG_ENCODING_UTF8; onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); srand(12345); XX_LEAD = ONIG_REGSET_POSITION_LEAD; NZERO(" abab bccab ca"); X2(p1, " abab bccab ca", 8, 11); X3(p1, " abab bccab ca", 8, 11, 1); N(p2, " XXXX AAA 1223 012345678bbb"); X2(p2, "0123456789", 9, 10); X2(p7, "abcde 555 qwert", 6, 9); X2(p8, "", 0, 0); XX_LEAD = ONIG_REGSET_REGEX_LEAD; NZERO(" abab bccab ca"); X2(p1, " abab bccab ca", 8, 11); X3(p1, " abab bccab ca", 8, 11, 1); N(p2, " XXXX AAA 1223 012345678bbb"); X2(p2, "0123456789", 9, 10); X2(p7, "abcde 555 qwert", 6, 9); #ifndef _WIN32 r = get_all_content_of_file(TEXT_PATH, &s, &end); if (r == 0) { fprintf(stdout, "FILE: %s, size: %d\n", TEXT_PATH, (int )(end - s)); file_exist = 1; } else { fprintf(stdout, "Ignore %s\n", TEXT_PATH); file_exist = 0; } if (file_exist != 0) { X2(p2, s, 10, 22); X2(p3, s, 496079, 496088); X2(p4, s, 1294, 1315); } #endif fprintf(stdout, "\nRESULT SUCC: %4d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); #ifndef _WIN32 if (file_exist != 0) { fprintf(stdout, "\n"); time_compare(ASIZE(p2), p2, s, end); time_compare(ASIZE(p3), p3, s, end); time_compare(ASIZE(p4), p4, s, end); time_compare(ASIZE(p5), p5, s, end); time_compare(ASIZE(p6), p6, s, end); fprintf(stdout, "\n"); free(s); } #endif onig_end(); return ((nfail == 0 && nerror == 0) ? 0 : -1); }