From 6b986090d954dbac91bbb3c43ce7c3328c91a780 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Mon, 20 Apr 2020 20:33:51 +0200 Subject: New upstream version 6.9.5 --- harnesses/ascii_compatible.dict | 2 + harnesses/base.c | 499 ++++++++++++++++++++++++++++++++++++++ harnesses/deluxe-encode-harness.c | 204 ---------------- harnesses/deluxe.c | 206 ++++++++++++++++ harnesses/encode-harness.c | 365 ---------------------------- harnesses/fuzzer.options | 2 + harnesses/makefile | 35 ++- harnesses/regset-harness.c | 379 ----------------------------- harnesses/regset.c | 392 ++++++++++++++++++++++++++++++ 9 files changed, 1118 insertions(+), 966 deletions(-) create mode 100644 harnesses/base.c delete mode 100644 harnesses/deluxe-encode-harness.c create mode 100644 harnesses/deluxe.c delete mode 100644 harnesses/encode-harness.c create mode 100644 harnesses/fuzzer.options delete mode 100644 harnesses/regset-harness.c create mode 100644 harnesses/regset.c (limited to 'harnesses') diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict index e6e00db..a3e978b 100644 --- a/harnesses/ascii_compatible.dict +++ b/harnesses/ascii_compatible.dict @@ -109,3 +109,5 @@ "\\N{name}" "\\p{Katakana}" "\\p{Emoji}" +"ss" +"SS" diff --git a/harnesses/base.c b/harnesses/base.c new file mode 100644 index 0000000..a88e6f2 --- /dev/null +++ b/harnesses/base.c @@ -0,0 +1,499 @@ +/* + * base.c contributed by Mark Griffin + * Copyright (c) 2019-2020 K.Kosako + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oniguruma.h" + +#define PARSE_DEPTH_LIMIT 8 +#define RETRY_LIMIT 5000 +#define CALL_MAX_NEST_LEVEL 8 +//#define EXEC_PRINT_INTERVAL 500000 +//#define DUMP_DATA_INTERVAL 100000 +//#define STAT_PATH "fuzzer.stat_log" + +typedef unsigned char uint8_t; + +#ifdef DUMP_INPUT +static void +dump_input(unsigned char* data, size_t len) +{ + static FILE* DumpFp; + static char end[] = { 'E', 'N', 'D' }; + + if (DumpFp == 0) + DumpFp = fopen("dump-input", "w"); + + fseek(DumpFp, 0, SEEK_SET); + fwrite(data, sizeof(unsigned char), len, DumpFp); + fwrite(end, sizeof(char), sizeof(end), DumpFp); + fflush(DumpFp); +} +#endif + +#ifdef DUMP_DATA_INTERVAL +static void +dump_file(char* path, unsigned char* data, size_t len) +{ + FILE* fp; + + fp = fopen(path, "w"); + fwrite(data, sizeof(unsigned char), len, fp); + fclose(fp); +} +#endif + +#ifdef STANDALONE +#include + +static void +dump_data(FILE* fp, unsigned char* data, int len) +{ + int i; + + fprintf(fp, "{\n"); + for (i = 0; i < len; i++) { + unsigned char c = data[i]; + + if (isprint((int )c)) { + if (c == '\\') + fprintf(fp, " '\\\\'"); + else + fprintf(fp, " '%c'", c); + } + else { + fprintf(fp, "0x%02x", (int )c); + } + + if (i == len - 1) { + fprintf(fp, "\n"); + } + else { + if (i % 8 == 7) + fprintf(fp, ",\n"); + else + fprintf(fp, ", "); + } + } + fprintf(fp, "};\n"); +} + +#else + +static void +output_current_time(FILE* fp) +{ + char d[64]; + time_t t; + + t = time(NULL); + strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + + fprintf(fp, "%s", d); +} + +#endif + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end, int backward) +{ + int r; + unsigned char *start, *range; + OnigRegion *region; + + region = onig_region_new(); + + if (backward != 0) { + start = end; + range = str; + } + else { + start = str; + range = end; + } + + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { +#ifdef STANDALONE + int i; + + fprintf(stdout, "match at %d (%s)\n", r, + ONIGENC_NAME(onig_get_encoding(reg))); + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } +#endif + } + else if (r == ONIG_MISMATCH) { +#ifdef STANDALONE + fprintf(stdout, "search fail (%s)\n", + ONIGENC_NAME(onig_get_encoding(reg))); +#endif + } + else { /* error */ +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +#endif + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + + if (r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) + return -2; + + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, + char* apattern, char* apattern_end, char* astr, UChar* end, int backward) +{ + int r; + regex_t* reg; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + UChar* pattern_end = (UChar* )apattern_end; + + EXEC_COUNT++; + EXEC_COUNT_INTERVAL++; + + onig_initialize(&enc, 1); + onig_set_retry_limit_in_search(RETRY_LIMIT); +#ifdef PARSE_DEPTH_LIMIT + onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); +#endif + onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL); + + r = onig_new(®, pattern, pattern_end, + options, enc, syntax, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); +#ifdef STANDALONE + fprintf(stdout, "ERROR: %s\n", s); +#endif + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + REGEX_SUCCESS_COUNT++; + + r = search(reg, pattern, pattern_end, backward); + if (r == -2) return -2; + + if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { + VALID_STRING_COUNT++; + r = search(reg, str, end, backward); + if (r == -2) return -2; + } + + onig_free(reg); + onig_end(); + return 0; +} + +static int +alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, + int backward, int pattern_size, size_t remaining_size, unsigned char *data) +{ + int r; + unsigned char *pattern_end; + unsigned char *str_null_end; + + // copy first PATTERN_SIZE bytes off to be the pattern + unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); + memcpy(pattern, data, pattern_size); + pattern_end = pattern + pattern_size; + data += pattern_size; + remaining_size -= pattern_size; + +#if defined(UTF16_BE) || defined(UTF16_LE) + if (remaining_size % 2 == 1) remaining_size--; +#endif + + unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); + memcpy(str, data, remaining_size); + str_null_end = str + remaining_size; + + r = exec(enc, options, syntax, + (char *)pattern, (char *)pattern_end, + (char *)str, str_null_end, backward); + + free(pattern); + free(str); + return r; +} + +#define OPTIONS_MASK (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP) + + +#ifdef SYNTAX_TEST +#define NUM_CONTROL_BYTES 6 +#else +#define NUM_CONTROL_BYTES 5 +#endif + +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +#if !defined(UTF16_BE) && !defined(UTF16_LE) + static OnigEncoding encodings[] = { + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_ASCII, + ONIG_ENCODING_EUC_JP, + ONIG_ENCODING_EUC_TW, + ONIG_ENCODING_EUC_KR, + ONIG_ENCODING_EUC_CN, + ONIG_ENCODING_SJIS, + ONIG_ENCODING_KOI8_R, + ONIG_ENCODING_CP1251, + ONIG_ENCODING_BIG5, + ONIG_ENCODING_GB18030, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_ISO_8859_1, + ONIG_ENCODING_ISO_8859_2, + ONIG_ENCODING_ISO_8859_3, + ONIG_ENCODING_ISO_8859_4, + ONIG_ENCODING_ISO_8859_5, + ONIG_ENCODING_ISO_8859_6, + ONIG_ENCODING_ISO_8859_7, + ONIG_ENCODING_ISO_8859_8, + ONIG_ENCODING_ISO_8859_9, + ONIG_ENCODING_ISO_8859_10, + ONIG_ENCODING_ISO_8859_11, + ONIG_ENCODING_ISO_8859_13, + ONIG_ENCODING_ISO_8859_14, + ONIG_ENCODING_ISO_8859_15, + ONIG_ENCODING_ISO_8859_16 + }; + unsigned char encoding_choice; +#endif + +#ifdef SYNTAX_TEST + static OnigSyntaxType* syntaxes[] = { + ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_EMACS, + ONIG_SYNTAX_GREP, + ONIG_SYNTAX_GNU_REGEX, + ONIG_SYNTAX_JAVA, + ONIG_SYNTAX_PERL_NG, + ONIG_SYNTAX_ONIGURUMA + }; + +#ifdef STANDALONE + static char* syntax_names[] = { + "Posix Extended", + "Emacs", + "Grep", + "GNU Regex", + "Java", + "Perl+NG", + "Oniguruma" + }; +#endif + + unsigned char syntax_choice; +#endif + + int r; + int backward; + int pattern_size; + size_t remaining_size; + unsigned char *data; + unsigned char pattern_size_choice; + OnigOptionType options; + OnigEncoding enc; + OnigSyntaxType* syntax; + +#ifndef STANDALONE + static FILE* STAT_FP; +#endif + + INPUT_COUNT++; + +#ifdef DUMP_DATA_INTERVAL + if (INPUT_COUNT % DUMP_DATA_INTERVAL == 0) { + char path[20]; + sprintf(path, "dump-%ld", INPUT_COUNT); + dump_file(path, (unsigned char* )Data, Size); + } +#endif + + if (Size < NUM_CONTROL_BYTES) return 0; + + remaining_size = Size; + data = (unsigned char* )(Data); + +#ifdef UTF16_BE + enc = ONIG_ENCODING_UTF16_BE; +#else +#ifdef UTF16_LE + enc = ONIG_ENCODING_UTF16_LE; +#else + encoding_choice = data[0]; + data++; + remaining_size--; + + int num_encodings = sizeof(encodings)/sizeof(encodings[0]); + enc = encodings[encoding_choice % num_encodings]; +#endif +#endif + +#ifdef SYNTAX_TEST + syntax_choice = data[0]; + data++; + remaining_size--; + + int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); + syntax = syntaxes[syntax_choice % num_syntaxes]; +#else + syntax = ONIG_SYNTAX_DEFAULT; +#endif + + if ((data[1] & 0xc0) == 0) + options = (data[0] | (data[1] << 8)) & OPTIONS_MASK; + else + options = data[0] & ONIG_OPTION_IGNORECASE; + + data++; + remaining_size--; + data++; + remaining_size--; + + pattern_size_choice = data[0]; + data++; + remaining_size--; + + backward = (data[0] == 0xbb); + data++; + remaining_size--; + + if (remaining_size == 0) + pattern_size = 0; + else { + pattern_size = (int )pattern_size_choice % remaining_size; +#if defined(UTF16_BE) || defined(UTF16_LE) + if (pattern_size % 2 == 1) pattern_size--; +#endif + } + +#ifdef STANDALONE + dump_data(stdout, data, pattern_size); +#ifdef SYNTAX_TEST + fprintf(stdout, + "enc: %s, syntax: %s, options: %u, pattern_size: %d, back:%d\n", + ONIGENC_NAME(enc), + syntax_names[syntax_choice % num_syntaxes], + options, + pattern_size, backward); +#else + fprintf(stdout, "enc: %s, options: %u, pattern_size: %d, back:%d\n", + ONIGENC_NAME(enc), options, pattern_size, backward); +#endif +#endif + +#ifdef DUMP_INPUT + dump_input((unsigned char* )Data, Size); +#endif + + r = alloc_exec(enc, options, syntax, backward, pattern_size, + remaining_size, data); + if (r == -2) exit(-2); + +#ifndef STANDALONE +#ifdef EXEC_PRINT_INTERVAL + if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { + float fexec, freg, fvalid; + + if (STAT_FP == 0) { +#ifdef STAT_PATH + STAT_FP = fopen(STAT_PATH, "a"); +#else + STAT_FP = stdout; +#endif + } + + output_current_time(STAT_FP); + + if (INPUT_COUNT != 0) { // overflow check + fexec = (float )EXEC_COUNT / INPUT_COUNT; + freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; + fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + + fprintf(STAT_FP, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", + EXEC_COUNT, fexec, freg, fvalid); + fflush(STAT_FP); + } + else { + fprintf(STAT_FP, ": ignore (input count overflow)\n"); + } + + EXEC_COUNT_INTERVAL = 0; + } + else if (EXEC_COUNT == 1) { + output_current_time(stdout); + fprintf(stdout, ": ------------ START ------------\n"); + } +#endif +#endif + + return r; +} + +#ifdef STANDALONE + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* STANDALONE */ diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe-encode-harness.c deleted file mode 100644 index aabe916..0000000 --- a/harnesses/deluxe-encode-harness.c +++ /dev/null @@ -1,204 +0,0 @@ -/* - * deluxe-encode-harness.c - * contributed by Mark Griffin - */ -#include -#include "oniguruma.h" - -#include -#include - -#define DEFAULT_LIMIT 120 -typedef unsigned char uint8_t; - -static int -search(regex_t* reg, unsigned char* str, unsigned char* end) -{ - int r; - unsigned char *start, *range; - OnigRegion *region; - - region = onig_region_new(); - - start = str; - range = end; - r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); - if (r >= 0) { - int i; - - fprintf(stdout, "match at %d (%s)\n", r, - ONIGENC_NAME(onig_get_encoding(reg))); - for (i = 0; i < region->num_regs; i++) { - fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); - } - } - else if (r == ONIG_MISMATCH) { - fprintf(stdout, "search fail (%s)\n", - ONIGENC_NAME(onig_get_encoding(reg))); - } - else { /* error */ - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str((UChar* )s, r); - fprintf(stdout, "ERROR: %s\n", s); - fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - return -1; - } - - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - return 0; -} - -static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; - -static int -exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, - OnigOptionType options, char* apattern, char* apattern_end, - char* astr, char* astr_end) -{ - int r; - regex_t* reg; - OnigCompileInfo ci; - OnigErrorInfo einfo; - UChar* pattern = (UChar* )apattern; - UChar* str = (UChar* )astr; - UChar* pattern_end = (UChar* )apattern_end; - unsigned char* end = (unsigned char* )astr_end; - - onig_initialize(&str_enc, 1); - onig_set_retry_limit_in_match(DEFAULT_LIMIT); - onig_set_parse_depth_limit(DEFAULT_LIMIT); - - ci.num_of_elements = 5; - ci.pattern_enc = pattern_enc; - ci.target_enc = str_enc; - ci.syntax = ONIG_SYNTAX_DEFAULT; - ci.option = options; - ci.case_fold_flag = CF; - - r = onig_new_deluxe(®, pattern, pattern_end, &ci, &einfo); - if (r != ONIG_NORMAL) { - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str((UChar* )s, r, &einfo); - fprintf(stdout, "ERROR: %s\n", s); - onig_end(); - return -1; - } - - if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) { - r = search(reg, str, end); - } - - onig_free(reg); - onig_end(); - return 0; -} - -#define PATTERN_SIZE 48 -#define NUM_CONTROL_BYTES 1 -#define MIN_STR_SIZE 2 -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) -{ - int r; - size_t remaining_size; - unsigned char *data; - unsigned char pat_encoding_choice; - unsigned char str_encoding_choice; - unsigned char *pattern; - unsigned char *str; - unsigned char *pattern_end; - unsigned char *str_end; - unsigned int num_encodings; - OnigEncodingType *pattern_enc; - OnigEncodingType *str_enc; - - OnigEncodingType *encodings[] = { - ONIG_ENCODING_ASCII, - ONIG_ENCODING_ISO_8859_1, - ONIG_ENCODING_ISO_8859_2, - ONIG_ENCODING_ISO_8859_3, - ONIG_ENCODING_ISO_8859_4, - ONIG_ENCODING_ISO_8859_5, - ONIG_ENCODING_ISO_8859_6, - ONIG_ENCODING_ISO_8859_7, - ONIG_ENCODING_ISO_8859_8, - ONIG_ENCODING_ISO_8859_9, - ONIG_ENCODING_ISO_8859_10, - ONIG_ENCODING_ISO_8859_11, - ONIG_ENCODING_ISO_8859_13, - ONIG_ENCODING_ISO_8859_14, - ONIG_ENCODING_ISO_8859_15, - ONIG_ENCODING_ISO_8859_16, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF16_BE, - ONIG_ENCODING_UTF16_LE, - ONIG_ENCODING_UTF32_BE, - ONIG_ENCODING_UTF32_LE, - ONIG_ENCODING_EUC_JP, - ONIG_ENCODING_EUC_TW, - ONIG_ENCODING_EUC_KR, - ONIG_ENCODING_EUC_CN, - ONIG_ENCODING_SJIS, - //ONIG_ENCODING_KOI8, - ONIG_ENCODING_KOI8_R, - ONIG_ENCODING_CP1251, - ONIG_ENCODING_BIG5, - ONIG_ENCODING_GB18030, - }; - - if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) - return 0; - if (Size > 0x1000) - return 0; - - remaining_size = Size; - data = (unsigned char *)(Data); - - // pull off bytes to switch off - pat_encoding_choice = data[0]; - data++; - remaining_size--; - str_encoding_choice = data[0]; - data++; - remaining_size--; - - // copy first PATTERN_SIZE bytes off to be the pattern - pattern = (unsigned char *)malloc(PATTERN_SIZE); - memcpy(pattern, data, PATTERN_SIZE); - pattern_end = pattern + PATTERN_SIZE; - data += PATTERN_SIZE; - remaining_size -= PATTERN_SIZE; - - str = (unsigned char*)malloc(remaining_size); - memcpy(str, data, remaining_size); - str_end = str + remaining_size; - - num_encodings = sizeof(encodings) / sizeof(encodings[0]); - pattern_enc = encodings[pat_encoding_choice % num_encodings]; - str_enc = encodings[str_encoding_choice % num_encodings]; - - r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end); - - free(pattern); - free(str); - - return r; -} - - -#ifdef WITH_READ_MAIN - -#include - -extern int main(int argc, char* argv[]) -{ - size_t n; - uint8_t Data[10000]; - - n = read(0, Data, sizeof(Data)); - fprintf(stdout, "n: %ld\n", n); - LLVMFuzzerTestOneInput(Data, n); - - return 0; -} -#endif /* WITH_READ_MAIN */ diff --git a/harnesses/deluxe.c b/harnesses/deluxe.c new file mode 100644 index 0000000..5441de9 --- /dev/null +++ b/harnesses/deluxe.c @@ -0,0 +1,206 @@ +/* + * deluxe.c + * contributed by Mark Griffin + */ +#include +#include "oniguruma.h" + +#include +#include + +#define RETRY_LIMIT 10000 +#define DEPTH_LIMIT 10 + +typedef unsigned char uint8_t; + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end) +{ + int r; + unsigned char *start, *range; + OnigRegion *region; + + region = onig_region_new(); + + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stdout, "match at %d (%s)\n", r, + ONIGENC_NAME(onig_get_encoding(reg))); + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stdout, "search fail (%s)\n", + ONIGENC_NAME(onig_get_encoding(reg))); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return 0; +} + +static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; + +static int +exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, + OnigOptionType options, char* apattern, char* apattern_end, + char* astr, char* astr_end) +{ + int r; + regex_t* reg; + OnigCompileInfo ci; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + UChar* pattern_end = (UChar* )apattern_end; + unsigned char* end = (unsigned char* )astr_end; + + onig_initialize(&str_enc, 1); + onig_set_retry_limit_in_search(RETRY_LIMIT); + onig_set_parse_depth_limit(DEPTH_LIMIT); + + ci.num_of_elements = 5; + ci.pattern_enc = pattern_enc; + ci.target_enc = str_enc; + ci.syntax = ONIG_SYNTAX_DEFAULT; + ci.option = options; + ci.case_fold_flag = CF; + + r = onig_new_deluxe(®, pattern, pattern_end, &ci, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: %s\n", s); + onig_end(); + return -1; + } + + if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) { + r = search(reg, str, end); + } + + onig_free(reg); + onig_end(); + return 0; +} + +#define PATTERN_SIZE 48 +#define NUM_CONTROL_BYTES 1 +#define MIN_STR_SIZE 2 +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + int r; + size_t remaining_size; + unsigned char *data; + unsigned char pat_encoding_choice; + unsigned char str_encoding_choice; + unsigned char *pattern; + unsigned char *str; + unsigned char *pattern_end; + unsigned char *str_end; + unsigned int num_encodings; + OnigEncodingType *pattern_enc; + OnigEncodingType *str_enc; + + OnigEncodingType *encodings[] = { + ONIG_ENCODING_ASCII, + ONIG_ENCODING_ISO_8859_1, + ONIG_ENCODING_ISO_8859_2, + ONIG_ENCODING_ISO_8859_3, + ONIG_ENCODING_ISO_8859_4, + ONIG_ENCODING_ISO_8859_5, + ONIG_ENCODING_ISO_8859_6, + ONIG_ENCODING_ISO_8859_7, + ONIG_ENCODING_ISO_8859_8, + ONIG_ENCODING_ISO_8859_9, + ONIG_ENCODING_ISO_8859_10, + ONIG_ENCODING_ISO_8859_11, + ONIG_ENCODING_ISO_8859_13, + ONIG_ENCODING_ISO_8859_14, + ONIG_ENCODING_ISO_8859_15, + ONIG_ENCODING_ISO_8859_16, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF16_BE, + ONIG_ENCODING_UTF16_LE, + ONIG_ENCODING_UTF32_BE, + ONIG_ENCODING_UTF32_LE, + ONIG_ENCODING_EUC_JP, + ONIG_ENCODING_EUC_TW, + ONIG_ENCODING_EUC_KR, + ONIG_ENCODING_EUC_CN, + ONIG_ENCODING_SJIS, + //ONIG_ENCODING_KOI8, + ONIG_ENCODING_KOI8_R, + ONIG_ENCODING_CP1251, + ONIG_ENCODING_BIG5, + ONIG_ENCODING_GB18030, + }; + + if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) + return 0; + if (Size > 0x1000) + return 0; + + remaining_size = Size; + data = (unsigned char *)(Data); + + // pull off bytes to switch off + pat_encoding_choice = data[0]; + data++; + remaining_size--; + str_encoding_choice = data[0]; + data++; + remaining_size--; + + // copy first PATTERN_SIZE bytes off to be the pattern + pattern = (unsigned char *)malloc(PATTERN_SIZE); + memcpy(pattern, data, PATTERN_SIZE); + pattern_end = pattern + PATTERN_SIZE; + data += PATTERN_SIZE; + remaining_size -= PATTERN_SIZE; + + str = (unsigned char*)malloc(remaining_size); + memcpy(str, data, remaining_size); + str_end = str + remaining_size; + + num_encodings = sizeof(encodings) / sizeof(encodings[0]); + pattern_enc = encodings[pat_encoding_choice % num_encodings]; + str_enc = encodings[str_encoding_choice % num_encodings]; + + r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end); + + free(pattern); + free(str); + + return r; +} + + +#ifdef STANDALONE + +#include + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* STANDALONE */ diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c deleted file mode 100644 index 5db0512..0000000 --- a/harnesses/encode-harness.c +++ /dev/null @@ -1,365 +0,0 @@ -/* - * encode-harness.c - * contributed by Mark Griffin - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "oniguruma.h" - - -//#define PARSE_DEPTH_LIMIT 120 -#define RETRY_LIMIT 3500 - -typedef unsigned char uint8_t; - -static int -search(regex_t* reg, unsigned char* str, unsigned char* end) -{ - int r; - unsigned char *start, *range; - OnigRegion *region; - - region = onig_region_new(); - - start = str; - range = end; - r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); - if (r >= 0) { -#ifdef WITH_READ_MAIN - int i; - - fprintf(stdout, "match at %d (%s)\n", r, - ONIGENC_NAME(onig_get_encoding(reg))); - for (i = 0; i < region->num_regs; i++) { - fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); - } -#endif - } - else if (r == ONIG_MISMATCH) { -#ifdef WITH_READ_MAIN - fprintf(stdout, "search fail (%s)\n", - ONIGENC_NAME(onig_get_encoding(reg))); -#endif - } - else { /* error */ -#ifdef WITH_READ_MAIN - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - - onig_error_code_to_str((UChar* )s, r); - fprintf(stdout, "ERROR: %s\n", s); - fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); -#endif - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - - if (r == ONIGERR_STACK_BUG || - r == ONIGERR_UNDEFINED_BYTECODE || - r == ONIGERR_UNEXPECTED_BYTECODE) - return -2; - - return -1; - } - - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - return 0; -} - -static long INPUT_COUNT; -static long EXEC_COUNT; -static long EXEC_COUNT_INTERVAL; -static long REGEX_SUCCESS_COUNT; -static long VALID_STRING_COUNT; - -static int -exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - char* apattern, char* apattern_end, char* astr, UChar* end) -{ - int r; - regex_t* reg; - OnigErrorInfo einfo; - UChar* pattern = (UChar* )apattern; - UChar* str = (UChar* )astr; - UChar* pattern_end = (UChar* )apattern_end; - - EXEC_COUNT++; - EXEC_COUNT_INTERVAL++; - - onig_initialize(&enc, 1); - onig_set_retry_limit_in_match(RETRY_LIMIT); - //onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); - - r = onig_new(®, pattern, pattern_end, - options, enc, syntax, &einfo); - if (r != ONIG_NORMAL) { - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str((UChar* )s, r, &einfo); -#ifdef WITH_READ_MAIN - fprintf(stdout, "ERROR: %s\n", s); -#endif - onig_end(); - - if (r == ONIGERR_PARSER_BUG || - r == ONIGERR_STACK_BUG || - r == ONIGERR_UNDEFINED_BYTECODE || - r == ONIGERR_UNEXPECTED_BYTECODE) { - return -2; - } - else - return -1; - } - REGEX_SUCCESS_COUNT++; - - r = search(reg, pattern, pattern_end); - if (r == -2) return -2; - - if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { - VALID_STRING_COUNT++; - r = search(reg, str, end); - if (r == -2) return -2; - } - - onig_free(reg); - onig_end(); - return 0; -} - -#if 0 -static void -output_data(char* path, const uint8_t * data, size_t size) -{ - int fd; - ssize_t n; - - fd = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IRGRP|S_IROTH); - if (fd == -1) { - fprintf(stderr, "ERROR: output_data(): can't open(%s)\n", path); - return ; - } - - n = write(fd, (const void* )data, size); - if (n != size) { - fprintf(stderr, "ERROR: output_data(): n: %ld, size: %ld\n", n, size); - } - close(fd); -} -#endif - - -static int -alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - int pattern_size, size_t remaining_size, unsigned char *data) -{ - int r; - unsigned char *pattern_end; - unsigned char *str_null_end; - - // copy first PATTERN_SIZE bytes off to be the pattern - unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); - memcpy(pattern, data, pattern_size); - pattern_end = pattern + pattern_size; - data += pattern_size; - remaining_size -= pattern_size; - -#if defined(UTF16_BE) || defined(UTF16_LE) - if (remaining_size % 2 == 1) remaining_size--; -#endif - - unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); - memcpy(str, data, remaining_size); - str_null_end = str + remaining_size; - - r = exec(enc, options, syntax, - (char *)pattern, (char *)pattern_end, - (char *)str, str_null_end); - - free(pattern); - free(str); - return r; -} - - -#define EXEC_PRINT_INTERVAL 10000000 -#define MAX_PATTERN_SIZE 150 - -#ifdef SYNTAX_TEST -#define NUM_CONTROL_BYTES 3 -#else -#define NUM_CONTROL_BYTES 2 -#endif - -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) -{ -#if !defined(UTF16_BE) && !defined(UTF16_LE) - static OnigEncoding encodings[] = { - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_SJIS, - //ONIG_ENCODING_EUC_JP, - ONIG_ENCODING_ISO_8859_1, - ONIG_ENCODING_BIG5, - ONIG_ENCODING_GB18030, - ONIG_ENCODING_EUC_TW - }; - unsigned char encoding_choice; -#endif - -#ifdef SYNTAX_TEST - static OnigSyntaxType* syntaxes[] = { - ONIG_SYNTAX_POSIX_EXTENDED, - ONIG_SYNTAX_EMACS, - ONIG_SYNTAX_GREP, - ONIG_SYNTAX_GNU_REGEX, - ONIG_SYNTAX_JAVA, - ONIG_SYNTAX_PERL_NG, - ONIG_SYNTAX_ONIGURUMA - }; - unsigned char syntax_choice; -#endif - - int r; - int pattern_size; - size_t remaining_size; - unsigned char *data; - unsigned char options_choice; - OnigOptionType options; - OnigEncoding enc; - OnigSyntaxType* syntax; - - INPUT_COUNT++; - if (Size < NUM_CONTROL_BYTES) return 0; - - remaining_size = Size; - data = (unsigned char* )(Data); - -#ifdef UTF16_BE - enc = ONIG_ENCODING_UTF16_BE; -#else -#ifdef UTF16_LE - enc = ONIG_ENCODING_UTF16_LE; -#else - encoding_choice = data[0]; - data++; - remaining_size--; - - int num_encodings = sizeof(encodings)/sizeof(encodings[0]); - enc = encodings[encoding_choice % num_encodings]; -#endif -#endif - -#ifdef SYNTAX_TEST - syntax_choice = data[0]; - data++; - remaining_size--; - - int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); - syntax = syntaxes[syntax_choice % num_syntaxes]; -#else - syntax = ONIG_SYNTAX_DEFAULT; -#endif - - options_choice = data[0]; - options = (options_choice % 2 == 0) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE; - data++; - remaining_size--; - -#ifdef WITH_READ_MAIN -#ifdef SYNTAX_TEST - fprintf(stdout, "enc: %s, syntax: %d, options: %u\n", - ONIGENC_NAME(enc), (int )(syntax_choice % num_syntaxes), options); -#else - fprintf(stdout, "enc: %s, options: %u\n", ONIGENC_NAME(enc), options); -#endif -#endif - -#ifdef WITH_READ_MAIN - int max_pattern_size; - - if (remaining_size == 0) - max_pattern_size = 0; - else { - max_pattern_size = remaining_size - 1; - if (max_pattern_size > MAX_PATTERN_SIZE) - max_pattern_size = MAX_PATTERN_SIZE; - -#if defined(UTF16_BE) || defined(UTF16_LE) - if (max_pattern_size % 2 == 1) max_pattern_size--; -#endif - } - - for (pattern_size = 0; pattern_size <= max_pattern_size; ) { - fprintf(stdout, "pattern_size: %d\n", pattern_size); - r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); - if (r == -2) { - //output_data("parser-bug", Data, Size); - exit(-2); - } - -#if defined(UTF16_BE) || defined(UTF16_LE) - pattern_size += 2; -#else - pattern_size++; -#endif - } - -#else /* WITH_READ_MAIN */ - - if (remaining_size == 0) - pattern_size = 0; - else { - pattern_size = INPUT_COUNT % remaining_size; - if (pattern_size > MAX_PATTERN_SIZE) - pattern_size = MAX_PATTERN_SIZE; - -#if defined(UTF16_BE) || defined(UTF16_LE) - if (pattern_size % 2 == 1) pattern_size--; -#endif - } - - r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); - if (r == -2) { - //output_data("parser-bug", Data, Size); - exit(-2); - } -#endif /* else WITH_READ_MAIN */ - - if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { - char d[64]; - time_t t; - float fexec, freg, fvalid; - - t = time(NULL); - strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); - - fexec = (float )EXEC_COUNT / INPUT_COUNT; - freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; - fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; - - fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", - d, EXEC_COUNT, fexec, freg, fvalid); - - EXEC_COUNT_INTERVAL = 0; - } - return r; -} - -#ifdef WITH_READ_MAIN - -extern int main(int argc, char* argv[]) -{ - size_t n; - uint8_t Data[10000]; - - n = read(0, Data, sizeof(Data)); - fprintf(stdout, "n: %ld\n", n); - LLVMFuzzerTestOneInput(Data, n); - - return 0; -} -#endif /* WITH_READ_MAIN */ diff --git a/harnesses/fuzzer.options b/harnesses/fuzzer.options new file mode 100644 index 0000000..ab44744 --- /dev/null +++ b/harnesses/fuzzer.options @@ -0,0 +1,2 @@ +[libfuzzer] +dict = ascii_compatible.dict diff --git a/harnesses/makefile b/harnesses/makefile index dfd84de..b324295 100644 --- a/harnesses/makefile +++ b/harnesses/makefile @@ -1,54 +1,53 @@ # makefile for harness SRC = ../src CFLAGS = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer -CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DWITH_READ_MAIN +CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DSTANDALONE ONIG_LIB = $(SRC)/.libs/libonig.a LIBS = $(ONIG_LIB) -TARGETS = encode-libfuzzer syntax-libfuzzer \ - utf16-be-libfuzzer utf16-le-libfuzzer main-encode main-syntax \ - main-utf16-be main-utf16-le main-regset regset-libfuzzer +TARGETS = fuzzer-encode fuzzer-syntax fuzzer-utf16-be fuzzer-utf16-le \ + fuzzer-regset \ + read-encode read-syntax read-utf16-be read-utf16-le read-regset -OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full \ - deluxe-encode-libfuzzer main-deluxe-encode +OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full fuzzer-deluxe read-deluxe default: $(TARGETS) -encode-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-encode: base.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ -syntax-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-syntax: base.c $(ONIG_LIB) clang -DSYNTAX_TEST $(CFLAGS) $< $(LIBS) -o $@ -deluxe-encode-libfuzzer: deluxe-encode-harness.c $(ONIG_LIB) +fuzzer-deluxe: deluxe.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ -utf16-be-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-utf16-be: base.c $(ONIG_LIB) clang -DUTF16_BE $(CFLAGS) $< $(LIBS) -o $@ -utf16-le-libfuzzer: encode-harness.c $(ONIG_LIB) +fuzzer-utf16-le: base.c $(ONIG_LIB) clang -DUTF16_LE $(CFLAGS) $< $(LIBS) -o $@ -regset-libfuzzer: regset-harness.c $(ONIG_LIB) +fuzzer-regset: regset.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ -main-encode: encode-harness.c $(ONIG_LIB) +read-encode: base.c $(ONIG_LIB) clang $(CFLAGS_M) $< $(LIBS) -o $@ -main-syntax: encode-harness.c $(ONIG_LIB) +read-syntax: base.c $(ONIG_LIB) clang -DSYNTAX_TEST $(CFLAGS_M) $< $(LIBS) -o $@ -main-deluxe-encode: deluxe-encode-harness.c $(ONIG_LIB) +read-deluxe: deluxe.c $(ONIG_LIB) clang $(CFLAGS_M) $< $(LIBS) -o $@ -main-utf16-be: encode-harness.c $(ONIG_LIB) +read-utf16-be: base.c $(ONIG_LIB) clang -DUTF16_BE $(CFLAGS_M) $< $(LIBS) -o $@ -main-utf16-le: encode-harness.c $(ONIG_LIB) +read-utf16-le: base.c $(ONIG_LIB) clang -DUTF16_LE $(CFLAGS_M) $< $(LIBS) -o $@ -main-regset: regset-harness.c $(ONIG_LIB) +read-regset: regset.c $(ONIG_LIB) clang $(CFLAGS_M) $< $(LIBS) -o $@ libfuzzer-onig: libfuzzer-onig.cpp $(ONIG_LIB) diff --git a/harnesses/regset-harness.c b/harnesses/regset-harness.c deleted file mode 100644 index b4b7e20..0000000 --- a/harnesses/regset-harness.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * regset-harness.c - * Copyright (c) 2019 K.Kosako - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include "oniguruma.h" - - -#define RETRY_LIMIT 500 - -#ifdef WITH_READ_MAIN -//#define CHECK_EACH_REGEX_SEARCH_TIME -#endif - -#define MAX_REG_NUM 256 - -typedef unsigned char uint8_t; -static OnigEncoding ENC; - -#ifdef CHECK_EACH_REGEX_SEARCH_TIME -static double -get_sec(struct timespec* ts, struct timespec* te) -{ - double t; - - t = (te->tv_sec - ts->tv_sec) + - (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; - return t; -} - -static int -check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end) -{ - int n; - int i; - int r; - OnigRegion* region; - - n = onig_regset_number_of_regex(set); - region = onig_region_new(); - - for (i = 0; i < n; i++) { - regex_t* reg; - unsigned char* start; - unsigned char* range; - struct timespec ts1, ts2; - double t; - - reg = onig_regset_get_regex(set, i); - start = str; - range = end; - - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); - - r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); - - clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); - t = get_sec(&ts1, &ts2); - - fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0); - } - - onig_region_free(region, 1); - return 0; -} -#endif - -static int -search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end) -{ - int r; - int match_pos; - unsigned char *start, *range; - - start = str; - range = end; - r = onig_regset_search(set, str, end, start, range, lead, - ONIG_OPTION_NONE, &match_pos); - if (r >= 0) { -#ifdef WITH_READ_MAIN - int i; - int match_index; - OnigRegion* region; - - match_index = r; - fprintf(stdout, "match reg index: %d, pos: %d (%s)\n", - match_index, match_pos, ONIGENC_NAME(ENC)); - region = onig_regset_get_region(set, match_index); - if (region == 0) { - fprintf(stdout, "ERROR: can't get region.\n"); - return -1; - } - - for (i = 0; i < region->num_regs; i++) { - fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); - } -#endif - } - else if (r == ONIG_MISMATCH) { -#ifdef WITH_READ_MAIN - fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC)); -#endif - } - else { /* error */ -#ifdef WITH_READ_MAIN - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - - onig_error_code_to_str((UChar* )s, r); - fprintf(stdout, "ERROR: %s\n", s); - fprintf(stdout, " (%s)\n", ONIGENC_NAME(ENC)); -#endif - return -1; - } - - return 0; -} - -static long INPUT_COUNT; -static long EXEC_COUNT; -static long EXEC_COUNT_INTERVAL; -static long REGEX_SUCCESS_COUNT; -static long VALID_STRING_COUNT; - -static int -exec(OnigEncoding enc, int reg_num, int init_reg_num, - UChar* pat[], UChar* pat_end[], - OnigRegSetLead lead, UChar* str, UChar* end) -{ - int r; - int i, j; - OnigRegSet* set; - regex_t* reg; - OnigOptionType options; - OnigErrorInfo einfo; - regex_t* regs[MAX_REG_NUM]; - - EXEC_COUNT++; - EXEC_COUNT_INTERVAL++; - - options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE; - - onig_initialize(&enc, 1); - onig_set_retry_limit_in_match(RETRY_LIMIT); - - for (i = 0; i < init_reg_num; i++) { - r = onig_new(®s[i], pat[i], pat_end[i], options, ENC, - ONIG_SYNTAX_DEFAULT, &einfo); - if (r != 0) { -#ifdef WITH_READ_MAIN - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - - onig_error_code_to_str((UChar* )s, r, &einfo); - fprintf(stdout, "ERROR: index: %d, %s\n", i, s); -#endif - - for (j = 0; j < i; j++) onig_free(regs[j]); - - onig_end(); - - if (r == ONIGERR_PARSER_BUG || - r == ONIGERR_STACK_BUG || - r == ONIGERR_UNDEFINED_BYTECODE || - r == ONIGERR_UNEXPECTED_BYTECODE) { - return -2; - } - else - return -1; - } - } - - r = onig_regset_new(&set, init_reg_num, regs); - if (r != 0) { - for (i = 0; i < init_reg_num; i++) { - onig_free(regs[i]); - } - onig_end(); - return -1; - } - - for (i = init_reg_num; i < reg_num; i++) { - r = onig_new(®, pat[i], pat_end[i], options, ENC, - ONIG_SYNTAX_DEFAULT, &einfo); - if (r != 0) { -#ifdef WITH_READ_MAIN - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - - onig_error_code_to_str((UChar* )s, r, &einfo); - fprintf(stdout, "ERROR: index: %d, %s\n", i, s); -#endif - onig_regset_free(set); - onig_end(); - - if (r == ONIGERR_PARSER_BUG || - r == ONIGERR_STACK_BUG || - r == ONIGERR_UNDEFINED_BYTECODE || - r == ONIGERR_UNEXPECTED_BYTECODE) { - return -2; - } - else - return -1; - } - - r = onig_regset_add(set, reg); - if (r != 0) { - onig_regset_free(set); - onig_end(); - fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i); - return r; - } - } - - REGEX_SUCCESS_COUNT++; - - if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { - VALID_STRING_COUNT++; - r = search(set, lead, str, end); -#ifdef CHECK_EACH_REGEX_SEARCH_TIME - r = check_each_regex_search_time(set, str, end); -#endif - } - - onig_regset_free(set); - onig_end(); - return 0; -} - -#define MAX_PATTERN_SIZE 30 -#define NUM_CONTROL_BYTES 3 - -#define EXEC_PRINT_INTERVAL 2000000 - -static int MaxRegNum; -static int MaxInitRegNum; - -extern int -LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) -{ - int r, i; - int pattern_size; - unsigned char *str_null_end; - size_t remaining_size; - unsigned char *data; - unsigned int reg_num; - unsigned int init_reg_num; - unsigned char* pat[256]; - unsigned char* pat_end[256]; - int len; - unsigned int lead_num; - OnigRegSetLead lead; - - INPUT_COUNT++; - - if (Size < NUM_CONTROL_BYTES) return 0; - - remaining_size = Size; - data = (unsigned char* )(Data); - - reg_num = data[0]; - data++; - remaining_size--; - - init_reg_num = data[0]; - data++; - remaining_size--; - - lead_num = data[0]; - data++; - remaining_size--; - lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD); - - if (remaining_size < reg_num * 2) { - reg_num = reg_num % 15; // zero is OK. - } - - init_reg_num %= (reg_num + 1); - - if (MaxRegNum < reg_num) - MaxRegNum = reg_num; - - if (MaxInitRegNum < init_reg_num) - MaxInitRegNum = init_reg_num; - - if (reg_num == 0) - pattern_size = 1; - else - pattern_size = remaining_size / (reg_num * 2); - - if (pattern_size > MAX_PATTERN_SIZE) - pattern_size = MAX_PATTERN_SIZE; - - len = pattern_size * reg_num; - if (len == 0) len = 1; - - for (i = 0; i < reg_num; i++) { - pat[i] = (unsigned char* )malloc(pattern_size); - memcpy(pat[i], data, pattern_size); - pat_end[i] = pat[i] + pattern_size; - data += pattern_size; - remaining_size -= pattern_size; - } - - unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); - memcpy(str, data, remaining_size); - str_null_end = str + remaining_size; - -#ifdef WITH_READ_MAIN - fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n", - reg_num, pattern_size, - lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); - - if (reg_num != 0) { - unsigned char* p; - i = 0; - p = pat[0]; - while (p < pat_end[0]) { - fprintf(stdout, " 0x%02x", (int )*p++); - i++; - if (i % 8 == 0) fprintf(stdout, "\n"); - } - fprintf(stdout, "\n"); - } -#endif - - ENC = ONIG_ENCODING_UTF8; - - r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end); - - for (i = 0; i < reg_num; i++) { - free(pat[i]); - } - free(str); - - if (r == -2) { - //output_data("parser-bug", Data, Size); - exit(-2); - } - - if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { - char d[64]; - time_t t; - float fexec, freg, fvalid; - - t = time(NULL); - strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); - - fexec = (float )EXEC_COUNT / INPUT_COUNT; - freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; - fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; - - fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", - d, EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); - - EXEC_COUNT_INTERVAL = 0; - } - return r; -} - -#ifdef WITH_READ_MAIN - -extern int main(int argc, char* argv[]) -{ - size_t n; - uint8_t Data[10000]; - - n = read(0, Data, sizeof(Data)); - fprintf(stdout, "n: %ld\n", n); - LLVMFuzzerTestOneInput(Data, n); - - return 0; -} -#endif /* WITH_READ_MAIN */ diff --git a/harnesses/regset.c b/harnesses/regset.c new file mode 100644 index 0000000..a8dd181 --- /dev/null +++ b/harnesses/regset.c @@ -0,0 +1,392 @@ +/* + * regset.c + * Copyright (c) 2019 K.Kosako + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oniguruma.h" + + +#define RETRY_LIMIT 5000 + +#ifdef STANDALONE +//#define CHECK_EACH_REGEX_SEARCH_TIME +#endif + +#define MAX_REG_NUM 256 + +typedef unsigned char uint8_t; +static OnigEncoding ENC; + +static void +output_current_time(FILE* fp) +{ + char d[64]; + time_t t; + + t = time(NULL); + strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + + fprintf(fp, "%s", d); +} + +#ifdef CHECK_EACH_REGEX_SEARCH_TIME +static double +get_sec(struct timespec* ts, struct timespec* te) +{ + double t; + + t = (te->tv_sec - ts->tv_sec) + + (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; + return t; +} + +static int +check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end) +{ + int n; + int i; + int r; + OnigRegion* region; + + n = onig_regset_number_of_regex(set); + region = onig_region_new(); + + for (i = 0; i < n; i++) { + regex_t* reg; + unsigned char* start; + unsigned char* range; + struct timespec ts1, ts2; + double t; + + reg = onig_regset_get_regex(set, i); + start = str; + range = end; + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); + t = get_sec(&ts1, &ts2); + + fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0); + } + + onig_region_free(region, 1); + return 0; +} +#endif + +static int +search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end) +{ + int r; + int match_pos; + unsigned char *start, *range; + + start = str; + range = end; + r = onig_regset_search(set, str, end, start, range, lead, + ONIG_OPTION_NONE, &match_pos); + if (r >= 0) { +#ifdef STANDALONE + int i; + int match_index; + OnigRegion* region; + + match_index = r; + fprintf(stdout, "match reg index: %d, pos: %d (%s)\n", + match_index, match_pos, ONIGENC_NAME(ENC)); + region = onig_regset_get_region(set, match_index); + if (region == 0) { + fprintf(stdout, "ERROR: can't get region.\n"); + return -1; + } + + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } +#endif + } + else if (r == ONIG_MISMATCH) { +#ifdef STANDALONE + fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC)); +#endif + } + else { /* error */ +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(ENC)); +#endif + return -1; + } + + return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, int reg_num, int init_reg_num, + UChar* pat[], UChar* pat_end[], + OnigRegSetLead lead, UChar* str, UChar* end) +{ + int r; + int i, j; + OnigRegSet* set; + regex_t* reg; + OnigOptionType options; + OnigErrorInfo einfo; + regex_t* regs[MAX_REG_NUM]; + + EXEC_COUNT++; + EXEC_COUNT_INTERVAL++; + + options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE; + + onig_initialize(&enc, 1); + onig_set_retry_limit_in_search(RETRY_LIMIT); + + for (i = 0; i < init_reg_num; i++) { + r = onig_new(®s[i], pat[i], pat_end[i], options, ENC, + ONIG_SYNTAX_DEFAULT, &einfo); + if (r != 0) { +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + + for (j = 0; j < i; j++) onig_free(regs[j]); + + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + } + + r = onig_regset_new(&set, init_reg_num, regs); + if (r != 0) { + for (i = 0; i < init_reg_num; i++) { + onig_free(regs[i]); + } + onig_end(); + return -1; + } + + for (i = init_reg_num; i < reg_num; i++) { + r = onig_new(®, pat[i], pat_end[i], options, ENC, + ONIG_SYNTAX_DEFAULT, &einfo); + if (r != 0) { +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + onig_regset_free(set); + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + + r = onig_regset_add(set, reg); + if (r != 0) { + onig_regset_free(set); + onig_end(); + fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i); + return r; + } + } + + REGEX_SUCCESS_COUNT++; + + if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { + VALID_STRING_COUNT++; + r = search(set, lead, str, end); +#ifdef CHECK_EACH_REGEX_SEARCH_TIME + r = check_each_regex_search_time(set, str, end); +#endif + } + + onig_regset_free(set); + onig_end(); + return 0; +} + +#define MAX_PATTERN_SIZE 30 +#define NUM_CONTROL_BYTES 3 + +#define EXEC_PRINT_INTERVAL 2000000 + +static int MaxRegNum; +static int MaxInitRegNum; + +extern int +LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + int r, i; + int pattern_size; + unsigned char *str_null_end; + size_t remaining_size; + unsigned char *data; + unsigned int reg_num; + unsigned int init_reg_num; + unsigned char* pat[256]; + unsigned char* pat_end[256]; + int len; + unsigned int lead_num; + OnigRegSetLead lead; + + INPUT_COUNT++; + + if (Size < NUM_CONTROL_BYTES) return 0; + + remaining_size = Size; + data = (unsigned char* )(Data); + + reg_num = data[0]; + data++; + remaining_size--; + + init_reg_num = data[0]; + data++; + remaining_size--; + + lead_num = data[0]; + data++; + remaining_size--; + lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD); + + if (remaining_size < reg_num * 2) { + reg_num = reg_num % 15; // zero is OK. + } + + init_reg_num %= (reg_num + 1); + + if (MaxRegNum < reg_num) + MaxRegNum = reg_num; + + if (MaxInitRegNum < init_reg_num) + MaxInitRegNum = init_reg_num; + + if (reg_num == 0) + pattern_size = 1; + else + pattern_size = remaining_size / (reg_num * 2); + + if (pattern_size > MAX_PATTERN_SIZE) + pattern_size = MAX_PATTERN_SIZE; + + len = pattern_size * reg_num; + if (len == 0) len = 1; + + for (i = 0; i < reg_num; i++) { + pat[i] = (unsigned char* )malloc(pattern_size); + memcpy(pat[i], data, pattern_size); + pat_end[i] = pat[i] + pattern_size; + data += pattern_size; + remaining_size -= pattern_size; + } + + unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); + memcpy(str, data, remaining_size); + str_null_end = str + remaining_size; + +#ifdef STANDALONE + fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n", + reg_num, pattern_size, + lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); + + if (reg_num != 0) { + unsigned char* p; + i = 0; + p = pat[0]; + while (p < pat_end[0]) { + fprintf(stdout, " 0x%02x", (int )*p++); + i++; + if (i % 8 == 0) fprintf(stdout, "\n"); + } + fprintf(stdout, "\n"); + } +#endif + + ENC = ONIG_ENCODING_UTF8; + + r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end); + + for (i = 0; i < reg_num; i++) { + free(pat[i]); + } + free(str); + + if (r == -2) { + //output_data("parser-bug", Data, Size); + exit(-2); + } + + if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { + float fexec, freg, fvalid; + + fexec = (float )EXEC_COUNT / INPUT_COUNT; + freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; + fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + + output_current_time(stdout); + fprintf(stdout, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", + EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); + + EXEC_COUNT_INTERVAL = 0; + } + else if (EXEC_COUNT == 1) { + output_current_time(stdout); + fprintf(stdout, ": ------------ START ------------\n"); + } + + return r; +} + +#ifdef STANDALONE + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* STANDALONE */ -- cgit v1.2.3