From 4216de6a3336cbc6dddb572cb7e6ab6193bf3729 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Fri, 29 Nov 2019 11:26:35 +0100 Subject: New upstream version 6.9.4 --- harnesses/ascii_compatible.dict | 20 +- harnesses/deluxe-encode-harness.c | 39 +--- harnesses/encode-harness.c | 291 ++++++++++++++++++++++++----- harnesses/libfuzzer-onig.cpp | 45 +++++ harnesses/makefile | 69 +++++++ harnesses/regset-harness.c | 379 ++++++++++++++++++++++++++++++++++++++ harnesses/syntax-harness.c | 120 ------------ 7 files changed, 748 insertions(+), 215 deletions(-) create mode 100644 harnesses/libfuzzer-onig.cpp create mode 100644 harnesses/makefile create mode 100644 harnesses/regset-harness.c delete mode 100644 harnesses/syntax-harness.c (limited to 'harnesses') diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict index 820bf47..e6e00db 100644 --- a/harnesses/ascii_compatible.dict +++ b/harnesses/ascii_compatible.dict @@ -1,10 +1,7 @@ # First-pass fuzzing dictionary for Oniguruma by Mark Griffin -"\\o{17777777777}" -"\\777" -"\\u" -"\\uFFFF" -"\\xFF" -"\\x{70000000}" +"\\o{34}" +"\\123" +"\\x{40}" "\\C-" "\\M-\\C-" "\\X" @@ -12,6 +9,8 @@ "\\p{^" "}" "]" +"]" +")" ")" "\\n" "\\r" @@ -47,10 +46,13 @@ "\\B" "(?y{" "[abcd1-9]" +"[\\w]" +"[\\W]" +"[\\s]" +"[\\S]" "[\\w\\d" "[\\p{Alphabetic}" -"[\\P{Arabic}" -"[\\x{ffff}" +"[\\x{03}" "[a-w&&" "[^" "[:graph:]" @@ -88,7 +90,6 @@ "(?())" "(?())" "(?())" -"(*ERROR{-2000})" "(*COUNT[tag]{X})" "\\1" "\\2" @@ -106,6 +107,5 @@ "(?a|b\\gc)" "(?-i:\\g)" "\\N{name}" -"\\p{Hiragana}" "\\p{Katakana}" "\\p{Emoji}" diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe-encode-harness.c index e1f84a5..aabe916 100644 --- a/harnesses/deluxe-encode-harness.c +++ b/harnesses/deluxe-encode-harness.c @@ -49,39 +49,6 @@ search(regex_t* reg, unsigned char* str, unsigned char* end) return 0; } -static int -exec(OnigEncoding enc, OnigOptionType options, - char* apattern, char* apattern_end, char* astr, char* astr_end) -{ - int r; - regex_t* reg; - OnigErrorInfo einfo; - UChar* pattern = (UChar* )apattern; - UChar* str = (UChar* )astr; - UChar* pattern_end = (UChar* )apattern_end; - unsigned char *end = (unsigned char* )astr_end; - - onig_initialize(&enc, 1); - onig_set_retry_limit_in_match(DEFAULT_LIMIT); - onig_set_parse_depth_limit(DEFAULT_LIMIT); - - r = onig_new(®, pattern, pattern_end, - options, enc, ONIG_SYNTAX_DEFAULT, &einfo); - if (r != ONIG_NORMAL) { - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str((UChar* )s, r, &einfo); - fprintf(stdout, "ERROR: %s\n", s); - onig_end(); - return -1; - } - - r = search(reg, str, end); - - onig_free(reg); - onig_end(); - return 0; -} - static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; static int @@ -196,15 +163,13 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) remaining_size--; // copy first PATTERN_SIZE bytes off to be the pattern - pattern = (unsigned char *)malloc(PATTERN_SIZE+4); - memset(pattern, 0, PATTERN_SIZE+4); + pattern = (unsigned char *)malloc(PATTERN_SIZE); memcpy(pattern, data, PATTERN_SIZE); pattern_end = pattern + PATTERN_SIZE; data += PATTERN_SIZE; remaining_size -= PATTERN_SIZE; - str = (unsigned char*)malloc(remaining_size+4); - memset(str, 0, remaining_size+4); + str = (unsigned char*)malloc(remaining_size); memcpy(str, data, remaining_size); str_end = str + remaining_size; diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c index e57fd4f..5db0512 100644 --- a/harnesses/encode-harness.c +++ b/harnesses/encode-harness.c @@ -3,13 +3,19 @@ * contributed by Mark Griffin */ #include -#include "oniguruma.h" - +#include #include #include +#include +#include +#include +#include -#define PARSE_DEPTH_LIMIT 120 -#define RETRY_LIMIT 4000 +#include "oniguruma.h" + + +//#define PARSE_DEPTH_LIMIT 120 +#define RETRY_LIMIT 3500 typedef unsigned char uint8_t; @@ -26,6 +32,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end) range = end; r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); if (r >= 0) { +#ifdef WITH_READ_MAIN int i; fprintf(stdout, "match at %d (%s)\n", r, @@ -33,17 +40,29 @@ search(regex_t* reg, unsigned char* str, unsigned char* end) for (i = 0; i < region->num_regs; i++) { fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); } +#endif } else if (r == ONIG_MISMATCH) { +#ifdef WITH_READ_MAIN fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +#endif } else { /* error */ +#ifdef WITH_READ_MAIN char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); fprintf(stdout, "ERROR: %s\n", s); fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); +#endif onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + + if (r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) + return -2; + return -1; } @@ -51,8 +70,14 @@ search(regex_t* reg, unsigned char* str, unsigned char* end) return 0; } +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + static int -exec(OnigEncoding enc, OnigOptionType options, +exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, char* apattern, char* apattern_end, char* astr, UChar* end) { int r; @@ -62,22 +87,41 @@ exec(OnigEncoding enc, OnigOptionType options, UChar* str = (UChar* )astr; UChar* pattern_end = (UChar* )apattern_end; + EXEC_COUNT++; + EXEC_COUNT_INTERVAL++; + onig_initialize(&enc, 1); onig_set_retry_limit_in_match(RETRY_LIMIT); - onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); + //onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); r = onig_new(®, pattern, pattern_end, - options, enc, ONIG_SYNTAX_DEFAULT, &einfo); + options, enc, syntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); +#ifdef WITH_READ_MAIN fprintf(stdout, "ERROR: %s\n", s); +#endif onig_end(); - return -1; + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; } + REGEX_SUCCESS_COUNT++; + + r = search(reg, pattern, pattern_end); + if (r == -2) return -2; if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { + VALID_STRING_COUNT++; r = search(reg, str, end); + if (r == -2) return -2; } onig_free(reg); @@ -85,52 +129,114 @@ exec(OnigEncoding enc, OnigOptionType options, return 0; } -#define PATTERN_SIZE 32 -#define NUM_CONTROL_BYTES 1 -#define MIN_STR_SIZE 1 -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +#if 0 +static void +output_data(char* path, const uint8_t * data, size_t size) { - if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) - return 0; - if (Size > 0x1000) - return 0; + int fd; + ssize_t n; + fd = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IRGRP|S_IROTH); + if (fd == -1) { + fprintf(stderr, "ERROR: output_data(): can't open(%s)\n", path); + return ; + } + + n = write(fd, (const void* )data, size); + if (n != size) { + fprintf(stderr, "ERROR: output_data(): n: %ld, size: %ld\n", n, size); + } + close(fd); +} +#endif + + +static int +alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, + int pattern_size, size_t remaining_size, unsigned char *data) +{ + int r; unsigned char *pattern_end; unsigned char *str_null_end; - size_t remaining_size = Size; - unsigned char *data = (unsigned char *)(Data); + // copy first PATTERN_SIZE bytes off to be the pattern + unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); + memcpy(pattern, data, pattern_size); + pattern_end = pattern + pattern_size; + data += pattern_size; + remaining_size -= pattern_size; - // pull off one byte to switch off - unsigned char encoding_choice = data[0]; - data++; - remaining_size--; +#if defined(UTF16_BE) || defined(UTF16_LE) + if (remaining_size % 2 == 1) remaining_size--; +#endif - // copy first PATTERN_SIZE bytes off to be the pattern - unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+4); - memset(pattern, 0, PATTERN_SIZE+4); - memcpy(pattern, data, PATTERN_SIZE); - pattern_end = pattern + PATTERN_SIZE; - data += PATTERN_SIZE; - remaining_size -= PATTERN_SIZE; - - unsigned char *str = (unsigned char*)malloc(remaining_size+4); - memset(str, 0, remaining_size+4); + unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); memcpy(str, data, remaining_size); str_null_end = str + remaining_size; - int r; - OnigEncodingType *encodings[] = { - ONIG_ENCODING_SJIS, - ONIG_ENCODING_EUC_JP, - ONIG_ENCODING_CP1251, - ONIG_ENCODING_ISO_8859_1, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_KOI8_R, - ONIG_ENCODING_BIG5 + r = exec(enc, options, syntax, + (char *)pattern, (char *)pattern_end, + (char *)str, str_null_end); + + free(pattern); + free(str); + return r; +} + + +#define EXEC_PRINT_INTERVAL 10000000 +#define MAX_PATTERN_SIZE 150 + +#ifdef SYNTAX_TEST +#define NUM_CONTROL_BYTES 3 +#else +#define NUM_CONTROL_BYTES 2 +#endif + +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ +#if !defined(UTF16_BE) && !defined(UTF16_LE) + static OnigEncoding encodings[] = { + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_SJIS, + //ONIG_ENCODING_EUC_JP, + ONIG_ENCODING_ISO_8859_1, + ONIG_ENCODING_BIG5, + ONIG_ENCODING_GB18030, + ONIG_ENCODING_EUC_TW + }; + unsigned char encoding_choice; +#endif + +#ifdef SYNTAX_TEST + static OnigSyntaxType* syntaxes[] = { + ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_EMACS, + ONIG_SYNTAX_GREP, + ONIG_SYNTAX_GNU_REGEX, + ONIG_SYNTAX_JAVA, + ONIG_SYNTAX_PERL_NG, + ONIG_SYNTAX_ONIGURUMA }; + unsigned char syntax_choice; +#endif + + int r; + int pattern_size; + size_t remaining_size; + unsigned char *data; + unsigned char options_choice; + OnigOptionType options; + OnigEncoding enc; + OnigSyntaxType* syntax; - OnigEncodingType *enc; + INPUT_COUNT++; + if (Size < NUM_CONTROL_BYTES) return 0; + + remaining_size = Size; + data = (unsigned char* )(Data); #ifdef UTF16_BE enc = ONIG_ENCODING_UTF16_BE; @@ -138,24 +244,113 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #ifdef UTF16_LE enc = ONIG_ENCODING_UTF16_LE; #else + encoding_choice = data[0]; + data++; + remaining_size--; + int num_encodings = sizeof(encodings)/sizeof(encodings[0]); enc = encodings[encoding_choice % num_encodings]; #endif #endif - r = exec(enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, - (char *)str, str_null_end); +#ifdef SYNTAX_TEST + syntax_choice = data[0]; + data++; + remaining_size--; - free(pattern); - free(str); + int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); + syntax = syntaxes[syntax_choice % num_syntaxes]; +#else + syntax = ONIG_SYNTAX_DEFAULT; +#endif + + options_choice = data[0]; + options = (options_choice % 2 == 0) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE; + data++; + remaining_size--; + +#ifdef WITH_READ_MAIN +#ifdef SYNTAX_TEST + fprintf(stdout, "enc: %s, syntax: %d, options: %u\n", + ONIGENC_NAME(enc), (int )(syntax_choice % num_syntaxes), options); +#else + fprintf(stdout, "enc: %s, options: %u\n", ONIGENC_NAME(enc), options); +#endif +#endif +#ifdef WITH_READ_MAIN + int max_pattern_size; + + if (remaining_size == 0) + max_pattern_size = 0; + else { + max_pattern_size = remaining_size - 1; + if (max_pattern_size > MAX_PATTERN_SIZE) + max_pattern_size = MAX_PATTERN_SIZE; + +#if defined(UTF16_BE) || defined(UTF16_LE) + if (max_pattern_size % 2 == 1) max_pattern_size--; +#endif + } + + for (pattern_size = 0; pattern_size <= max_pattern_size; ) { + fprintf(stdout, "pattern_size: %d\n", pattern_size); + r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); + if (r == -2) { + //output_data("parser-bug", Data, Size); + exit(-2); + } + +#if defined(UTF16_BE) || defined(UTF16_LE) + pattern_size += 2; +#else + pattern_size++; +#endif + } + +#else /* WITH_READ_MAIN */ + + if (remaining_size == 0) + pattern_size = 0; + else { + pattern_size = INPUT_COUNT % remaining_size; + if (pattern_size > MAX_PATTERN_SIZE) + pattern_size = MAX_PATTERN_SIZE; + +#if defined(UTF16_BE) || defined(UTF16_LE) + if (pattern_size % 2 == 1) pattern_size--; +#endif + } + + r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data); + if (r == -2) { + //output_data("parser-bug", Data, Size); + exit(-2); + } +#endif /* else WITH_READ_MAIN */ + + if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { + char d[64]; + time_t t; + float fexec, freg, fvalid; + + t = time(NULL); + strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + + fexec = (float )EXEC_COUNT / INPUT_COUNT; + freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; + fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + + fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n", + d, EXEC_COUNT, fexec, freg, fvalid); + + EXEC_COUNT_INTERVAL = 0; + } return r; } #ifdef WITH_READ_MAIN -#include - extern int main(int argc, char* argv[]) { size_t n; diff --git a/harnesses/libfuzzer-onig.cpp b/harnesses/libfuzzer-onig.cpp new file mode 100644 index 0000000..526c826 --- /dev/null +++ b/harnesses/libfuzzer-onig.cpp @@ -0,0 +1,45 @@ +/* libfuzzer test code for oniguruma + * author: Hanno Böck, license: CC0/public domain + +Usage: +* compile oniguruma with something like + ./configure CC=clang LD=clang CFLAGS="-fsanitize-coverage=edge -fsanitize=address" \ + LDFLAGS="-fsanitize-coverage=edge -fsanitize=address" +* Compile libfuzzer stub and link against static libonig.a and libFuzzer.a: + clang++ libfuzzer-onig.cpp src/.libs/libonig.a libFuzzer.a -o libfuzzer-onig \ + -fsanitize-coverage=edge -fsanitize=address +* Put sample patterns in directory "in/" +* Run + ./libfuzzer-onig in + +Consult libfuzzer docs for further details and how to create libFuzzer.a: +http://llvm.org/docs/LibFuzzer.html + + */ +#include +#include +#include + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + regex_t *reg; + OnigEncoding enc; + + enc = ONIG_ENCODING_UTF8; + +#ifdef FULL_TEST + onig_initialize(&enc, 1); + onig_set_retry_limit_in_match(120); + onig_set_parse_depth_limit(120); +#endif + + if (onig_new(®, Data, Data + Size, ONIG_OPTION_DEFAULT, enc, + ONIG_SYNTAX_DEFAULT, 0) == 0) + onig_free(reg); + +#ifdef FULL_TEST + onig_end(); +#endif + + return 0; +} diff --git a/harnesses/makefile b/harnesses/makefile new file mode 100644 index 0000000..dfd84de --- /dev/null +++ b/harnesses/makefile @@ -0,0 +1,69 @@ +# makefile for harness +SRC = ../src +CFLAGS = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer +CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DWITH_READ_MAIN +ONIG_LIB = $(SRC)/.libs/libonig.a +LIBS = $(ONIG_LIB) + +TARGETS = encode-libfuzzer syntax-libfuzzer \ + utf16-be-libfuzzer utf16-le-libfuzzer main-encode main-syntax \ + main-utf16-be main-utf16-le main-regset regset-libfuzzer + +OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full \ + deluxe-encode-libfuzzer main-deluxe-encode + + +default: $(TARGETS) + +encode-libfuzzer: encode-harness.c $(ONIG_LIB) + clang $(CFLAGS) $< $(LIBS) -o $@ + +syntax-libfuzzer: encode-harness.c $(ONIG_LIB) + clang -DSYNTAX_TEST $(CFLAGS) $< $(LIBS) -o $@ + +deluxe-encode-libfuzzer: deluxe-encode-harness.c $(ONIG_LIB) + clang $(CFLAGS) $< $(LIBS) -o $@ + +utf16-be-libfuzzer: encode-harness.c $(ONIG_LIB) + clang -DUTF16_BE $(CFLAGS) $< $(LIBS) -o $@ + +utf16-le-libfuzzer: encode-harness.c $(ONIG_LIB) + clang -DUTF16_LE $(CFLAGS) $< $(LIBS) -o $@ + +regset-libfuzzer: regset-harness.c $(ONIG_LIB) + clang $(CFLAGS) $< $(LIBS) -o $@ + +main-encode: encode-harness.c $(ONIG_LIB) + clang $(CFLAGS_M) $< $(LIBS) -o $@ + +main-syntax: encode-harness.c $(ONIG_LIB) + clang -DSYNTAX_TEST $(CFLAGS_M) $< $(LIBS) -o $@ + +main-deluxe-encode: deluxe-encode-harness.c $(ONIG_LIB) + clang $(CFLAGS_M) $< $(LIBS) -o $@ + +main-utf16-be: encode-harness.c $(ONIG_LIB) + clang -DUTF16_BE $(CFLAGS_M) $< $(LIBS) -o $@ + +main-utf16-le: encode-harness.c $(ONIG_LIB) + clang -DUTF16_LE $(CFLAGS_M) $< $(LIBS) -o $@ + +main-regset: regset-harness.c $(ONIG_LIB) + clang $(CFLAGS_M) $< $(LIBS) -o $@ + +libfuzzer-onig: libfuzzer-onig.cpp $(ONIG_LIB) + clang++ $(CFLAGS) $< $(LIBS) -o $@ + +libfuzzer-onig-full: libfuzzer-onig.cpp $(ONIG_LIB) + clang++ -DFULL_TEST $(CFLAGS) $< $(LIBS) -o $@ + + +$(ONIG_LIB): + cd ..; make clean + #cd ..; autoreconf -vfi + cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" + cd ..; make -j4 + + +clean: + rm -f $(TARGETS) $(OTHER_TARGETS) diff --git a/harnesses/regset-harness.c b/harnesses/regset-harness.c new file mode 100644 index 0000000..b4b7e20 --- /dev/null +++ b/harnesses/regset-harness.c @@ -0,0 +1,379 @@ +/* + * regset-harness.c + * Copyright (c) 2019 K.Kosako + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include "oniguruma.h" + + +#define RETRY_LIMIT 500 + +#ifdef WITH_READ_MAIN +//#define CHECK_EACH_REGEX_SEARCH_TIME +#endif + +#define MAX_REG_NUM 256 + +typedef unsigned char uint8_t; +static OnigEncoding ENC; + +#ifdef CHECK_EACH_REGEX_SEARCH_TIME +static double +get_sec(struct timespec* ts, struct timespec* te) +{ + double t; + + t = (te->tv_sec - ts->tv_sec) + + (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; + return t; +} + +static int +check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end) +{ + int n; + int i; + int r; + OnigRegion* region; + + n = onig_regset_number_of_regex(set); + region = onig_region_new(); + + for (i = 0; i < n; i++) { + regex_t* reg; + unsigned char* start; + unsigned char* range; + struct timespec ts1, ts2; + double t; + + reg = onig_regset_get_regex(set, i); + start = str; + range = end; + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); + t = get_sec(&ts1, &ts2); + + fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0); + } + + onig_region_free(region, 1); + return 0; +} +#endif + +static int +search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end) +{ + int r; + int match_pos; + unsigned char *start, *range; + + start = str; + range = end; + r = onig_regset_search(set, str, end, start, range, lead, + ONIG_OPTION_NONE, &match_pos); + if (r >= 0) { +#ifdef WITH_READ_MAIN + int i; + int match_index; + OnigRegion* region; + + match_index = r; + fprintf(stdout, "match reg index: %d, pos: %d (%s)\n", + match_index, match_pos, ONIGENC_NAME(ENC)); + region = onig_regset_get_region(set, match_index); + if (region == 0) { + fprintf(stdout, "ERROR: can't get region.\n"); + return -1; + } + + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } +#endif + } + else if (r == ONIG_MISMATCH) { +#ifdef WITH_READ_MAIN + fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC)); +#endif + } + else { /* error */ +#ifdef WITH_READ_MAIN + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(ENC)); +#endif + return -1; + } + + return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, int reg_num, int init_reg_num, + UChar* pat[], UChar* pat_end[], + OnigRegSetLead lead, UChar* str, UChar* end) +{ + int r; + int i, j; + OnigRegSet* set; + regex_t* reg; + OnigOptionType options; + OnigErrorInfo einfo; + regex_t* regs[MAX_REG_NUM]; + + EXEC_COUNT++; + EXEC_COUNT_INTERVAL++; + + options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE; + + onig_initialize(&enc, 1); + onig_set_retry_limit_in_match(RETRY_LIMIT); + + for (i = 0; i < init_reg_num; i++) { + r = onig_new(®s[i], pat[i], pat_end[i], options, ENC, + ONIG_SYNTAX_DEFAULT, &einfo); + if (r != 0) { +#ifdef WITH_READ_MAIN + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + + for (j = 0; j < i; j++) onig_free(regs[j]); + + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + } + + r = onig_regset_new(&set, init_reg_num, regs); + if (r != 0) { + for (i = 0; i < init_reg_num; i++) { + onig_free(regs[i]); + } + onig_end(); + return -1; + } + + for (i = init_reg_num; i < reg_num; i++) { + r = onig_new(®, pat[i], pat_end[i], options, ENC, + ONIG_SYNTAX_DEFAULT, &einfo); + if (r != 0) { +#ifdef WITH_READ_MAIN + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + onig_regset_free(set); + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + + r = onig_regset_add(set, reg); + if (r != 0) { + onig_regset_free(set); + onig_end(); + fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i); + return r; + } + } + + REGEX_SUCCESS_COUNT++; + + if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { + VALID_STRING_COUNT++; + r = search(set, lead, str, end); +#ifdef CHECK_EACH_REGEX_SEARCH_TIME + r = check_each_regex_search_time(set, str, end); +#endif + } + + onig_regset_free(set); + onig_end(); + return 0; +} + +#define MAX_PATTERN_SIZE 30 +#define NUM_CONTROL_BYTES 3 + +#define EXEC_PRINT_INTERVAL 2000000 + +static int MaxRegNum; +static int MaxInitRegNum; + +extern int +LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + int r, i; + int pattern_size; + unsigned char *str_null_end; + size_t remaining_size; + unsigned char *data; + unsigned int reg_num; + unsigned int init_reg_num; + unsigned char* pat[256]; + unsigned char* pat_end[256]; + int len; + unsigned int lead_num; + OnigRegSetLead lead; + + INPUT_COUNT++; + + if (Size < NUM_CONTROL_BYTES) return 0; + + remaining_size = Size; + data = (unsigned char* )(Data); + + reg_num = data[0]; + data++; + remaining_size--; + + init_reg_num = data[0]; + data++; + remaining_size--; + + lead_num = data[0]; + data++; + remaining_size--; + lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD); + + if (remaining_size < reg_num * 2) { + reg_num = reg_num % 15; // zero is OK. + } + + init_reg_num %= (reg_num + 1); + + if (MaxRegNum < reg_num) + MaxRegNum = reg_num; + + if (MaxInitRegNum < init_reg_num) + MaxInitRegNum = init_reg_num; + + if (reg_num == 0) + pattern_size = 1; + else + pattern_size = remaining_size / (reg_num * 2); + + if (pattern_size > MAX_PATTERN_SIZE) + pattern_size = MAX_PATTERN_SIZE; + + len = pattern_size * reg_num; + if (len == 0) len = 1; + + for (i = 0; i < reg_num; i++) { + pat[i] = (unsigned char* )malloc(pattern_size); + memcpy(pat[i], data, pattern_size); + pat_end[i] = pat[i] + pattern_size; + data += pattern_size; + remaining_size -= pattern_size; + } + + unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); + memcpy(str, data, remaining_size); + str_null_end = str + remaining_size; + +#ifdef WITH_READ_MAIN + fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n", + reg_num, pattern_size, + lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); + + if (reg_num != 0) { + unsigned char* p; + i = 0; + p = pat[0]; + while (p < pat_end[0]) { + fprintf(stdout, " 0x%02x", (int )*p++); + i++; + if (i % 8 == 0) fprintf(stdout, "\n"); + } + fprintf(stdout, "\n"); + } +#endif + + ENC = ONIG_ENCODING_UTF8; + + r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end); + + for (i = 0; i < reg_num; i++) { + free(pat[i]); + } + free(str); + + if (r == -2) { + //output_data("parser-bug", Data, Size); + exit(-2); + } + + if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { + char d[64]; + time_t t; + float fexec, freg, fvalid; + + t = time(NULL); + strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + + fexec = (float )EXEC_COUNT / INPUT_COUNT; + freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; + fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + + fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", + d, EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); + + EXEC_COUNT_INTERVAL = 0; + } + return r; +} + +#ifdef WITH_READ_MAIN + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* WITH_READ_MAIN */ diff --git a/harnesses/syntax-harness.c b/harnesses/syntax-harness.c deleted file mode 100644 index 0fb3587..0000000 --- a/harnesses/syntax-harness.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * syntax-harness.c - * contributed by Mark Griffin - */ -#include -#include -#include "oniguruma.h" - -#include - -#define DEFAULT_LIMIT 120 -typedef unsigned char uint8_t; - -extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr) -{ - int r; - unsigned char *start, *range, *end; - regex_t* reg; - OnigErrorInfo einfo; - OnigRegion *region; - UChar* pattern = (UChar* )apattern; - UChar* str = (UChar* )astr; - - r = onig_new(®, pattern, pattern + strlen((char* )pattern), - ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); - if (r != ONIG_NORMAL) { - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str((UChar* )s, r, &einfo); - fprintf(stdout, "ERROR: %s\n", s); - return -1; - } - - region = onig_region_new(); - - end = str + strlen((char* )str); - start = str; - range = end; - r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); - if (r >= 0) { - int i; - - fprintf(stdout, "match at %d\n", r); - for (i = 0; i < region->num_regs; i++) { - fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); - } - } - else if (r == ONIG_MISMATCH) { - fprintf(stdout, "search fail\n"); - } - else { /* error */ - char s[ONIG_MAX_ERROR_MESSAGE_LEN]; - onig_error_code_to_str((UChar* )s, r); - fprintf(stdout, "ERROR: %s\n", s); - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - onig_free(reg); - return -1; - } - - onig_region_free(region, 1 /* 1:free self, 0:free contents only */); - onig_free(reg); - return 0; -} - -#define PATTERN_SIZE 64 -#define NUM_CONTROL_BYTES 1 -#define MIN_STR_SIZE 1 -int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) -{ - if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) - return 0; - if (Size > 0x1000) - return 0; - size_t remaining_size = Size; - unsigned char *data = (unsigned char *)(Data); - - // pull off one byte to switch syntax choice - unsigned char syntax_choice = data[0]; - data++; - remaining_size--; - - // copy first PATTERN_SIZE bytes off to be the pattern - unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+1); - memset(pattern, 0, PATTERN_SIZE+1); - memcpy(pattern, data, PATTERN_SIZE); - data += PATTERN_SIZE; - remaining_size -= PATTERN_SIZE; - - unsigned char *str = (unsigned char*)malloc(remaining_size+1); - memset(str, 0, remaining_size+1); - memcpy(str, data, remaining_size); - - OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; - onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); - - onig_set_retry_limit_in_match(DEFAULT_LIMIT); - onig_set_parse_depth_limit(DEFAULT_LIMIT); - - OnigSyntaxType *syntaxes[] = { - ONIG_SYNTAX_POSIX_EXTENDED, - ONIG_SYNTAX_EMACS, - ONIG_SYNTAX_GREP, - ONIG_SYNTAX_GNU_REGEX, - ONIG_SYNTAX_JAVA, - ONIG_SYNTAX_PERL_NG, - ONIG_SYNTAX_RUBY, - ONIG_SYNTAX_ONIGURUMA, - }; - OnigSyntaxType *syntax = syntaxes[syntax_choice % 8]; - - int r; - r = exec(syntax, (char *)pattern, (char *)str); - // r = exec(ONIG_SYNTAX_JAVA, "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); - - onig_end(); - - free(pattern); - free(str); - - return 0; -} -- cgit v1.2.3