diff options
Diffstat (limited to 'harnesses')
-rw-r--r-- | harnesses/base.c | 166 | ||||
-rw-r--r-- | harnesses/makefile | 9 |
2 files changed, 117 insertions, 58 deletions
diff --git a/harnesses/base.c b/harnesses/base.c index a88e6f2..1206217 100644 --- a/harnesses/base.c +++ b/harnesses/base.c @@ -10,16 +10,32 @@ #include <sys/stat.h> #include <fcntl.h> #include <time.h> - #include "oniguruma.h" #define PARSE_DEPTH_LIMIT 8 -#define RETRY_LIMIT 5000 #define CALL_MAX_NEST_LEVEL 8 +#define SUBEXP_CALL_LIMIT 500 +#define BASE_RETRY_LIMIT 20000 +#define BASE_LENGTH 2048 +#define MATCH_STACK_LIMIT 10000000 +#define MAX_REM_SIZE 1048576 +#define MAX_SLOW_REM_SIZE 1024 +#define SLOW_RETRY_LIMIT 2000 + //#define EXEC_PRINT_INTERVAL 500000 //#define DUMP_DATA_INTERVAL 100000 //#define STAT_PATH "fuzzer.stat_log" +#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD ) + +#define OPTIONS_AT_RUNTIME (ONIG_OPTION_NOTBOL | ONIG_OPTION_NOTEOL | ONIG_OPTION_CHECK_VALIDITY_OF_STRING | ONIG_OPTION_NOT_BEGIN_STRING | ONIG_OPTION_NOT_END_STRING | ONIG_OPTION_NOT_BEGIN_POSITION) + + +#define ADJUST_LEN(enc, len) do {\ + int mlen = ONIGENC_MBC_MINLEN(enc);\ + if (mlen != 1) { len -= len % mlen; }\ +} while (0) + typedef unsigned char uint8_t; #ifdef DUMP_INPUT @@ -103,14 +119,34 @@ output_current_time(FILE* fp) #endif static int -search(regex_t* reg, unsigned char* str, unsigned char* end, int backward) +search(regex_t* reg, unsigned char* str, unsigned char* end, OnigOptionType options, int backward, int sl) { int r; unsigned char *start, *range; OnigRegion *region; + unsigned int retry_limit; + size_t len; region = onig_region_new(); + len = (size_t )(end - str); + if (len < BASE_LENGTH) { + if (sl >= 2) + retry_limit = (unsigned int )SLOW_RETRY_LIMIT; + else + retry_limit = (unsigned int )BASE_RETRY_LIMIT; + } + else + retry_limit = (unsigned int )(BASE_RETRY_LIMIT * BASE_LENGTH / len); + +#ifdef STANDALONE + fprintf(stdout, "retry limit: %u\n", retry_limit); +#endif + + onig_set_retry_limit_in_search(retry_limit); + onig_set_match_stack_limit_size(MATCH_STACK_LIMIT); + onig_set_subexp_call_limit_in_search(SUBEXP_CALL_LIMIT); + if (backward != 0) { start = end; range = str; @@ -120,7 +156,7 @@ search(regex_t* reg, unsigned char* str, unsigned char* end, int backward) range = end; } - r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + r = onig_search(reg, str, end, start, range, region, (options & OPTIONS_AT_RUNTIME)); if (r >= 0) { #ifdef STANDALONE int i; @@ -168,7 +204,8 @@ static long VALID_STRING_COUNT; static int exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - char* apattern, char* apattern_end, char* astr, UChar* end, int backward) + char* apattern, char* apattern_end, char* astr, UChar* end, int backward, + int sl) { int r; regex_t* reg; @@ -181,14 +218,13 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, EXEC_COUNT_INTERVAL++; onig_initialize(&enc, 1); - onig_set_retry_limit_in_search(RETRY_LIMIT); #ifdef PARSE_DEPTH_LIMIT onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); #endif onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL); r = onig_new(®, pattern, pattern_end, - options, enc, syntax, &einfo); + (options & OPTIONS_AT_COMPILE), enc, syntax, &einfo); if (r != ONIG_NORMAL) { char s[ONIG_MAX_ERROR_MESSAGE_LEN]; onig_error_code_to_str((UChar* )s, r, &einfo); @@ -208,12 +244,12 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, } REGEX_SUCCESS_COUNT++; - r = search(reg, pattern, pattern_end, backward); + r = search(reg, pattern, pattern_end, options, backward, sl); if (r == -2) return -2; if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { VALID_STRING_COUNT++; - r = search(reg, str, end, backward); + r = search(reg, str, end, options, backward, sl); if (r == -2) return -2; } @@ -224,43 +260,52 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, static int alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax, - int backward, int pattern_size, size_t remaining_size, unsigned char *data) + int backward, int pattern_size, size_t rem_size, unsigned char *data) { + extern int onig_detect_can_be_slow_pattern(const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax); + int r; + int sl; + unsigned char *pattern; unsigned char *pattern_end; unsigned char *str_null_end; - // copy first PATTERN_SIZE bytes off to be the pattern - unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); + pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1); memcpy(pattern, data, pattern_size); pattern_end = pattern + pattern_size; data += pattern_size; - remaining_size -= pattern_size; + rem_size -= pattern_size; + + if (rem_size > MAX_REM_SIZE) rem_size = MAX_REM_SIZE; + + sl = onig_detect_can_be_slow_pattern(pattern, pattern_end, options, enc, syntax); + if (sl > 0) { + if (rem_size > MAX_SLOW_REM_SIZE) + rem_size = MAX_SLOW_REM_SIZE; + } -#if defined(UTF16_BE) || defined(UTF16_LE) - if (remaining_size % 2 == 1) remaining_size--; + ADJUST_LEN(enc, rem_size); +#ifdef STANDALONE + fprintf(stdout, "rem_size: %ld\n", rem_size); #endif - unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); - memcpy(str, data, remaining_size); - str_null_end = str + remaining_size; + unsigned char *str = (unsigned char*)malloc(rem_size != 0 ? rem_size : 1); + memcpy(str, data, rem_size); + str_null_end = str + rem_size; r = exec(enc, options, syntax, (char *)pattern, (char *)pattern_end, - (char *)str, str_null_end, backward); + (char *)str, str_null_end, backward, sl); free(pattern); free(str); return r; } -#define OPTIONS_MASK (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP) - - #ifdef SYNTAX_TEST -#define NUM_CONTROL_BYTES 6 +#define NUM_CONTROL_BYTES 7 #else -#define NUM_CONTROL_BYTES 5 +#define NUM_CONTROL_BYTES 6 #endif int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) @@ -285,14 +330,14 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) ONIG_ENCODING_CP1251, ONIG_ENCODING_BIG5, ONIG_ENCODING_GB18030, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, - ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF16_BE, + ONIG_ENCODING_UTF16_LE, + ONIG_ENCODING_UTF16_BE, + ONIG_ENCODING_UTF16_LE, + ONIG_ENCODING_UTF32_BE, + ONIG_ENCODING_UTF32_LE, + ONIG_ENCODING_UTF32_BE, + ONIG_ENCODING_UTF32_LE, ONIG_ENCODING_ISO_8859_1, ONIG_ENCODING_ISO_8859_2, ONIG_ENCODING_ISO_8859_3, @@ -341,7 +386,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) int r; int backward; int pattern_size; - size_t remaining_size; + size_t rem_size; unsigned char *data; unsigned char pattern_size_choice; OnigOptionType options; @@ -364,7 +409,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) if (Size < NUM_CONTROL_BYTES) return 0; - remaining_size = Size; + rem_size = Size; data = (unsigned char* )(Data); #ifdef UTF16_BE @@ -375,7 +420,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #else encoding_choice = data[0]; data++; - remaining_size--; + rem_size--; int num_encodings = sizeof(encodings)/sizeof(encodings[0]); enc = encodings[encoding_choice % num_encodings]; @@ -385,7 +430,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #ifdef SYNTAX_TEST syntax_choice = data[0]; data++; - remaining_size--; + rem_size--; int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]); syntax = syntaxes[syntax_choice % num_syntaxes]; @@ -393,31 +438,30 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) syntax = ONIG_SYNTAX_DEFAULT; #endif - if ((data[1] & 0xc0) == 0) - options = (data[0] | (data[1] << 8)) & OPTIONS_MASK; + if ((data[2] & 0xc0) == 0) + options = data[0] | (data[1] << 8) | (data[2] << 16); else options = data[0] & ONIG_OPTION_IGNORECASE; - data++; - remaining_size--; - data++; - remaining_size--; + data++; rem_size--; + data++; rem_size--; + data++; rem_size--; pattern_size_choice = data[0]; - data++; - remaining_size--; + data++; rem_size--; backward = (data[0] == 0xbb); - data++; - remaining_size--; + data++; rem_size--; - if (remaining_size == 0) + if (backward != 0) { + options = options & ~ONIG_OPTION_FIND_LONGEST; + } + + if (rem_size == 0) pattern_size = 0; else { - pattern_size = (int )pattern_size_choice % remaining_size; -#if defined(UTF16_BE) || defined(UTF16_LE) - if (pattern_size % 2 == 1) pattern_size--; -#endif + pattern_size = (int )pattern_size_choice % rem_size; + ADJUST_LEN(enc, pattern_size); } #ifdef STANDALONE @@ -440,7 +484,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #endif r = alloc_exec(enc, options, syntax, backward, pattern_size, - remaining_size, data); + rem_size, data); if (r == -2) exit(-2); #ifndef STANDALONE @@ -485,15 +529,25 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) #ifdef STANDALONE +#define MAX_INPUT_DATA_SIZE 4194304 + extern int main(int argc, char* argv[]) { + size_t max_size; size_t n; - uint8_t Data[10000]; + uint8_t Data[MAX_INPUT_DATA_SIZE]; - n = read(0, Data, sizeof(Data)); - fprintf(stdout, "n: %ld\n", n); - LLVMFuzzerTestOneInput(Data, n); + if (argc > 1) { + max_size = (size_t )atoi(argv[1]); + } + else { + max_size = sizeof(Data); + } + n = read(0, Data, max_size); + fprintf(stdout, "read size: %ld, max_size: %ld\n", n, max_size); + + LLVMFuzzerTestOneInput(Data, n); return 0; } #endif /* STANDALONE */ diff --git a/harnesses/makefile b/harnesses/makefile index b324295..d4fcfb6 100644 --- a/harnesses/makefile +++ b/harnesses/makefile @@ -1,4 +1,8 @@ # makefile for harness +DEBUG_OUT = +#DEBUG_OUT = -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE +#DEBUG_OUT = -DONIG_DEBUG_PARSE -DONIG_DEBUG_COMPILE -DONIG_DEBUG_MATCH_COUNTER + SRC = ../src CFLAGS = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DSTANDALONE @@ -12,7 +16,8 @@ TARGETS = fuzzer-encode fuzzer-syntax fuzzer-utf16-be fuzzer-utf16-le \ OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full fuzzer-deluxe read-deluxe -default: $(TARGETS) +#default: $(TARGETS) +default: read-syntax fuzzer-encode: base.c $(ONIG_LIB) clang $(CFLAGS) $< $(LIBS) -o $@ @@ -60,7 +65,7 @@ libfuzzer-onig-full: libfuzzer-onig.cpp $(ONIG_LIB) $(ONIG_LIB): cd ..; make clean #cd ..; autoreconf -vfi - cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" + cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer $(DEBUG_OUT)" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" cd ..; make -j4 |