diff options
Diffstat (limited to 'harnesses')
-rw-r--r-- | harnesses/ascii_compatible.dict | 111 | ||||
-rw-r--r-- | harnesses/deluxe-encode-harness.c | 239 | ||||
-rw-r--r-- | harnesses/dict_conv.py | 72 | ||||
-rw-r--r-- | harnesses/encode-harness.c | 170 | ||||
-rw-r--r-- | harnesses/syntax-harness.c | 120 |
5 files changed, 712 insertions, 0 deletions
diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict new file mode 100644 index 0000000..820bf47 --- /dev/null +++ b/harnesses/ascii_compatible.dict @@ -0,0 +1,111 @@ +# First-pass fuzzing dictionary for Oniguruma by Mark Griffin +"\\o{17777777777}" +"\\777" +"\\u" +"\\uFFFF" +"\\xFF" +"\\x{70000000}" +"\\C-" +"\\M-\\C-" +"\\X" +"\\p{" +"\\p{^" +"}" +"]" +")" +"\\n" +"\\r" +"\\R" +"\\W" +"\\w" +"\\s" +"\\S" +"\\d" +"\\O" +"\\X" +"\\b" +"\\y" +"\\Y" +"\\A" +"\\z" +"\\K" +"\\G" +"\\p{Print}" +"\\p{ASCII}" +"\\p{Alnum}" +"{0,2}" +"{3,}" +"{,3}" +"{5}" +"{4,2}" +"??" +"*?" +"+?" +"*+" +"{1,3}+" +"(?>" +"\\B" +"(?y{" +"[abcd1-9]" +"[\\w\\d" +"[\\p{Alphabetic}" +"[\\P{Arabic}" +"[\\x{ffff}" +"[a-w&&" +"[^" +"[:graph:]" +"[^:cntrl:]" +"(?i:" +"(?i)" +"(?m:" +"(?x:" +"(?W:" +"(?y-:" +"(?y{w}:" +"(?P:" +"(?#" +"(?:" +"(?=" +"(?!" +"(?<=" +"(?<!" +"(?>" +"(?<name>" +"(?{" +"(?{....}[x])" +"(?{.}[x]>)" +"(?{{{.}}})" +"(?~" +"(?~a)" +"(?~|a|.*)" +"(?~|(?:a|b))" +"(?~|)" +"(?(.) |.)" +"(?('-n'))" +"(?(n+0))" +"(?(n+1))" +"(?(n-1))" +"(?(<name+0>))" +"(?(<name+1>))" +"(?(<name-1>))" +"(*ERROR{-2000})" +"(*COUNT[tag]{X})" +"\\1" +"\\2" +"\\k<name>" +"\\k<1>" +"\\k<2>" +"\\k<-1>" +"\\k<-2>" +"\\k<name+0>" +"\\k<name+1>" +"\\k<name-1>" +"\\g<-1>" +"\\g<name>" +"name" +"(?<name>a|b\\g<name>c)" +"(?-i:\\g<name>)" +"\\N{name}" +"\\p{Hiragana}" +"\\p{Katakana}" +"\\p{Emoji}" diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe-encode-harness.c new file mode 100644 index 0000000..e1f84a5 --- /dev/null +++ b/harnesses/deluxe-encode-harness.c @@ -0,0 +1,239 @@ +/* + * deluxe-encode-harness.c + * contributed by Mark Griffin + */ +#include <stdio.h> +#include "oniguruma.h" + +#include <stdlib.h> +#include <string.h> + +#define DEFAULT_LIMIT 120 +typedef unsigned char uint8_t; + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end) +{ + int r; + unsigned char *start, *range; + OnigRegion *region; + + region = onig_region_new(); + + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stdout, "match at %d (%s)\n", r, + ONIGENC_NAME(onig_get_encoding(reg))); + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stdout, "search fail (%s)\n", + ONIGENC_NAME(onig_get_encoding(reg))); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return 0; +} + +static int +exec(OnigEncoding enc, OnigOptionType options, + char* apattern, char* apattern_end, char* astr, char* astr_end) +{ + int r; + regex_t* reg; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + UChar* pattern_end = (UChar* )apattern_end; + unsigned char *end = (unsigned char* )astr_end; + + onig_initialize(&enc, 1); + onig_set_retry_limit_in_match(DEFAULT_LIMIT); + onig_set_parse_depth_limit(DEFAULT_LIMIT); + + r = onig_new(®, pattern, pattern_end, + options, enc, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: %s\n", s); + onig_end(); + return -1; + } + + r = search(reg, str, end); + + onig_free(reg); + onig_end(); + return 0; +} + +static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN; + +static int +exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc, + OnigOptionType options, char* apattern, char* apattern_end, + char* astr, char* astr_end) +{ + int r; + regex_t* reg; + OnigCompileInfo ci; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + UChar* pattern_end = (UChar* )apattern_end; + unsigned char* end = (unsigned char* )astr_end; + + onig_initialize(&str_enc, 1); + onig_set_retry_limit_in_match(DEFAULT_LIMIT); + onig_set_parse_depth_limit(DEFAULT_LIMIT); + + ci.num_of_elements = 5; + ci.pattern_enc = pattern_enc; + ci.target_enc = str_enc; + ci.syntax = ONIG_SYNTAX_DEFAULT; + ci.option = options; + ci.case_fold_flag = CF; + + r = onig_new_deluxe(®, pattern, pattern_end, &ci, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: %s\n", s); + onig_end(); + return -1; + } + + if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) { + r = search(reg, str, end); + } + + onig_free(reg); + onig_end(); + return 0; +} + +#define PATTERN_SIZE 48 +#define NUM_CONTROL_BYTES 1 +#define MIN_STR_SIZE 2 +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + int r; + size_t remaining_size; + unsigned char *data; + unsigned char pat_encoding_choice; + unsigned char str_encoding_choice; + unsigned char *pattern; + unsigned char *str; + unsigned char *pattern_end; + unsigned char *str_end; + unsigned int num_encodings; + OnigEncodingType *pattern_enc; + OnigEncodingType *str_enc; + + OnigEncodingType *encodings[] = { + ONIG_ENCODING_ASCII, + ONIG_ENCODING_ISO_8859_1, + ONIG_ENCODING_ISO_8859_2, + ONIG_ENCODING_ISO_8859_3, + ONIG_ENCODING_ISO_8859_4, + ONIG_ENCODING_ISO_8859_5, + ONIG_ENCODING_ISO_8859_6, + ONIG_ENCODING_ISO_8859_7, + ONIG_ENCODING_ISO_8859_8, + ONIG_ENCODING_ISO_8859_9, + ONIG_ENCODING_ISO_8859_10, + ONIG_ENCODING_ISO_8859_11, + ONIG_ENCODING_ISO_8859_13, + ONIG_ENCODING_ISO_8859_14, + ONIG_ENCODING_ISO_8859_15, + ONIG_ENCODING_ISO_8859_16, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_UTF16_BE, + ONIG_ENCODING_UTF16_LE, + ONIG_ENCODING_UTF32_BE, + ONIG_ENCODING_UTF32_LE, + ONIG_ENCODING_EUC_JP, + ONIG_ENCODING_EUC_TW, + ONIG_ENCODING_EUC_KR, + ONIG_ENCODING_EUC_CN, + ONIG_ENCODING_SJIS, + //ONIG_ENCODING_KOI8, + ONIG_ENCODING_KOI8_R, + ONIG_ENCODING_CP1251, + ONIG_ENCODING_BIG5, + ONIG_ENCODING_GB18030, + }; + + if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) + return 0; + if (Size > 0x1000) + return 0; + + remaining_size = Size; + data = (unsigned char *)(Data); + + // pull off bytes to switch off + pat_encoding_choice = data[0]; + data++; + remaining_size--; + str_encoding_choice = data[0]; + data++; + remaining_size--; + + // copy first PATTERN_SIZE bytes off to be the pattern + pattern = (unsigned char *)malloc(PATTERN_SIZE+4); + memset(pattern, 0, PATTERN_SIZE+4); + memcpy(pattern, data, PATTERN_SIZE); + pattern_end = pattern + PATTERN_SIZE; + data += PATTERN_SIZE; + remaining_size -= PATTERN_SIZE; + + str = (unsigned char*)malloc(remaining_size+4); + memset(str, 0, remaining_size+4); + memcpy(str, data, remaining_size); + str_end = str + remaining_size; + + num_encodings = sizeof(encodings) / sizeof(encodings[0]); + pattern_enc = encodings[pat_encoding_choice % num_encodings]; + str_enc = encodings[str_encoding_choice % num_encodings]; + + r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end); + + free(pattern); + free(str); + + return r; +} + + +#ifdef WITH_READ_MAIN + +#include <unistd.h> + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* WITH_READ_MAIN */ diff --git a/harnesses/dict_conv.py b/harnesses/dict_conv.py new file mode 100644 index 0000000..f721293 --- /dev/null +++ b/harnesses/dict_conv.py @@ -0,0 +1,72 @@ +# -*- coding: utf-8 -*- +# dict_conv.py (Python3 script) + +import sys + +ENC_UTF16_BE = 1 +ENC_UTF16_LE = 2 + +def add_char(enc, s, c): + if enc == ENC_UTF16_BE: + s += "\\x00" + + s += c + if enc == ENC_UTF16_LE: + s += "\\x00" + + return s + +def conv(enc, s): + n = len(s) + r = "" + i = 0 + while i < n: + c = s[i] + if c == '\\': + c = s[i+1] + if c == '\\' or c == '"': + r = add_char(enc, r, "\\" + c) + i += 2 + continue + else: + raise("Unknown escape {0}".format(s)) + + r = add_char(enc, r, c) + i += 1 + + return r + +def main(enc): + print("# This file was generated by dict_conv.py.") + for line in sys.stdin: + s = line.strip() + if s[0] == '#': + print(s) + continue + + if s[0] == '"' and s[-1] == '"': + s = conv(enc, s[1:-1]) + print("\"{0}\"".format(s)) + else: + raise("Invalid format {0}".format(s)) + +def usage(argv): + raise RuntimeError("Usage: python {0} utf16_be/utf16_le".format(argv[0])) + + +if __name__ == "__main__": + argv = sys.argv + argc = len(argv) + + if argc >= 2: + s = argv[1] + if s == 'utf16_be': + enc = ENC_UTF16_BE + elif s == 'utf16_le': + enc = ENC_UTF16_LE + else: + usage(argv) + else: + usage(argv) + + main(enc) diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c new file mode 100644 index 0000000..e57fd4f --- /dev/null +++ b/harnesses/encode-harness.c @@ -0,0 +1,170 @@ +/* + * encode-harness.c + * contributed by Mark Griffin + */ +#include <stdio.h> +#include "oniguruma.h" + +#include <stdlib.h> +#include <string.h> + +#define PARSE_DEPTH_LIMIT 120 +#define RETRY_LIMIT 4000 + +typedef unsigned char uint8_t; + +static int +search(regex_t* reg, unsigned char* str, unsigned char* end) +{ + int r; + unsigned char *start, *range; + OnigRegion *region; + + region = onig_region_new(); + + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stdout, "match at %d (%s)\n", r, + ONIGENC_NAME(onig_get_encoding(reg))); + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stdout, "search fail (%s)\n", + ONIGENC_NAME(onig_get_encoding(reg))); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg))); + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + return 0; +} + +static int +exec(OnigEncoding enc, OnigOptionType options, + char* apattern, char* apattern_end, char* astr, UChar* end) +{ + int r; + regex_t* reg; + OnigErrorInfo einfo; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + UChar* pattern_end = (UChar* )apattern_end; + + onig_initialize(&enc, 1); + onig_set_retry_limit_in_match(RETRY_LIMIT); + onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT); + + r = onig_new(®, pattern, pattern_end, + options, enc, ONIG_SYNTAX_DEFAULT, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: %s\n", s); + onig_end(); + return -1; + } + + if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { + r = search(reg, str, end); + } + + onig_free(reg); + onig_end(); + return 0; +} + +#define PATTERN_SIZE 32 +#define NUM_CONTROL_BYTES 1 +#define MIN_STR_SIZE 1 +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) + return 0; + if (Size > 0x1000) + return 0; + + unsigned char *pattern_end; + unsigned char *str_null_end; + + size_t remaining_size = Size; + unsigned char *data = (unsigned char *)(Data); + + // pull off one byte to switch off + unsigned char encoding_choice = data[0]; + data++; + remaining_size--; + + // copy first PATTERN_SIZE bytes off to be the pattern + unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+4); + memset(pattern, 0, PATTERN_SIZE+4); + memcpy(pattern, data, PATTERN_SIZE); + pattern_end = pattern + PATTERN_SIZE; + data += PATTERN_SIZE; + remaining_size -= PATTERN_SIZE; + + unsigned char *str = (unsigned char*)malloc(remaining_size+4); + memset(str, 0, remaining_size+4); + memcpy(str, data, remaining_size); + str_null_end = str + remaining_size; + + int r; + OnigEncodingType *encodings[] = { + ONIG_ENCODING_SJIS, + ONIG_ENCODING_EUC_JP, + ONIG_ENCODING_CP1251, + ONIG_ENCODING_ISO_8859_1, + ONIG_ENCODING_UTF8, + ONIG_ENCODING_KOI8_R, + ONIG_ENCODING_BIG5 + }; + + OnigEncodingType *enc; + +#ifdef UTF16_BE + enc = ONIG_ENCODING_UTF16_BE; +#else +#ifdef UTF16_LE + enc = ONIG_ENCODING_UTF16_LE; +#else + int num_encodings = sizeof(encodings)/sizeof(encodings[0]); + enc = encodings[encoding_choice % num_encodings]; +#endif +#endif + + r = exec(enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, + (char *)str, str_null_end); + + free(pattern); + free(str); + + return r; +} + +#ifdef WITH_READ_MAIN + +#include <unistd.h> + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* WITH_READ_MAIN */ diff --git a/harnesses/syntax-harness.c b/harnesses/syntax-harness.c new file mode 100644 index 0000000..0fb3587 --- /dev/null +++ b/harnesses/syntax-harness.c @@ -0,0 +1,120 @@ +/* + * syntax-harness.c + * contributed by Mark Griffin + */ +#include <stdio.h> +#include <string.h> +#include "oniguruma.h" + +#include <stdlib.h> + +#define DEFAULT_LIMIT 120 +typedef unsigned char uint8_t; + +extern int exec(OnigSyntaxType* syntax, char* apattern, char* astr) +{ + int r; + unsigned char *start, *range, *end; + regex_t* reg; + OnigErrorInfo einfo; + OnigRegion *region; + UChar* pattern = (UChar* )apattern; + UChar* str = (UChar* )astr; + + r = onig_new(®, pattern, pattern + strlen((char* )pattern), + ONIG_OPTION_DEFAULT, ONIG_ENCODING_ASCII, syntax, &einfo); + if (r != ONIG_NORMAL) { + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: %s\n", s); + return -1; + } + + region = onig_region_new(); + + end = str + strlen((char* )str); + start = str; + range = end; + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + if (r >= 0) { + int i; + + fprintf(stdout, "match at %d\n", r); + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } + } + else if (r == ONIG_MISMATCH) { + fprintf(stdout, "search fail\n"); + } + else { /* error */ + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + return -1; + } + + onig_region_free(region, 1 /* 1:free self, 0:free contents only */); + onig_free(reg); + return 0; +} + +#define PATTERN_SIZE 64 +#define NUM_CONTROL_BYTES 1 +#define MIN_STR_SIZE 1 +int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE)) + return 0; + if (Size > 0x1000) + return 0; + size_t remaining_size = Size; + unsigned char *data = (unsigned char *)(Data); + + // pull off one byte to switch syntax choice + unsigned char syntax_choice = data[0]; + data++; + remaining_size--; + + // copy first PATTERN_SIZE bytes off to be the pattern + unsigned char *pattern = (unsigned char *)malloc(PATTERN_SIZE+1); + memset(pattern, 0, PATTERN_SIZE+1); + memcpy(pattern, data, PATTERN_SIZE); + data += PATTERN_SIZE; + remaining_size -= PATTERN_SIZE; + + unsigned char *str = (unsigned char*)malloc(remaining_size+1); + memset(str, 0, remaining_size+1); + memcpy(str, data, remaining_size); + + OnigEncoding use_encs[] = { ONIG_ENCODING_ASCII }; + onig_initialize(use_encs, sizeof(use_encs)/sizeof(use_encs[0])); + + onig_set_retry_limit_in_match(DEFAULT_LIMIT); + onig_set_parse_depth_limit(DEFAULT_LIMIT); + + OnigSyntaxType *syntaxes[] = { + ONIG_SYNTAX_POSIX_EXTENDED, + ONIG_SYNTAX_EMACS, + ONIG_SYNTAX_GREP, + ONIG_SYNTAX_GNU_REGEX, + ONIG_SYNTAX_JAVA, + ONIG_SYNTAX_PERL_NG, + ONIG_SYNTAX_RUBY, + ONIG_SYNTAX_ONIGURUMA, + }; + OnigSyntaxType *syntax = syntaxes[syntax_choice % 8]; + + int r; + r = exec(syntax, (char *)pattern, (char *)str); + // r = exec(ONIG_SYNTAX_JAVA, "\\p{XDigit}\\P{XDigit}[a-c&&b-g]", "bgc"); + + onig_end(); + + free(pattern); + free(str); + + return 0; +} |