summaryrefslogtreecommitdiff
path: root/harnesses
diff options
context:
space:
mode:
Diffstat (limited to 'harnesses')
-rw-r--r--harnesses/ascii_compatible.dict111
-rw-r--r--harnesses/deluxe-encode-harness.c204
-rw-r--r--harnesses/dict_conv.py72
-rw-r--r--harnesses/encode-harness.c365
-rw-r--r--harnesses/libfuzzer-onig.cpp45
-rw-r--r--harnesses/makefile69
-rw-r--r--harnesses/regset-harness.c379
7 files changed, 1245 insertions, 0 deletions
diff --git a/harnesses/ascii_compatible.dict b/harnesses/ascii_compatible.dict
new file mode 100644
index 0000000..e6e00db
--- /dev/null
+++ b/harnesses/ascii_compatible.dict
@@ -0,0 +1,111 @@
+# First-pass fuzzing dictionary for Oniguruma by Mark Griffin
+"\\o{34}"
+"\\123"
+"\\x{40}"
+"\\C-"
+"\\M-\\C-"
+"\\X"
+"\\p{"
+"\\p{^"
+"}"
+"]"
+"]"
+")"
+")"
+"\\n"
+"\\r"
+"\\R"
+"\\W"
+"\\w"
+"\\s"
+"\\S"
+"\\d"
+"\\O"
+"\\X"
+"\\b"
+"\\y"
+"\\Y"
+"\\A"
+"\\z"
+"\\K"
+"\\G"
+"\\p{Print}"
+"\\p{ASCII}"
+"\\p{Alnum}"
+"{0,2}"
+"{3,}"
+"{,3}"
+"{5}"
+"{4,2}"
+"??"
+"*?"
+"+?"
+"*+"
+"{1,3}+"
+"(?>"
+"\\B"
+"(?y{"
+"[abcd1-9]"
+"[\\w]"
+"[\\W]"
+"[\\s]"
+"[\\S]"
+"[\\w\\d"
+"[\\p{Alphabetic}"
+"[\\x{03}"
+"[a-w&&"
+"[^"
+"[:graph:]"
+"[^:cntrl:]"
+"(?i:"
+"(?i)"
+"(?m:"
+"(?x:"
+"(?W:"
+"(?y-:"
+"(?y{w}:"
+"(?P:"
+"(?#"
+"(?:"
+"(?="
+"(?!"
+"(?<="
+"(?<!"
+"(?>"
+"(?<name>"
+"(?{"
+"(?{....}[x])"
+"(?{.}[x]>)"
+"(?{{{.}}})"
+"(?~"
+"(?~a)"
+"(?~|a|.*)"
+"(?~|(?:a|b))"
+"(?~|)"
+"(?(.) |.)"
+"(?('-n'))"
+"(?(n+0))"
+"(?(n+1))"
+"(?(n-1))"
+"(?(<name+0>))"
+"(?(<name+1>))"
+"(?(<name-1>))"
+"(*COUNT[tag]{X})"
+"\\1"
+"\\2"
+"\\k<name>"
+"\\k<1>"
+"\\k<2>"
+"\\k<-1>"
+"\\k<-2>"
+"\\k<name+0>"
+"\\k<name+1>"
+"\\k<name-1>"
+"\\g<-1>"
+"\\g<name>"
+"name"
+"(?<name>a|b\\g<name>c)"
+"(?-i:\\g<name>)"
+"\\N{name}"
+"\\p{Katakana}"
+"\\p{Emoji}"
diff --git a/harnesses/deluxe-encode-harness.c b/harnesses/deluxe-encode-harness.c
new file mode 100644
index 0000000..aabe916
--- /dev/null
+++ b/harnesses/deluxe-encode-harness.c
@@ -0,0 +1,204 @@
+/*
+ * deluxe-encode-harness.c
+ * contributed by Mark Griffin
+ */
+#include <stdio.h>
+#include "oniguruma.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+#define DEFAULT_LIMIT 120
+typedef unsigned char uint8_t;
+
+static int
+search(regex_t* reg, unsigned char* str, unsigned char* end)
+{
+ int r;
+ unsigned char *start, *range;
+ OnigRegion *region;
+
+ region = onig_region_new();
+
+ start = str;
+ range = end;
+ r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+ if (r >= 0) {
+ int i;
+
+ fprintf(stdout, "match at %d (%s)\n", r,
+ ONIGENC_NAME(onig_get_encoding(reg)));
+ for (i = 0; i < region->num_regs; i++) {
+ fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
+ }
+ }
+ else if (r == ONIG_MISMATCH) {
+ fprintf(stdout, "search fail (%s)\n",
+ ONIGENC_NAME(onig_get_encoding(reg)));
+ }
+ else { /* error */
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(stdout, "ERROR: %s\n", s);
+ fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ return -1;
+ }
+
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ return 0;
+}
+
+static OnigCaseFoldType CF = ONIGENC_CASE_FOLD_MIN;
+
+static int
+exec_deluxe(OnigEncoding pattern_enc, OnigEncoding str_enc,
+ OnigOptionType options, char* apattern, char* apattern_end,
+ char* astr, char* astr_end)
+{
+ int r;
+ regex_t* reg;
+ OnigCompileInfo ci;
+ OnigErrorInfo einfo;
+ UChar* pattern = (UChar* )apattern;
+ UChar* str = (UChar* )astr;
+ UChar* pattern_end = (UChar* )apattern_end;
+ unsigned char* end = (unsigned char* )astr_end;
+
+ onig_initialize(&str_enc, 1);
+ onig_set_retry_limit_in_match(DEFAULT_LIMIT);
+ onig_set_parse_depth_limit(DEFAULT_LIMIT);
+
+ ci.num_of_elements = 5;
+ ci.pattern_enc = pattern_enc;
+ ci.target_enc = str_enc;
+ ci.syntax = ONIG_SYNTAX_DEFAULT;
+ ci.option = options;
+ ci.case_fold_flag = CF;
+
+ r = onig_new_deluxe(&reg, pattern, pattern_end, &ci, &einfo);
+ if (r != ONIG_NORMAL) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: %s\n", s);
+ onig_end();
+ return -1;
+ }
+
+ if (onigenc_is_valid_mbc_string(str_enc, str, end) != 0) {
+ r = search(reg, str, end);
+ }
+
+ onig_free(reg);
+ onig_end();
+ return 0;
+}
+
+#define PATTERN_SIZE 48
+#define NUM_CONTROL_BYTES 1
+#define MIN_STR_SIZE 2
+int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+ int r;
+ size_t remaining_size;
+ unsigned char *data;
+ unsigned char pat_encoding_choice;
+ unsigned char str_encoding_choice;
+ unsigned char *pattern;
+ unsigned char *str;
+ unsigned char *pattern_end;
+ unsigned char *str_end;
+ unsigned int num_encodings;
+ OnigEncodingType *pattern_enc;
+ OnigEncodingType *str_enc;
+
+ OnigEncodingType *encodings[] = {
+ ONIG_ENCODING_ASCII,
+ ONIG_ENCODING_ISO_8859_1,
+ ONIG_ENCODING_ISO_8859_2,
+ ONIG_ENCODING_ISO_8859_3,
+ ONIG_ENCODING_ISO_8859_4,
+ ONIG_ENCODING_ISO_8859_5,
+ ONIG_ENCODING_ISO_8859_6,
+ ONIG_ENCODING_ISO_8859_7,
+ ONIG_ENCODING_ISO_8859_8,
+ ONIG_ENCODING_ISO_8859_9,
+ ONIG_ENCODING_ISO_8859_10,
+ ONIG_ENCODING_ISO_8859_11,
+ ONIG_ENCODING_ISO_8859_13,
+ ONIG_ENCODING_ISO_8859_14,
+ ONIG_ENCODING_ISO_8859_15,
+ ONIG_ENCODING_ISO_8859_16,
+ ONIG_ENCODING_UTF8,
+ ONIG_ENCODING_UTF16_BE,
+ ONIG_ENCODING_UTF16_LE,
+ ONIG_ENCODING_UTF32_BE,
+ ONIG_ENCODING_UTF32_LE,
+ ONIG_ENCODING_EUC_JP,
+ ONIG_ENCODING_EUC_TW,
+ ONIG_ENCODING_EUC_KR,
+ ONIG_ENCODING_EUC_CN,
+ ONIG_ENCODING_SJIS,
+ //ONIG_ENCODING_KOI8,
+ ONIG_ENCODING_KOI8_R,
+ ONIG_ENCODING_CP1251,
+ ONIG_ENCODING_BIG5,
+ ONIG_ENCODING_GB18030,
+ };
+
+ if (Size <= (NUM_CONTROL_BYTES + PATTERN_SIZE + MIN_STR_SIZE))
+ return 0;
+ if (Size > 0x1000)
+ return 0;
+
+ remaining_size = Size;
+ data = (unsigned char *)(Data);
+
+ // pull off bytes to switch off
+ pat_encoding_choice = data[0];
+ data++;
+ remaining_size--;
+ str_encoding_choice = data[0];
+ data++;
+ remaining_size--;
+
+ // copy first PATTERN_SIZE bytes off to be the pattern
+ pattern = (unsigned char *)malloc(PATTERN_SIZE);
+ memcpy(pattern, data, PATTERN_SIZE);
+ pattern_end = pattern + PATTERN_SIZE;
+ data += PATTERN_SIZE;
+ remaining_size -= PATTERN_SIZE;
+
+ str = (unsigned char*)malloc(remaining_size);
+ memcpy(str, data, remaining_size);
+ str_end = str + remaining_size;
+
+ num_encodings = sizeof(encodings) / sizeof(encodings[0]);
+ pattern_enc = encodings[pat_encoding_choice % num_encodings];
+ str_enc = encodings[str_encoding_choice % num_encodings];
+
+ r = exec_deluxe(pattern_enc, str_enc, ONIG_OPTION_NONE, (char *)pattern, (char *)pattern_end, (char *)str, (char *)str_end);
+
+ free(pattern);
+ free(str);
+
+ return r;
+}
+
+
+#ifdef WITH_READ_MAIN
+
+#include <unistd.h>
+
+extern int main(int argc, char* argv[])
+{
+ size_t n;
+ uint8_t Data[10000];
+
+ n = read(0, Data, sizeof(Data));
+ fprintf(stdout, "n: %ld\n", n);
+ LLVMFuzzerTestOneInput(Data, n);
+
+ return 0;
+}
+#endif /* WITH_READ_MAIN */
diff --git a/harnesses/dict_conv.py b/harnesses/dict_conv.py
new file mode 100644
index 0000000..f721293
--- /dev/null
+++ b/harnesses/dict_conv.py
@@ -0,0 +1,72 @@
+# -*- coding: utf-8 -*-
+# dict_conv.py (Python3 script)
+
+import sys
+
+ENC_UTF16_BE = 1
+ENC_UTF16_LE = 2
+
+def add_char(enc, s, c):
+ if enc == ENC_UTF16_BE:
+ s += "\\x00"
+
+ s += c
+ if enc == ENC_UTF16_LE:
+ s += "\\x00"
+
+ return s
+
+def conv(enc, s):
+ n = len(s)
+ r = ""
+ i = 0
+ while i < n:
+ c = s[i]
+ if c == '\\':
+ c = s[i+1]
+ if c == '\\' or c == '"':
+ r = add_char(enc, r, "\\" + c)
+ i += 2
+ continue
+ else:
+ raise("Unknown escape {0}".format(s))
+
+ r = add_char(enc, r, c)
+ i += 1
+
+ return r
+
+def main(enc):
+ print("# This file was generated by dict_conv.py.")
+ for line in sys.stdin:
+ s = line.strip()
+ if s[0] == '#':
+ print(s)
+ continue
+
+ if s[0] == '"' and s[-1] == '"':
+ s = conv(enc, s[1:-1])
+ print("\"{0}\"".format(s))
+ else:
+ raise("Invalid format {0}".format(s))
+
+def usage(argv):
+ raise RuntimeError("Usage: python {0} utf16_be/utf16_le".format(argv[0]))
+
+
+if __name__ == "__main__":
+ argv = sys.argv
+ argc = len(argv)
+
+ if argc >= 2:
+ s = argv[1]
+ if s == 'utf16_be':
+ enc = ENC_UTF16_BE
+ elif s == 'utf16_le':
+ enc = ENC_UTF16_LE
+ else:
+ usage(argv)
+ else:
+ usage(argv)
+
+ main(enc)
diff --git a/harnesses/encode-harness.c b/harnesses/encode-harness.c
new file mode 100644
index 0000000..5db0512
--- /dev/null
+++ b/harnesses/encode-harness.c
@@ -0,0 +1,365 @@
+/*
+ * encode-harness.c
+ * contributed by Mark Griffin
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+#include "oniguruma.h"
+
+
+//#define PARSE_DEPTH_LIMIT 120
+#define RETRY_LIMIT 3500
+
+typedef unsigned char uint8_t;
+
+static int
+search(regex_t* reg, unsigned char* str, unsigned char* end)
+{
+ int r;
+ unsigned char *start, *range;
+ OnigRegion *region;
+
+ region = onig_region_new();
+
+ start = str;
+ range = end;
+ r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+ if (r >= 0) {
+#ifdef WITH_READ_MAIN
+ int i;
+
+ fprintf(stdout, "match at %d (%s)\n", r,
+ ONIGENC_NAME(onig_get_encoding(reg)));
+ for (i = 0; i < region->num_regs; i++) {
+ fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
+ }
+#endif
+ }
+ else if (r == ONIG_MISMATCH) {
+#ifdef WITH_READ_MAIN
+ fprintf(stdout, "search fail (%s)\n",
+ ONIGENC_NAME(onig_get_encoding(reg)));
+#endif
+ }
+ else { /* error */
+#ifdef WITH_READ_MAIN
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(stdout, "ERROR: %s\n", s);
+ fprintf(stdout, " (%s)\n", ONIGENC_NAME(onig_get_encoding(reg)));
+#endif
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+
+ if (r == ONIGERR_STACK_BUG ||
+ r == ONIGERR_UNDEFINED_BYTECODE ||
+ r == ONIGERR_UNEXPECTED_BYTECODE)
+ return -2;
+
+ return -1;
+ }
+
+ onig_region_free(region, 1 /* 1:free self, 0:free contents only */);
+ return 0;
+}
+
+static long INPUT_COUNT;
+static long EXEC_COUNT;
+static long EXEC_COUNT_INTERVAL;
+static long REGEX_SUCCESS_COUNT;
+static long VALID_STRING_COUNT;
+
+static int
+exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
+ char* apattern, char* apattern_end, char* astr, UChar* end)
+{
+ int r;
+ regex_t* reg;
+ OnigErrorInfo einfo;
+ UChar* pattern = (UChar* )apattern;
+ UChar* str = (UChar* )astr;
+ UChar* pattern_end = (UChar* )apattern_end;
+
+ EXEC_COUNT++;
+ EXEC_COUNT_INTERVAL++;
+
+ onig_initialize(&enc, 1);
+ onig_set_retry_limit_in_match(RETRY_LIMIT);
+ //onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT);
+
+ r = onig_new(&reg, pattern, pattern_end,
+ options, enc, syntax, &einfo);
+ if (r != ONIG_NORMAL) {
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+#ifdef WITH_READ_MAIN
+ fprintf(stdout, "ERROR: %s\n", s);
+#endif
+ onig_end();
+
+ if (r == ONIGERR_PARSER_BUG ||
+ r == ONIGERR_STACK_BUG ||
+ r == ONIGERR_UNDEFINED_BYTECODE ||
+ r == ONIGERR_UNEXPECTED_BYTECODE) {
+ return -2;
+ }
+ else
+ return -1;
+ }
+ REGEX_SUCCESS_COUNT++;
+
+ r = search(reg, pattern, pattern_end);
+ if (r == -2) return -2;
+
+ if (onigenc_is_valid_mbc_string(enc, str, end) != 0) {
+ VALID_STRING_COUNT++;
+ r = search(reg, str, end);
+ if (r == -2) return -2;
+ }
+
+ onig_free(reg);
+ onig_end();
+ return 0;
+}
+
+#if 0
+static void
+output_data(char* path, const uint8_t * data, size_t size)
+{
+ int fd;
+ ssize_t n;
+
+ fd = open(path, O_CREAT|O_RDWR, S_IRUSR|S_IRGRP|S_IROTH);
+ if (fd == -1) {
+ fprintf(stderr, "ERROR: output_data(): can't open(%s)\n", path);
+ return ;
+ }
+
+ n = write(fd, (const void* )data, size);
+ if (n != size) {
+ fprintf(stderr, "ERROR: output_data(): n: %ld, size: %ld\n", n, size);
+ }
+ close(fd);
+}
+#endif
+
+
+static int
+alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
+ int pattern_size, size_t remaining_size, unsigned char *data)
+{
+ int r;
+ unsigned char *pattern_end;
+ unsigned char *str_null_end;
+
+ // copy first PATTERN_SIZE bytes off to be the pattern
+ unsigned char *pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1);
+ memcpy(pattern, data, pattern_size);
+ pattern_end = pattern + pattern_size;
+ data += pattern_size;
+ remaining_size -= pattern_size;
+
+#if defined(UTF16_BE) || defined(UTF16_LE)
+ if (remaining_size % 2 == 1) remaining_size--;
+#endif
+
+ unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1);
+ memcpy(str, data, remaining_size);
+ str_null_end = str + remaining_size;
+
+ r = exec(enc, options, syntax,
+ (char *)pattern, (char *)pattern_end,
+ (char *)str, str_null_end);
+
+ free(pattern);
+ free(str);
+ return r;
+}
+
+
+#define EXEC_PRINT_INTERVAL 10000000
+#define MAX_PATTERN_SIZE 150
+
+#ifdef SYNTAX_TEST
+#define NUM_CONTROL_BYTES 3
+#else
+#define NUM_CONTROL_BYTES 2
+#endif
+
+int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+#if !defined(UTF16_BE) && !defined(UTF16_LE)
+ static OnigEncoding encodings[] = {
+ ONIG_ENCODING_UTF8,
+ ONIG_ENCODING_UTF8,
+ ONIG_ENCODING_UTF8,
+ ONIG_ENCODING_SJIS,
+ //ONIG_ENCODING_EUC_JP,
+ ONIG_ENCODING_ISO_8859_1,
+ ONIG_ENCODING_BIG5,
+ ONIG_ENCODING_GB18030,
+ ONIG_ENCODING_EUC_TW
+ };
+ unsigned char encoding_choice;
+#endif
+
+#ifdef SYNTAX_TEST
+ static OnigSyntaxType* syntaxes[] = {
+ ONIG_SYNTAX_POSIX_EXTENDED,
+ ONIG_SYNTAX_EMACS,
+ ONIG_SYNTAX_GREP,
+ ONIG_SYNTAX_GNU_REGEX,
+ ONIG_SYNTAX_JAVA,
+ ONIG_SYNTAX_PERL_NG,
+ ONIG_SYNTAX_ONIGURUMA
+ };
+ unsigned char syntax_choice;
+#endif
+
+ int r;
+ int pattern_size;
+ size_t remaining_size;
+ unsigned char *data;
+ unsigned char options_choice;
+ OnigOptionType options;
+ OnigEncoding enc;
+ OnigSyntaxType* syntax;
+
+ INPUT_COUNT++;
+ if (Size < NUM_CONTROL_BYTES) return 0;
+
+ remaining_size = Size;
+ data = (unsigned char* )(Data);
+
+#ifdef UTF16_BE
+ enc = ONIG_ENCODING_UTF16_BE;
+#else
+#ifdef UTF16_LE
+ enc = ONIG_ENCODING_UTF16_LE;
+#else
+ encoding_choice = data[0];
+ data++;
+ remaining_size--;
+
+ int num_encodings = sizeof(encodings)/sizeof(encodings[0]);
+ enc = encodings[encoding_choice % num_encodings];
+#endif
+#endif
+
+#ifdef SYNTAX_TEST
+ syntax_choice = data[0];
+ data++;
+ remaining_size--;
+
+ int num_syntaxes = sizeof(syntaxes)/sizeof(syntaxes[0]);
+ syntax = syntaxes[syntax_choice % num_syntaxes];
+#else
+ syntax = ONIG_SYNTAX_DEFAULT;
+#endif
+
+ options_choice = data[0];
+ options = (options_choice % 2 == 0) ? ONIG_OPTION_NONE : ONIG_OPTION_IGNORECASE;
+ data++;
+ remaining_size--;
+
+#ifdef WITH_READ_MAIN
+#ifdef SYNTAX_TEST
+ fprintf(stdout, "enc: %s, syntax: %d, options: %u\n",
+ ONIGENC_NAME(enc), (int )(syntax_choice % num_syntaxes), options);
+#else
+ fprintf(stdout, "enc: %s, options: %u\n", ONIGENC_NAME(enc), options);
+#endif
+#endif
+
+#ifdef WITH_READ_MAIN
+ int max_pattern_size;
+
+ if (remaining_size == 0)
+ max_pattern_size = 0;
+ else {
+ max_pattern_size = remaining_size - 1;
+ if (max_pattern_size > MAX_PATTERN_SIZE)
+ max_pattern_size = MAX_PATTERN_SIZE;
+
+#if defined(UTF16_BE) || defined(UTF16_LE)
+ if (max_pattern_size % 2 == 1) max_pattern_size--;
+#endif
+ }
+
+ for (pattern_size = 0; pattern_size <= max_pattern_size; ) {
+ fprintf(stdout, "pattern_size: %d\n", pattern_size);
+ r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data);
+ if (r == -2) {
+ //output_data("parser-bug", Data, Size);
+ exit(-2);
+ }
+
+#if defined(UTF16_BE) || defined(UTF16_LE)
+ pattern_size += 2;
+#else
+ pattern_size++;
+#endif
+ }
+
+#else /* WITH_READ_MAIN */
+
+ if (remaining_size == 0)
+ pattern_size = 0;
+ else {
+ pattern_size = INPUT_COUNT % remaining_size;
+ if (pattern_size > MAX_PATTERN_SIZE)
+ pattern_size = MAX_PATTERN_SIZE;
+
+#if defined(UTF16_BE) || defined(UTF16_LE)
+ if (pattern_size % 2 == 1) pattern_size--;
+#endif
+ }
+
+ r = alloc_exec(enc, options, syntax, pattern_size, remaining_size, data);
+ if (r == -2) {
+ //output_data("parser-bug", Data, Size);
+ exit(-2);
+ }
+#endif /* else WITH_READ_MAIN */
+
+ if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) {
+ char d[64];
+ time_t t;
+ float fexec, freg, fvalid;
+
+ t = time(NULL);
+ strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t));
+
+ fexec = (float )EXEC_COUNT / INPUT_COUNT;
+ freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT;
+ fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT;
+
+ fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f\n",
+ d, EXEC_COUNT, fexec, freg, fvalid);
+
+ EXEC_COUNT_INTERVAL = 0;
+ }
+ return r;
+}
+
+#ifdef WITH_READ_MAIN
+
+extern int main(int argc, char* argv[])
+{
+ size_t n;
+ uint8_t Data[10000];
+
+ n = read(0, Data, sizeof(Data));
+ fprintf(stdout, "n: %ld\n", n);
+ LLVMFuzzerTestOneInput(Data, n);
+
+ return 0;
+}
+#endif /* WITH_READ_MAIN */
diff --git a/harnesses/libfuzzer-onig.cpp b/harnesses/libfuzzer-onig.cpp
new file mode 100644
index 0000000..526c826
--- /dev/null
+++ b/harnesses/libfuzzer-onig.cpp
@@ -0,0 +1,45 @@
+/* libfuzzer test code for oniguruma
+ * author: Hanno Böck, license: CC0/public domain
+
+Usage:
+* compile oniguruma with something like
+ ./configure CC=clang LD=clang CFLAGS="-fsanitize-coverage=edge -fsanitize=address" \
+ LDFLAGS="-fsanitize-coverage=edge -fsanitize=address"
+* Compile libfuzzer stub and link against static libonig.a and libFuzzer.a:
+ clang++ libfuzzer-onig.cpp src/.libs/libonig.a libFuzzer.a -o libfuzzer-onig \
+ -fsanitize-coverage=edge -fsanitize=address
+* Put sample patterns in directory "in/"
+* Run
+ ./libfuzzer-onig in
+
+Consult libfuzzer docs for further details and how to create libFuzzer.a:
+http://llvm.org/docs/LibFuzzer.html
+
+ */
+#include <stdint.h>
+#include <string.h>
+#include <oniguruma.h>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+ regex_t *reg;
+ OnigEncoding enc;
+
+ enc = ONIG_ENCODING_UTF8;
+
+#ifdef FULL_TEST
+ onig_initialize(&enc, 1);
+ onig_set_retry_limit_in_match(120);
+ onig_set_parse_depth_limit(120);
+#endif
+
+ if (onig_new(&reg, Data, Data + Size, ONIG_OPTION_DEFAULT, enc,
+ ONIG_SYNTAX_DEFAULT, 0) == 0)
+ onig_free(reg);
+
+#ifdef FULL_TEST
+ onig_end();
+#endif
+
+ return 0;
+}
diff --git a/harnesses/makefile b/harnesses/makefile
new file mode 100644
index 0000000..dfd84de
--- /dev/null
+++ b/harnesses/makefile
@@ -0,0 +1,69 @@
+# makefile for harness
+SRC = ../src
+CFLAGS = -I$(SRC) -Wall -g -fsanitize=fuzzer,address -fno-omit-frame-pointer
+CFLAGS_M = -I$(SRC) -Wall -g -fsanitize=fuzzer-no-link,address -fno-omit-frame-pointer -DWITH_READ_MAIN
+ONIG_LIB = $(SRC)/.libs/libonig.a
+LIBS = $(ONIG_LIB)
+
+TARGETS = encode-libfuzzer syntax-libfuzzer \
+ utf16-be-libfuzzer utf16-le-libfuzzer main-encode main-syntax \
+ main-utf16-be main-utf16-le main-regset regset-libfuzzer
+
+OTHER_TARGETS = libfuzzer-onig libfuzzer-onig-full \
+ deluxe-encode-libfuzzer main-deluxe-encode
+
+
+default: $(TARGETS)
+
+encode-libfuzzer: encode-harness.c $(ONIG_LIB)
+ clang $(CFLAGS) $< $(LIBS) -o $@
+
+syntax-libfuzzer: encode-harness.c $(ONIG_LIB)
+ clang -DSYNTAX_TEST $(CFLAGS) $< $(LIBS) -o $@
+
+deluxe-encode-libfuzzer: deluxe-encode-harness.c $(ONIG_LIB)
+ clang $(CFLAGS) $< $(LIBS) -o $@
+
+utf16-be-libfuzzer: encode-harness.c $(ONIG_LIB)
+ clang -DUTF16_BE $(CFLAGS) $< $(LIBS) -o $@
+
+utf16-le-libfuzzer: encode-harness.c $(ONIG_LIB)
+ clang -DUTF16_LE $(CFLAGS) $< $(LIBS) -o $@
+
+regset-libfuzzer: regset-harness.c $(ONIG_LIB)
+ clang $(CFLAGS) $< $(LIBS) -o $@
+
+main-encode: encode-harness.c $(ONIG_LIB)
+ clang $(CFLAGS_M) $< $(LIBS) -o $@
+
+main-syntax: encode-harness.c $(ONIG_LIB)
+ clang -DSYNTAX_TEST $(CFLAGS_M) $< $(LIBS) -o $@
+
+main-deluxe-encode: deluxe-encode-harness.c $(ONIG_LIB)
+ clang $(CFLAGS_M) $< $(LIBS) -o $@
+
+main-utf16-be: encode-harness.c $(ONIG_LIB)
+ clang -DUTF16_BE $(CFLAGS_M) $< $(LIBS) -o $@
+
+main-utf16-le: encode-harness.c $(ONIG_LIB)
+ clang -DUTF16_LE $(CFLAGS_M) $< $(LIBS) -o $@
+
+main-regset: regset-harness.c $(ONIG_LIB)
+ clang $(CFLAGS_M) $< $(LIBS) -o $@
+
+libfuzzer-onig: libfuzzer-onig.cpp $(ONIG_LIB)
+ clang++ $(CFLAGS) $< $(LIBS) -o $@
+
+libfuzzer-onig-full: libfuzzer-onig.cpp $(ONIG_LIB)
+ clang++ -DFULL_TEST $(CFLAGS) $< $(LIBS) -o $@
+
+
+$(ONIG_LIB):
+ cd ..; make clean
+ #cd ..; autoreconf -vfi
+ cd ..; ./configure CC=clang LD=clang CFLAGS="-g -fsanitize=address -fno-omit-frame-pointer" LDFLAGS="-g -fsanitize=address -fno-omit-frame-pointer"
+ cd ..; make -j4
+
+
+clean:
+ rm -f $(TARGETS) $(OTHER_TARGETS)
diff --git a/harnesses/regset-harness.c b/harnesses/regset-harness.c
new file mode 100644
index 0000000..b4b7e20
--- /dev/null
+++ b/harnesses/regset-harness.c
@@ -0,0 +1,379 @@
+/*
+ * regset-harness.c
+ * Copyright (c) 2019 K.Kosako
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+#include "oniguruma.h"
+
+
+#define RETRY_LIMIT 500
+
+#ifdef WITH_READ_MAIN
+//#define CHECK_EACH_REGEX_SEARCH_TIME
+#endif
+
+#define MAX_REG_NUM 256
+
+typedef unsigned char uint8_t;
+static OnigEncoding ENC;
+
+#ifdef CHECK_EACH_REGEX_SEARCH_TIME
+static double
+get_sec(struct timespec* ts, struct timespec* te)
+{
+ double t;
+
+ t = (te->tv_sec - ts->tv_sec) +
+ (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0;
+ return t;
+}
+
+static int
+check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end)
+{
+ int n;
+ int i;
+ int r;
+ OnigRegion* region;
+
+ n = onig_regset_number_of_regex(set);
+ region = onig_region_new();
+
+ for (i = 0; i < n; i++) {
+ regex_t* reg;
+ unsigned char* start;
+ unsigned char* range;
+ struct timespec ts1, ts2;
+ double t;
+
+ reg = onig_regset_get_regex(set, i);
+ start = str;
+ range = end;
+
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1);
+
+ r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2);
+ t = get_sec(&ts1, &ts2);
+
+ fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0);
+ }
+
+ onig_region_free(region, 1);
+ return 0;
+}
+#endif
+
+static int
+search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end)
+{
+ int r;
+ int match_pos;
+ unsigned char *start, *range;
+
+ start = str;
+ range = end;
+ r = onig_regset_search(set, str, end, start, range, lead,
+ ONIG_OPTION_NONE, &match_pos);
+ if (r >= 0) {
+#ifdef WITH_READ_MAIN
+ int i;
+ int match_index;
+ OnigRegion* region;
+
+ match_index = r;
+ fprintf(stdout, "match reg index: %d, pos: %d (%s)\n",
+ match_index, match_pos, ONIGENC_NAME(ENC));
+ region = onig_regset_get_region(set, match_index);
+ if (region == 0) {
+ fprintf(stdout, "ERROR: can't get region.\n");
+ return -1;
+ }
+
+ for (i = 0; i < region->num_regs; i++) {
+ fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
+ }
+#endif
+ }
+ else if (r == ONIG_MISMATCH) {
+#ifdef WITH_READ_MAIN
+ fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC));
+#endif
+ }
+ else { /* error */
+#ifdef WITH_READ_MAIN
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(stdout, "ERROR: %s\n", s);
+ fprintf(stdout, " (%s)\n", ONIGENC_NAME(ENC));
+#endif
+ return -1;
+ }
+
+ return 0;
+}
+
+static long INPUT_COUNT;
+static long EXEC_COUNT;
+static long EXEC_COUNT_INTERVAL;
+static long REGEX_SUCCESS_COUNT;
+static long VALID_STRING_COUNT;
+
+static int
+exec(OnigEncoding enc, int reg_num, int init_reg_num,
+ UChar* pat[], UChar* pat_end[],
+ OnigRegSetLead lead, UChar* str, UChar* end)
+{
+ int r;
+ int i, j;
+ OnigRegSet* set;
+ regex_t* reg;
+ OnigOptionType options;
+ OnigErrorInfo einfo;
+ regex_t* regs[MAX_REG_NUM];
+
+ EXEC_COUNT++;
+ EXEC_COUNT_INTERVAL++;
+
+ options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE;
+
+ onig_initialize(&enc, 1);
+ onig_set_retry_limit_in_match(RETRY_LIMIT);
+
+ for (i = 0; i < init_reg_num; i++) {
+ r = onig_new(&regs[i], pat[i], pat_end[i], options, ENC,
+ ONIG_SYNTAX_DEFAULT, &einfo);
+ if (r != 0) {
+#ifdef WITH_READ_MAIN
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: index: %d, %s\n", i, s);
+#endif
+
+ for (j = 0; j < i; j++) onig_free(regs[j]);
+
+ onig_end();
+
+ if (r == ONIGERR_PARSER_BUG ||
+ r == ONIGERR_STACK_BUG ||
+ r == ONIGERR_UNDEFINED_BYTECODE ||
+ r == ONIGERR_UNEXPECTED_BYTECODE) {
+ return -2;
+ }
+ else
+ return -1;
+ }
+ }
+
+ r = onig_regset_new(&set, init_reg_num, regs);
+ if (r != 0) {
+ for (i = 0; i < init_reg_num; i++) {
+ onig_free(regs[i]);
+ }
+ onig_end();
+ return -1;
+ }
+
+ for (i = init_reg_num; i < reg_num; i++) {
+ r = onig_new(&reg, pat[i], pat_end[i], options, ENC,
+ ONIG_SYNTAX_DEFAULT, &einfo);
+ if (r != 0) {
+#ifdef WITH_READ_MAIN
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: index: %d, %s\n", i, s);
+#endif
+ onig_regset_free(set);
+ onig_end();
+
+ if (r == ONIGERR_PARSER_BUG ||
+ r == ONIGERR_STACK_BUG ||
+ r == ONIGERR_UNDEFINED_BYTECODE ||
+ r == ONIGERR_UNEXPECTED_BYTECODE) {
+ return -2;
+ }
+ else
+ return -1;
+ }
+
+ r = onig_regset_add(set, reg);
+ if (r != 0) {
+ onig_regset_free(set);
+ onig_end();
+ fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i);
+ return r;
+ }
+ }
+
+ REGEX_SUCCESS_COUNT++;
+
+ if (onigenc_is_valid_mbc_string(enc, str, end) != 0) {
+ VALID_STRING_COUNT++;
+ r = search(set, lead, str, end);
+#ifdef CHECK_EACH_REGEX_SEARCH_TIME
+ r = check_each_regex_search_time(set, str, end);
+#endif
+ }
+
+ onig_regset_free(set);
+ onig_end();
+ return 0;
+}
+
+#define MAX_PATTERN_SIZE 30
+#define NUM_CONTROL_BYTES 3
+
+#define EXEC_PRINT_INTERVAL 2000000
+
+static int MaxRegNum;
+static int MaxInitRegNum;
+
+extern int
+LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+ int r, i;
+ int pattern_size;
+ unsigned char *str_null_end;
+ size_t remaining_size;
+ unsigned char *data;
+ unsigned int reg_num;
+ unsigned int init_reg_num;
+ unsigned char* pat[256];
+ unsigned char* pat_end[256];
+ int len;
+ unsigned int lead_num;
+ OnigRegSetLead lead;
+
+ INPUT_COUNT++;
+
+ if (Size < NUM_CONTROL_BYTES) return 0;
+
+ remaining_size = Size;
+ data = (unsigned char* )(Data);
+
+ reg_num = data[0];
+ data++;
+ remaining_size--;
+
+ init_reg_num = data[0];
+ data++;
+ remaining_size--;
+
+ lead_num = data[0];
+ data++;
+ remaining_size--;
+ lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD);
+
+ if (remaining_size < reg_num * 2) {
+ reg_num = reg_num % 15; // zero is OK.
+ }
+
+ init_reg_num %= (reg_num + 1);
+
+ if (MaxRegNum < reg_num)
+ MaxRegNum = reg_num;
+
+ if (MaxInitRegNum < init_reg_num)
+ MaxInitRegNum = init_reg_num;
+
+ if (reg_num == 0)
+ pattern_size = 1;
+ else
+ pattern_size = remaining_size / (reg_num * 2);
+
+ if (pattern_size > MAX_PATTERN_SIZE)
+ pattern_size = MAX_PATTERN_SIZE;
+
+ len = pattern_size * reg_num;
+ if (len == 0) len = 1;
+
+ for (i = 0; i < reg_num; i++) {
+ pat[i] = (unsigned char* )malloc(pattern_size);
+ memcpy(pat[i], data, pattern_size);
+ pat_end[i] = pat[i] + pattern_size;
+ data += pattern_size;
+ remaining_size -= pattern_size;
+ }
+
+ unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1);
+ memcpy(str, data, remaining_size);
+ str_null_end = str + remaining_size;
+
+#ifdef WITH_READ_MAIN
+ fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n",
+ reg_num, pattern_size,
+ lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex");
+
+ if (reg_num != 0) {
+ unsigned char* p;
+ i = 0;
+ p = pat[0];
+ while (p < pat_end[0]) {
+ fprintf(stdout, " 0x%02x", (int )*p++);
+ i++;
+ if (i % 8 == 0) fprintf(stdout, "\n");
+ }
+ fprintf(stdout, "\n");
+ }
+#endif
+
+ ENC = ONIG_ENCODING_UTF8;
+
+ r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end);
+
+ for (i = 0; i < reg_num; i++) {
+ free(pat[i]);
+ }
+ free(str);
+
+ if (r == -2) {
+ //output_data("parser-bug", Data, Size);
+ exit(-2);
+ }
+
+ if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) {
+ char d[64];
+ time_t t;
+ float fexec, freg, fvalid;
+
+ t = time(NULL);
+ strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t));
+
+ fexec = (float )EXEC_COUNT / INPUT_COUNT;
+ freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT;
+ fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT;
+
+ fprintf(stdout, "%s: %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n",
+ d, EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum);
+
+ EXEC_COUNT_INTERVAL = 0;
+ }
+ return r;
+}
+
+#ifdef WITH_READ_MAIN
+
+extern int main(int argc, char* argv[])
+{
+ size_t n;
+ uint8_t Data[10000];
+
+ n = read(0, Data, sizeof(Data));
+ fprintf(stdout, "n: %ld\n", n);
+ LLVMFuzzerTestOneInput(Data, n);
+
+ return 0;
+}
+#endif /* WITH_READ_MAIN */