summaryrefslogtreecommitdiff
path: root/harnesses/base.c
diff options
context:
space:
mode:
Diffstat (limited to 'harnesses/base.c')
-rw-r--r--harnesses/base.c193
1 files changed, 163 insertions, 30 deletions
diff --git a/harnesses/base.c b/harnesses/base.c
index 1206217..70f98f7 100644
--- a/harnesses/base.c
+++ b/harnesses/base.c
@@ -1,6 +1,6 @@
/*
* base.c contributed by Mark Griffin
- * Copyright (c) 2019-2020 K.Kosako
+ * Copyright (c) 2019-2021 K.Kosako
*/
#include <stdio.h>
#include <unistd.h>
@@ -12,23 +12,31 @@
#include <time.h>
#include "oniguruma.h"
-#define PARSE_DEPTH_LIMIT 8
-#define CALL_MAX_NEST_LEVEL 8
-#define SUBEXP_CALL_LIMIT 500
-#define BASE_RETRY_LIMIT 20000
-#define BASE_LENGTH 2048
-#define MATCH_STACK_LIMIT 10000000
-#define MAX_REM_SIZE 1048576
-#define MAX_SLOW_REM_SIZE 1024
-#define SLOW_RETRY_LIMIT 2000
-
-//#define EXEC_PRINT_INTERVAL 500000
-//#define DUMP_DATA_INTERVAL 100000
-//#define STAT_PATH "fuzzer.stat_log"
-
-#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD )
+#define PARSE_DEPTH_LIMIT 8
+#define MAX_SUBEXP_CALL_NEST_LEVEL 8
+#define SUBEXP_CALL_LIMIT 1000
+#define BASE_RETRY_LIMIT 20000
+#define BASE_LENGTH 2048
+#define MATCH_STACK_LIMIT 10000000
+#define MAX_REM_SIZE 1048576
+#define MAX_SLOW_REM_SIZE 1024
+#define MAX_SLOW_REM_SIZE2 100
+#define SLOW_RETRY_LIMIT 2000
+#define SLOW_SUBEXP_CALL_LIMIT 100
+#define MAX_SLOW_BACKWARD_REM_SIZE 200
+
+//#define EXEC_PRINT_INTERVAL 500000
+//#define DUMP_DATA_INTERVAL 100000
+//#define STAT_PATH "fuzzer.stat_log"
+//#define PREV_CONTROL
+
+#ifdef PREV_CONTROL
+#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD)
+#else
+#define OPTIONS_AT_COMPILE (ONIG_OPTION_IGNORECASE | ONIG_OPTION_EXTEND | ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE | ONIG_OPTION_FIND_LONGEST | ONIG_OPTION_FIND_NOT_EMPTY | ONIG_OPTION_NEGATE_SINGLELINE | ONIG_OPTION_DONT_CAPTURE_GROUP | ONIG_OPTION_CAPTURE_GROUP | ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII | ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER | ONIG_OPTION_TEXT_SEGMENT_WORD | ONIG_OPTION_IGNORECASE_IS_ASCII)
+#endif
-#define OPTIONS_AT_RUNTIME (ONIG_OPTION_NOTBOL | ONIG_OPTION_NOTEOL | ONIG_OPTION_CHECK_VALIDITY_OF_STRING | ONIG_OPTION_NOT_BEGIN_STRING | ONIG_OPTION_NOT_END_STRING | ONIG_OPTION_NOT_BEGIN_POSITION)
+#define OPTIONS_AT_RUNTIME (ONIG_OPTION_NOTBOL | ONIG_OPTION_NOTEOL | ONIG_OPTION_CHECK_VALIDITY_OF_STRING | ONIG_OPTION_NOT_BEGIN_STRING | ONIG_OPTION_NOT_END_STRING | ONIG_OPTION_NOT_BEGIN_POSITION | ONIG_OPTION_CALLBACK_EACH_MATCH)
#define ADJUST_LEN(enc, len) do {\
@@ -38,6 +46,64 @@
typedef unsigned char uint8_t;
+
+//#define TEST_PATTERN
+
+#ifdef TEST_PATTERN
+
+#if 1
+unsigned char TestPattern[] = {
+};
+#endif
+
+#endif /* TEST_PATTERN */
+
+#ifdef STANDALONE
+
+static void
+print_options(FILE* fp, OnigOptionType o)
+{
+ if ((o & ONIG_OPTION_IGNORECASE) != 0) fprintf(fp, " IGNORECASE");
+ if ((o & ONIG_OPTION_EXTEND) != 0) fprintf(fp, " EXTEND");
+ if ((o & ONIG_OPTION_MULTILINE) != 0) fprintf(fp, " MULTILINE");
+ if ((o & ONIG_OPTION_SINGLELINE) != 0) fprintf(fp, " SINGLELINE");
+ if ((o & ONIG_OPTION_FIND_LONGEST) != 0) fprintf(fp, " FIND_LONGEST");
+ if ((o & ONIG_OPTION_FIND_NOT_EMPTY) != 0) fprintf(fp, " FIND_NOT_EMPTY");
+ if ((o & ONIG_OPTION_NEGATE_SINGLELINE) != 0) fprintf(fp, " NEGATE_SINGLELINE");
+ if ((o & ONIG_OPTION_DONT_CAPTURE_GROUP) != 0) fprintf(fp, " DONT_CAPTURE_GROUP");
+ if ((o & ONIG_OPTION_CAPTURE_GROUP) != 0) fprintf(fp, " CAPTURE_GROUP");
+ if ((o & ONIG_OPTION_NOTBOL) != 0) fprintf(fp, " NOTBOL");
+ if ((o & ONIG_OPTION_NOTEOL) != 0) fprintf(fp, " NOTEOL");
+ if ((o & ONIG_OPTION_POSIX_REGION) != 0) fprintf(fp, " POSIX_REGION");
+ if ((o & ONIG_OPTION_CHECK_VALIDITY_OF_STRING) != 0) fprintf(fp, " CHECK_VALIDITY_OF_STRING");
+ if ((o & ONIG_OPTION_IGNORECASE_IS_ASCII) != 0) fprintf(fp, " IGNORECASE_IS_ASCII");
+ if ((o & ONIG_OPTION_WORD_IS_ASCII) != 0) fprintf(fp, " WORD_IS_ASCII");
+ if ((o & ONIG_OPTION_DIGIT_IS_ASCII) != 0) fprintf(fp, " DIGIT_IS_ASCII");
+ if ((o & ONIG_OPTION_SPACE_IS_ASCII) != 0) fprintf(fp, " SPACE_IS_ASCII");
+ if ((o & ONIG_OPTION_POSIX_IS_ASCII) != 0) fprintf(fp, " POSIX_IS_ASCII");
+ if ((o & ONIG_OPTION_TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER) != 0) fprintf(fp, " TEXT_SEGMENT_EXTENDED_GRAPHEME_CLUSTER");
+ if ((o & ONIG_OPTION_TEXT_SEGMENT_WORD) != 0) fprintf(fp, " TEXT_SEGMENT_WORD");
+ if ((o & ONIG_OPTION_NOT_BEGIN_STRING) != 0) fprintf(fp, " NOT_BIGIN_STRING");
+ if ((o & ONIG_OPTION_NOT_END_STRING) != 0) fprintf(fp, " NOT_END_STRING");
+ if ((o & ONIG_OPTION_NOT_BEGIN_POSITION) != 0) fprintf(fp, " NOT_BEGIN_POSITION");
+ if ((o & ONIG_OPTION_CALLBACK_EACH_MATCH) != 0) fprintf(fp, " CALLBACK_EACH_MATCH");
+}
+
+static void
+to_binary(unsigned int v, char s[/* 33 */])
+{
+ unsigned int mask;
+ int i;
+
+ mask = 1 << (sizeof(v) * 8 - 1);
+ i = 0;
+ do {
+ s[i++] = (mask & v ? '1' : '0');
+ } while (mask >>= 1);
+ s[i] = 0;
+}
+#endif
+
#ifdef DUMP_INPUT
static void
dump_input(unsigned char* data, size_t len)
@@ -104,6 +170,7 @@ dump_data(FILE* fp, unsigned char* data, int len)
#else
+#ifdef EXEC_PRINT_INTERVAL
static void
output_current_time(FILE* fp)
{
@@ -115,10 +182,24 @@ output_current_time(FILE* fp)
fprintf(fp, "%s", d);
}
+#endif
#endif
static int
+progress_callout_func(OnigCalloutArgs* args, void* user_data)
+{
+ return ONIG_CALLOUT_SUCCESS;
+}
+
+static int
+each_match_callback_func(const UChar* str, const UChar* end,
+ const UChar* match_start, OnigRegion* region, void* user_data)
+{
+ return ONIG_NORMAL;
+}
+
+static int
search(regex_t* reg, unsigned char* str, unsigned char* end, OnigOptionType options, int backward, int sl)
{
int r;
@@ -145,7 +226,10 @@ search(regex_t* reg, unsigned char* str, unsigned char* end, OnigOptionType opti
onig_set_retry_limit_in_search(retry_limit);
onig_set_match_stack_limit_size(MATCH_STACK_LIMIT);
- onig_set_subexp_call_limit_in_search(SUBEXP_CALL_LIMIT);
+ if (sl >= 2)
+ onig_set_subexp_call_limit_in_search(SLOW_SUBEXP_CALL_LIMIT);
+ else
+ onig_set_subexp_call_limit_in_search(SUBEXP_CALL_LIMIT);
if (backward != 0) {
start = end;
@@ -218,10 +302,12 @@ exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
EXEC_COUNT_INTERVAL++;
onig_initialize(&enc, 1);
+ (void)onig_set_progress_callout(progress_callout_func);
#ifdef PARSE_DEPTH_LIMIT
onig_set_parse_depth_limit(PARSE_DEPTH_LIMIT);
#endif
- onig_set_subexp_call_max_nest_level(CALL_MAX_NEST_LEVEL);
+ onig_set_subexp_call_max_nest_level(MAX_SUBEXP_CALL_NEST_LEVEL);
+ onig_set_callback_each_match(each_match_callback_func);
r = onig_new(&reg, pattern, pattern_end,
(options & OPTIONS_AT_COMPILE), enc, syntax, &einfo);
@@ -270,18 +356,38 @@ alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
unsigned char *pattern_end;
unsigned char *str_null_end;
+#ifdef TEST_PATTERN
+ pattern = (unsigned char *)malloc(sizeof(TestPattern));
+ memcpy(pattern, TestPattern, sizeof(TestPattern));
+ pattern_end = pattern + sizeof(TestPattern);
+#else
pattern = (unsigned char *)malloc(pattern_size != 0 ? pattern_size : 1);
memcpy(pattern, data, pattern_size);
pattern_end = pattern + pattern_size;
+#endif
+
data += pattern_size;
rem_size -= pattern_size;
if (rem_size > MAX_REM_SIZE) rem_size = MAX_REM_SIZE;
sl = onig_detect_can_be_slow_pattern(pattern, pattern_end, options, enc, syntax);
+#ifdef STANDALONE
+ fprintf(stdout, "sl: %d\n", sl);
+#endif
if (sl > 0) {
- if (rem_size > MAX_SLOW_REM_SIZE)
- rem_size = MAX_SLOW_REM_SIZE;
+ if (sl >= 100) {
+ if (rem_size > MAX_SLOW_REM_SIZE2)
+ rem_size = MAX_SLOW_REM_SIZE2;
+ }
+ else {
+ if (rem_size > MAX_SLOW_REM_SIZE)
+ rem_size = MAX_SLOW_REM_SIZE;
+ }
+ }
+ if (backward != 0 && enc == ONIG_ENCODING_GB18030) {
+ if (rem_size > MAX_SLOW_BACKWARD_REM_SIZE)
+ rem_size = MAX_SLOW_BACKWARD_REM_SIZE;
}
ADJUST_LEN(enc, rem_size);
@@ -302,11 +408,19 @@ alloc_exec(OnigEncoding enc, OnigOptionType options, OnigSyntaxType* syntax,
return r;
}
+#ifdef PREV_CONTROL
#ifdef SYNTAX_TEST
#define NUM_CONTROL_BYTES 7
#else
#define NUM_CONTROL_BYTES 6
#endif
+#else
+#ifdef SYNTAX_TEST
+#define NUM_CONTROL_BYTES 8
+#else
+#define NUM_CONTROL_BYTES 7
+#endif
+#endif
int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
{
@@ -365,6 +479,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
ONIG_SYNTAX_GNU_REGEX,
ONIG_SYNTAX_JAVA,
ONIG_SYNTAX_PERL_NG,
+ ONIG_SYNTAX_PYTHON,
ONIG_SYNTAX_ONIGURUMA
};
@@ -376,6 +491,7 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
"GNU Regex",
"Java",
"Perl+NG",
+ "Python",
"Oniguruma"
};
#endif
@@ -394,8 +510,10 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
OnigSyntaxType* syntax;
#ifndef STANDALONE
+#ifdef EXEC_PRINT_INTERVAL
static FILE* STAT_FP;
#endif
+#endif
INPUT_COUNT++;
@@ -438,14 +556,22 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
syntax = ONIG_SYNTAX_DEFAULT;
#endif
+#ifdef PREV_CONTROL
if ((data[2] & 0xc0) == 0)
options = data[0] | (data[1] << 8) | (data[2] << 16);
+#else
+ if ((data[3] & 0xc0) == 0)
+ options = data[0] | (data[1] << 8) | (data[2] << 16) | (data[3] << 24);
+#endif
else
options = data[0] & ONIG_OPTION_IGNORECASE;
data++; rem_size--;
data++; rem_size--;
data++; rem_size--;
+#ifndef PREV_CONTROL
+ data++; rem_size--;
+#endif
pattern_size_choice = data[0];
data++; rem_size--;
@@ -465,18 +591,25 @@ int LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
}
#ifdef STANDALONE
- dump_data(stdout, data, pattern_size);
+ {
+ char soptions[33];
+
+ dump_data(stdout, data, pattern_size);
+ to_binary(options, soptions);
#ifdef SYNTAX_TEST
- fprintf(stdout,
- "enc: %s, syntax: %s, options: %u, pattern_size: %d, back:%d\n",
- ONIGENC_NAME(enc),
- syntax_names[syntax_choice % num_syntaxes],
- options,
- pattern_size, backward);
+ fprintf(stdout,
+ "enc: %s, syntax: %s, pattern_size: %d, back:%d\noptions: %s\n",
+ ONIGENC_NAME(enc),
+ syntax_names[syntax_choice % num_syntaxes],
+ pattern_size, backward, soptions);
#else
- fprintf(stdout, "enc: %s, options: %u, pattern_size: %d, back:%d\n",
- ONIGENC_NAME(enc), options, pattern_size, backward);
+ fprintf(stdout, "enc: %s, pattern_size: %d, back:%d\noptions: %s\n",
+ ONIGENC_NAME(enc), pattern_size, backward, soptions);
#endif
+
+ print_options(stdout, options);
+ fprintf(stdout, "\n");
+ }
#endif
#ifdef DUMP_INPUT