summaryrefslogtreecommitdiff
path: root/harnesses/regset.c
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2020-04-20 20:34:10 +0200
committerJörg Frings-Fürst <debian@jff.email>2020-04-20 20:34:10 +0200
commitf3d6e46ce3762b6f51a166119d3982fd3715507a (patch)
tree0935fb6da7f1d9728b42ddf08395a0e977e1c228 /harnesses/regset.c
parent043fff5b6f2461aeccb1c62cb771826cfe301832 (diff)
parent73c6133c32cddae59813cbadf655cb50a3a7356a (diff)
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'harnesses/regset.c')
-rw-r--r--harnesses/regset.c392
1 files changed, 392 insertions, 0 deletions
diff --git a/harnesses/regset.c b/harnesses/regset.c
new file mode 100644
index 0000000..a8dd181
--- /dev/null
+++ b/harnesses/regset.c
@@ -0,0 +1,392 @@
+/*
+ * regset.c
+ * Copyright (c) 2019 K.Kosako
+ */
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+
+#include "oniguruma.h"
+
+
+#define RETRY_LIMIT 5000
+
+#ifdef STANDALONE
+//#define CHECK_EACH_REGEX_SEARCH_TIME
+#endif
+
+#define MAX_REG_NUM 256
+
+typedef unsigned char uint8_t;
+static OnigEncoding ENC;
+
+static void
+output_current_time(FILE* fp)
+{
+ char d[64];
+ time_t t;
+
+ t = time(NULL);
+ strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t));
+
+ fprintf(fp, "%s", d);
+}
+
+#ifdef CHECK_EACH_REGEX_SEARCH_TIME
+static double
+get_sec(struct timespec* ts, struct timespec* te)
+{
+ double t;
+
+ t = (te->tv_sec - ts->tv_sec) +
+ (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0;
+ return t;
+}
+
+static int
+check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end)
+{
+ int n;
+ int i;
+ int r;
+ OnigRegion* region;
+
+ n = onig_regset_number_of_regex(set);
+ region = onig_region_new();
+
+ for (i = 0; i < n; i++) {
+ regex_t* reg;
+ unsigned char* start;
+ unsigned char* range;
+ struct timespec ts1, ts2;
+ double t;
+
+ reg = onig_regset_get_regex(set, i);
+ start = str;
+ range = end;
+
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1);
+
+ r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE);
+
+ clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2);
+ t = get_sec(&ts1, &ts2);
+
+ fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0);
+ }
+
+ onig_region_free(region, 1);
+ return 0;
+}
+#endif
+
+static int
+search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end)
+{
+ int r;
+ int match_pos;
+ unsigned char *start, *range;
+
+ start = str;
+ range = end;
+ r = onig_regset_search(set, str, end, start, range, lead,
+ ONIG_OPTION_NONE, &match_pos);
+ if (r >= 0) {
+#ifdef STANDALONE
+ int i;
+ int match_index;
+ OnigRegion* region;
+
+ match_index = r;
+ fprintf(stdout, "match reg index: %d, pos: %d (%s)\n",
+ match_index, match_pos, ONIGENC_NAME(ENC));
+ region = onig_regset_get_region(set, match_index);
+ if (region == 0) {
+ fprintf(stdout, "ERROR: can't get region.\n");
+ return -1;
+ }
+
+ for (i = 0; i < region->num_regs; i++) {
+ fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]);
+ }
+#endif
+ }
+ else if (r == ONIG_MISMATCH) {
+#ifdef STANDALONE
+ fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC));
+#endif
+ }
+ else { /* error */
+#ifdef STANDALONE
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+
+ onig_error_code_to_str((UChar* )s, r);
+ fprintf(stdout, "ERROR: %s\n", s);
+ fprintf(stdout, " (%s)\n", ONIGENC_NAME(ENC));
+#endif
+ return -1;
+ }
+
+ return 0;
+}
+
+static long INPUT_COUNT;
+static long EXEC_COUNT;
+static long EXEC_COUNT_INTERVAL;
+static long REGEX_SUCCESS_COUNT;
+static long VALID_STRING_COUNT;
+
+static int
+exec(OnigEncoding enc, int reg_num, int init_reg_num,
+ UChar* pat[], UChar* pat_end[],
+ OnigRegSetLead lead, UChar* str, UChar* end)
+{
+ int r;
+ int i, j;
+ OnigRegSet* set;
+ regex_t* reg;
+ OnigOptionType options;
+ OnigErrorInfo einfo;
+ regex_t* regs[MAX_REG_NUM];
+
+ EXEC_COUNT++;
+ EXEC_COUNT_INTERVAL++;
+
+ options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE;
+
+ onig_initialize(&enc, 1);
+ onig_set_retry_limit_in_search(RETRY_LIMIT);
+
+ for (i = 0; i < init_reg_num; i++) {
+ r = onig_new(&regs[i], pat[i], pat_end[i], options, ENC,
+ ONIG_SYNTAX_DEFAULT, &einfo);
+ if (r != 0) {
+#ifdef STANDALONE
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: index: %d, %s\n", i, s);
+#endif
+
+ for (j = 0; j < i; j++) onig_free(regs[j]);
+
+ onig_end();
+
+ if (r == ONIGERR_PARSER_BUG ||
+ r == ONIGERR_STACK_BUG ||
+ r == ONIGERR_UNDEFINED_BYTECODE ||
+ r == ONIGERR_UNEXPECTED_BYTECODE) {
+ return -2;
+ }
+ else
+ return -1;
+ }
+ }
+
+ r = onig_regset_new(&set, init_reg_num, regs);
+ if (r != 0) {
+ for (i = 0; i < init_reg_num; i++) {
+ onig_free(regs[i]);
+ }
+ onig_end();
+ return -1;
+ }
+
+ for (i = init_reg_num; i < reg_num; i++) {
+ r = onig_new(&reg, pat[i], pat_end[i], options, ENC,
+ ONIG_SYNTAX_DEFAULT, &einfo);
+ if (r != 0) {
+#ifdef STANDALONE
+ char s[ONIG_MAX_ERROR_MESSAGE_LEN];
+
+ onig_error_code_to_str((UChar* )s, r, &einfo);
+ fprintf(stdout, "ERROR: index: %d, %s\n", i, s);
+#endif
+ onig_regset_free(set);
+ onig_end();
+
+ if (r == ONIGERR_PARSER_BUG ||
+ r == ONIGERR_STACK_BUG ||
+ r == ONIGERR_UNDEFINED_BYTECODE ||
+ r == ONIGERR_UNEXPECTED_BYTECODE) {
+ return -2;
+ }
+ else
+ return -1;
+ }
+
+ r = onig_regset_add(set, reg);
+ if (r != 0) {
+ onig_regset_free(set);
+ onig_end();
+ fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i);
+ return r;
+ }
+ }
+
+ REGEX_SUCCESS_COUNT++;
+
+ if (onigenc_is_valid_mbc_string(enc, str, end) != 0) {
+ VALID_STRING_COUNT++;
+ r = search(set, lead, str, end);
+#ifdef CHECK_EACH_REGEX_SEARCH_TIME
+ r = check_each_regex_search_time(set, str, end);
+#endif
+ }
+
+ onig_regset_free(set);
+ onig_end();
+ return 0;
+}
+
+#define MAX_PATTERN_SIZE 30
+#define NUM_CONTROL_BYTES 3
+
+#define EXEC_PRINT_INTERVAL 2000000
+
+static int MaxRegNum;
+static int MaxInitRegNum;
+
+extern int
+LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size)
+{
+ int r, i;
+ int pattern_size;
+ unsigned char *str_null_end;
+ size_t remaining_size;
+ unsigned char *data;
+ unsigned int reg_num;
+ unsigned int init_reg_num;
+ unsigned char* pat[256];
+ unsigned char* pat_end[256];
+ int len;
+ unsigned int lead_num;
+ OnigRegSetLead lead;
+
+ INPUT_COUNT++;
+
+ if (Size < NUM_CONTROL_BYTES) return 0;
+
+ remaining_size = Size;
+ data = (unsigned char* )(Data);
+
+ reg_num = data[0];
+ data++;
+ remaining_size--;
+
+ init_reg_num = data[0];
+ data++;
+ remaining_size--;
+
+ lead_num = data[0];
+ data++;
+ remaining_size--;
+ lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD);
+
+ if (remaining_size < reg_num * 2) {
+ reg_num = reg_num % 15; // zero is OK.
+ }
+
+ init_reg_num %= (reg_num + 1);
+
+ if (MaxRegNum < reg_num)
+ MaxRegNum = reg_num;
+
+ if (MaxInitRegNum < init_reg_num)
+ MaxInitRegNum = init_reg_num;
+
+ if (reg_num == 0)
+ pattern_size = 1;
+ else
+ pattern_size = remaining_size / (reg_num * 2);
+
+ if (pattern_size > MAX_PATTERN_SIZE)
+ pattern_size = MAX_PATTERN_SIZE;
+
+ len = pattern_size * reg_num;
+ if (len == 0) len = 1;
+
+ for (i = 0; i < reg_num; i++) {
+ pat[i] = (unsigned char* )malloc(pattern_size);
+ memcpy(pat[i], data, pattern_size);
+ pat_end[i] = pat[i] + pattern_size;
+ data += pattern_size;
+ remaining_size -= pattern_size;
+ }
+
+ unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1);
+ memcpy(str, data, remaining_size);
+ str_null_end = str + remaining_size;
+
+#ifdef STANDALONE
+ fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n",
+ reg_num, pattern_size,
+ lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex");
+
+ if (reg_num != 0) {
+ unsigned char* p;
+ i = 0;
+ p = pat[0];
+ while (p < pat_end[0]) {
+ fprintf(stdout, " 0x%02x", (int )*p++);
+ i++;
+ if (i % 8 == 0) fprintf(stdout, "\n");
+ }
+ fprintf(stdout, "\n");
+ }
+#endif
+
+ ENC = ONIG_ENCODING_UTF8;
+
+ r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end);
+
+ for (i = 0; i < reg_num; i++) {
+ free(pat[i]);
+ }
+ free(str);
+
+ if (r == -2) {
+ //output_data("parser-bug", Data, Size);
+ exit(-2);
+ }
+
+ if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) {
+ float fexec, freg, fvalid;
+
+ fexec = (float )EXEC_COUNT / INPUT_COUNT;
+ freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT;
+ fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT;
+
+ output_current_time(stdout);
+ fprintf(stdout, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n",
+ EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum);
+
+ EXEC_COUNT_INTERVAL = 0;
+ }
+ else if (EXEC_COUNT == 1) {
+ output_current_time(stdout);
+ fprintf(stdout, ": ------------ START ------------\n");
+ }
+
+ return r;
+}
+
+#ifdef STANDALONE
+
+extern int main(int argc, char* argv[])
+{
+ size_t n;
+ uint8_t Data[10000];
+
+ n = read(0, Data, sizeof(Data));
+ fprintf(stdout, "n: %ld\n", n);
+ LLVMFuzzerTestOneInput(Data, n);
+
+ return 0;
+}
+#endif /* STANDALONE */