diff options
Diffstat (limited to 'harnesses/regset.c')
-rw-r--r-- | harnesses/regset.c | 392 |
1 files changed, 392 insertions, 0 deletions
diff --git a/harnesses/regset.c b/harnesses/regset.c new file mode 100644 index 0000000..a8dd181 --- /dev/null +++ b/harnesses/regset.c @@ -0,0 +1,392 @@ +/* + * regset.c + * Copyright (c) 2019 K.Kosako + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <time.h> + +#include "oniguruma.h" + + +#define RETRY_LIMIT 5000 + +#ifdef STANDALONE +//#define CHECK_EACH_REGEX_SEARCH_TIME +#endif + +#define MAX_REG_NUM 256 + +typedef unsigned char uint8_t; +static OnigEncoding ENC; + +static void +output_current_time(FILE* fp) +{ + char d[64]; + time_t t; + + t = time(NULL); + strftime(d, sizeof(d), "%m/%d %H:%M:%S", localtime(&t)); + + fprintf(fp, "%s", d); +} + +#ifdef CHECK_EACH_REGEX_SEARCH_TIME +static double +get_sec(struct timespec* ts, struct timespec* te) +{ + double t; + + t = (te->tv_sec - ts->tv_sec) + + (double )(te->tv_nsec - ts->tv_nsec) / 1000000000.0; + return t; +} + +static int +check_each_regex_search_time(OnigRegSet* set, unsigned char* str, unsigned char* end) +{ + int n; + int i; + int r; + OnigRegion* region; + + n = onig_regset_number_of_regex(set); + region = onig_region_new(); + + for (i = 0; i < n; i++) { + regex_t* reg; + unsigned char* start; + unsigned char* range; + struct timespec ts1, ts2; + double t; + + reg = onig_regset_get_regex(set, i); + start = str; + range = end; + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts1); + + r = onig_search(reg, str, end, start, range, region, ONIG_OPTION_NONE); + + clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts2); + t = get_sec(&ts1, &ts2); + + fprintf(stdout, "regex search time %d: %6.2lfmsec.\n", i, t * 1000.0); + } + + onig_region_free(region, 1); + return 0; +} +#endif + +static int +search(OnigRegSet* set, OnigRegSetLead lead, unsigned char* str, unsigned char* end) +{ + int r; + int match_pos; + unsigned char *start, *range; + + start = str; + range = end; + r = onig_regset_search(set, str, end, start, range, lead, + ONIG_OPTION_NONE, &match_pos); + if (r >= 0) { +#ifdef STANDALONE + int i; + int match_index; + OnigRegion* region; + + match_index = r; + fprintf(stdout, "match reg index: %d, pos: %d (%s)\n", + match_index, match_pos, ONIGENC_NAME(ENC)); + region = onig_regset_get_region(set, match_index); + if (region == 0) { + fprintf(stdout, "ERROR: can't get region.\n"); + return -1; + } + + for (i = 0; i < region->num_regs; i++) { + fprintf(stdout, "%d: (%d-%d)\n", i, region->beg[i], region->end[i]); + } +#endif + } + else if (r == ONIG_MISMATCH) { +#ifdef STANDALONE + fprintf(stdout, "search fail (%s)\n", ONIGENC_NAME(ENC)); +#endif + } + else { /* error */ +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r); + fprintf(stdout, "ERROR: %s\n", s); + fprintf(stdout, " (%s)\n", ONIGENC_NAME(ENC)); +#endif + return -1; + } + + return 0; +} + +static long INPUT_COUNT; +static long EXEC_COUNT; +static long EXEC_COUNT_INTERVAL; +static long REGEX_SUCCESS_COUNT; +static long VALID_STRING_COUNT; + +static int +exec(OnigEncoding enc, int reg_num, int init_reg_num, + UChar* pat[], UChar* pat_end[], + OnigRegSetLead lead, UChar* str, UChar* end) +{ + int r; + int i, j; + OnigRegSet* set; + regex_t* reg; + OnigOptionType options; + OnigErrorInfo einfo; + regex_t* regs[MAX_REG_NUM]; + + EXEC_COUNT++; + EXEC_COUNT_INTERVAL++; + + options = (EXEC_COUNT % 4 == 0) ? ONIG_OPTION_IGNORECASE : ONIG_OPTION_NONE; + + onig_initialize(&enc, 1); + onig_set_retry_limit_in_search(RETRY_LIMIT); + + for (i = 0; i < init_reg_num; i++) { + r = onig_new(®s[i], pat[i], pat_end[i], options, ENC, + ONIG_SYNTAX_DEFAULT, &einfo); + if (r != 0) { +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + + for (j = 0; j < i; j++) onig_free(regs[j]); + + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + } + + r = onig_regset_new(&set, init_reg_num, regs); + if (r != 0) { + for (i = 0; i < init_reg_num; i++) { + onig_free(regs[i]); + } + onig_end(); + return -1; + } + + for (i = init_reg_num; i < reg_num; i++) { + r = onig_new(®, pat[i], pat_end[i], options, ENC, + ONIG_SYNTAX_DEFAULT, &einfo); + if (r != 0) { +#ifdef STANDALONE + char s[ONIG_MAX_ERROR_MESSAGE_LEN]; + + onig_error_code_to_str((UChar* )s, r, &einfo); + fprintf(stdout, "ERROR: index: %d, %s\n", i, s); +#endif + onig_regset_free(set); + onig_end(); + + if (r == ONIGERR_PARSER_BUG || + r == ONIGERR_STACK_BUG || + r == ONIGERR_UNDEFINED_BYTECODE || + r == ONIGERR_UNEXPECTED_BYTECODE) { + return -2; + } + else + return -1; + } + + r = onig_regset_add(set, reg); + if (r != 0) { + onig_regset_free(set); + onig_end(); + fprintf(stdout, "ERROR: onig_regset_add(): %d\n", i); + return r; + } + } + + REGEX_SUCCESS_COUNT++; + + if (onigenc_is_valid_mbc_string(enc, str, end) != 0) { + VALID_STRING_COUNT++; + r = search(set, lead, str, end); +#ifdef CHECK_EACH_REGEX_SEARCH_TIME + r = check_each_regex_search_time(set, str, end); +#endif + } + + onig_regset_free(set); + onig_end(); + return 0; +} + +#define MAX_PATTERN_SIZE 30 +#define NUM_CONTROL_BYTES 3 + +#define EXEC_PRINT_INTERVAL 2000000 + +static int MaxRegNum; +static int MaxInitRegNum; + +extern int +LLVMFuzzerTestOneInput(const uint8_t * Data, size_t Size) +{ + int r, i; + int pattern_size; + unsigned char *str_null_end; + size_t remaining_size; + unsigned char *data; + unsigned int reg_num; + unsigned int init_reg_num; + unsigned char* pat[256]; + unsigned char* pat_end[256]; + int len; + unsigned int lead_num; + OnigRegSetLead lead; + + INPUT_COUNT++; + + if (Size < NUM_CONTROL_BYTES) return 0; + + remaining_size = Size; + data = (unsigned char* )(Data); + + reg_num = data[0]; + data++; + remaining_size--; + + init_reg_num = data[0]; + data++; + remaining_size--; + + lead_num = data[0]; + data++; + remaining_size--; + lead = (lead_num % 2 == 0 ? ONIG_REGSET_POSITION_LEAD : ONIG_REGSET_REGEX_LEAD); + + if (remaining_size < reg_num * 2) { + reg_num = reg_num % 15; // zero is OK. + } + + init_reg_num %= (reg_num + 1); + + if (MaxRegNum < reg_num) + MaxRegNum = reg_num; + + if (MaxInitRegNum < init_reg_num) + MaxInitRegNum = init_reg_num; + + if (reg_num == 0) + pattern_size = 1; + else + pattern_size = remaining_size / (reg_num * 2); + + if (pattern_size > MAX_PATTERN_SIZE) + pattern_size = MAX_PATTERN_SIZE; + + len = pattern_size * reg_num; + if (len == 0) len = 1; + + for (i = 0; i < reg_num; i++) { + pat[i] = (unsigned char* )malloc(pattern_size); + memcpy(pat[i], data, pattern_size); + pat_end[i] = pat[i] + pattern_size; + data += pattern_size; + remaining_size -= pattern_size; + } + + unsigned char *str = (unsigned char*)malloc(remaining_size != 0 ? remaining_size : 1); + memcpy(str, data, remaining_size); + str_null_end = str + remaining_size; + +#ifdef STANDALONE + fprintf(stdout, "reg num: %d, pattern size: %d, lead: %s\n", + reg_num, pattern_size, + lead == ONIG_REGSET_POSITION_LEAD ? "position" : "regex"); + + if (reg_num != 0) { + unsigned char* p; + i = 0; + p = pat[0]; + while (p < pat_end[0]) { + fprintf(stdout, " 0x%02x", (int )*p++); + i++; + if (i % 8 == 0) fprintf(stdout, "\n"); + } + fprintf(stdout, "\n"); + } +#endif + + ENC = ONIG_ENCODING_UTF8; + + r = exec(ENC, reg_num, init_reg_num, pat, pat_end, lead, str, str_null_end); + + for (i = 0; i < reg_num; i++) { + free(pat[i]); + } + free(str); + + if (r == -2) { + //output_data("parser-bug", Data, Size); + exit(-2); + } + + if (EXEC_COUNT_INTERVAL == EXEC_PRINT_INTERVAL) { + float fexec, freg, fvalid; + + fexec = (float )EXEC_COUNT / INPUT_COUNT; + freg = (float )REGEX_SUCCESS_COUNT / INPUT_COUNT; + fvalid = (float )VALID_STRING_COUNT / INPUT_COUNT; + + output_current_time(stdout); + fprintf(stdout, ": %ld: EXEC:%.2f, REG:%.2f, VALID:%.2f MAX REG:%d-%d\n", + EXEC_COUNT, fexec, freg, fvalid, MaxRegNum, MaxInitRegNum); + + EXEC_COUNT_INTERVAL = 0; + } + else if (EXEC_COUNT == 1) { + output_current_time(stdout); + fprintf(stdout, ": ------------ START ------------\n"); + } + + return r; +} + +#ifdef STANDALONE + +extern int main(int argc, char* argv[]) +{ + size_t n; + uint8_t Data[10000]; + + n = read(0, Data, sizeof(Data)); + fprintf(stdout, "n: %ld\n", n); + LLVMFuzzerTestOneInput(Data, n); + + return 0; +} +#endif /* STANDALONE */ |