summaryrefslogtreecommitdiff
path: root/src/regint.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/regint.h')
-rw-r--r--src/regint.h293
1 files changed, 216 insertions, 77 deletions
diff --git a/src/regint.h b/src/regint.h
index 256b045..ba8407a 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -4,7 +4,7 @@
regint.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2018 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -57,29 +57,48 @@
/* config */
/* spec. config */
#define USE_CALL
+#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
+#define USE_RETRY_LIMIT_IN_MATCH
+
/* internal config */
#define USE_OP_PUSH_OR_JUMP_EXACT
#define USE_QUANT_PEEK_NEXT
#define USE_ST_LIBRARY
+#include "regenc.h"
+
+#ifdef __cplusplus
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */
+#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+#ifdef HAVE_STDARG_H
+# ifndef HAVE_STDARG_PROTOTYPES
+# define HAVE_STDARG_PROTOTYPES 1
+# endif
+#endif
+
+
#define INIT_MATCH_STACK_SIZE 160
#define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */
+#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000
#define DEFAULT_PARSE_DEPTH_LIMIT 4096
-#if defined(__GNUC__)
-# define ARG_UNUSED __attribute__ ((unused))
-#else
-# define ARG_UNUSED
-#endif
-
/* */
/* escape other system UChar definition */
-#include "config.h"
#ifdef ONIG_ESCAPE_UCHAR_COLLISION
#undef ONIG_ESCAPE_UCHAR_COLLISION
#endif
@@ -89,15 +108,12 @@
#define USE_VARIABLE_META_CHARS
#define USE_POSIX_API_REGION_OPTION
#define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
-/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */
#define xmalloc malloc
#define xrealloc realloc
#define xcalloc calloc
#define xfree free
-#define CHECK_INTERRUPT_IN_MATCH_AT
-
#define st_init_table onig_st_init_table
#define st_init_table_with_size onig_st_init_table_with_size
#define st_init_numtable onig_st_init_numtable
@@ -118,9 +134,6 @@
/* */
#define onig_st_is_member st_is_member
-#define STATE_CHECK_STRING_THRESHOLD_LEN 7
-#define STATE_CHECK_BUFF_MAX_SIZE 0x4000
-
#define xmemset memset
#define xmemcpy memcpy
#define xmemmove memmove
@@ -140,6 +153,10 @@
#include <stddef.h>
+#ifdef HAVE_LIMITS_H
+#include <limits.h>
+#endif
+
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
@@ -184,8 +201,6 @@ typedef unsigned int uintptr_t;
#endif
#endif
-#include "regenc.h"
-
#ifdef MIN
#undef MIN
#endif
@@ -237,14 +252,93 @@ typedef unsigned int uintptr_t;
#endif /* PLATFORM_UNALIGNED_WORD_ACCESS */
+
+#ifdef USE_CALLOUT
+
+typedef struct {
+ int flag;
+ OnigCalloutOf of;
+ int in;
+ int name_id;
+ const UChar* tag_start;
+ const UChar* tag_end;
+ OnigCalloutType type;
+ OnigCalloutFunc start_func;
+ OnigCalloutFunc end_func;
+ union {
+ struct {
+ const UChar* start;
+ const UChar* end;
+ } content;
+ struct {
+ int num;
+ int passed_num;
+ OnigType types[ONIG_CALLOUT_MAX_ARGS_NUM];
+ OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM];
+ } arg;
+ } u;
+} CalloutListEntry;
+
+#endif
+
typedef struct {
- int num_keeper;
- int* keepers;
-} RegExt;
+ const UChar* pattern;
+ const UChar* pattern_end;
+#ifdef USE_CALLOUT
+ void* tag_table;
+ int callout_num;
+ int callout_list_alloc;
+ CalloutListEntry* callout_list; /* index: callout num */
+#endif
+} RegexExt;
-#define REG_EXTP(reg) (RegExt* )((reg)->chain)
+#define REG_EXTP(reg) ((RegexExt* )((reg)->chain))
#define REG_EXTPL(reg) ((reg)->chain)
+struct re_pattern_buffer {
+ /* common members of BBuf(bytes-buffer) */
+ unsigned char* p; /* compiled pattern */
+ unsigned int used; /* used space for p */
+ unsigned int alloc; /* allocated space for p */
+
+ int num_mem; /* used memory(...) num counted from 1 */
+ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */
+ int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */
+ int num_comb_exp_check; /* no longer used (combination explosion check) */
+ int num_call; /* number of subexp call */
+ unsigned int capture_history; /* (?@...) flag (1-31) */
+ unsigned int bt_mem_start; /* need backtrack flag */
+ unsigned int bt_mem_end; /* need backtrack flag */
+ int stack_pop_level;
+ int repeat_range_alloc;
+ OnigRepeatRange* repeat_range;
+
+ OnigEncoding enc;
+ OnigOptionType options;
+ OnigSyntaxType* syntax;
+ OnigCaseFoldType case_fold_flag;
+ void* name_table;
+
+ /* optimization info (string search, char-map and anchors) */
+ int optimize; /* optimize flag */
+ int threshold_len; /* search str-length for apply optimize */
+ int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */
+ OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */
+ OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */
+ int sub_anchor; /* start-anchor for exact or map */
+ unsigned char *exact;
+ unsigned char *exact_end;
+ unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */
+ int *int_map; /* BM skip for exact_len > 255 */
+ int *int_map_backward; /* BM skip for backward search */
+ OnigLen dmin; /* min-distance of exact or map */
+ OnigLen dmax; /* max-distance of exact or map */
+
+ /* regex_t link chain */
+ struct re_pattern_buffer* chain; /* escape compile-conflict */
+};
+
+
/* stack pop level */
enum StackPopLevel {
STACK_POP_LEVEL_FREE = 0,
@@ -253,12 +347,14 @@ enum StackPopLevel {
};
/* optimize flags */
-#define ONIG_OPTIMIZE_NONE 0
-#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */
-#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */
-#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */
-#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */
-#define ONIG_OPTIMIZE_MAP 5 /* char map */
+enum OptimizeType {
+ OPTIMIZE_NONE = 0,
+ OPTIMIZE_EXACT = 1, /* Slow Search */
+ OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */
+ OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */
+ OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */
+ OPTIMIZE_MAP = 5 /* char map */
+};
/* bit status */
typedef unsigned int MemStatusType;
@@ -467,8 +563,8 @@ typedef struct _BBuf {
#define ANCHOR_NO_WORD_BOUNDARY (1<<11)
#define ANCHOR_WORD_BEGIN (1<<12)
#define ANCHOR_WORD_END (1<<13)
-#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */
-#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */
+#define ANCHOR_ANYCHAR_INF (1<<14)
+#define ANCHOR_ANYCHAR_INF_ML (1<<15)
#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16)
#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17)
@@ -557,7 +653,7 @@ enum OpCode {
OP_JUMP,
OP_PUSH,
OP_PUSH_SUPER,
- OP_POP,
+ OP_POP_OUT,
OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
OP_REPEAT, /* {n,m} */
@@ -581,16 +677,14 @@ enum OpCode {
OP_LOOK_BEHIND_NOT_START, /* (?<!...) start */
OP_LOOK_BEHIND_NOT_END, /* (?<!...) end */
- OP_CALL, /* \g<name> */
+ OP_CALL, /* \g<name> */
OP_RETURN,
OP_PUSH_SAVE_VAL,
OP_UPDATE_VAR,
-
- OP_STATE_CHECK_PUSH, /* combination explosion check and push */
- OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
- OP_STATE_CHECK, /* check only */
- OP_STATE_CHECK_ANYCHAR_STAR,
- OP_STATE_CHECK_ANYCHAR_ML_STAR,
+#ifdef USE_CALLOUT
+ OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */
+ OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */
+#endif
/* no need: IS_DYNAMIC_OPTION() == 0 */
OP_SET_OPTION_PUSH, /* set option and push recover option */
@@ -627,7 +721,6 @@ typedef int ModeType;
#define SIZE_ABSADDR sizeof(AbsAddrType)
#define SIZE_LENGTH sizeof(LengthType)
#define SIZE_MEMNUM sizeof(MemNumType)
-#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType)
#define SIZE_REPEATNUM sizeof(RepeatNumType)
#define SIZE_OPTION sizeof(OnigOptionType)
#define SIZE_CODE_POINT sizeof(OnigCodePoint)
@@ -643,7 +736,6 @@ typedef int ModeType;
#define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType)
#define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType)
#define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType)
-#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType)
#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType)
#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType)
#define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType)
@@ -662,7 +754,7 @@ typedef int ModeType;
#define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR)
#define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR)
-#define SIZE_OP_POP SIZE_OPCODE
+#define SIZE_OP_POP_OUT SIZE_OPCODE
#define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1)
#define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM)
@@ -693,11 +785,9 @@ typedef int ModeType;
#define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM)
#define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM)
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
-#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
-#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR)
-#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM)
+#ifdef USE_CALLOUT
+#define SIZE_OP_CALLOUT_CONTENTS (SIZE_OPCODE + SIZE_MEMNUM)
+#define SIZE_OP_CALLOUT_NAME (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM)
#endif
#define MC_ESC(syn) (syn)->meta_char_table.esc
@@ -751,44 +841,14 @@ typedef int ModeType;
#define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT)
#define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT)
-typedef struct {
- void* stack_p;
- int stack_n;
- OnigOptionType options;
- OnigRegion* region;
- int ptr_num;
- const UChar* start; /* search start position (for \G: BEGIN_POSITION) */
-#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
- int best_len; /* for ONIG_OPTION_FIND_LONGEST */
- UChar* best_s;
-#endif
-#ifdef USE_COMBINATION_EXPLOSION_CHECK
- void* state_check_buff;
- int state_check_buff_size;
-#endif
-} OnigMatchArg;
-
-
-typedef struct OnigEndCallListItem {
- struct OnigEndCallListItem* next;
- void (*func)(void);
-} OnigEndCallListItemType;
-
extern void onig_add_end_call(void (*func)(void));
#ifdef ONIG_DEBUG
-typedef struct {
- short int opcode;
- char* name;
- short int arg_type;
-} OnigOpInfoType;
-
-extern OnigOpInfoType OnigOpInfo[];
-
-
-extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc));
+#ifdef ONIG_DEBUG_COMPILE
+extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg);
+#endif
#ifdef ONIG_DEBUG_STATISTICS
extern void onig_statistics_init P_((void));
@@ -803,6 +863,85 @@ extern int onig_bbuf_init P_((BBuf* buf, int size));
extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo));
extern void onig_transfer P_((regex_t* to, regex_t* from));
extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc));
+extern RegexExt* onig_get_regex_ext(regex_t* reg);
+extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end);
+
+#ifdef USE_CALLOUT
+
+extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id);
+extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int id);
+extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int id);
+extern int onig_callout_tag_table_free(void* table);
+extern void onig_free_reg_callout_list(int n, CalloutListEntry* list);
+extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num);
+extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num);
+
+/* for definition of builtin callout */
+#define BC0_P(name, func) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_PROGRESS,\
+ onig_builtin_ ## func, 0, 0, 0, 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC0_R(name, func) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_RETRACTION,\
+ onig_builtin_ ## func, 0, 0, 0, 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC0_B(name, func) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_BOTH,\
+ onig_builtin_ ## func, 0, 0, 0, 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_P(name, func, na, ts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_PROGRESS,\
+ onig_builtin_ ## func, 0, (na), (ts), 0, 0); \
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_P_O(name, func, nts, ts, nopts, opts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_PROGRESS,\
+ onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_B(name, func, na, ts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_BOTH,\
+ onig_builtin_ ## func, 0, (na), (ts), 0, 0);\
+ if (id < 0) return id;\
+} while(0)
+
+#define BC_B_O(name, func, nts, ts, nopts, opts) do {\
+ int len = onigenc_str_bytelen_null(enc, (UChar* )name);\
+ id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\
+ (UChar* )(name), (UChar* )((name) + len),\
+ ONIG_CALLOUT_IN_BOTH,\
+ onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\
+ if (id < 0) return id;\
+} while(0)
+
+#endif /* USE_CALLOUT */
+
/* strend hash */
typedef void hash_table_type;