diff options
Diffstat (limited to 'src/regparse.h')
-rw-r--r-- | src/regparse.h | 159 |
1 files changed, 103 insertions, 56 deletions
diff --git a/src/regparse.h b/src/regparse.h index 884f4d5..b7260ea 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -33,48 +33,58 @@ /* node type */ typedef enum { - NODE_STR = 0, - NODE_CCLASS = 1, - NODE_CTYPE = 2, - NODE_BREF = 3, - NODE_QUANT = 4, - NODE_ENCLOSURE = 5, - NODE_ANCHOR = 6, - NODE_LIST = 7, - NODE_ALT = 8, - NODE_CALL = 9 + NODE_STRING = 0, + NODE_CCLASS = 1, + NODE_CTYPE = 2, + NODE_BACKREF = 3, + NODE_QUANT = 4, + NODE_ENCLOSURE = 5, + NODE_ANCHOR = 6, + NODE_LIST = 7, + NODE_ALT = 8, + NODE_CALL = 9, + NODE_GIMMICK = 10 } NodeType; +enum GimmickType { + GIMMICK_FAIL = 0, + GIMMICK_KEEP = 1, + GIMMICK_SAVE = 2, + GIMMICK_UPDATE_VAR = 3, +}; + /* node type bit */ #define NODE_TYPE2BIT(type) (1<<(type)) -#define BIT_NODE_STR NODE_TYPE2BIT(NODE_STR) +#define BIT_NODE_STRING NODE_TYPE2BIT(NODE_STRING) #define BIT_NODE_CCLASS NODE_TYPE2BIT(NODE_CCLASS) #define BIT_NODE_CTYPE NODE_TYPE2BIT(NODE_CTYPE) -#define BIT_NODE_BREF NODE_TYPE2BIT(NODE_BREF) -#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT) +#define BIT_NODE_BACKREF NODE_TYPE2BIT(NODE_BACKREF) +#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT) #define BIT_NODE_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE) #define BIT_NODE_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR) #define BIT_NODE_LIST NODE_TYPE2BIT(NODE_LIST) #define BIT_NODE_ALT NODE_TYPE2BIT(NODE_ALT) #define BIT_NODE_CALL NODE_TYPE2BIT(NODE_CALL) +#define BIT_NODE_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK) #define NODE_IS_SIMPLE_TYPE(node) \ ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ - (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0) + (BIT_NODE_STRING | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BACKREF)) != 0) #define NODE_TYPE(node) ((node)->u.base.node_type) -#define SET_NODE_TYPE(node, ntype) (node)->u.base.node_type = (ntype) +#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype) #define STR_(node) (&((node)->u.str)) #define CCLASS_(node) (&((node)->u.cclass)) #define CTYPE_(node) (&((node)->u.ctype)) -#define BREF_(node) (&((node)->u.bref)) -#define QUANT_(node) (&((node)->u.quant)) -#define ENCLOSURE_(node) (&((node)->u.enclosure)) +#define BACKREF_(node) (&((node)->u.backref)) +#define QUANT_(node) (&((node)->u.quant)) +#define ENCLOSURE_(node) (&((node)->u.enclosure)) #define ANCHOR_(node) (&((node)->u.anchor)) #define CONS_(node) (&((node)->u.cons)) #define CALL_(node) (&((node)->u.call)) +#define GIMMICK_(node) (&((node)->u.gimmick)) #define NODE_CAR(node) (CONS_(node)->car) #define NODE_CDR(node) (CONS_(node)->cdr) @@ -83,6 +93,9 @@ typedef enum { #define NODE_IS_ANYCHAR(node) \ (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) +#define CTYPE_OPTION(node, reg) \ + (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) + #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) @@ -90,24 +103,25 @@ typedef enum { #define ENCLOSURE_MEMORY (1<<0) #define ENCLOSURE_OPTION (1<<1) #define ENCLOSURE_STOP_BACKTRACK (1<<2) +#define ENCLOSURE_IF_ELSE (1<<3) -#define NODE_STR_MARGIN 16 -#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_STRING_MARGIN 16 +#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ #define NODE_BACKREFS_SIZE 6 #define STRING_RAW (1<<0) /* by backslashed number */ #define STRING_AMBIG (1<<1) #define STRING_DONT_GET_OPT_INFO (1<<2) -#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW -#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW -#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG -#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) +#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW +#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW +#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG +#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0) -#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0) -#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0) +#define NODE_STRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0) +#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0) #define BACKREFS_P(br) \ @@ -118,7 +132,7 @@ typedef enum { #define QUANT_BODY_IS_EMPTY_MEM 2 #define QUANT_BODY_IS_EMPTY_REC 3 -/* status bits */ +/* node status bits */ #define NST_MIN_FIXED (1<<0) #define NST_MAX_FIXED (1<<1) #define NST_CLEN_FIXED (1<<2) @@ -136,28 +150,37 @@ typedef enum { #define NST_BY_NUMBER (1<<14) /* {n,m} */ #define NST_BY_NAME (1<<15) /* backref by name */ #define NST_BACKREF (1<<16) +#define NST_CHECKER (1<<17) +#define NST_FIXED_OPTION (1<<18) +#define NST_PROHIBIT_RECURSION (1<<19) +#define NST_SUPER (1<<20) #define NODE_STATUS(node) (((Node* )node)->u.base.status) #define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (f)) #define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(f)) -#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) +#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) #define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0) -#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0) +#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0) #define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0) -#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0) +#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0) #define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0) -#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) -#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0) -#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0) -#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0) -#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0) -#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0) -#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0) -#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0) -#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0) -#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0) +#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) +#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0) +#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0) +#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0) +#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0) +#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0) +#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0) +#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0) +#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0) +#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0) +#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NST_CHECKER) != 0) +#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NST_FIXED_OPTION) != 0) +#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NST_SUPER) != 0) +#define NODE_IS_PROHIBIT_RECURSION(node) \ + ((NODE_STATUS(node) & NST_PROHIBIT_RECURSION) != 0) #define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0) @@ -168,8 +191,6 @@ typedef enum { #define NODE_ANCHOR_BODY(node) ((node)->body) -#define CALLNODE_REFNUM_UNDEF -1 - typedef struct { NodeType node_type; int status; @@ -178,7 +199,7 @@ typedef struct { UChar* end; unsigned int flag; int capa; /* (allocated size - 1) or 0: use buf[] */ - UChar buf[NODE_STR_BUF_SIZE]; + UChar buf[NODE_STRING_BUF_SIZE]; } StrNode; typedef struct { @@ -221,17 +242,22 @@ typedef struct { int called_state; } m; struct { - OnigOptionType option; + OnigOptionType options; } o; + struct { + /* body is condition */ + struct _Node* Then; + struct _Node* Else; + } te; }; /* for multiple call reference */ - OnigLen min_len; /* min length (byte) */ - OnigLen max_len; /* max length (byte) */ - int char_len; /* character length */ - int opt_count; /* referenced count in optimize_node_left() */ + OnigLen min_len; /* min length (byte) */ + OnigLen max_len; /* max length (byte) */ + int char_len; /* character length */ + int opt_count; /* referenced count in optimize_node_left() */ } EnclosureNode; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL typedef struct { int offset; @@ -266,7 +292,7 @@ typedef struct { int back_static[NODE_BACKREFS_SIZE]; int* back_dynamic; int nest_level; -} BRefNode; +} BackRefNode; typedef struct { NodeType node_type; @@ -291,8 +317,18 @@ typedef struct { int ctype; int not; + OnigOptionType options; } CtypeNode; +typedef struct { + NodeType node_type; + int status; + + enum GimmickType type; + int detail_type; + int id; +} GimmickNode; + typedef struct _Node { union { struct { @@ -305,13 +341,14 @@ typedef struct _Node { CClassNode cclass; QuantNode quant; EnclosureNode enclosure; - BRefNode bref; + BackRefNode backref; AnchorNode anchor; ConsAltNode cons; CtypeNode ctype; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL CallNode call; #endif + GimmickNode gimmick; } u; } Node; @@ -332,7 +369,11 @@ typedef struct { } MemEnv; typedef struct { - OnigOptionType option; + enum SaveType type; +} SaveItem; + +typedef struct { + OnigOptionType options; OnigCaseFoldType case_fold_flag; OnigEncoding enc; OnigSyntaxType* syntax; @@ -346,7 +387,7 @@ typedef struct { UChar* error_end; regex_t* reg; /* for reg->names only */ int num_call; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL UnsetAddrList* unset_addr_list; int has_call_zero; #endif @@ -364,6 +405,11 @@ typedef struct { int has_recursion; #endif unsigned int parse_depth; + + int keep_num; + int save_num; + int save_alloc_num; + SaveItem* saves; } ScanEnv; @@ -399,6 +445,7 @@ extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); extern int onig_free_shared_cclass_table P_((void)); extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node); #ifdef ONIG_DEBUG #ifdef USE_NAMED_GROUP |