diff options
Diffstat (limited to 'src/regparse.h')
-rw-r--r-- | src/regparse.h | 102 |
1 files changed, 54 insertions, 48 deletions
diff --git a/src/regparse.h b/src/regparse.h index b7a2867..1525ccb 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2019 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ #include "regint.h" #define NODE_STRING_MARGIN 16 -#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */ #define NODE_BACKREFS_SIZE 6 /* node type */ @@ -66,27 +66,32 @@ enum GimmickType { #endif }; -enum BodyEmpty { - BODY_IS_NOT_EMPTY = 0, - BODY_IS_EMPTY = 1, - BODY_IS_EMPTY_MEM = 2, - BODY_IS_EMPTY_REC = 3 +enum BodyEmptyType { + BODY_IS_NOT_EMPTY = 0, + BODY_IS_EMPTY_POSSIBILITY = 1, + BODY_IS_EMPTY_POSSIBILITY_MEM = 2, + BODY_IS_EMPTY_POSSIBILITY_REC = 3 }; +struct _Node; + typedef struct { NodeType node_type; int status; + struct _Node* parent; UChar* s; UChar* end; unsigned int flag; - int capacity; /* (allocated size - 1) or 0: use buf[] */ UChar buf[NODE_STRING_BUF_SIZE]; + int capacity; /* (allocated size - 1) or 0: use buf[] */ + int case_min_len; } StrNode; typedef struct { NodeType node_type; int status; + struct _Node* parent; unsigned int flags; BitSet bs; @@ -96,20 +101,22 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; int lower; int upper; int greedy; - enum BodyEmpty empty_info; + enum BodyEmptyType emptiness; struct _Node* head_exact; struct _Node* next_head_exact; - int is_refered; /* include called node. don't eliminate even if {0} */ + int include_referred; /* include called node. don't eliminate even if {0} */ } QuantNode; typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; enum BagType type; @@ -152,6 +159,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; /* to BagNode : BAG_MEMORY */ int by_number; @@ -166,6 +174,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; int back_num; int back_static[NODE_BACKREFS_SIZE]; @@ -176,6 +185,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; int type; @@ -186,6 +196,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* car; struct _Node* cdr; @@ -194,6 +205,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; int ctype; int not; @@ -204,6 +216,7 @@ typedef struct { typedef struct { NodeType node_type; int status; + struct _Node* parent; enum GimmickType type; int detail_type; @@ -216,6 +229,7 @@ typedef struct _Node { struct { NodeType node_type; int status; + struct _Node* parent; struct _Node* body; } base; @@ -252,10 +266,6 @@ typedef struct _Node { #define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL) #define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK) -#define NODE_IS_SIMPLE_TYPE(node) \ - ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ - (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0) - #define NODE_TYPE(node) ((node)->u.base.node_type) #define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype) @@ -284,26 +294,21 @@ typedef struct _Node { #define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML) #define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF) -#define NODE_STRING_RAW (1<<0) /* by backslashed number */ -#define NODE_STRING_AMBIG (1<<1) -#define NODE_STRING_GOOD_AMBIG (1<<2) -#define NODE_STRING_DONT_GET_OPT_INFO (1<<3) +#define NODE_STRING_CRUDE (1<<0) +#define NODE_STRING_CASE_EXPANDED (1<<1) +#define NODE_STRING_CASE_FOLD_MATCH (1<<2) #define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s) -#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW -#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW -#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG -#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG -#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ - (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO -#define NODE_STRING_IS_RAW(node) \ - (((node)->u.str.flag & NODE_STRING_RAW) != 0) -#define NODE_STRING_IS_AMBIG(node) \ - (((node)->u.str.flag & NODE_STRING_AMBIG) != 0) -#define NODE_STRING_IS_GOOD_AMBIG(node) \ - (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0) -#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ - (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0) +#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE +#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE +#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED +#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH +#define NODE_STRING_IS_CRUDE(node) \ + (((node)->u.str.flag & NODE_STRING_CRUDE) != 0) +#define NODE_STRING_IS_CASE_EXPANDED(node) \ + (((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0) +#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \ + (((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0) #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) @@ -314,7 +319,7 @@ typedef struct _Node { #define NODE_ST_CLEN_FIXED (1<<2) #define NODE_ST_MARK1 (1<<3) #define NODE_ST_MARK2 (1<<4) -#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5) +#define NODE_ST_STRICT_REAL_REPEAT (1<<5) #define NODE_ST_RECURSION (1<<6) #define NODE_ST_CALLED (1<<7) #define NODE_ST_ADDR_FIXED (1<<8) @@ -330,6 +335,7 @@ typedef struct _Node { #define NODE_ST_FIXED_OPTION (1<<18) #define NODE_ST_PROHIBIT_RECURSION (1<<19) #define NODE_ST_SUPER (1<<20) +#define NODE_ST_EMPTY_STATUS_CHECK (1<<21) #define NODE_STATUS(node) (((Node* )node)->u.base.status) @@ -357,9 +363,12 @@ typedef struct _Node { #define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0) #define NODE_IS_PROHIBIT_RECURSION(node) \ ((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0) -#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ - ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0) +#define NODE_IS_STRICT_REAL_REPEAT(node) \ + ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0) +#define NODE_IS_EMPTY_STATUS_CHECK(node) \ + ((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0) +#define NODE_PARENT(node) ((node)->u.base.parent) #define NODE_BODY(node) ((node)->u.base.body) #define NODE_QUANT_BODY(node) ((node)->body) #define NODE_BAG_BODY(node) ((node)->body) @@ -372,11 +381,8 @@ typedef struct _Node { (senv)->mem_env_dynamic : (senv)->mem_env_static) typedef struct { - Node* node; -#if 0 - int in; - int recursion; -#endif + Node* mem_node; + Node* empty_repeat_node; } MemEnv; typedef struct { @@ -388,9 +394,8 @@ typedef struct { OnigCaseFoldType case_fold_flag; OnigEncoding enc; OnigSyntaxType* syntax; - MemStatusType capture_history; - MemStatusType bt_mem_start; - MemStatusType bt_mem_end; + MemStatusType cap_history; + MemStatusType backtrack_mem; /* backtrack/recursion */ MemStatusType backrefed_mem; UChar* pattern; UChar* pattern_end; @@ -408,7 +413,10 @@ typedef struct { MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; MemEnv* mem_env_dynamic; unsigned int parse_depth; - +#ifdef ONIG_DEBUG_PARSE + unsigned int max_parse_depth; +#endif + int backref_num; int keep_num; int save_num; int save_alloc_num; @@ -429,9 +437,7 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map)); extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n)); extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end)); extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end)); -extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc)); -extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode)); -extern void onig_node_conv_to_str_node P_((Node* node, int raw)); +extern int onig_reduce_nested_quantifier P_((Node* pnode)); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); @@ -439,13 +445,13 @@ extern Node* onig_node_new_bag P_((enum BagType type)); extern Node* onig_node_new_anchor P_((int type, int ascii_mode)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); -extern Node* onig_node_list_add P_((Node* list, Node* x)); extern Node* onig_node_new_alt P_((Node* left, Node* right)); extern void onig_node_str_clear P_((Node* node)); extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); extern int onig_free_shared_cclass_table P_((void)); extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern int onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]); extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node); #ifdef USE_CALLOUT |