summaryrefslogtreecommitdiff
path: root/src/regparse.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/regparse.h')
-rw-r--r--src/regparse.h102
1 files changed, 54 insertions, 48 deletions
diff --git a/src/regparse.h b/src/regparse.h
index b7a2867..1525ccb 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2019 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2019 K.Kosako
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
#include "regint.h"
#define NODE_STRING_MARGIN 16
-#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
+#define NODE_STRING_BUF_SIZE 20 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
/* node type */
@@ -66,27 +66,32 @@ enum GimmickType {
#endif
};
-enum BodyEmpty {
- BODY_IS_NOT_EMPTY = 0,
- BODY_IS_EMPTY = 1,
- BODY_IS_EMPTY_MEM = 2,
- BODY_IS_EMPTY_REC = 3
+enum BodyEmptyType {
+ BODY_IS_NOT_EMPTY = 0,
+ BODY_IS_EMPTY_POSSIBILITY = 1,
+ BODY_IS_EMPTY_POSSIBILITY_MEM = 2,
+ BODY_IS_EMPTY_POSSIBILITY_REC = 3
};
+struct _Node;
+
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
UChar* s;
UChar* end;
unsigned int flag;
- int capacity; /* (allocated size - 1) or 0: use buf[] */
UChar buf[NODE_STRING_BUF_SIZE];
+ int capacity; /* (allocated size - 1) or 0: use buf[] */
+ int case_min_len;
} StrNode;
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
unsigned int flags;
BitSet bs;
@@ -96,20 +101,22 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
int lower;
int upper;
int greedy;
- enum BodyEmpty empty_info;
+ enum BodyEmptyType emptiness;
struct _Node* head_exact;
struct _Node* next_head_exact;
- int is_refered; /* include called node. don't eliminate even if {0} */
+ int include_referred; /* include called node. don't eliminate even if {0} */
} QuantNode;
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
enum BagType type;
@@ -152,6 +159,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body; /* to BagNode : BAG_MEMORY */
int by_number;
@@ -166,6 +174,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
int back_num;
int back_static[NODE_BACKREFS_SIZE];
@@ -176,6 +185,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
int type;
@@ -186,6 +196,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* car;
struct _Node* cdr;
@@ -194,6 +205,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
int ctype;
int not;
@@ -204,6 +216,7 @@ typedef struct {
typedef struct {
NodeType node_type;
int status;
+ struct _Node* parent;
enum GimmickType type;
int detail_type;
@@ -216,6 +229,7 @@ typedef struct _Node {
struct {
NodeType node_type;
int status;
+ struct _Node* parent;
struct _Node* body;
} base;
@@ -252,10 +266,6 @@ typedef struct _Node {
#define NODE_BIT_CALL NODE_TYPE2BIT(NODE_CALL)
#define NODE_BIT_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK)
-#define NODE_IS_SIMPLE_TYPE(node) \
- ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
- (NODE_BIT_STRING | NODE_BIT_CCLASS | NODE_BIT_CTYPE | NODE_BIT_BACKREF)) != 0)
-
#define NODE_TYPE(node) ((node)->u.base.node_type)
#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
@@ -284,26 +294,21 @@ typedef struct _Node {
#define ANCR_ANYCHAR_INF_MASK (ANCR_ANYCHAR_INF | ANCR_ANYCHAR_INF_ML)
#define ANCR_END_BUF_MASK (ANCR_END_BUF | ANCR_SEMI_END_BUF)
-#define NODE_STRING_RAW (1<<0) /* by backslashed number */
-#define NODE_STRING_AMBIG (1<<1)
-#define NODE_STRING_GOOD_AMBIG (1<<2)
-#define NODE_STRING_DONT_GET_OPT_INFO (1<<3)
+#define NODE_STRING_CRUDE (1<<0)
+#define NODE_STRING_CASE_EXPANDED (1<<1)
+#define NODE_STRING_CASE_FOLD_MATCH (1<<2)
#define NODE_STRING_LEN(node) (int )((node)->u.str.end - (node)->u.str.s)
-#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= NODE_STRING_RAW
-#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NODE_STRING_RAW
-#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= NODE_STRING_AMBIG
-#define NODE_STRING_SET_GOOD_AMBIG(node) (node)->u.str.flag |= NODE_STRING_GOOD_AMBIG
-#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \
- (node)->u.str.flag |= NODE_STRING_DONT_GET_OPT_INFO
-#define NODE_STRING_IS_RAW(node) \
- (((node)->u.str.flag & NODE_STRING_RAW) != 0)
-#define NODE_STRING_IS_AMBIG(node) \
- (((node)->u.str.flag & NODE_STRING_AMBIG) != 0)
-#define NODE_STRING_IS_GOOD_AMBIG(node) \
- (((node)->u.str.flag & NODE_STRING_GOOD_AMBIG) != 0)
-#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \
- (((node)->u.str.flag & NODE_STRING_DONT_GET_OPT_INFO) != 0)
+#define NODE_STRING_SET_CRUDE(node) (node)->u.str.flag |= NODE_STRING_CRUDE
+#define NODE_STRING_CLEAR_CRUDE(node) (node)->u.str.flag &= ~NODE_STRING_CRUDE
+#define NODE_STRING_SET_CASE_EXPANDED(node) (node)->u.str.flag |= NODE_STRING_CASE_EXPANDED
+#define NODE_STRING_SET_CASE_FOLD_MATCH(node) (node)->u.str.flag |= NODE_STRING_CASE_FOLD_MATCH
+#define NODE_STRING_IS_CRUDE(node) \
+ (((node)->u.str.flag & NODE_STRING_CRUDE) != 0)
+#define NODE_STRING_IS_CASE_EXPANDED(node) \
+ (((node)->u.str.flag & NODE_STRING_CASE_EXPANDED) != 0)
+#define NODE_STRING_IS_CASE_FOLD_MATCH(node) \
+ (((node)->u.str.flag & NODE_STRING_CASE_FOLD_MATCH) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static)
@@ -314,7 +319,7 @@ typedef struct _Node {
#define NODE_ST_CLEN_FIXED (1<<2)
#define NODE_ST_MARK1 (1<<3)
#define NODE_ST_MARK2 (1<<4)
-#define NODE_ST_STOP_BT_SIMPLE_REPEAT (1<<5)
+#define NODE_ST_STRICT_REAL_REPEAT (1<<5)
#define NODE_ST_RECURSION (1<<6)
#define NODE_ST_CALLED (1<<7)
#define NODE_ST_ADDR_FIXED (1<<8)
@@ -330,6 +335,7 @@ typedef struct _Node {
#define NODE_ST_FIXED_OPTION (1<<18)
#define NODE_ST_PROHIBIT_RECURSION (1<<19)
#define NODE_ST_SUPER (1<<20)
+#define NODE_ST_EMPTY_STATUS_CHECK (1<<21)
#define NODE_STATUS(node) (((Node* )node)->u.base.status)
@@ -357,9 +363,12 @@ typedef struct _Node {
#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NODE_ST_SUPER) != 0)
#define NODE_IS_PROHIBIT_RECURSION(node) \
((NODE_STATUS(node) & NODE_ST_PROHIBIT_RECURSION) != 0)
-#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
- ((NODE_STATUS(node) & NODE_ST_STOP_BT_SIMPLE_REPEAT) != 0)
+#define NODE_IS_STRICT_REAL_REPEAT(node) \
+ ((NODE_STATUS(node) & NODE_ST_STRICT_REAL_REPEAT) != 0)
+#define NODE_IS_EMPTY_STATUS_CHECK(node) \
+ ((NODE_STATUS(node) & NODE_ST_EMPTY_STATUS_CHECK) != 0)
+#define NODE_PARENT(node) ((node)->u.base.parent)
#define NODE_BODY(node) ((node)->u.base.body)
#define NODE_QUANT_BODY(node) ((node)->body)
#define NODE_BAG_BODY(node) ((node)->body)
@@ -372,11 +381,8 @@ typedef struct _Node {
(senv)->mem_env_dynamic : (senv)->mem_env_static)
typedef struct {
- Node* node;
-#if 0
- int in;
- int recursion;
-#endif
+ Node* mem_node;
+ Node* empty_repeat_node;
} MemEnv;
typedef struct {
@@ -388,9 +394,8 @@ typedef struct {
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
- MemStatusType capture_history;
- MemStatusType bt_mem_start;
- MemStatusType bt_mem_end;
+ MemStatusType cap_history;
+ MemStatusType backtrack_mem; /* backtrack/recursion */
MemStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
@@ -408,7 +413,10 @@ typedef struct {
MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
MemEnv* mem_env_dynamic;
unsigned int parse_depth;
-
+#ifdef ONIG_DEBUG_PARSE
+ unsigned int max_parse_depth;
+#endif
+ int backref_num;
int keep_num;
int save_num;
int save_alloc_num;
@@ -429,9 +437,7 @@ extern int onig_renumber_name_table P_((regex_t* reg, GroupNumRemap* map));
extern int onig_strncmp P_((const UChar* s1, const UChar* s2, int n));
extern void onig_strcpy P_((UChar* dest, const UChar* src, const UChar* end));
extern void onig_scan_env_set_error_string P_((ScanEnv* env, int ecode, UChar* arg, UChar* arg_end));
-extern int onig_scan_unsigned_number P_((UChar** src, const UChar* end, OnigEncoding enc));
-extern void onig_reduce_nested_quantifier P_((Node* pnode, Node* cnode));
-extern void onig_node_conv_to_str_node P_((Node* node, int raw));
+extern int onig_reduce_nested_quantifier P_((Node* pnode));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
@@ -439,13 +445,13 @@ extern Node* onig_node_new_bag P_((enum BagType type));
extern Node* onig_node_new_anchor P_((int type, int ascii_mode));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
-extern Node* onig_node_list_add P_((Node* list, Node* x));
extern Node* onig_node_new_alt P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_names_free P_((regex_t* reg));
extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
+extern int onig_new_cclass_with_code_list(Node** rnode, OnigEncoding enc, int n, OnigCodePoint codes[]);
extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node);
#ifdef USE_CALLOUT