summaryrefslogtreecommitdiff
path: root/src/regparse.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/regparse.h')
-rw-r--r--src/regparse.h334
1 files changed, 196 insertions, 138 deletions
diff --git a/src/regparse.h b/src/regparse.h
index c9d1fe8..884f4d5 100644
--- a/src/regparse.h
+++ b/src/regparse.h
@@ -4,7 +4,7 @@
regparse.h - Oniguruma (regular expression library)
**********************************************************************/
/*-
- * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
+ * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -32,88 +32,91 @@
#include "regint.h"
/* node type */
-#define NT_STR 0
-#define NT_CCLASS 1
-#define NT_CTYPE 2
-#define NT_CANY 3
-#define NT_BREF 4
-#define NT_QTFR 5
-#define NT_ENCLOSE 6
-#define NT_ANCHOR 7
-#define NT_LIST 8
-#define NT_ALT 9
-#define NT_CALL 10
+typedef enum {
+ NODE_STR = 0,
+ NODE_CCLASS = 1,
+ NODE_CTYPE = 2,
+ NODE_BREF = 3,
+ NODE_QUANT = 4,
+ NODE_ENCLOSURE = 5,
+ NODE_ANCHOR = 6,
+ NODE_LIST = 7,
+ NODE_ALT = 8,
+ NODE_CALL = 9
+} NodeType;
/* node type bit */
-#define NTYPE2BIT(type) (1<<(type))
-
-#define BIT_NT_STR NTYPE2BIT(NT_STR)
-#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS)
-#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE)
-#define BIT_NT_CANY NTYPE2BIT(NT_CANY)
-#define BIT_NT_BREF NTYPE2BIT(NT_BREF)
-#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR)
-#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE)
-#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR)
-#define BIT_NT_LIST NTYPE2BIT(NT_LIST)
-#define BIT_NT_ALT NTYPE2BIT(NT_ALT)
-#define BIT_NT_CALL NTYPE2BIT(NT_CALL)
-
-#define IS_NODE_TYPE_SIMPLE(type) \
- ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\
- BIT_NT_CANY | BIT_NT_BREF)) != 0)
-
-#define NTYPE(node) ((node)->u.base.type)
-#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype)
-
-#define NSTR(node) (&((node)->u.str))
-#define NCCLASS(node) (&((node)->u.cclass))
-#define NCTYPE(node) (&((node)->u.ctype))
-#define NBREF(node) (&((node)->u.bref))
-#define NQTFR(node) (&((node)->u.qtfr))
-#define NENCLOSE(node) (&((node)->u.enclose))
-#define NANCHOR(node) (&((node)->u.anchor))
-#define NCONS(node) (&((node)->u.cons))
-#define NCALL(node) (&((node)->u.call))
-
-#define NCAR(node) (NCONS(node)->car)
-#define NCDR(node) (NCONS(node)->cdr)
-
+#define NODE_TYPE2BIT(type) (1<<(type))
+
+#define BIT_NODE_STR NODE_TYPE2BIT(NODE_STR)
+#define BIT_NODE_CCLASS NODE_TYPE2BIT(NODE_CCLASS)
+#define BIT_NODE_CTYPE NODE_TYPE2BIT(NODE_CTYPE)
+#define BIT_NODE_BREF NODE_TYPE2BIT(NODE_BREF)
+#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT)
+#define BIT_NODE_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE)
+#define BIT_NODE_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR)
+#define BIT_NODE_LIST NODE_TYPE2BIT(NODE_LIST)
+#define BIT_NODE_ALT NODE_TYPE2BIT(NODE_ALT)
+#define BIT_NODE_CALL NODE_TYPE2BIT(NODE_CALL)
+
+#define NODE_IS_SIMPLE_TYPE(node) \
+ ((NODE_TYPE2BIT(NODE_TYPE(node)) & \
+ (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0)
+
+#define NODE_TYPE(node) ((node)->u.base.node_type)
+#define SET_NODE_TYPE(node, ntype) (node)->u.base.node_type = (ntype)
+
+#define STR_(node) (&((node)->u.str))
+#define CCLASS_(node) (&((node)->u.cclass))
+#define CTYPE_(node) (&((node)->u.ctype))
+#define BREF_(node) (&((node)->u.bref))
+#define QUANT_(node) (&((node)->u.quant))
+#define ENCLOSURE_(node) (&((node)->u.enclosure))
+#define ANCHOR_(node) (&((node)->u.anchor))
+#define CONS_(node) (&((node)->u.cons))
+#define CALL_(node) (&((node)->u.call))
+
+#define NODE_CAR(node) (CONS_(node)->car)
+#define NODE_CDR(node) (CONS_(node)->cdr)
+
+#define CTYPE_ANYCHAR -1
+#define NODE_IS_ANYCHAR(node) \
+ (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR)
#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML)
#define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)
-#define ENCLOSE_MEMORY (1<<0)
-#define ENCLOSE_OPTION (1<<1)
-#define ENCLOSE_STOP_BACKTRACK (1<<2)
+#define ENCLOSURE_MEMORY (1<<0)
+#define ENCLOSURE_OPTION (1<<1)
+#define ENCLOSURE_STOP_BACKTRACK (1<<2)
#define NODE_STR_MARGIN 16
#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */
#define NODE_BACKREFS_SIZE 6
-#define NSTR_RAW (1<<0) /* by backslashed number */
-#define NSTR_AMBIG (1<<1)
-#define NSTR_DONT_GET_OPT_INFO (1<<2)
+#define STRING_RAW (1<<0) /* by backslashed number */
+#define STRING_AMBIG (1<<1)
+#define STRING_DONT_GET_OPT_INFO (1<<2)
#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s)
-#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW
-#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW
-#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG
+#define NSTRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW
+#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW
+#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG
#define NSTRING_SET_DONT_GET_OPT_INFO(node) \
- (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO
-#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0)
-#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0)
+ (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO
+#define NSTRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0)
+#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0)
#define NSTRING_IS_DONT_GET_OPT_INFO(node) \
- (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0)
+ (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0)
#define BACKREFS_P(br) \
(IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static);
-#define NQ_TARGET_ISNOT_EMPTY 0
-#define NQ_TARGET_IS_EMPTY 1
-#define NQ_TARGET_IS_EMPTY_MEM 2
-#define NQ_TARGET_IS_EMPTY_REC 3
+#define QUANT_BODY_IS_NOT_EMPTY 0
+#define QUANT_BODY_IS_EMPTY 1
+#define QUANT_BODY_IS_EMPTY_MEM 2
+#define QUANT_BODY_IS_EMPTY_REC 3
/* status bits */
#define NST_MIN_FIXED (1<<0)
@@ -121,44 +124,56 @@
#define NST_CLEN_FIXED (1<<2)
#define NST_MARK1 (1<<3)
#define NST_MARK2 (1<<4)
-#define NST_MEM_BACKREFED (1<<5)
-#define NST_STOP_BT_SIMPLE_REPEAT (1<<6)
-#define NST_RECURSION (1<<7)
-#define NST_CALLED (1<<8)
-#define NST_ADDR_FIXED (1<<9)
-#define NST_NAMED_GROUP (1<<10)
-#define NST_NAME_REF (1<<11)
-#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */
+#define NST_STOP_BT_SIMPLE_REPEAT (1<<5)
+#define NST_RECURSION (1<<6)
+#define NST_CALLED (1<<7)
+#define NST_ADDR_FIXED (1<<8)
+#define NST_NAMED_GROUP (1<<9)
+#define NST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */
+#define NST_IN_ZERO_REPEAT (1<<11) /* (....){0} */
+#define NST_IN_MULTI_ENTRY (1<<12)
#define NST_NEST_LEVEL (1<<13)
#define NST_BY_NUMBER (1<<14) /* {n,m} */
+#define NST_BY_NAME (1<<15) /* backref by name */
+#define NST_BACKREF (1<<16)
+
+
+#define NODE_STATUS(node) (((Node* )node)->u.base.status)
+#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (f))
+#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(f))
+
+#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0)
+#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0)
+#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0)
+#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0)
+#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0)
+#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0)
+#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0)
+#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0)
+#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0)
+#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0)
+#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0)
+#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0)
+#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0)
+#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0)
+#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0)
+#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0)
+#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \
+ ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0)
+
+#define NODE_BODY(node) ((node)->u.base.body)
+#define NODE_QUANT_BODY(node) ((node)->body)
+#define NODE_ENCLOSURE_BODY(node) ((node)->body)
+#define NODE_CALL_BODY(node) ((node)->body)
+#define NODE_ANCHOR_BODY(node) ((node)->body)
-#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f)
-#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f)
-
-#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0)
-#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0)
-#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0)
-#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0)
-#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0)
-#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0)
-#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0)
-#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0)
-#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \
- (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0)
-#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0)
-
-#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION
-#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0)
-#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0)
-#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0)
-#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0)
-#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0)
-#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0)
#define CALLNODE_REFNUM_UNDEF -1
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+
UChar* s;
UChar* end;
unsigned int flag;
@@ -167,35 +182,54 @@ typedef struct {
} StrNode;
typedef struct {
- NodeBase base;
- int state;
- struct _Node* target;
+ NodeType node_type;
+ int status;
+
+ unsigned int flags;
+ BitSet bs;
+ BBuf* mbuf; /* multi-byte info or NULL */
+} CClassNode;
+
+typedef struct {
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+
int lower;
int upper;
int greedy;
- int target_empty_info;
+ int body_empty_info;
struct _Node* head_exact;
struct _Node* next_head_exact;
int is_refered; /* include called node. don't eliminate even if {0} */
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int comb_exp_check_num; /* 1,2,3...: check, 0: no check */
#endif
-} QtfrNode;
+} QuantNode;
typedef struct {
- NodeBase base;
- int state;
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+
int type;
- int regnum;
- OnigOptionType option;
- struct _Node* target;
- AbsAddrType call_addr;
+ union {
+ struct {
+ int regnum;
+ AbsAddrType called_addr;
+ int entry_count;
+ int called_state;
+ } m;
+ struct {
+ OnigOptionType option;
+ } o;
+ };
/* for multiple call reference */
OnigLen min_len; /* min length (byte) */
OnigLen max_len; /* max length (byte) */
int char_len; /* character length */
int opt_count; /* referenced count in optimize_node_left() */
-} EncloseNode;
+} EnclosureNode;
#ifdef USE_SUBEXP_CALL
@@ -211,20 +245,23 @@ typedef struct {
} UnsetAddrList;
typedef struct {
- NodeBase base;
- int state;
+ NodeType node_type;
+ int status;
+ struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */
+
+ int by_number;
int group_num;
UChar* name;
UChar* name_end;
- struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */
- UnsetAddrList* unset_addr_list;
+ int entry_count;
} CallNode;
#endif
typedef struct {
- NodeBase base;
- int state;
+ NodeType node_type;
+ int status;
+
int back_num;
int back_static[NODE_BACKREFS_SIZE];
int* back_dynamic;
@@ -232,37 +269,48 @@ typedef struct {
} BRefNode;
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+
int type;
- struct _Node* target;
int char_len;
} AnchorNode;
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+
struct _Node* car;
struct _Node* cdr;
} ConsAltNode;
typedef struct {
- NodeBase base;
+ NodeType node_type;
+ int status;
+
int ctype;
int not;
} CtypeNode;
typedef struct _Node {
union {
- NodeBase base;
- StrNode str;
- CClassNode cclass;
- QtfrNode qtfr;
- EncloseNode enclose;
- BRefNode bref;
- AnchorNode anchor;
- ConsAltNode cons;
- CtypeNode ctype;
+ struct {
+ NodeType node_type;
+ int status;
+ struct _Node* body;
+ } base;
+
+ StrNode str;
+ CClassNode cclass;
+ QuantNode quant;
+ EnclosureNode enclosure;
+ BRefNode bref;
+ AnchorNode anchor;
+ ConsAltNode cons;
+ CtypeNode ctype;
#ifdef USE_SUBEXP_CALL
- CallNode call;
+ CallNode call;
#endif
} u;
} Node;
@@ -270,20 +318,28 @@ typedef struct _Node {
#define NULL_NODE ((Node* )0)
-#define SCANENV_MEMNODES_SIZE 8
-#define SCANENV_MEM_NODES(senv) \
- (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \
- (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static)
+#define SCANENV_MEMENV_SIZE 8
+#define SCANENV_MEMENV(senv) \
+ (IS_NOT_NULL((senv)->mem_env_dynamic) ? \
+ (senv)->mem_env_dynamic : (senv)->mem_env_static)
+
+typedef struct {
+ Node* node;
+#if 0
+ int in;
+ int recursion;
+#endif
+} MemEnv;
typedef struct {
OnigOptionType option;
OnigCaseFoldType case_fold_flag;
OnigEncoding enc;
OnigSyntaxType* syntax;
- BitStatusType capture_history;
- BitStatusType bt_mem_start;
- BitStatusType bt_mem_end;
- BitStatusType backrefed_mem;
+ MemStatusType capture_history;
+ MemStatusType bt_mem_start;
+ MemStatusType bt_mem_end;
+ MemStatusType backrefed_mem;
UChar* pattern;
UChar* pattern_end;
UChar* error;
@@ -292,14 +348,15 @@ typedef struct {
int num_call;
#ifdef USE_SUBEXP_CALL
UnsetAddrList* unset_addr_list;
+ int has_call_zero;
#endif
int num_mem;
#ifdef USE_NAMED_GROUP
int num_named;
#endif
int mem_alloc;
- Node* mem_nodes_static[SCANENV_MEMNODES_SIZE];
- Node** mem_nodes_dynamic;
+ MemEnv mem_env_static[SCANENV_MEMENV_SIZE];
+ MemEnv* mem_env_dynamic;
#ifdef USE_COMBINATION_EXPLOSION_CHECK
int num_comb_exp_check;
int comb_exp_max_regnum;
@@ -331,7 +388,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw));
extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end));
extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end));
extern void onig_node_free P_((Node* node));
-extern Node* onig_node_new_enclose P_((int type));
+extern Node* onig_node_new_enclosure P_((int type));
extern Node* onig_node_new_anchor P_((int type));
extern Node* onig_node_new_str P_((const UChar* s, const UChar* end));
extern Node* onig_node_new_list P_((Node* left, Node* right));
@@ -339,8 +396,9 @@ extern Node* onig_node_list_add P_((Node* list, Node* x));
extern Node* onig_node_new_alt P_((Node* left, Node* right));
extern void onig_node_str_clear P_((Node* node));
extern int onig_names_free P_((regex_t* reg));
-extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
+extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env));
extern int onig_free_shared_cclass_table P_((void));
+extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc));
#ifdef ONIG_DEBUG
#ifdef USE_NAMED_GROUP