diff options
author | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-09-01 19:54:36 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff-webhosting.net> | 2017-09-01 19:54:36 +0200 |
commit | a3ec3a8d99a951fc599818b1ea9a2aa218b10fa5 (patch) | |
tree | 3ee714ca52a54635d91c7e3c10b968e0048022be /src/regparse.c | |
parent | 6b31069db6198cd50cc17f2c63917dd2df5775fb (diff) | |
parent | de5adb21f7224352652be174c66fb88e596bb49c (diff) |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src/regparse.c')
-rw-r--r-- | src/regparse.c | 646 |
1 files changed, 370 insertions, 276 deletions
diff --git a/src/regparse.c b/src/regparse.c index 25291c5..1fb2357 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -50,6 +50,7 @@ OnigSyntaxType OnigSyntaxRuby = { ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | @@ -163,7 +164,7 @@ bbuf_clone(BBuf** rto, BBuf* from) *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); CHECK_NULL_RETURN_MEMERR(to); - r = BBUF_INIT(to, from->alloc); + r = BB_INIT(to, from->alloc); if (r != 0) { xfree(to->p); *rto = 0; @@ -288,7 +289,6 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end) } } -#ifdef USE_NAMED_GROUP static UChar* strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) { @@ -307,7 +307,6 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) return r; } -#endif static int save_entry(ScanEnv* env, enum SaveType type, int* id) @@ -483,8 +482,6 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, #endif /* USE_ST_LIBRARY */ -#ifdef USE_NAMED_GROUP - #define INIT_NAME_BACKREFS_ALLOC_NUM 8 typedef struct { @@ -965,49 +962,17 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name, } } -#else /* USE_NAMED_GROUP */ - -extern int -onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion* region) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*,int,int*,regex_t*,void*), void* arg) -{ - return ONIG_NO_SUPPORT_CONFIG; -} - -extern int -onig_number_of_names(regex_t* reg) -{ - return 0; -} -#endif /* else USE_NAMED_GROUP */ - extern int onig_noname_group_capture_is_active(regex_t* reg) { if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) return 0; -#ifdef USE_NAMED_GROUP if (onig_number_of_names(reg) > 0 && IS_SYNTAX_BV(reg->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { return 0; } -#endif return 1; } @@ -1032,10 +997,8 @@ scan_env_clear(ScanEnv* env) #endif env->num_mem = 0; -#ifdef USE_NAMED_GROUP env->num_named = 0; -#endif - env->mem_alloc = 0; + env->mem_alloc = 0; env->mem_env_dynamic = (MemEnv* )NULL; xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); @@ -1204,7 +1167,6 @@ node_new(void) Node* node; node = (Node* )xmalloc(sizeof(Node)); - //xmemset(node, 0, sizeof(node->u.base)); xmemset(node, 0, sizeof(*node)); #ifdef DEBUG_NODE_FREE @@ -1218,7 +1180,6 @@ static void initialize_cclass(CClassNode* cc) { BITSET_CLEAR(cc->bs); - /* cc->base.flags = 0; */ cc->flags = 0; cc->mbuf = NULL; } @@ -1235,21 +1196,23 @@ node_new_cclass(void) } static Node* -node_new_ctype(int type, int not) +node_new_ctype(int type, int not, OnigOptionType options) { Node* node = node_new(); CHECK_NULL_RETURN(node); NODE_SET_TYPE(node, NODE_CTYPE); - CTYPE_(node)->ctype = type; - CTYPE_(node)->not = not; + CTYPE_(node)->ctype = type; + CTYPE_(node)->not = not; + CTYPE_(node)->options = options; + CTYPE_(node)->ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(type, options); return node; } static Node* node_new_anychar(void) { - Node* node = node_new_ctype(CTYPE_ANYCHAR, 0); + Node* node = node_new_ctype(CTYPE_ANYCHAR, 0, ONIG_OPTION_NONE); return node; } @@ -1383,14 +1346,15 @@ make_alt(int n, Node* ns[]) } extern Node* -onig_node_new_anchor(int type) +onig_node_new_anchor(int type, int ascii_mode) { Node* node = node_new(); CHECK_NULL_RETURN(node); NODE_SET_TYPE(node, NODE_ANCHOR); - ANCHOR_(node)->type = type; - ANCHOR_(node)->char_len = -1; + ANCHOR_(node)->type = type; + ANCHOR_(node)->char_len = -1; + ANCHOR_(node)->ascii_mode = ascii_mode; return node; } @@ -1506,7 +1470,7 @@ node_new_quantifier(int lower, int upper, int by_number) } static Node* -node_new_enclosure(int type) +node_new_enclosure(enum EnclosureType type) { Node* node = node_new(); CHECK_NULL_RETURN(node); @@ -1637,6 +1601,61 @@ node_new_keep(Node** node, ScanEnv* env) } static int +make_extended_grapheme_cluster(Node** node, ScanEnv* env) +{ + int r; + int i; + Node* x; + Node* ns[2]; + + /* \X == (?>\O(?:\Y\O)*) */ + + ns[1] = NULL_NODE; + + r = ONIGERR_MEMORY; + ns[0] = onig_node_new_anchor(ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, 0); + if (IS_NULL(ns[0])) goto err; + + r = node_new_true_anychar(&ns[1], env); + if (r != 0) goto err1; + + x = make_list(2, ns); + if (IS_NULL(x)) goto err; + ns[0] = x; + ns[1] = NULL_NODE; + + x = node_new_quantifier(0, REPEAT_INFINITE, 1); + if (IS_NULL(x)) goto err; + + NODE_BODY(x) = ns[0]; + ns[0] = NULL_NODE; + ns[1] = x; + + r = node_new_true_anychar(&ns[0], env); + if (r != 0) goto err1; + + x = make_list(2, ns); + if (IS_NULL(x)) goto err; + + ns[0] = x; + ns[1] = NULL_NODE; + + x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + if (IS_NULL(x)) goto err; + + NODE_BODY(x) = ns[0]; + + *node = x; + return ONIG_NORMAL; + + err: + r = ONIGERR_MEMORY; + err1: + for (i = 0; i < 2; i++) onig_node_free(ns[i]); + return r; +} + +static int make_absent_engine(Node** node, int pre_save_right_id, Node* absent, Node* step_one, int lower, int upper, int possessive, int is_range_cutter, ScanEnv* env) @@ -1663,26 +1682,26 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent, if (r != 0) goto err; x = make_list(4, ns); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; ns[0] = x; ns[1] = step_one; ns[2] = ns[3] = NULL_NODE; x = make_alt(2, ns); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; ns[0] = x; x = node_new_quantifier(lower, upper, 0); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; NODE_BODY(x) = ns[0]; ns[0] = x; if (possessive != 0) { x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; NODE_BODY(x) = ns[0]; ns[0] = x; @@ -1696,12 +1715,12 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent, if (r != 0) goto err; x = make_list(2, ns + 1); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; ns[1] = x; ns[2] = NULL_NODE; x = make_alt(2, ns); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; if (is_range_cutter != 0) NODE_STATUS_ADD(x, NST_SUPER); @@ -1709,6 +1728,8 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent, *node = x; return ONIG_NORMAL; + err0: + r = ONIGERR_MEMORY; err: for (i = 0; i < 4; i++) onig_node_free(ns[i]); return r; @@ -1739,7 +1760,7 @@ make_absent_tail(Node** node1, Node** node2, int pre_save_right_id, if (r != 0) goto err; x = make_list(2, ns); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; ns[0] = NULL_NODE; ns[1] = x; @@ -1748,12 +1769,68 @@ make_absent_tail(Node** node1, Node** node2, int pre_save_right_id, if (r != 0) goto err; x = make_alt(2, ns); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; *node1 = save; *node2 = x; return ONIG_NORMAL; + err0: + r = ONIGERR_MEMORY; + err: + onig_node_free(save); + onig_node_free(ns[0]); + onig_node_free(ns[1]); + return r; +} + +static int +make_range_clear(Node** node, ScanEnv* env) +{ + int r; + int id; + Node* save; + Node* x; + Node* ns[2]; + + *node = NULL_NODE; + save = ns[0] = ns[1] = NULL_NODE; + + r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env); + if (r != 0) goto err; + + id = GIMMICK_(save)->id; + r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, + id, env); + if (r != 0) goto err; + + r = node_new_fail(&ns[1], env); + if (r != 0) goto err; + + x = make_list(2, ns); + if (IS_NULL(x)) goto err0; + + ns[0] = NULL_NODE; ns[1] = x; + + r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_INIT, 0, env); + if (r != 0) goto err; + + x = make_alt(2, ns); + if (IS_NULL(x)) goto err0; + + NODE_STATUS_ADD(x, NST_SUPER); + + ns[0] = save; + ns[1] = x; + save = NULL_NODE; + x = make_list(2, ns); + if (IS_NULL(x)) goto err0; + + *node = x; + return ONIG_NORMAL; + + err0: + r = ONIGERR_MEMORY; err: onig_node_free(save); onig_node_free(ns[0]); @@ -1790,6 +1867,9 @@ is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody, return 0; } + if (QUANT_(quant)->greedy == 0) + return 0; + body = NODE_BODY(quant); switch (NODE_TYPE(body)) { case NODE_STRING: @@ -1856,15 +1936,18 @@ make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* qua ns[2] = ns[3] = NULL_NODE; - r = make_absent_tail(&ns[2], &ns[3], id1, env); + r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, + id1, env); if (r != 0) goto err; - x = make_list(4, ns); - if (IS_NULL(x)) goto err; + x = make_list(3, ns); + if (IS_NULL(x)) goto err0; *node = x; return ONIG_NORMAL; + err0: + r = ONIGERR_MEMORY; err: for (i = 0; i < 4; i++) onig_node_free(ns[i]); return r; @@ -1892,7 +1975,7 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, if (expr == NULL_NODE) { /* default expr \O* */ quant = node_new_quantifier(0, REPEAT_INFINITE, 0); - if (IS_NULL(quant)) goto err; + if (IS_NULL(quant)) goto err0; r = node_new_true_anychar(&body, env); if (r != 0) { @@ -1945,19 +2028,21 @@ make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, if (is_range_cutter != 0) { x = make_list(4, ns); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; } else { r = make_absent_tail(&ns[5], &ns[6], id1, env); if (r != 0) goto err; x = make_list(7, ns); - if (IS_NULL(x)) goto err; + if (IS_NULL(x)) goto err0; } *node = x; return ONIG_NORMAL; + err0: + r = ONIGERR_MEMORY; err: for (i = 0; i < 7; i++) onig_node_free(ns[i]); return r; @@ -2143,7 +2228,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) num = 0; while (! PEND) { PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (IS_CODE_DIGIT_ASCII(enc, c)) { val = (unsigned int )DIGITVAL(c); if ((INT_MAX_LIMIT - val) / 10UL < num) return -1; /* overflow */ @@ -2161,7 +2246,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) static int scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) + OnigEncoding enc) { OnigCodePoint c; unsigned int num, val; @@ -2171,7 +2256,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, num = 0; while (! PEND && maxlen-- != 0) { PFETCH(c); - if (ONIGENC_IS_CODE_XDIGIT(enc, c)) { + if (IS_CODE_XDIGIT_ASCII(enc, c)) { val = (unsigned int )XDIGITVAL(enc,c); if ((INT_MAX_LIMIT - val) / 16UL < num) return -1; /* overflow */ @@ -2189,7 +2274,7 @@ scan_unsigned_hexadecimal_number(UChar** src, UChar* end, int maxlen, static int scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, - OnigEncoding enc) + OnigEncoding enc) { OnigCodePoint c; unsigned int num, val; @@ -2199,7 +2284,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, num = 0; while (! PEND && maxlen-- != 0) { PFETCH(c); - if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { + if (IS_CODE_DIGIT_ASCII(enc, c) && c < '8') { val = ODIGITVAL(c); if ((INT_MAX_LIMIT - val) / 8UL < num) return -1; /* overflow */ @@ -2216,8 +2301,8 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, } -#define BBUF_WRITE_CODE_POINT(bbuf,pos,code) \ - BBUF_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) +#define BB_WRITE_CODE_POINT(bbuf,pos,code) \ + BB_WRITE(bbuf, pos, &(code), SIZE_CODE_POINT) /* data format: [n][from-1][to-1][from-2][to-2] ... [from-n][to-n] @@ -2233,7 +2318,7 @@ new_code_range(BBuf** pbuf) bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); CHECK_NULL_RETURN_MEMERR(bbuf); - r = BBUF_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE); + r = BB_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE); if (r != 0) { xfree(bbuf); *pbuf = 0; @@ -2241,7 +2326,7 @@ new_code_range(BBuf** pbuf) } n = 0; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); + BB_WRITE_CODE_POINT(bbuf, 0, n); return 0; } @@ -2304,19 +2389,19 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) int size = (n - high) * 2 * SIZE_CODE_POINT; if (inc_n > 0) { - BBUF_MOVE_RIGHT(bbuf, from_pos, to_pos, size); + BB_MOVE_RIGHT(bbuf, from_pos, to_pos, size); } else { - BBUF_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); + BB_MOVE_LEFT_REDUCE(bbuf, from_pos, to_pos); } } pos = SIZE_CODE_POINT * (1 + low * 2); - BBUF_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); - BBUF_WRITE_CODE_POINT(bbuf, pos, from); - BBUF_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); + BB_ENSURE_SIZE(bbuf, pos + SIZE_CODE_POINT * 2); + BB_WRITE_CODE_POINT(bbuf, pos, from); + BB_WRITE_CODE_POINT(bbuf, pos + SIZE_CODE_POINT, to); n += inc_n; - BBUF_WRITE_CODE_POINT(bbuf, 0, n); + BB_WRITE_CODE_POINT(bbuf, 0, n); return 0; } @@ -2369,7 +2454,7 @@ not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) return r; } -#define SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2) do {\ +#define SWAP_BB_NOT(bbuf1, not1, bbuf2, not2) do {\ BBuf *tbuf; \ int tnot; \ tnot = not1; not1 = not2; not2 = tnot; \ @@ -2393,7 +2478,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, r = 0; if (IS_NULL(bbuf2)) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + SWAP_BB_NOT(bbuf1, not1, bbuf2, not2); if (IS_NULL(bbuf1)) { if (not1 != 0) { @@ -2410,7 +2495,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, } if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + SWAP_BB_NOT(bbuf1, not1, bbuf2, not2); data1 = (OnigCodePoint* )(bbuf1->p); GET_CODE_POINT(n1, data1); @@ -2435,7 +2520,7 @@ or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, static int and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1, - OnigCodePoint* data, int n) + OnigCodePoint* data, int n) { int i, r; OnigCodePoint from2, to2; @@ -2493,7 +2578,7 @@ and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf) } if (not1 != 0) - SWAP_BBUF_NOT(bbuf1, not1, bbuf2, not2); + SWAP_BB_NOT(bbuf1, not1, bbuf2, not2); data1 = (OnigCodePoint* )(bbuf1->p); data2 = (OnigCodePoint* )(bbuf2->p); @@ -2556,7 +2641,6 @@ and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) bitset_and(bsr1, bsr2); if (bsr1 != dest->bs) { bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; } if (not1 != 0) { bitset_invert(dest->bs); @@ -2614,7 +2698,6 @@ or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) bitset_or(bsr1, bsr2); if (bsr1 != dest->bs) { bitset_copy(dest->bs, bsr1); - bsr1 = dest->bs; } if (not1 != 0) { bitset_invert(dest->bs); @@ -2661,7 +2744,7 @@ conv_backslash_value(OnigCodePoint c, ScanEnv* env) case 'e': return '\033'; case 'v': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ESC_V_VTAB)) - return '\v'; + return '\v'; break; default: @@ -2874,6 +2957,7 @@ enum TokenSyms { TK_GENERAL_NEWLINE, /* \R */ TK_NO_NEWLINE, /* \N */ TK_TRUE_ANYCHAR, /* \O */ + TK_EXTENDED_GRAPHEME_CLUSTER, /* \X */ /* in cc */ TK_CC_CLOSE, @@ -3110,7 +3194,6 @@ enum REF_NUM { IS_REL_NUM = 2 }; -#ifdef USE_NAMED_GROUP #ifdef USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> @@ -3152,7 +3235,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, if (c == end_code) return ONIGERR_EMPTY_GROUP_NAME; - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (IS_CODE_DIGIT_ASCII(enc, c)) { *num_type = IS_ABS_NUM; digit_count++; } @@ -3181,7 +3264,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, } if (*num_type != IS_NOT_NUM) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (IS_CODE_DIGIT_ASCII(enc, c)) { digit_count++; } else { @@ -3204,7 +3287,7 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, goto end; } PFETCH(c); - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; + if (! IS_CODE_DIGIT_ASCII(enc, c)) goto err; PUNFETCH; level = onig_scan_unsigned_number(&p, end, enc); if (level < 0) return ONIGERR_TOO_BIG_NUMBER; @@ -3284,7 +3367,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, if (c == end_code) return ONIGERR_EMPTY_GROUP_NAME; - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (IS_CODE_DIGIT_ASCII(enc, c)) { if (ref == 1) *num_type = IS_ABS_NUM; else { @@ -3328,7 +3411,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, } if (*num_type != IS_NOT_NUM) { - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { + if (IS_CODE_DIGIT_ASCII(enc, c)) { digit_count++; } else { @@ -3384,111 +3467,6 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, return r; } } -#else -static int -fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, - enum REF_NUM* num_type, int ref) -{ - int r, sign; - int digit_count; - OnigCodePoint end_code; - OnigCodePoint c = 0; - UChar *name_end; - OnigEncoding enc = env->enc; - UChar *pnum_head; - UChar *p = *src; - PFETCH_READY; - - *rback_num = 0; - - end_code = get_name_end_code_point(start_code); - - digit_count = 0; - *rname_end = name_end = end; - r = 0; - pnum_head = *src; - *num_type = IS_ABS_NUM; - sign = 1; - - if (PEND) { - return ONIGERR_EMPTY_GROUP_NAME; - } - else { - PFETCH(c); - if (c == end_code) - return ONIGERR_EMPTY_GROUP_NAME; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - *num_type = IS_ABS_NUM; - digit_count++; - } - else if (c == '-') { - if (ref == 1) { - *num_type = IS_REL_NUM; - sign = -1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - } - } - else if (c == '+') { - if (ref == 1) { - *num_type = IS_REL_NUM; - sign = 1; - pnum_head = p; - } - else { - r = ONIGERR_INVALID_GROUP_NAME; - } - } - else { - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - } - - while (! PEND) { - name_end = p; - - PFETCH(c); - if (c == end_code || c == ')') break; - - if (ONIGENC_IS_CODE_DIGIT(enc, c)) - digit_count++; - else - r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; - } - if (r == 0 && c != end_code) { - r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; - } - if (r == 0 && digit_count == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - } - - if (r == 0) { - *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); - if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) { - if (*num_type == IS_REL_NUM) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; - } - } - *rback_num *= sign; - - *rname_end = name_end; - *src = p; - return 0; - } - else { - err: - onig_scan_env_set_error_string(env, r, *src, name_end); - return r; - } -} -#endif /* USE_NAMED_GROUP */ static void CC_ESC_WARN(ScanEnv* env, UChar *c) @@ -3521,7 +3499,7 @@ CLOSE_BRACKET_WITHOUT_ESC_WARN(ScanEnv* env, UChar* c) static UChar* find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, - UChar **next, OnigEncoding enc) + UChar **next, OnigEncoding enc) { int i; OnigCodePoint x; @@ -3550,7 +3528,7 @@ find_str_position(OnigCodePoint s[], int n, UChar* from, UChar* to, static int str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, - OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn) + OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn) { int i, in_esc; OnigCodePoint x; @@ -3699,7 +3677,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { c2 = PPEEK; - if (ONIGENC_IS_CODE_DIGIT(enc, c2)) + if (IS_CODE_DIGIT_ASCII(enc, c2)) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } @@ -3726,7 +3704,7 @@ fetch_token_in_cc(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { c2 = PPEEK; - if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) + if (IS_CODE_XDIGIT_ASCII(enc, c2)) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } @@ -3956,13 +3934,25 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case 'b': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_WORD_BOUND; + tok->u.anchor = ANCHOR_WORD_BOUNDARY; break; case 'B': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_B_WORD_BOUND)) break; tok->type = TK_ANCHOR; - tok->u.anchor = ANCHOR_NOT_WORD_BOUND; + tok->u.anchor = ANCHOR_NO_WORD_BOUNDARY; + break; + + case 'y': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; + break; + + case 'Y': + if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break; + tok->type = TK_ANCHOR; + tok->u.anchor = ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY; break; #ifdef USE_WORD_BEGIN_END @@ -4041,6 +4031,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->type = TK_TRUE_ANYCHAR; break; + case 'X': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER)) break; + tok->type = TK_EXTENDED_GRAPHEME_CLUSTER; + break; + case 'A': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; begin_buf: @@ -4086,7 +4081,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) num = scan_unsigned_octal_number(&p, end, 11, enc); if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { - if (ONIGENC_IS_CODE_DIGIT(enc, PPEEK)) + if (IS_CODE_DIGIT_ASCII(enc, PPEEK)) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } @@ -4111,7 +4106,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) num = scan_unsigned_hexadecimal_number(&p, end, 8, enc); if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; if (!PEND) { - if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) + if (IS_CODE_XDIGIT_ASCII(enc, PPEEK)) return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; } @@ -4205,7 +4200,6 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) } break; -#ifdef USE_NAMED_GROUP case 'k': if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { PFETCH(c); @@ -4277,7 +4271,6 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) PUNFETCH; } break; -#endif #ifdef USE_CALL case 'g': @@ -4531,8 +4524,8 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) static int add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, - OnigEncoding enc ARG_UNUSED, - OnigCodePoint sb_out, const OnigCodePoint mbr[]) + OnigEncoding enc ARG_UNUSED, OnigCodePoint sb_out, + const OnigCodePoint mbr[]) { int i, r; OnigCodePoint j; @@ -4591,33 +4584,140 @@ add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, if (r != 0) return r; } prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; + if (prev == 0) goto end; } - if (prev < 0x7fffffff) { - r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff); + + r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT); + if (r != 0) return r; + } + + end: + return 0; +} + +static int +add_ctype_to_cc_by_range_limit(CClassNode* cc, int ctype ARG_UNUSED, int not, + OnigEncoding enc ARG_UNUSED, + OnigCodePoint sb_out, + const OnigCodePoint mbr[], OnigCodePoint limit) +{ + int i, r; + OnigCodePoint j; + OnigCodePoint from; + OnigCodePoint to; + + int n = ONIGENC_CODE_RANGE_NUM(mbr); + + if (not == 0) { + for (i = 0; i < n; i++) { + for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); + j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { + if (j > limit) goto end; + if (j >= sb_out) { + if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { + to = ONIGENC_CODE_RANGE_TO(mbr, i); + if (to > limit) to = limit; + r = add_code_range_to_buf(&(cc->mbuf), j, to); + if (r != 0) return r; + i++; + } + + goto sb_end; + } + BITSET_SET_BIT(cc->bs, j); + } + } + + sb_end: + for ( ; i < n; i++) { + from = ONIGENC_CODE_RANGE_FROM(mbr, i); + to = ONIGENC_CODE_RANGE_TO(mbr, i); + if (from > limit) break; + if (to > limit) to = limit; + r = add_code_range_to_buf(&(cc->mbuf), from, to); if (r != 0) return r; } } + else { + OnigCodePoint prev = 0; + + for (i = 0; i < n; i++) { + from = ONIGENC_CODE_RANGE_FROM(mbr, i); + if (from > limit) { + for (j = prev; j < sb_out; j++) { + BITSET_SET_BIT(cc->bs, j); + } + goto sb_end2; + } + for (j = prev; j < from; j++) { + if (j >= sb_out) goto sb_end2; + BITSET_SET_BIT(cc->bs, j); + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i); + if (prev > limit) prev = limit; + prev++; + if (prev == 0) goto end; + } + for (j = prev; j < sb_out; j++) { + BITSET_SET_BIT(cc->bs, j); + } + + sb_end2: + prev = sb_out; + + for (i = 0; i < n; i++) { + from = ONIGENC_CODE_RANGE_FROM(mbr, i); + if (from > limit) goto last; + + if (prev < from) { + r = add_code_range_to_buf(&(cc->mbuf), prev, from - 1); + if (r != 0) return r; + } + prev = ONIGENC_CODE_RANGE_TO(mbr, i); + if (prev > limit) prev = limit; + prev++; + if (prev == 0) goto end; + } + + last: + r = add_code_range_to_buf(&(cc->mbuf), prev, MAX_CODE_POINT); + if (r != 0) return r; + } + end: return 0; } static int add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) { +#define ASCII_LIMIT 127 + int c, r; + int ascii_mode; const OnigCodePoint *ranges; + OnigCodePoint limit; OnigCodePoint sb_out; OnigEncoding enc = env->enc; + ascii_mode = IS_ASCII_MODE_CTYPE_OPTION(ctype, env->options); + r = ONIGENC_GET_CTYPE_CODE_RANGE(enc, ctype, &sb_out, &ranges); if (r == 0) { - return add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); + if (ascii_mode == 0) + r = add_ctype_to_cc_by_range(cc, ctype, not, env->enc, sb_out, ranges); + else + r = add_ctype_to_cc_by_range_limit(cc, ctype, not, env->enc, sb_out, + ranges, ASCII_LIMIT); + return r; } else if (r != ONIG_NO_SUPPORT_CONFIG) { return r; } r = 0; + limit = ascii_mode ? ASCII_LIMIT : SINGLE_BYTE_SIZE; + switch (ctype) { case ONIGENC_CTYPE_ALPHA: case ONIGENC_CTYPE_BLANK: @@ -4631,14 +4731,18 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_ASCII: case ONIGENC_CTYPE_ALNUM: if (not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + for (c = 0; c < limit; c++) { if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } + for (c = limit; c < SINGLE_BYTE_SIZE; c++) { + BITSET_SET_BIT(cc->bs, c); + } + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { + for (c = 0; c < limit; c++) { if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } @@ -4647,34 +4751,25 @@ add_ctype_to_cc(CClassNode* cc, int ctype, int not, ScanEnv* env) case ONIGENC_CTYPE_GRAPH: case ONIGENC_CTYPE_PRINT: + case ONIGENC_CTYPE_WORD: if (not != 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + for (c = 0; c < limit; c++) { + if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0 /* check invalid code point */ + && ! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } - } - else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) + for (c = limit; c < SINGLE_BYTE_SIZE; c++) { + if (ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) BITSET_SET_BIT(cc->bs, c); } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); - } - break; - - case ONIGENC_CTYPE_WORD: - if (not == 0) { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if (IS_CODE_SB_WORD(enc, c)) BITSET_SET_BIT(cc->bs, c); - } - ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } else { - for (c = 0; c < SINGLE_BYTE_SIZE; c++) { - if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ - && ! ONIGENC_IS_CODE_WORD(enc, c)) + for (c = 0; c < limit; c++) { + if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) BITSET_SET_BIT(cc->bs, c); } + if (ascii_mode == 0) + ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); } break; @@ -4792,8 +4887,7 @@ fetch_char_property_to_ctype(UChar** src, UChar* end, ScanEnv* env) } static int -parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, - ScanEnv* env) +parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) { int r, ctype; CClassNode* cc; @@ -4827,7 +4921,7 @@ enum CCVALTYPE { static int next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) + enum CCSTATE* state, ScanEnv* env) { int r; @@ -4850,9 +4944,9 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, static int next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to, - int* from_israw, int to_israw, - enum CCVALTYPE intype, enum CCVALTYPE* type, - enum CCSTATE* state, ScanEnv* env) + int* from_israw, int to_israw, + enum CCVALTYPE intype, enum CCVALTYPE* type, + enum CCSTATE* state, ScanEnv* env) { int r; @@ -4921,7 +5015,7 @@ next_state_val(CClassNode* cc, OnigCodePoint *from, OnigCodePoint to, static int code_exist_check(OnigCodePoint c, UChar* from, UChar* end, int ignore_escaped, - ScanEnv* env) + ScanEnv* env) { int in_esc; OnigCodePoint code; @@ -5282,19 +5376,17 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, } static int parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env); + UChar** src, UChar* end, ScanEnv* env); static int parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, - ScanEnv* env) + ScanEnv* env) { int r, num; Node *target; OnigOptionType option; OnigCodePoint c; -#ifdef USE_NAMED_GROUP int list_capture; -#endif OnigEncoding enc = env->enc; UChar* p = *src; @@ -5322,16 +5414,15 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, break; case '=': - *np = onig_node_new_anchor(ANCHOR_PREC_READ); + *np = onig_node_new_anchor(ANCHOR_PREC_READ, 0); break; case '!': /* preceding read */ - *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); + *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT, 0); break; case '>': /* (?>...) stop backtrack */ *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); break; -#ifdef USE_NAMED_GROUP case '\'': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { goto named_group1; @@ -5339,16 +5430,14 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, else return ONIGERR_UNDEFINED_GROUP_OPTION; break; -#endif case '<': /* look behind (?<=...), (?<!...) */ if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; PFETCH(c); if (c == '=') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND); + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND, 0); else if (c == '!') - *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT); -#ifdef USE_NAMED_GROUP + *np = onig_node_new_anchor(ANCHOR_LOOK_BEHIND_NOT, 0); else { if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { UChar *name; @@ -5385,11 +5474,6 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return ONIGERR_UNDEFINED_GROUP_OPTION; } } -#else - else { - return ONIGERR_UNDEFINED_GROUP_OPTION; - } -#endif break; case '~': @@ -5406,10 +5490,9 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; head_bar = 1; - if (PPEEK_IS(')')) { // (?~|) : absent clear + if (PPEEK_IS(')')) { // (?~|) : range clear PINC; - r = node_new_update_var_gimmick(np, UPDATE_VAR_RIGHT_RANGE_INIT, - 0, env); + r = make_range_clear(np, env); if (r != 0) return r; goto end; } @@ -5471,7 +5554,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, PFETCH(c); if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - if (ONIGENC_IS_CODE_DIGIT(enc, c) + if (IS_CODE_DIGIT_ASCII(enc, c) || c == '-' || c == '+' || c == '<' || c == '\'') { UChar* name_end; int back_num; @@ -5647,7 +5730,6 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, case '@': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { -#ifdef USE_NAMED_GROUP if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { PFETCH(c); if (c == '<' || c == '\'') { @@ -5656,7 +5738,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } PUNFETCH; } -#endif + *np = node_new_memory(0); CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); @@ -5678,6 +5760,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, case 'p': #endif case '-': case 'i': case 'm': case 's': case 'x': + case 'W': case 'D': case 'S': case 'P': { int neg = 0; @@ -5713,6 +5796,11 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); break; #endif + case 'W': OPTION_NEGATE(option, ONIG_OPTION_WORD_IS_ASCII, neg); break; + case 'D': OPTION_NEGATE(option, ONIG_OPTION_DIGIT_IS_ASCII, neg); break; + case 'S': OPTION_NEGATE(option, ONIG_OPTION_SPACE_IS_ASCII, neg); break; + case 'P': OPTION_NEGATE(option, ONIG_OPTION_POSIX_IS_ASCII, neg); break; + default: return ONIGERR_UNDEFINED_GROUP_OPTION; } @@ -5918,8 +6006,7 @@ typedef struct { } IApplyCaseFoldArg; static int -i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], - int to_len, void* arg) +i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], int to_len, void* arg) { IApplyCaseFoldArg* iarg; ScanEnv* env; @@ -5998,8 +6085,8 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], } static int -parse_exp(Node** np, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) +parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r, len, group = 0; Node* qn; @@ -6145,7 +6232,7 @@ parse_exp(Node** np, OnigToken* tok, int term, { switch (tok->u.prop.ctype) { case ONIGENC_CTYPE_WORD: - *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not); + *np = node_new_ctype(tok->u.prop.ctype, tok->u.prop.not, env->options); CHECK_NULL_RETURN_MEMERR(*np); break; @@ -6253,7 +6340,11 @@ parse_exp(Node** np, OnigToken* tok, int term, #endif case TK_ANCHOR: - *np = onig_node_new_anchor(tok->u.anchor); + { + int ascii_mode = + IS_WORD_ASCII(env->options) && IS_WORD_ANCHOR_TYPE(tok->u.anchor) ? 1 : 0; + *np = onig_node_new_anchor(tok->u.anchor, ascii_mode); + } break; case TK_OP_REPEAT: @@ -6289,6 +6380,11 @@ parse_exp(Node** np, OnigToken* tok, int term, if (r < 0) return r; break; + case TK_EXTENDED_GRAPHEME_CLUSTER: + r = make_extended_grapheme_cluster(np, env); + if (r < 0) return r; + break; + default: return ONIGERR_PARSER_BUG; break; @@ -6356,8 +6452,8 @@ parse_exp(Node** np, OnigToken* tok, int term, } static int -parse_branch(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) +parse_branch(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r; Node *node, **headp; @@ -6399,8 +6495,8 @@ parse_branch(Node** top, OnigToken* tok, int term, /* term_tok: TK_EOT or TK_SUBEXP_CLOSE */ static int -parse_subexp(Node** top, OnigToken* tok, int term, - UChar** src, UChar* end, ScanEnv* env) +parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, + ScanEnv* env) { int r; Node *node, **headp; @@ -6492,9 +6588,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, int r; UChar* p; -#ifdef USE_NAMED_GROUP names_clear(reg); -#endif scan_env_clear(env); env->options = reg->options; @@ -6531,7 +6625,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, extern void onig_scan_env_set_error_string(ScanEnv* env, int ecode ARG_UNUSED, - UChar* arg, UChar* arg_end) + UChar* arg, UChar* arg_end) { env->error = arg; env->error_end = arg_end; |