diff options
-rw-r--r-- | .bzrignore | 3 | ||||
-rw-r--r-- | .gitignore | 31 | ||||
-rw-r--r-- | CMakeLists.txt | 3 | ||||
-rw-r--r-- | HISTORY | 14 | ||||
-rw-r--r-- | README.md | 8 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | debian/changelog | 8 | ||||
-rw-r--r-- | debian/patches/001-changes_build_sys.diff | 44 | ||||
-rw-r--r-- | debian/patches/0500-CVE-2017-922[4-9].patch | 144 | ||||
-rw-r--r-- | debian/patches/series | 2 | ||||
-rw-r--r-- | debian/symbols | 4 | ||||
-rw-r--r-- | dist.info | 2 | ||||
-rw-r--r-- | doc/API.ja | 562 | ||||
-rw-r--r-- | doc/FAQ.ja | 14 | ||||
-rw-r--r-- | doc/RE | 7 | ||||
-rw-r--r-- | doc/RE.ja | 482 | ||||
-rw-r--r-- | index.html | 3 | ||||
-rw-r--r-- | index_ja.html | 3 | ||||
-rw-r--r-- | src/config.h.cmake.in | 3 | ||||
-rw-r--r-- | src/oniguruma.h | 6 | ||||
-rw-r--r-- | src/regcomp.c | 3258 | ||||
-rw-r--r-- | src/regenc.h | 4 | ||||
-rw-r--r-- | src/regerror.c | 84 | ||||
-rw-r--r-- | src/regexec.c | 252 | ||||
-rw-r--r-- | src/regext.c | 8 | ||||
-rw-r--r-- | src/regint.h | 119 | ||||
-rw-r--r-- | src/regparse.c | 791 | ||||
-rw-r--r-- | src/regparse.h | 334 | ||||
-rw-r--r-- | src/st.c | 484 | ||||
-rw-r--r-- | src/unicode.c | 12 | ||||
-rw-r--r-- | test/testc.c | 12 |
31 files changed, 3618 insertions, 3085 deletions
diff --git a/.bzrignore b/.bzrignore new file mode 100644 index 0000000..2386f62 --- /dev/null +++ b/.bzrignore @@ -0,0 +1,3 @@ +.git +**/.git +**/.pc @@ -1,5 +1,28 @@ +Makefile +autom4te.cache/ +ltmain.sh +stamp-h1 +configure +config.status +config.log +config.h +config.h.in +onig-config +libtool +aclocal.m4 +Makefile.in +*.o +*.obj +*.so +*.lo +*.la +*.pc +*~ +.libs/ +.deps/ +testc +testcu +testp +/build +m4/*.m4 .bzr -.bzrignore -.pc -.gitignore -debian/files
\ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b262f1..60ce397 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8) project(oniguruma C) set(PACKAGE onig) -set(PACKAGE_VERSION "6.3.0") +set(PACKAGE_VERSION "6.4.0") set(USE_COMBINATION_EXPLOSION_CHECK 0) set(USE_CRNL_AS_LINE_TERMINATOR 0) @@ -44,6 +44,7 @@ check_include_files(sys/times.h HAVE_SYS_TIMES_H) check_include_files(sys/time.h HAVE_SYS_TIME_H) check_include_files(sys/types.h HAVE_SYS_TYPES_H) check_include_files(unistd.h HAVE_UNISTD_H) +check_include_files(inttypes.h HAVE_INTTYPES_H) check_type_size(int SIZEOF_INT) check_type_size(long SIZEOF_LONG) check_type_size(short SIZEOF_SHORT) @@ -1,5 +1,19 @@ History +2017/07/03: Version 6.4.0 + +2017/06/30: fix memory leaks +2017/06/29: fix memory leaks +2017/06/28: change encoding of doc/XXXX.ja from EUC-JP to UTF-8 +2017/06/28: update doc/RE, and doc/RE.ja +2017/06/26: fix fatal bug of endless repeat check on Windows +2017/06/26: PR #62 : add check for return values +2017/06/23: [new] support call zero (\g{0}) +2017/06/23: [new] support relative call by positive number +2017/06/23: [new] support relative back-reference by positive number +2017/06/15: fix #60 : check value type +2017/06/02: change output format for ONIG_DEBUG_COMPILE and ONIG_DEBUG_MATCH + 2017/05/29: Version 6.3.0 2017/05/24: fix #60 : invalid state(CCS_VALUE) in parse_char_class() @@ -20,6 +20,14 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.4.0 +-------------------------- + +* Fix fatal problem of endless repeat on Windows +* NEW: call zero (call the total regexp) +* NEW: relative backref and relative call by positive number + + New feature of version 6.3.0 -------------------------- diff --git a/configure.ac b/configure.ac index 1a7ca9b..688d15b 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.3.0) +AC_INIT(onig, 6.4.0) AC_CONFIG_MACRO_DIR([m4]) diff --git a/debian/changelog b/debian/changelog index b7b74ad..69a8598 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,11 @@ +libonig (6.4.0-1) unstable; urgency=medium + + * New upstream release. + + Refresh symbols file. + * Remove not longer needed patches. + + -- J旦rg Frings-F端rst <debian@jff-webhosting.net> Sun, 23 Jul 2017 11:51:31 +0200 + libonig (6.3.0-1) unstable; urgency=medium * New upstream release. diff --git a/debian/patches/001-changes_build_sys.diff b/debian/patches/001-changes_build_sys.diff deleted file mode 100644 index 5750433..0000000 --- a/debian/patches/001-changes_build_sys.diff +++ /dev/null @@ -1,44 +0,0 @@ -Description: some buildsystem changes -Author: J旦rg Frings-F端rst <debian@jff-webhosting.net> -Reviewed-by: -Last-Update: 2014-05-07 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ -Index: trunk/configure.in -=================================================================== ---- trunk.orig/configure.in -+++ trunk/configure.in -@@ -1,9 +1,11 @@ - dnl Process this file with autoconf to produce a configure script. - AC_INIT(onig, 5.9.6) - -+ -+AUTOMAKE_OPTIONS=subdir-objects - AC_CONFIG_MACRO_DIR([m4]) - --AM_INIT_AUTOMAKE -+AM_INIT_AUTOMAKE([foreign]) - AC_CONFIG_HEADER(config.h) - - -@@ -41,7 +43,7 @@ fi - dnl Checks for programs. - AC_PROG_CC - AM_PROG_LIBTOOL --LTVERSION="2:0:0" -+LTVERSION="2:1:0" - AC_SUBST(LTVERSION) - - AC_PROG_INSTALL -Index: trunk/Makefile.am -=================================================================== ---- trunk.orig/Makefile.am -+++ trunk/Makefile.am -@@ -3,6 +3,7 @@ encdir = $(top_srcdir)/enc - sampledir = $(top_srcdir)/sample - libname = libonig.la - -+AUTOMAKE_OPTIONS=subdir-objects - ACLOCAL_AMFLAGS = -I m4 - #AM_CFLAGS = -DNOT_RUBY - AM_CFLAGS = diff --git a/debian/patches/0500-CVE-2017-922[4-9].patch b/debian/patches/0500-CVE-2017-922[4-9].patch deleted file mode 100644 index d28b6ad..0000000 --- a/debian/patches/0500-CVE-2017-922[4-9].patch +++ /dev/null @@ -1,144 +0,0 @@ -Correct CVE-2017-922[4-9] - Fix mutilple invalid pointer dereference, out-of-bounds write memory - corruption and stack buffer overflow, -Origin: Cheerypicked from upstream -Bug: https://github.com/kkos/oniguruma/issues/[55|56|57|58|59|60] -Bug-Debian: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=86331[2|3|4|5|6|8] -Forwarded: not-needed -Last-Update: 2017-05-25 ---- -This patch header follows DEP-3: http://dep.debian.net/deps/dep3/ -Index: 6.1.3-1+deb9u1/src/regexec.c -=================================================================== ---- 6.1.3-1+deb9u1.orig/src/regexec.c -+++ 6.1.3-1+deb9u1/src/regexec.c -@@ -1463,14 +1463,9 @@ match_at(regex_t* reg, const UChar* str, - break; - - case OP_EXACT1: MOP_IN(OP_EXACT1); --#if 0 - DATA_ENSURE(1); - if (*p != *s) goto fail; - p++; s++; --#endif -- if (*p != *s++) goto fail; -- DATA_ENSURE(0); -- p++; - MOP_OUT; - break; - -@@ -3149,6 +3144,8 @@ forward_search_range(regex_t* reg, const - } - else { - UChar *q = p + reg->dmin; -+ -+ if (q >= end) return 0; /* fail */ - while (p < q) p += enclen(reg->enc, p); - } - } -@@ -3228,18 +3225,25 @@ forward_search_range(regex_t* reg, const - } - else { - if (reg->dmax != ONIG_INFINITE_DISTANCE) { -- *low = p - reg->dmax; -- if (*low > s) { -- *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, -- *low, (const UChar** )low_prev); -- if (low_prev && IS_NULL(*low_prev)) -- *low_prev = onigenc_get_prev_char_head(reg->enc, -- (pprev ? pprev : s), *low); -- } -- else { -+ if (p - str < reg->dmax) { -+ *low = (UChar* )str; - if (low_prev) -- *low_prev = onigenc_get_prev_char_head(reg->enc, -- (pprev ? pprev : str), *low); -+ *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low); -+ } -+ else { -+ *low = p - reg->dmax; -+ if (*low > s) { -+ *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s, -+ *low, (const UChar** )low_prev); -+ if (low_prev && IS_NULL(*low_prev)) -+ *low_prev = onigenc_get_prev_char_head(reg->enc, -+ (pprev ? pprev : s), *low); -+ } -+ else { -+ if (low_prev) -+ *low_prev = onigenc_get_prev_char_head(reg->enc, -+ (pprev ? pprev : str), *low); -+ } - } - } - } -Index: 6.1.3-1+deb9u1/src/regparse.c -=================================================================== ---- 6.1.3-1+deb9u1.orig/src/regparse.c -+++ 6.1.3-1+deb9u1/src/regparse.c -@@ -2986,7 +2986,7 @@ fetch_token_in_cc(OnigToken* tok, UChar* - PUNFETCH; - prev = p; - num = scan_unsigned_octal_number(&p, end, 3, enc); -- if (num < 0) return ONIGERR_TOO_BIG_NUMBER; -+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } -@@ -3358,7 +3358,7 @@ fetch_token(OnigToken* tok, UChar** src, - if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_OCTAL3)) { - prev = p; - num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); -- if (num < 0) return ONIGERR_TOO_BIG_NUMBER; -+ if (num < 0 || num >= 256) return ONIGERR_TOO_BIG_NUMBER; - if (p == prev) { /* can't read nothing. */ - num = 0; /* but, it's not error */ - } -@@ -3994,7 +3994,9 @@ next_state_class(CClassNode* cc, OnigCod - } - } - -- *state = CCS_VALUE; -+ if (*state != CCS_START) -+ *state = CCS_VALUE; -+ - *type = CCV_CLASS; - return 0; - } -@@ -4010,6 +4012,9 @@ next_state_val(CClassNode* cc, OnigCodeP - switch (*state) { - case CCS_VALUE: - if (*type == CCV_SB) { -+ if (*vs > 0xff) -+ return ONIGERR_INVALID_CODE_POINT_VALUE; -+ - BITSET_SET_BIT(cc->bs, (int )(*vs)); - } - else if (*type == CCV_CODE_POINT) { -Index: 6.1.3-1+deb9u1/src/gperf_unfold_key_conv.py -=================================================================== ---- 6.1.3-1+deb9u1.orig/src/gperf_unfold_key_conv.py -+++ 6.1.3-1+deb9u1/src/gperf_unfold_key_conv.py -@@ -36,7 +36,7 @@ def parse_line(s): - if r != s: return r - r = re.sub(REG_GET_CODE, 'OnigCodePoint gcode = wordlist[key].code;', s) - if r != s: return r -- r = re.sub(REG_CODE_CHECK, 'if (code == gcode)', s) -+ r = re.sub(REG_CODE_CHECK, 'if (code == gcode && wordlist[key].index >= 0)', s) - if r != s: return r - - return s -Index: 6.1.3-1+deb9u1/src/unicode_unfold_key.c -=================================================================== ---- 6.1.3-1+deb9u1.orig/src/unicode_unfold_key.c -+++ 6.1.3-1+deb9u1/src/unicode_unfold_key.c -@@ -2844,7 +2844,7 @@ unicode_unfold_key(OnigCodePoint code) - { - OnigCodePoint gcode = wordlist[key].code; - -- if (code == gcode) -+ if (code == gcode && wordlist[key].index >= 0) - return &wordlist[key]; - } - } diff --git a/debian/patches/series b/debian/patches/series index 2f55f57..ea79fff 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,3 +1 @@ -#001-changes_build_sys.diff 0100-source_typos.patch -#0500-CVE-2017-922[4-9].patch diff --git a/debian/symbols b/debian/symbols index 086e86c..23c1b49 100644 --- a/debian/symbols +++ b/debian/symbols @@ -95,7 +95,7 @@ libonig.so.4 libonig4 #MINVER# onig_node_list_add@Base 5.9.5 onig_node_new_alt@Base 5.9.5 onig_node_new_anchor@Base 5.9.5 - onig_node_new_enclose@Base 5.9.5 + onig_node_new_enclosure@Base 6.4.0 onig_node_new_list@Base 5.9.5 onig_node_new_str@Base 5.9.5 onig_node_str_cat@Base 5.9.5 @@ -106,7 +106,7 @@ libonig.so.4 libonig4 #MINVER# onig_number_of_capture_histories@Base 5.9.5 onig_number_of_captures@Base 5.9.5 onig_number_of_names@Base 5.9.5 - onig_parse_make_tree@Base 5.9.5 + onig_parse_tree@Base 6.4.0 onig_reduce_nested_quantifier@Base 5.9.5 onig_reg_init@Base 5.9.5 onig_region_clear@Base 5.9.5 @@ -1,7 +1,7 @@ --- This file is part of LuaDist project name = "onig" -version = "6.3.0" +version = "6.4.0" desc = "Oniguruma is a regular expressions library." author = "K.Kosako" @@ -1,89 +1,89 @@ -鬼車インターフェース Version 6.1.0 2016/08/22 +薔取ゃ潟帥若с若 Version 6.1.0 2016/08/22 #include <oniguruma.h> # int onig_initialize(OnigEncoding use_encodings[], int num_encodings) - ライブラリの初期化 - 最初に呼び出す必要がある。 + ゃ + 若喝冴綽荀 - * onig_init() は廃止 + * onig_init() 綮罩 - 引数 - 1 use_encodings: 使用する文字エンコーディングの配列 - 2 num_encodings: 文字エンコーディングの数 + 綣 + 1 use_encodings: 篏睡絖潟潟若c潟違 + 2 num_encodings: 絖潟潟若c潟違 # int onig_error_code_to_str(UChar* err_buf, int err_code, ...) - エラーメッセージを取得する。 + 若<祉若吾緇 - この関数を、onig_new()の結果に対して呼び出す場合には、onig_new()のpattern引数を - メモリ解放するよりも前に呼び出さなければならない。 + ∽違onig_new()腟絲障若喝冴翫onig_new()pattern綣違 + <≪茹f障若喝冴違 - 正常終了戻り値: エラーメッセージ文字列のバイト長 + 罩e幻腟篋祉: 若<祉若御絖ゃ - 引数 - 1 err_buf: エラーメッセージを格納する領域 - (必要なサイズ: ONIG_MAX_ERROR_MESSAGE_LEN) - 2 err_code: エラーコード - 3 err_info (optional): onig_new()のerr_info + 綣 + 1 err_buf: 若<祉若吾主 + (綽荀泣ゃ: ONIG_MAX_ERROR_MESSAGE_LEN) + 2 err_code: 若潟若 + 3 err_info (optional): onig_new()err_info # void onig_set_warn_func(OnigWarnFunc func) - 警告通知関数をセットする。 + 茘ラ∽違祉 - 警告: + 茘: '[', '-', ']' in character class without escape. ']' in pattern without escape. - 引数 - 1 func: 警告関数 void (*func)(char* warning_message) + 綣 + 1 func: 茘∽ void (*func)(char* warning_message) # void onig_set_verb_warn_func(OnigWarnFunc func) - 詳細警告通知関数をセットする。 + 荅括完茘ラ∽違祉 - 詳細警告: + 荅括完茘: redundant nested repeat operator. - 引数 - 1 func: 詳細警告関数 void (*func)(char* warning_message) + 綣 + 1 func: 荅括完茘∽ void (*func)(char* warning_message) # int onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* err_info) - 正規表現オブジェクト(regex)を作成する。 + 罩h頫憗吾с(regex)篏 - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL - 引数 - 1 reg: 作成された正規表現オブジェクトを返すアドレス - 2 pattern: 正規表現パターン文字列 - 3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length) - 4 option: 正規表現コンパイル時オプション + 綣 + 1 reg: 篏罩h頫憗吾с菴≪ + 2 pattern: 罩h頫憗帥若恰絖 + 3 pattern_end: 罩h頫憗帥若恰絖腟腴≪(pattern + pattern length) + 4 option: 罩h頫憗潟潟ゃ激с - ONIG_OPTION_NONE オプションなし + ONIG_OPTION_NONE 激с潟 ONIG_OPTION_SINGLELINE '^' -> '\A', '$' -> '\Z' - ONIG_OPTION_MULTILINE '.'が改行にマッチする - ONIG_OPTION_IGNORECASE 曖昧マッチ オン - ONIG_OPTION_EXTEND パターン拡張形式 - ONIG_OPTION_FIND_LONGEST 最長マッチ - ONIG_OPTION_FIND_NOT_EMPTY 空マッチを無視 + ONIG_OPTION_MULTILINE '.'壕 + ONIG_OPTION_IGNORECASE с + ONIG_OPTION_EXTEND 帥若恰≦宍綵√ + ONIG_OPTION_FIND_LONGEST 激 + ONIG_OPTION_FIND_NOT_EMPTY 腥冴∴ ONIG_OPTION_NEGATE_SINGLELINE ONIG_SYNTAX_POSIX_BASIC, ONIG_SYNTAX_POSIX_EXTENDED, - ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVAで - デフォルトで有効なONIG_OPTION_SINGLELINEをクリアする。 + ONIG_SYNTAX_PERL, ONIG_SYNTAX_PERL_NG, ONIG_SYNTAX_JAVA + ф鴻ONIG_OPTION_SINGLELINE≪ - ONIG_OPTION_DONT_CAPTURE_GROUP 名前付き捕獲式集合のみ捕獲 - ONIG_OPTION_CAPTURE_GROUP 名前無し捕獲式集合も捕獲 + ONIG_OPTION_DONT_CAPTURE_GROUP 篁峨炊 + ONIG_OPTION_CAPTURE_GROUP <峨 - 5 enc: 文字エンコーディング + 5 enc: 絖潟潟若c潟 ONIG_ENCODING_ASCII ASCII ONIG_ENCODING_ISO_8859_1 ISO 8859-1 @@ -116,9 +116,9 @@ ONIG_ENCODING_BIG5 Big5 ONIG_ENCODING_GB18030 GB18030 - または、ユーザが定義したOnigEncodingTypeデータのアドレス + 障若吟絎臂OnigEncodingType若帥≪ - 6 syntax: 正規表現パターン文法定義 + 6 syntax: 罩h頫憗帥若恰羈絎臂 ONIG_SYNTAX_ASIS plain text ONIG_SYNTAX_POSIX_BASIC POSIX Basic RE @@ -128,15 +128,15 @@ ONIG_SYNTAX_GNU_REGEX GNU regex ONIG_SYNTAX_JAVA Java (Sun java.util.regex) ONIG_SYNTAX_PERL Perl - ONIG_SYNTAX_PERL_NG Perl + 名前付き捕獲式集合 + ONIG_SYNTAX_PERL_NG Perl + 篁峨 ONIG_SYNTAX_RUBY Ruby ONIG_SYNTAX_DEFAULT default (== Ruby) onig_set_default_syntax() - または、ユーザが定義したOnigSyntaxTypeデータのアドレス + 障若吟絎臂OnigSyntaxType若帥≪ - 7 err_info: エラー情報を返すためのアドレス - onig_error_code_to_str()の三番目の引数として使用する + 7 err_info: 惹宴菴≪ + onig_error_code_to_str()筝綣違篏睡 @@ -145,44 +145,44 @@ OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* err_info) - 正規表現オブジェクト(regex)を作成する。 - regの領域を内部で割り当てない。 + 罩h頫憗吾с(regex)篏 + regу蚊綵 - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL # int onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo) - 正規表現オブジェクト(regex)を作成する。 - この関数は、onig_new()のデラックス版。 + 罩h頫憗吾с(regex)篏 + ∽違onig_new()合 - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL - 引数 - 1 reg: 作成された正規表現オブジェクトを返すアドレス - 2 pattern: 正規表現パターン文字列 - 3 pattern_end: 正規表現パターン文字列の終端アドレス(pattern + pattern length) - 4 ci: コンパイル情報 + 綣 + 1 reg: 篏罩h頫憗吾с菴≪ + 2 pattern: 罩h頫憗帥若恰絖 + 3 pattern_end: 罩h頫憗帥若恰絖腟腴≪(pattern + pattern length) + 4 ci: 潟潟ゃ - ci->num_of_elements: ciの要素数 (現在の版では: 5) - ci->pattern_enc: パターン文字列の文字エンコーディング - ci->target_enc: 対象文字列の文字エンコーディング - ci->syntax: 正規表現パターン文法定義 - ci->option: 正規表現コンパイル時オプション - ci->case_fold_flag: ONIG_OPTION_IGNORECASEモードでの - 文字曖昧マッチ指定ビットフラグ + ci->num_of_elements: ci荀膣 (憜с: 5) + ci->pattern_enc: 帥若恰絖絖潟潟若c潟 + ci->target_enc: 絲乗院絖絖潟潟若c潟 + ci->syntax: 罩h頫憗帥若恰羈絎臂 + ci->option: 罩h頫憗潟潟ゃ激с + ci->case_fold_flag: ONIG_OPTION_IGNORECASE≪若с + 絖с絎 - ONIGENC_CASE_FOLD_MIN: 最小 - ONIGENC_CASE_FOLD_DEFAULT: 最小 + ONIGENC_CASE_FOLD_MIN: 絨 + ONIGENC_CASE_FOLD_DEFAULT: 絨 onig_set_default_case_fold_flag() - 5 err_info: エラー情報を返すためのアドレス - onig_error_code_to_str()の三番目の引数として使用する + 5 err_info: 惹宴菴≪ + onig_error_code_to_str()筝綣違篏睡 - 異なる文字エンコーディングの組み合わせは、以下の場合にのみ許される。 + 違絖潟潟若c潟違腟水篁ヤ翫粋┗ pattern_enc: ASCII, ISO_8859_1 target_enc: UTF16_BE, UTF16_LE, UTF32_BE, UTF32_LE @@ -196,64 +196,64 @@ # void onig_free(regex_t* reg) - 正規表現オブジェクトのメモリを解放する。 + 罩h頫憗吾с<≪茹f障 - 引数 - 1 reg: 正規表現オブジェクト + 綣 + 1 reg: 罩h頫憗吾с # void onig_free_body(regex_t* reg) - 正規表現オブジェクトのメモリを解放する。(reg自身の領域を除いて) + 罩h頫憗吾с<≪茹f障(reg荳ゃ) - 引数 - 1 reg: 正規表現オブジェクト + 綣 + 1 reg: 罩h頫憗吾с # int onig_search(regex_t* reg, const UChar* str, const UChar* end, const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option) - 正規表現で文字列を検索し、検索結果とマッチ領域を返す。 + 罩h頫憗ф絖罎膣≪罎膣∝菴 - 正常終了戻り値: マッチ位置 (p - str >= 0) - 検索失敗: ONIG_MISMATCH (< 0) + 罩e幻腟篋祉: 篏臀 (p - str >= 0) + 罎膣√け: ONIG_MISMATCH (< 0) - 引数 - 1 reg: 正規表現オブジェクト - 2 str: 検索対象文字列 - 3 end: 検索対象文字列の終端アドレス - 4 start: 検索対象文字列の検索先頭位置アドレス - 5 range: 検索対象文字列の検索終了位置アドレス - 前方探索 (start <= 探索される文字列 < range) - 後方探索 (range <= 探索される文字列 <= start) - 6 region: マッチ領域情報(region) (NULLも許される) - 7 option: 検索時オプション + 綣 + 1 reg: 罩h頫憗吾с + 2 str: 罎膣√乗院絖 + 3 end: 罎膣√乗院絖腟腴≪ + 4 start: 罎膣√乗院絖罎膣√篏臀≪ + 5 range: 罎膣√乗院絖罎膣∝篋篏臀≪ + 号「膣 (start <= 「膣≪絖 < range) + 緇号「膣 (range <= 「膣≪絖 <= start) + 6 region: (region) (NULL荐宴) + 7 option: 罎膣∽激с - ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない - ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない - ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする + ONIG_OPTION_NOTBOL 絖(str)茵 + ONIG_OPTION_NOTEOL 絖腟腴(end)茵 + ONIG_OPTION_POSIX_REGION region綣違POSIX APIregmatch_t[] # int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) - 文字列の指定位置でマッチングを行い、結果とマッチ領域を返す。 + 絖絎篏臀с潟違茵腟菴 - 正常終了戻り値: マッチしたバイト長 (>= 0) + 罩e幻腟篋祉: ゃ (>= 0) not match: ONIG_MISMATCH ( < 0) - 引数 - 1 reg: 正規表現オブジェクト - 2 str: 検索対象文字列 - 3 end: 検索対象文字列の終端アドレス - 4 at: 検索対象文字列の検索アドレス - 5 region: マッチ領域情報(region) (NULLも許される) - 6 option: 検索時オプション + 綣 + 1 reg: 罩h頫憗吾с + 2 str: 罎膣√乗院絖 + 3 end: 罎膣√乗院絖腟腴≪ + 4 at: 罎膣√乗院絖罎膣≪≪ + 5 region: (region) (NULL荐宴) + 6 option: 罎膣∽激с - ONIG_OPTION_NOTBOL 文字列の先頭(str)を行頭と看做さない - ONIG_OPTION_NOTEOL 文字列の終端(end)を行末と看做さない - ONIG_OPTION_POSIX_REGION region引数をPOSIX APIのregmatch_t[]にする + ONIG_OPTION_NOTBOL 絖(str)茵 + ONIG_OPTION_NOTEOL 絖腟腴(end)茵 + ONIG_OPTION_POSIX_REGION region綣違POSIX APIregmatch_t[] # int onig_scan(regex_t* reg, const UChar* str, const UChar* end, @@ -261,127 +261,127 @@ int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg) - 正規表現で文字列をスキャンして、マッチングする毎にコールバック関数を呼び出す。 + 罩h頫憗ф絖鴻c潟潟違罸潟若∽違若喝冴 - 正常終了: マッチ回数 (0回も含める) - エラー: エラーコード (< 0) - 中断: コールバック関数が0以外の戻り値を返したとき、その値を戻り値として中断 + 罩e幻腟篋: (0) + : 若潟若 (< 0) + 筝: 潟若∽違鐚篁ュ祉ゃ菴ゃ祉ゃ筝 - 引数 - 1 reg: 正規表現オブジェクト - 2 str: 検索対象文字列 - 3 end: 検索対象文字列の終端アドレス - 4 region: マッチ領域情報(region) (NULLも許される) - 5 option: 検索時オプション - 6 scan_callback: コールバック関数 - 7 callback_arg: コールバック関数に渡される付加引数値 + 綣 + 1 reg: 罩h頫憗吾с + 2 str: 罎膣√乗院絖 + 3 end: 罎膣√乗院絖腟腴≪ + 4 region: (region) (NULL荐宴) + 5 option: 罎膣∽激с + 6 scan_callback: 潟若∽ + 7 callback_arg: 潟若∽違羝<篁綣医 # OnigRegion* onig_region_new(void) - マッチ領域情報(region)を作成する。 + (region)篏 # void onig_region_free(OnigRegion* region, int free_self) - マッチ領域情報(region)で使用されているメモリを解放する。 + (region)т戎<≪茹f障 - 引数 - 1 region: マッチ領域情報オブジェクト - 2 free_self: [1: region自身を含めて全て解放, 0: region自身は解放しない] + 綣 + 1 region: 宴吾с + 2 free_self: [1: region荳茹f, 0: region荳茹f障] # void onig_region_copy(OnigRegion* to, OnigRegion* from) - マッチ領域情報(region)を複製する。 + (region)茲茖純 - 引数 - 1 to: 対象領域 - 2 from: 元領域 + 綣 + 1 to: 絲乗院 + 2 from: # void onig_region_clear(OnigRegion* region) - マッチ領域情報(region)の中味をクリアする。 + (region)筝潟≪ - 引数 - 1 region: 対象領域 + 綣 + 1 region: 絲乗院 # int onig_region_resize(OnigRegion* region, int n) - マッチ領域情報(region)の捕獲式集合(グループ)数を変更する。 + (region)峨(違若)違紊眼 - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL - 引数 - 1 region: 対象領域 - 2 n: 新しいサイズ + 綣 + 1 region: 絲乗院 + 2 n: 違泣ゃ # int onig_name_to_group_numbers(regex_t* reg, const UChar* name, const UChar* name_end, int** num_list) - 指定した名前に対する名前付き捕獲式集合(グループ)の - グループ番号リストを返す。 - 名前付き捕獲式集合は、(?<name>....)によって定義できる。 + 絎絲障篁峨(違若) + 違若垩鴻菴 + 篁峨(?<name>....)c絎臂с - 正常終了戻り値: 指定された名前に対するグループ数 - (例 /(?<x>..)(?<x>..)/ ==> 2) - 名前に対するグループが存在しない: -1 + 罩e幻腟篋祉: 絎絲障違若 + (箴 /(?<x>..)(?<x>..)/ ==> 2) + 絲障違若絖: -1 - 引数 - 1 reg: 正規表現オブジェクト - 2 name: 捕獲式集合(グループ)名 - 3 name_end: 捕獲式集合(グループ)名の終端アドレス - 4 num_list: 番号リストを返すアドレス + 綣 + 1 reg: 罩h頫憗吾с + 2 name: 峨(違若) + 3 name_end: 峨(違若)腟腴≪ + 4 num_list: 垩鴻菴≪ # int onig_name_to_backref_number(regex_t* reg, const UChar* name, const UChar* name_end, OnigRegion *region) - 指定された名前の後方参照(\k<name>)に対する捕獲式集合(グループ)の番号を返す。 - 名前に対して、複数のマッチ領域が有効であれば、その中の最大の番号を返す。 - 名前に対する捕獲式集合が一個しかないときには、対応するマッチ領域が有効か - どうかに関係なく、その番号を返す。(従って、regionにはNULLを渡してもよい。) + 絎緇劫(\k<name>)絲障峨(違若)垩菴 + 絲障茲違鴻с違筝紊с垩菴 + 絲障峨筝絲上鴻 + ≫垩菴(緇cregionNULL羝<) - 正常終了戻り値: 番号 + 罩e幻腟篋祉: - 引数 - 1 reg: 正規表現オブジェクト - 2 name: 捕獲式集合(グループ)名 - 3 name_end: 捕獲式集合(グループ)名の終端アドレス - 4 region: search/match結果のマッチ領域 + 綣 + 1 reg: 罩h頫憗吾с + 2 name: 峨(違若) + 3 name_end: 峨(違若)腟腴≪ + 4 region: search/match腟 # int onig_foreach_name(regex_t* reg, int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), void* arg) - 全ての名前に対してコールバック関数呼び出しを実行する。 + 絲障潟若∽医若喝冴絎茵 - 正常終了戻り値: 0 - エラー: コールバック関数の戻り値 + 罩e幻腟篋祉: 0 + : 潟若∽違祉 - 引数 - 1 reg: 正規表現オブジェクト - 2 func: コールバック関数 + 綣 + 1 reg: 罩h頫憗吾с + 2 func: 潟若∽ func(name, name_end, <number of groups>, <group number's list>, reg, arg); - funcが0以外の値を返すと、それ以降のコールバックは行なわずに - 終了する。 + func0篁ュゃ菴篁ラ潟若茵 + 腟篋 - 3 arg: funcに対する追加引数 + 3 arg: func絲障菴遵綣 # int onig_number_of_names(regex_t* reg) - パターン中で定義された名前の数を返す。 - 一個の名前の多重定義は一個と看做す。 + 帥若割賢у臂違菴 + 筝紊絎臂筝 - 引数 - 1 reg: 正規表現オブジェクト + 綣 + 1 reg: 罩h頫憗吾с # OnigEncoding onig_get_encoding(regex_t* reg) @@ -389,157 +389,157 @@ # OnigCaseFoldType onig_get_case_fold_flag(regex_t* reg) # OnigSyntaxType* onig_get_syntax(regex_t* reg) - 正規表現オブジェクトに対して、対応する値を返す。 + 罩h頫憗吾с絲障絲上ゃ菴 - 引数 - 1 reg: 正規表現オブジェクト + 綣 + 1 reg: 罩h頫憗吾с # int onig_number_of_captures(regex_t* reg) - パターン中で定義された捕獲グループの数を返す。 + 帥若割賢у臂蚊違若違菴 - 引数 - 1 reg: 正規表現オブジェクト + 綣 + 1 reg: 罩h頫憗吾с # int onig_number_of_capture_histories(regex_t* reg) - パターン中で定義された捕獲履歴(?@...)の数を返す。 + 帥若割賢у臂峨吋罩(?@...)違菴 - 使用する文法で捕獲履歴機能が有効(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY) - でなければ、捕獲履歴機能は使用できない。 + 篏睡羈ф峨吋罩贋純(ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY) + с違峨吋罩贋純篏睡с - 引数 - 1 reg: 正規表現オブジェクト + 綣 + 1 reg: 罩h頫憗吾с # OnigCaptureTreeNode* onig_get_capture_tree(OnigRegion* region) - 捕獲履歴データのルートノードを返す。 + 峨吋罩眼若帥若若菴 - マッチが失敗している場合には、この値は不定である。 + 紊掩翫ゃ筝絎с - 引数 - 1 region: マッチ領域 + 綣 + 1 region: # int onig_capture_tree_traverse(OnigRegion* region, int at, int(*func)(int,int,int,int,int,void*), void* arg) - 捕獲履歴データ木を巡回してコールバックする。 + 峨吋罩眼若炊綏≦潟若 - 正常終了戻り値: 0 - エラー: コールバック関数の戻り値 + 罩e幻腟篋祉: 0 + : 潟若∽違祉 - 引数 - 1 region: マッチ領域 - 2 at: コールバックを行なうタイミング + 綣 + 1 region: + 2 at: 潟若茵帥ゃ潟 ONIG_TRAVERSE_CALLBACK_AT_FIRST: - 最初にコールバックして、子ノードを巡回 + 潟若絖若綏≦ ONIG_TRAVERSE_CALLBACK_AT_LAST: - 子ノードを巡回して、コールバック + 絖若綏≦潟若 ONIG_TRAVERSE_CALLBACK_AT_BOTH: - 最初にコールバックして、子ノードを巡回、最後にもう一度コールバック + 潟若絖若綏≦緇筝綺潟若 - 3 func: コールバック関数 - funcが0以外の値を返すと、それ以降の巡回は行なわずに - 終了する。 + 3 func: 潟若∽ + func0篁ュゃ菴篁ラ綏≦茵 + 腟篋 int func(int group, int beg, int end, int level, int at, void* arg) - group: グループ番号 - beg: マッチ開始位置 - end マッチ終了位置 - level: ネストレベル (0から) - at: コールバックが呼び出されたタイミング + group: 違若 + beg: 紮篏臀 + end 腟篋篏臀 + level: 鴻 (0) + at: 潟若若喝冴帥ゃ潟 ONIG_TRAVERSE_CALLBACK_AT_FIRST ONIG_TRAVERSE_CALLBACK_AT_LAST - arg: 追加引数 + arg: 菴遵綣 - 4 arg; funcに対する追加引数 + 4 arg; func絲障菴遵綣 # int onig_noname_group_capture_is_active(regex_t* reg) - 名前なし式集合の捕獲機能が有効かどうかを返す。 + 綣我純鴻菴 - 有効: 1 - 無効: 0 + : 1 + ≦: 0 - 引数 - 1 reg: 正規表現オブジェクト + 綣 + 1 reg: 罩h頫憗吾с - オプションのONIG_OPTION_DONT_CAPTURE_GROUPがON --> 無効 + 激с潟ONIG_OPTION_DONT_CAPTURE_GROUPON --> ≦ - パターンが名前つき式集合を使用している - AND 使用文法で、ONIG_SYN_CAPTURE_ONLY_NAMED_GROUPがON - AND オプションのONIG_OPTION_CAPTURE_GROUPがOFF - --> 無効 + 帥若潟ゃ綣篏睡 + AND 篏睡羈сONIG_SYN_CAPTURE_ONLY_NAMED_GROUPON + AND 激с潟ONIG_OPTION_CAPTURE_GROUPOFF + --> ≦ - 上記以外の場合 --> 有効 + 筝荐篁ュ翫 --> # UChar* onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s) - 文字一個分前の文字列位置を返す。 + 絖筝絖篏臀菴 - 引数 - 1 enc: 文字エンコーディング - 2 start: 文字列の先頭アドレス - 3 s: 文字列中の位置 + 綣 + 1 enc: 絖潟潟若c潟 + 2 start: 絖≪ + 3 s: 絖筝篏臀 # UChar* onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) - 文字の先頭バイト位置になるように左側に調整したアドレスを返す。 + 絖ゃ篏臀綏眼茯炊眼≪鴻菴 - 引数 - 1 enc: 文字エンコーディング - 2 start: 文字列の先頭アドレス - 3 s: 文字列中の位置 + 綣 + 1 enc: 絖潟潟若c潟 + 2 start: 絖≪ + 3 s: 絖筝篏臀 # UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) - 文字の先頭バイト位置になるように右側に調整したアドレスを返す。 + 絖ゃ篏臀勀眼茯炊眼≪鴻菴 - 引数 - 1 enc: 文字エンコーディング - 2 start: 文字列の先頭アドレス - 3 s: 文字列中の位置 + 綣 + 1 enc: 絖潟潟若c潟 + 2 start: 絖≪ + 3 s: 絖筝篏臀 # int onigenc_strlen(OnigEncoding enc, const UChar* s, const UChar* end) # int onigenc_strlen_null(OnigEncoding enc, const UChar* s) - 文字列の文字数を返す。 + 絖絖違菴 # int onigenc_str_bytelen_null(OnigEncoding enc, const UChar* s) - 文字列のバイト数を返す。 + 絖ゃ違菴 # int onig_set_default_syntax(OnigSyntaxType* syntax) - デフォルトの正規表現パターン文法をセットする。 + 罩h頫憗帥若恰羈祉 - 引数 - 1 syntax: 正規表現パターン文法 + 綣 + 1 syntax: 罩h頫憗帥若恰羈 # void onig_copy_syntax(OnigSyntaxType* to, OnigSyntaxType* from) - 正規表現パターン文法をコピーする。 + 罩h頫憗帥若恰羈潟若 - 引数 - 1 to: 対象 - 2 from: 元 + 綣 + 1 to: 絲乗院 + 2 from: # unsigned int onig_get_syntax_op(OnigSyntaxType* syntax) @@ -552,35 +552,35 @@ # void onig_set_syntax_behavior(OnigSyntaxType* syntax, unsigned int behavior) # void onig_set_syntax_options(OnigSyntaxType* syntax, OnigOptionType options) - 正規表現パターン文法の要素を参照/取得する。 + 罩h頫憗帥若恰羈荀膣/緇 - 引数 - 1 syntax: 正規表現パターン文法 - 2 op, op2, behavior, options: 要素の値 + 綣 + 1 syntax: 罩h頫憗帥若恰羈 + 2 op, op2, behavior, options: 荀膣 # void onig_copy_encoding(OnigEncoding to, OnigEncoding from) - 文字エンコーディングをコピーする。 + 絖潟潟若c潟違潟若 - 引数 - 1 to: 対象 - 2 from: 元 + 綣 + 1 to: 絲乗院 + 2 from: # int onig_set_meta_char(OnigSyntaxType* syntax, unsigned int what, OnigCodePoint code) - メタ文字を指定したコードポイント値にセットする。 - ONIG_SYN_OP_VARIABLE_META_CHARACTERSが正規表現パターン文法で有効に - なっていない場合には、エスケープ文字を除いて、ここで指定したメタ文字は - 機能しない。(組込みの文法では有効にしていない。) + <炊絖絎潟若ゃ潟ゃ祉 + ONIG_SYN_OP_VARIABLE_META_CHARACTERS罩h頫憗帥若恰羈ф鴻 + c翫鴻宴若絖ゃф絎<炊絖 + 罘純(腟莨若帥羈с鴻) - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL - 引数 - 1 syntax: 対象文法 - 2 what: メタ文字機能の指定 + 綣 + 1 syntax: 絲乗院羈 + 2 what: <炊絖罘純絎 ONIG_META_CHAR_ESCAPE ONIG_META_CHAR_ANYCHAR @@ -589,79 +589,79 @@ ONIG_META_CHAR_ONE_OR_MORE_TIME ONIG_META_CHAR_ANYCHAR_ANYTIME - 3 code: メタ文字のコードポイント または ONIG_INEFFECTIVE_META_CHAR. + 3 code: <炊絖潟若ゃ潟 障 ONIG_INEFFECTIVE_META_CHAR. # OnigCaseFoldType onig_get_default_case_fold_flag() - デフォルトのcase foldフラグを取得する。 + case fold違緇 # int onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) - デフォルトのcase foldフラグをセットする。 + case fold違祉 - 引数 - 1 case_fold_flag: case foldフラグ + 綣 + 1 case_fold_flag: case fold # unsigned int onig_get_match_stack_limit_size(void) - マッチスタックサイズの最大値を返す。 - (デフォルト: 0 == 無制限) + 鴻帥泣ゃ冴紊уゃ菴 + (: 0 == ≦狗) # int onig_set_match_stack_limit_size(unsigned int size) - マッチスタックサイズの最大値を指定する。 - (size = 0: 無制限) + 鴻帥泣ゃ冴紊уゃ絎 + (size = 0: ≦狗) - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL # int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges)) - 新しいUnicodeプロパティを定義する。 - (この関数はスレッドセーフではない) + 違Unicodec絎臂 + (∽違鴻祉若с) - 引数 - 1 name: プロパティ名 (ASCIIコードのみ。 文字 ' ', '-', '_' は無視される。) - 2 ranges: プロパティコードポイント範囲 - (最初の要素は範囲の数) + 綣 + 1 name: e (ASCII潟若帥 絖 ' ', '-', '_' ∴) + 2 ranges: c潟若ゃ潟膀 + (荀膣膀蚊) [num-of-ranges, 1st-range-start, 1st-range-end, 2nd-range-start... ] - * この関数を呼んだ後で、rangesを変更/破壊しないこと + * ∽違若緇сranges紊/翫 - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL # unsigned int onig_get_parse_depth_limit(void) - 再帰パース処理の最大深さを返す。 - (デフォルト: regint.h で定義されている DEFAULT_PARSE_DEPTH_LIMIT。現在は 4096) + 絽違若劫紊ф訓菴 + (: regint.h у臂 DEFAULT_PARSE_DEPTH_LIMIT憜 4096) # int onig_set_parse_depth_limit(unsigned int depth) - 再帰パース処理の最大深さを指定する。 - (depth = 0: regint.h で定義されたデフォルト値に設定する。) + 絽違若劫紊ф訓絎 + (depth = 0: regint.h у臂ゃ荐絎) - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL # int onig_end(void) - ライブラリの使用を終了する。 + ゃ篏睡腟篋 - 正常終了戻り値: ONIG_NORMAL + 罩e幻腟篋祉: ONIG_NORMAL - onig_init()を再度呼び出しても、以前に作成した正規表現オブジェクト - を使用することはできない。 + onig_init()綺若喝冴篁ュ篏罩h頫憗吾с + 篏睡с # const char* onig_version(void) - バージョン文字列を返す。(例 "5.0.3") + 若吾с恰絖菴(箴 "5.0.3") // END @@ -1,22 +1,22 @@ FAQ 2016/04/06 -1. 最長マッチ +1. 激 - onig_new()の中で、ONIG_OPTION_FIND_LONGESTオプション - を使用すれば最長マッチになる。 + onig_new()筝сONIG_OPTION_FIND_LONGEST激с + 篏睡井激 2. CR + LF - DOSの改行(CR(0x0c) + LF(0x0a)の連続) + DOS壕(CR(0x0c) + LF(0x0a)g) - regenc.hの中の、以下の部分を有効にする。 + regenc.h筝篁ヤ鴻 /* #define USE_CRNL_AS_LINE_TERMINATOR */ -3. メーリングリスト +3. <若潟違鴻 - 鬼車に関するメーリングリストは存在しない。 + 薔取≪<若潟違鴻絖 //END @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.3.0 2017/05/19 +Oniguruma Regular Expressions Version 6.4.0 2017/06/28 syntax: ONIG_SYNTAX_RUBY (default) @@ -237,6 +237,8 @@ syntax: ONIG_SYNTAX_RUBY (default) \n \k<n> \k'n' (n >= 1) backreference the nth group in the regexp \k<-n> \k'-n' (n >= 1) backreference the nth group counting backwards from the referring position + \k<+n> \k'+n' (n >= 1) backreference the nth group counting + forwards from the referring position \k<name> \k'name' backreference a group with the specified name When backreferencing with a name that is assigned to more than one groups, @@ -286,8 +288,11 @@ syntax: ONIG_SYNTAX_RUBY (default) that group." \g<n> \g'n' (n >= 1) call the nth group + \g<0> \g'0' call zero (call the total regexp) \g<-n> \g'-n' (n >= 1) call the nth group counting backwards from the calling position + \g<+n> \g'+n' (n >= 1) call the nth group counting forwards from + the calling position \g<name> \g'name' call the group with the specified name * Left-most recursive calls are not allowed. @@ -1,74 +1,74 @@ -鬼車 正規表現 Version 6.3.0 2017/05/19 +薔取 罩h頫 Version 6.4.0 2017/06/28 -使用文法: ONIG_SYNTAX_RUBY (既定値) +篏睡羈: ONIG_SYNTAX_RUBY (√) -1. 基本要素 +1. 堺荀膣 - \ 退避修飾 (エスケープ) 正規表現記号の有効/無効の制御 - | 選択子 - (...) 式集合 (グループ) - [...] 文字集合 (文字クラス) + \ 推信蕋 (鴻宴若) 罩h頫憠垩/≦鴻九勝 + | 御絖 + (...) 綣 (違若) + [...] 絖 (絖) -2. 文字 +2. 絖 - \t 水平タブ (0x09) - \v 垂直タブ (0x0B) - \n 改行 (0x0A) - \r 復帰 (0x0D) - \b 後退空白 (0x08) - \f 改頁 (0x0C) - \a 鐘 (0x07) - \e 退避修飾 (0x1B) - \nnn 八進数表現 符号化バイト値(の一部) - \o{17777777777} 拡張八進数表現 コードポイント値 - \xHH 十六進数表現 符号化バイト値(の一部) - \x{7HHHHHHH} 拡張十六進数表現 コードポイント値 - \cx 制御文字表現 コードポイント値 - \C-x 制御文字表現 コードポイント値 - \M-x 超 (x|0x80) コードポイント値 - \M-\C-x 超 + 制御文字表現 コードポイント値 + \t 羂翫抗帥 (0x09) + \v 眼帥 (0x0B) + \n 壕 (0x0A) + \r 緇絽 (0x0D) + \b 緇腥榊 (0x08) + \f 拷 (0x0C) + \a (0x07) + \e 推信蕋 (0x1B) + \nnn 我域; 膃垸ゃ(筝) + \o{17777777777} ≦宍我域; 潟若ゃ潟 + \xHH 我域; 膃垸ゃ(筝) + \x{7HHHHHHH} ≦宍我域; 潟若ゃ潟 + \cx 九勝絖茵 潟若ゃ潟 + \C-x 九勝絖茵 潟若ゃ潟 + \M-x 莇 (x|0x80) 潟若ゃ潟 + \M-\C-x 莇 + 九勝絖茵 潟若ゃ潟 - ※ \bは、文字集合内でのみ有効 + \b絖с炊 -3. 文字種 +3. 絖腮 - . 任意文字 (改行を除く) + . 篁紙絖 (壕ゃ) - \w 単語構成文字 + \w 茯罕絖 - Unicode以外の場合: - 英数字, "_" および 多バイト文字。 + Unicode篁ュ翫: + 掩医, "_" 紊ゃ絖 - Unicodeの場合: + Unicode翫: General_Category -- (Letter|Mark|Number|Connector_Punctuation) - \W 非単語構成文字 + \W 茯罕絖 - \s 空白文字 + \s 腥榊醇絖 - Unicode以外の場合: + Unicode篁ュ翫: \t, \n, \v, \f, \r, \x20 - Unicodeの場合: + Unicode翫: 0009, 000A, 000B, 000C, 000D, 0085(NEL), General_Category -- Line_Separator -- Paragraph_Separator -- Space_Separator - \S 非空白文字 + \S 腥榊醇絖 - \d 10進数字 + \d 10我医 - Unicodeの場合: General_Category -- Decimal_Number + Unicode翫: General_Category -- Decimal_Number - \D 非10進数字 + \D 10我医 - \h 16進数字 [0-9a-fA-F] + \h 16我医 [0-9a-fA-F] - \H 非16進数字 + \H 16我医 Character Property @@ -79,95 +79,95 @@ property-name: - + 全てのエンコーディングで有効 + + 潟潟若c潟違ф Alnum, Alpha, Blank, Cntrl, Digit, Graph, Lower, Print, Punct, Space, Upper, XDigit, Word, ASCII, - + EUC-JP, Shift_JISで有効 + + EUC-JP, Shift_JISф Hiragana, Katakana - + UTF8, UTF16, UTF32で有効 - doc/UNICODE_PROPERTIES参照 + + UTF8, UTF16, UTF32ф + doc/UNICODE_PROPERTIES -4. 量指定子 +4. 絎絖 - 欲張り + 罨峨宍 - ? 一回または零回 - * 零回以上 - + 一回以上 - {n,m} n回以上m回以下 - {n,} n回以上 - {,n} 零回以上n回以下 ({0,n}) - {n} n回 + ? 筝障九 + * 九篁ヤ + + 筝篁ヤ + {n,m} n篁ヤm篁ヤ + {n,} n篁ヤ + {,n} 九篁ヤn篁ヤ ({0,n}) + {n} n - 無欲 + ≧ - ?? 一回または零回 - *? 零回以上 - +? 一回以上 - {n,m}? n回以上m回以下 - {n,}? n回以上 - {,n}? 零回以上n回以下 (== {0,n}?) + ?? 筝障九 + *? 九篁ヤ + +? 筝篁ヤ + {n,m}? n篁ヤm篁ヤ + {n,}? n篁ヤ + {,n}? 九篁ヤn篁ヤ (== {0,n}?) - 強欲 (欲張りで、繰り返しに成功した後は回数を減らすような後退再試行をしない) + 綣傑 (罨峨宍с膵違菴緇違羝緇荅茵) - ?+ 一回または零回 - *+ 零回以上 - ++ 一回以上 + ?+ 筝障九 + *+ 九篁ヤ + ++ 筝篁ヤ - ({n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAでのみ強欲な指定子) + ({n,m}+, {n,}+, {n}+ ONIG_SYNTAX_JAVAс水七罨蚊絎絖) - 例. /a*+/ === /(?>a*)/ + 箴. /a*+/ === /(?>a*)/ -5. 錨 +5. - ^ 行頭 - $ 行末 - \b 単語境界 - \B 非単語境界 - \A 文字列先頭 - \Z 文字列末尾、または文字列末尾の改行の直前 - \z 文字列末尾 - \G 照合開始位置 + ^ 茵 + $ 茵 + \b 茯紜 + \B 茯紜 + \A 絖 + \Z 絖絨障障絖絨障壕翫 + \z 絖絨 + \G у紮篏臀 -6. 文字集合 +6. 絖 - ^... 否定 (最低優先度演算子) - x-y 範囲 (xからyまで) - [...] 集合 (文字集合内文字集合) - ..&&.. 積演算 (^の次に優先度が低い演算子) + ^... 絎 (篏綺羲膊絖) + x-y 膀 (xy障) + [...] (絖絖) + ..&&.. 腥羲膊 (^罨<綺篏羲膊絖) - 例. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w] + 箴. [a-w&&[^c-g]z] ==> ([a-w] and ([^c-g] or z)) ==> [abh-w] - ※ '[', '-', ']'を、文字集合内で通常文字の意味で使用したい場合には、 - これらの文字を'\'で退避修飾しなければならない。 + '[', '-', ']'絖ч絽御絖潟т戎翫 + 絖'\'ч推信蕋障違 - POSIXブラケット ([:xxxxx:], 否定 [:^xxxxx:]) + POSIX宴 ([:xxxxx:], 絎 [:^xxxxx:]) - Unicode以外の場合: + Unicode篁ュ翫: - alnum 英数字 - alpha 英字 + alnum 掩医 + alpha 怨 ascii 0 - 127 blank \t, \x20 cntrl digit 0-9 - graph 多バイト文字全部を含む + graph 紊ゃ絖 lower - print 多バイト文字全部を含む + print 紊ゃ絖 punct space \t, \n, \v, \f, \r, \x20 upper xdigit 0-9, a-f, A-F - word 英数字, "_" および 多バイト文字 + word 掩医, "_" 紊ゃ絖 - Unicodeの場合: + Unicode翫: alnum Letter | Mark | Decimal_Number alpha Letter | Mark @@ -190,67 +190,69 @@ -7. 拡張式集合 +7. ≦宍綣 - (?#...) 注釈 - (?imx-imx) 孤立オプション - i: 大文字小文字照合 - m: 複数行 - x: 拡張形式 - (?imx-imx:式) 式オプション + (?#...) 羈 + (?imx-imx) 絖ょ激с + i: 紊ф絖絨絖у + m: 茲域 + x: ≦宍綵√ + (?imx-imx:綣) 綣激с - (式) 捕獲式集合 - (?:式) 非捕獲式集合 + (綣) 峨 + (?:綣) 峨 - (?=式) 先読み - (?!式) 否定先読み - (?<=式) 戻り読み - (?<!式) 否定戻り読み + (?=綣) 茯 + (?!綣) 絎茯 + (?<=綣) 祉茯 + (?<!綣) 絎祉茯 - 戻り読みの式は固定文字長でなければならない。 - しかし、最上位の選択子だけは異なった文字長が許される。 - 例. (?<=a|bc) は許可. (?<=aaa(?:b|cd)) は不許可 + 祉茯帥綣阪絖激с違 + 筝篏御絖違c絖激荐宴 + 箴. (?<=a|bc) 荐怨. (?<=aaa(?:b|cd)) 筝荐怨 - 否定戻り読みでは、捕獲式集合は許されないが、 - 非捕獲式集合は許される。 + 絎祉茯帥с峨荐宴 + 峨荐宴 - (?>式) 原子的式集合 - 式全体を通過したとき、式の中での後退再試行を行なわない + (?>綣) 絖綣 + 綣篏綣筝с緇荅茵茵 - (?<name>式), (?'name'式) - 名前付き捕獲式集合 - 式集合に名前を割り当てる(定義する)。 - (名前は単語構成文字でなければならない。) + (?<name>綣), (?'name'綣) + 篁峨 + 綣蚊綵(絎臂) + (茯罕絖с違) - 名前だけでなく、捕獲式集合と同様に番号も割り当てられる。 - 番号指定が禁止されていない状態 (10. 捕獲式集合 を参照) - のときは、名前を使わないで番号でも参照できる。 + с峨罕垩蚊綵 + 垽絎胼罩≪倶 (10. 峨 ) + 篏帥х垩ссс - 複数の式集合に同じ名前を与えることは許されている。 - この場合には、この名前を使用した後方参照は可能であるが、 - 部分式呼出しはできない。 + 茲違綣筝荐宴 + 翫篏睡緇劫с純с + 綣弱冴с -8. 後方参照 +8. 緇劫 - \n 番号指定参照 (n >= 1) - \k<n> 番号指定参照 (n >= 1) - \k'n' 番号指定参照 (n >= 1) - \k<-n> 相対番号指定参照 (n >= 1) - \k'-n' 相対番号指定参照 (n >= 1) - \k<name> 名前指定参照 - \k'name' 名前指定参照 + \n 垽絎 (n >= 1) + \k<n> 垽絎 (n >= 1) + \k'n' 垽絎 (n >= 1) + \k<-n> 後丞垽絎 (n >= 1) + \k'-n' 後丞垽絎 (n >= 1) + \k<+n> 後丞垽絎 (n >= 1) + \k'+n' 後丞垽絎 (n >= 1) + \k<name> 絎 + \k'name' 絎 - 名前指定参照で、その名前が複数の式集合で多重定義されている場合には、 - 番号の大きい式集合から優先的に参照される。 - (マッチしないときには番号の小さい式集合が参照される) + 絎сс茲違綣у絎臂翫 + 垩紊с綣с + (垩絨綣с) - ※ 番号指定参照は、名前付き捕獲式集合が定義され、 - かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、 - 禁止される。(10. 捕獲式集合 を参照) + 垽絎с篁峨絎臂 + ONIG_OPTION_CAPTURE_GROUP絎翫 + 胼罩≪(10. 峨 ) - ネストレベル付き後方参照 + 鴻篁緇劫 level: 0, 1, 2, ... @@ -264,14 +266,14 @@ \k'name+level' \k'name-level' - 後方参照の位置から相対的な部分式呼出しネストレベルを指定して、そのレベルでの - 捕獲値を参照する。 + 緇劫с篏臀後丞綣弱冴鴻絎с + 峨ゃс - 例-1. + 箴-1. /\A(?<a>|.|(?:(?<b>.)\g<a>\k<b+0>))\z/.match("reer") - 例-2. + 箴-2. r = Regexp.compile(<<'__REGEXP__'.strip, Regexp::EXTENDED) (?<element> \g<stag> \g<content>* \g<etag> ){0} @@ -286,82 +288,86 @@ -9. 部分式呼出し ("田中哲スペシャル") +9. 綣弱冴 ("遺賢蚊鴻激c") - \g<name> 名前指定呼出し - \g'name' 名前指定呼出し - \g<n> 番号指定呼出し (n >= 1) - \g'n' 番号指定呼出し (n >= 1) - \g<-n> 相対番号指定呼出し (n >= 1) - \g'-n' 相対番号指定呼出し (n >= 1) + \g<name> 絎弱冴 + \g'name' 絎弱冴 + \g<n> 垽絎弱冴 (n >= 1) + \g'n' 垽絎弱冴 (n >= 1) + \g<0> 垽絎弱冴(篏若喝冴) + \g'0' 垽絎弱冴(篏若喝冴) + \g<-n> 後丞垽絎弱冴 (n >= 1) + \g'-n' 後丞垽絎弱冴 (n >= 1) + \g<+n> 後丞垽絎弱冴 (n >= 1) + \g'+n' 後丞垽絎弱冴 (n >= 1) - ※ 最左位置での再帰呼出しは禁止される。 - 例. (?<name>a|\g<name>b) => error + 綏篏臀с絽医弱冴胼罩≪ + 箴. (?<name>a|\g<name>b) => error (?<name>a|b\g<name>c) => OK - ※ 番号指定呼出しは、名前付き捕獲式集合が定義され、 - かつ ONIG_OPTION_CAPTURE_GROUPが指定されていない場合には、 - 禁止される。 (10. 捕獲式集合 を参照) + 垽絎弱冴篁峨絎臂 + ONIG_OPTION_CAPTURE_GROUP絎翫 + 胼罩≪ (10. 峨 ) - ※ 呼び出された式集合のオプション状態が呼出し側のオプション状態と異なっている - とき、呼び出された側のオプション状態が有効である。 + 若喝冴綣激с括倶弱冴眼激с括倶違c + 若喝冴眼激с括倶鴻с - 例. (?-i:\g<name>)(?i:(?<name>a)){0} は "A" に照合成功する。 + 箴. (?-i:\g<name>)(?i:(?<name>a)){0} "A" у -10. 捕獲式集合 +10. 峨 - 捕獲式集合(...)は、以下の条件に応じて振舞が変化する。 - (名前付き捕獲式集合は変化しない) + 峨(...)篁ヤ>散綽紊 + (篁峨紊) - case 1. /.../ (名前付き捕獲式集合は不使用、オプションなし) + case 1. /.../ (篁峨筝篏睡激с潟) - (...) は、捕獲式集合として扱われる。 + (...) 峨宴 - case 2. /.../g (名前付き捕獲式集合は不使用、オプション 'g'を指定) + case 2. /.../g (篁峨筝篏睡激с 'g'絎) - (...) は、非捕獲式集合として扱われる。 + (...) 峨宴 - case 3. /..(?<name>..)../ (名前付き捕獲式集合は使用、オプションなし) + case 3. /..(?<name>..)../ (篁峨篏睡激с潟) - (...) は、非捕獲式集合として扱われる。 - 番号指定参照/呼び出しは不許可。 + (...) 峨宴 + 垽絎/若喝冴筝荐怨 - case 4. /..(?<name>..)../G (名前付き捕獲式集合は使用、オプション 'G'を指定) + case 4. /..(?<name>..)../G (篁峨篏睡激с 'G'絎) - (...) は、捕獲式集合として扱われる。 - 番号指定参照/呼び出しは許可。 + (...) 峨宴 + 垽絎/若喝冴荐怨 - 但し + 篏 g: ONIG_OPTION_DONT_CAPTURE_GROUP G: ONIG_OPTION_CAPTURE_GROUP - ('g'と'G'オプションは、ruby-dev MLで議論された。) + ('g''G'激с潟ruby-dev MLц域) - これらの振舞の意味は、 - 名前付き捕獲と名前無し捕獲を同時に使用する必然性のある場面は少ないであろう - という理由から考えられたものである。 + 潟 + 篁蚊<蚊篏睡綽倶с顔≪絨с + 宴с ----------------------------- -補記 1. 文法依存オプション +茖荐 1. 羈箴絖激с + ONIG_SYNTAX_RUBY - (?m): 終止符記号(.)は改行と照合成功 + (?m): 腟罩∝荐(.)壕у - + ONIG_SYNTAX_PERL と ONIG_SYNTAX_JAVA - (?s): 終止符記号(.)は改行と照合成功 - (?m): ^ は改行の直後に照合する、$ は改行の直前に照合する + + ONIG_SYNTAX_PERL ONIG_SYNTAX_JAVA + (?s): 腟罩∝荐(.)壕у + (?m): ^ 壕翫у$ 壕翫у -補記 2. 独自拡張機能 +茖荐 2. ≦宍罘 - + 16進数数字、非16進数字 \h, \H - + 名前付き捕獲式集合 (?<name>...), (?'name'...) - + 名前指定後方参照 \k<name> - + 部分式呼出し \g<name>, \g<group-num> + + 16我井医16我医 \h, \H + + 篁峨 (?<name>...), (?'name'...) + + 絎緇劫 \k<name> + + 綣弱冴 \g<name>, \g<group-num> -補記 3. Perl 5.8.0と比較して存在しない機能 +茖荐 3. Perl 5.8.0罸莠絖罘 + \N{name} + \l,\u,\L,\U, \X, \C @@ -370,80 +376,80 @@ + (?(condition)yes-pat|no-pat) * \Q...\E - 但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効 - - -補記 4. Ruby 1.8 の日本語化 GNU regex(version 0.12)との違い - - + 文字Property機能追加 (\p{property}, \P{Property}) - + 16進数字タイプ追加 (\h, \H) - + 戻り読み機能を追加 - + 強欲な繰り返し指定子を追加 (?+, *+, ++) - + 文字集合の中の演算子を追加 ([...], &&) - ('[' は、文字集合の中で通常の文字として使用するときには - 退避修飾しなければならない) - + 名前付き捕獲式集合と、部分式呼出し機能追加 - + 多バイト文字コードが指定されているとき、 - 文字集合の中で八進数または十六進数表現の連続は、多バイト符号で表現された - 一個の文字と解釈される - (例. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) - + 文字集合の中で、一バイト文字と多バイト文字の範囲指定は許される。 - ex. /[a-あ]/ - + 孤立オプションの有効範囲は、その孤立オプションを含んでいる式集合の - 終わりまでである - 例. (?:(?i)a|b) は (?:(?i:a|b)) と解釈される、(?:(?i:a)|b)ではない - + 孤立オプションはその前の式に対して透過的ではない - 例. /a(?i)*/ は文法エラーとなる - + 不完全な繰り返し範囲指定子は通常の文字列として許可される - 例. /{/, /({)/, /a{2,3/ - + 否定的POSIXブラケット [:^xxxx:] を追加 - + POSIXブラケット [:ascii:] を追加 - + 先読みの繰り返しは不許可 - 例. /(?=a)*/, /(?!b){5}/ - + 数値で指定された文字に対しても、大文字小文字照合オプションは有効 - 例. /\x61/i =~ "A" - + 繰り返し回数指定で、最低回数の省略(0回)ができる + 篏ONIG_SYNTAX_PERLONIG_SYNTAX_JAVAс + + +茖荐 4. Ruby 1.8 ユ茯 GNU regex(version 0.12) + + + 絖Property罘処申 (\p{property}, \P{Property}) + + 16我医帥ゃ菴遵 (\h, \H) + + 祉茯炊純菴遵 + + 綣傑蚊膵違菴絎絖菴遵 (?+, *+, ++) + + 絖筝羲膊絖菴遵 ([...], &&) + ('[' 絖筝ч絽吾絖篏睡 + 推信蕋障違) + + 篁峨綣弱冴罘処申 + + 紊ゃ絖潟若絎 + 絖筝у我違障我域;憗g紊ゃ膃垩ц;憗 + 筝絖茹i + (箴. [\xa1\xa2], [\xa1\xa7-\xa4\xa1]) + + 絖筝с筝ゃ絖紊ゃ絖膀我絎荐宴 + ex. /[a-]/ + + 絖ょ激с潟合蚊絖ょ激с潟с綣 + 腟障сс + 箴. (?:(?i)a|b) (?:(?i:a|b)) 茹i(?:(?i:a)|b)с + + 絖ょ激с潟綣絲障с + 箴. /a(?i)*/ 羈若 + + 筝絎膵違菴膀我絎絖絽吾絖荐怨 + 箴. /{/, /({)/, /a{2,3/ + + 絎POSIX宴 [:^xxxx:] 菴遵 + + POSIX宴 [:ascii:] 菴遵 + + 茯帥膵違菴筝荐怨 + 箴. /(?=a)*/, /(?!b){5}/ + + 医ゃф絎絖絲障紊ф絖絨絖у激с潟 + 箴. /\x61/i =~ "A" + + 膵違菴井絎с篏違(0)с /a{,n}/ == /a{0,n}/ - 最低回数と最大回数の同時省略は許されない。(/a{,}/) - + /a{n}?/は無欲な演算子ではない。 + 篏違紊у違ャ荐宴(/a{,}/) + + /a{n}?/≧蚊羲膊絖с /a{n}?/ == /(?:a{n})?/ - + 無効な後方参照をチェックしてエラーにする。 + + ≦鴻緇劫сс若 /\1/, /(a)\2/ - + 無限繰り返しの中で、長さ零での照合成功は繰り返しを中断させるが、 - このとき、中断すべきかどうかの判定として、捕獲式集合の捕獲状態の - 変化まで考慮している + + ♂膵違菴筝с激吟су膵違菴筝 + 筝鴻ゅ峨牙倶 + 紊障ц /(?:()|())*\1\2/ =~ "" /(?:\1a|())*/ =~ "a" -補記 5. 実装されているが、既定値では有効にしていない機能 +茖荐 5. 絎茖√ゃс鴻罘 - + 捕獲履歴参照 + + 峨吋罩翫 - (?@...) と (?@<name>...) + (?@...) (?@<name>...) - 例. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>] + 箴. /(?@a)*/.match("aaa") ==> [<0-1>, <1-2>, <2-3>] - 使用方法は、sample/listcap.cを参照 + 篏睡号sample/listcap.c - 有効にしていない理由は、どの程度役に立つかはっきりしないため。 + 鴻宴腮綺綵鴻腴ゃc -補記 6. 問題点 +茖荐 6. 馹 - + エンコーディングバイト値が適正な価かどうかのチェックは行なっていない。 + + 潟潟若c潟違ゃゃ罩c箴<с茵c - 例: UTF-8 + 箴: UTF-8 - * 先頭バイトとして不正なバイトを一文字とみなす + * ゃ筝罩cゃ筝絖帥 /./u =~ "\xa3" - * 不完全なバイトシーケンスのチェックをしない + * 筝絎ゃ激若宴潟鴻с /\w+/u =~ "a\xf3\x8ec" - これを調べることは可能ではあるが、遅くなるので行なわない。 + 茯帥鴻純сц - 文字列として、そのようなバイト列を指定した場合の動作は保証しない。 + 絖ゃ絎翫篏篆荐若 -終り +腟 @@ -8,7 +8,7 @@ <h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>) <p> -(c) K.Kosako, updated at: 2017/05/26 +(c) K.Kosako, updated at: 2017/06/30 </p> <dl> @@ -16,6 +16,7 @@ <dt><b>What's new</b> </font> <ul> +<li>2017/07/03: Version 6.4.0 released.</li> <li>2017/05/29: Version 6.3.0 released.</li> <li>2017/04/08: Version 6.2.0 released.</li> <li>2016/12/11: Version 6.1.3 released.</li> diff --git a/index_ja.html b/index_ja.html index 7070dfe..502f460 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@ <h1>薔取</h1> <p> -(c) K.Kosako, 腟贋: 2017/05/26 +(c) K.Kosako, 腟贋: 2017/06/30 </p> <dl> @@ -16,6 +16,7 @@ <dt><b>贋井</b> </font> <ul> +<li>2017/07/03: Version 6.4.0 若</li> <li>2017/05/29: Version 6.3.0 若</li> <li>2017/04/08: Version 6.2.0 若</li> <li>2016/12/11: Version 6.1.3 若</li> diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index 93e46c0..e13fad1 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -43,6 +43,9 @@ /* Define to 1 if you have the <unistd.h> header file. */ #cmakedefine HAVE_UNISTD_H ${HAVE_UNISTD_H} +/* Define to 1 if you have the <inttypes.h> header file. */ +#cmakedefine HAVE_INTTYPES_H ${HAVE_INTTYPES_H} + /* Name of package */ #cmakedefine PACKAGE ${PACKAGE} diff --git a/src/oniguruma.h b/src/oniguruma.h index 02d4254..a8ae09a 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -35,7 +35,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 3 +#define ONIGURUMA_VERSION_MINOR 4 #define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus @@ -609,7 +609,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; /* #define ONIGERR_OVER_THREAD_PASS_LIMIT_COUNT -1001 */ -/* must be smaller than BIT_STATUS_BITS_NUM (unsigned int * 8) */ +/* must be smaller than MEM_STATUS_BITS_NUM (unsigned int * 8) */ #define ONIG_MAX_CAPTURE_HISTORY_GROUP 31 #define ONIG_IS_CAPTURE_HISTORY_GROUP(r, i) \ ((i) <= ONIG_MAX_CAPTURE_HISTORY_GROUP && (r)->list && (r)->list[i]) @@ -669,7 +669,7 @@ typedef struct re_pattern_buffer { int num_mem; /* used memory(...) num counted from 1 */ int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_NULL_CHECK_START/END id counter */ + int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ int num_comb_exp_check; /* combination explosion check */ int num_call; /* number of subexp call */ unsigned int capture_history; /* (?@...) flag (1-31) */ diff --git a/src/regcomp.c b/src/regcomp.c index 0e9a9ab..db83739 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -29,6 +29,83 @@ #include "regparse.h" +#if 0 +typedef struct { + int n; + int alloc; + int* v; +} int_stack; + +static int +make_int_stack(int_stack** rs, int init_size) +{ + int_stack* s; + int* v; + + *rs = 0; + + s = xmalloc(sizeof(*s)); + if (IS_NULL(s)) return ONIGERR_MEMORY; + + v = (int* )xmalloc(sizeof(int) * init_size); + if (IS_NULL(v)) { + xfree(s); + return ONIGERR_MEMORY; + } + + s->n = 0; + s->alloc = init_size; + s->v = v; + + *rs = s; + return ONIG_NORMAL; +} + +static void +free_int_stack(int_stack* s) +{ + if (IS_NOT_NULL(s)) { + if (IS_NOT_NULL(s->v)) + xfree(s->v); + xfree(s); + } +} + +static int +int_stack_push(int_stack* s, int v) +{ + if (s->n >= s->alloc) { + int new_size = s->alloc * 2; + int* nv = (int* )xrealloc(s->v, new_size); + if (IS_NULL(nv)) return ONIGERR_MEMORY; + + s->alloc = new_size; + s->v = nv; + } + + s->v[s->n] = v; + s->n++; + return ONIG_NORMAL; +} + +static int +int_stack_pop(int_stack* s) +{ + int v; + +#ifdef ONIG_DEBUG + if (s->n <= 0) { + fprintf(stderr, "int_stack_pop: fail empty. %p\n", s); + return 0; + } +#endif + + v = s->v[s->n]; + s->n--; + return v; +} +#endif + OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN; extern OnigCaseFoldType @@ -70,8 +147,8 @@ swap_node(Node* a, Node* b) Node c; c = *a; *a = *b; *b = c; - if (NTYPE(a) == NT_STR) { - StrNode* sn = NSTR(a); + if (NODE_TYPE(a) == NODE_STR) { + StrNode* sn = STR_(a); if (sn->capa == 0) { int len = sn->end - sn->s; sn->s = sn->buf; @@ -79,8 +156,8 @@ swap_node(Node* a, Node* b) } } - if (NTYPE(b) == NT_STR) { - StrNode* sn = NSTR(b); + if (NODE_TYPE(b) == NODE_STR) { + StrNode* sn = STR_(b); if (sn->capa == 0) { int len = sn->end - sn->s; sn->s = sn->buf; @@ -156,42 +233,42 @@ onig_bbuf_init(BBuf* buf, int size) #ifdef USE_SUBEXP_CALL static int -unset_addr_list_init(UnsetAddrList* uslist, int size) +unset_addr_list_init(UnsetAddrList* list, int size) { UnsetAddr* p; p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size); CHECK_NULL_RETURN_MEMERR(p); - uslist->num = 0; - uslist->alloc = size; - uslist->us = p; + list->num = 0; + list->alloc = size; + list->us = p; return 0; } static void -unset_addr_list_end(UnsetAddrList* uslist) +unset_addr_list_end(UnsetAddrList* list) { - if (IS_NOT_NULL(uslist->us)) - xfree(uslist->us); + if (IS_NOT_NULL(list->us)) + xfree(list->us); } static int -unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node) +unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node) { UnsetAddr* p; int size; - if (uslist->num >= uslist->alloc) { - size = uslist->alloc * 2; - p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size); + if (list->num >= list->alloc) { + size = list->alloc * 2; + p = (UnsetAddr* )xrealloc(list->us, sizeof(UnsetAddr) * size); CHECK_NULL_RETURN_MEMERR(p); - uslist->alloc = size; - uslist->us = p; + list->alloc = size; + list->us = p; } - uslist->us[uslist->num].offset = offset; - uslist->us[uslist->num].target = node; - uslist->num++; + list->us[list->num].offset = offset; + list->us[list->num].target = node; + list->num++; return 0; } #endif /* USE_SUBEXP_CALL */ @@ -251,6 +328,7 @@ add_mem_num(regex_t* reg, int num) return 0; } +#if 0 static int add_pointer(regex_t* reg, void* addr) { @@ -259,6 +337,7 @@ add_pointer(regex_t* reg, void* addr) BBUF_ADD(reg, &ptr, SIZE_POINTER); return 0; } +#endif static int add_option(regex_t* reg, OnigOptionType option) @@ -273,7 +352,7 @@ add_opcode_rel_addr(regex_t* reg, int opcode, int addr) int r; r = add_opcode(reg, opcode); - if (r) return r; + if (r != 0) return r; r = add_rel_addr(reg, addr); return r; } @@ -298,13 +377,13 @@ add_opcode_option(regex_t* reg, int opcode, OnigOptionType option) int r; r = add_opcode(reg, opcode); - if (r) return r; + if (r != 0) return r; r = add_option(reg, option); return r; } static int compile_length_tree(Node* node, regex_t* reg); -static int compile_tree(Node* node, regex_t* reg); +static int compile_tree(Node* node, regex_t* reg, ScanEnv* env); #define IS_NEED_STR_LEN_OP_EXACT(op) \ @@ -357,31 +436,31 @@ select_str_opcode(int mb_len, int str_len, int ignore_case) } static int -compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) +compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env) { int r; int saved_num_null_check = reg->num_null_check; - if (empty_info != 0) { - r = add_opcode(reg, OP_NULL_CHECK_START); - if (r) return r; + if (empty_info != QUANT_BODY_IS_NOT_EMPTY) { + r = add_opcode(reg, OP_EMPTY_CHECK_START); + if (r != 0) return r; r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */ - if (r) return r; + if (r != 0) return r; reg->num_null_check++; } - r = compile_tree(node, reg); - if (r) return r; + r = compile_tree(node, reg, env); + if (r != 0) return r; - if (empty_info != 0) { - if (empty_info == NQ_TARGET_IS_EMPTY) - r = add_opcode(reg, OP_NULL_CHECK_END); - else if (empty_info == NQ_TARGET_IS_EMPTY_MEM) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST); - else if (empty_info == NQ_TARGET_IS_EMPTY_REC) - r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH); + if (empty_info != QUANT_BODY_IS_NOT_EMPTY) { + if (empty_info == QUANT_BODY_IS_EMPTY) + r = add_opcode(reg, OP_EMPTY_CHECK_END); + else if (empty_info == QUANT_BODY_IS_EMPTY_MEM) + r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST); + else if (empty_info == QUANT_BODY_IS_EMPTY_REC) + r = add_opcode(reg, OP_EMPTY_CHECK_END_MEMST_PUSH); - if (r) return r; + if (r != 0) return r; r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */ } return r; @@ -389,28 +468,28 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info) #ifdef USE_SUBEXP_CALL static int -compile_call(CallNode* node, regex_t* reg) +compile_call(CallNode* node, regex_t* reg, ScanEnv* env) { int r; r = add_opcode(reg, OP_CALL); - if (r) return r; - r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg), - node->target); - if (r) return r; + if (r != 0) return r; + r = unset_addr_list_add(env->unset_addr_list, BBUF_GET_OFFSET_POS(reg), + NODE_CALL_BODY(node)); + if (r != 0) return r; r = add_abs_addr(reg, 0 /*dummy addr.*/); return r; } #endif static int -compile_tree_n_times(Node* node, int n, regex_t* reg) +compile_tree_n_times(Node* node, int n, regex_t* reg, ScanEnv* env) { int i, r; for (i = 0; i < n; i++) { - r = compile_tree(node, reg); - if (r) return r; + r = compile_tree(node, reg, env); + if (r != 0) return r; } return 0; } @@ -462,7 +541,7 @@ compile_length_string_node(Node* node, regex_t* reg) UChar *p, *prev; StrNode* sn; - sn = NSTR(node); + sn = STR_(node); if (sn->end <= sn->s) return 0; @@ -510,7 +589,7 @@ compile_string_node(Node* node, regex_t* reg) UChar *p, *prev, *end; StrNode* sn; - sn = NSTR(node); + sn = STR_(node); if (sn->end <= sn->s) return 0; @@ -529,7 +608,7 @@ compile_string_node(Node* node, regex_t* reg) } else { r = add_compile_string(prev, prev_len, slen, reg, ambig); - if (r) return r; + if (r != 0) return r; prev = p; slen = 1; @@ -578,11 +657,6 @@ compile_length_cclass_node(CClassNode* cc, regex_t* reg) { int len; - if (IS_NCCLASS_SHARE(cc)) { - len = SIZE_OPCODE + SIZE_POINTER; - return len; - } - if (IS_NULL(cc->mbuf)) { len = SIZE_OPCODE + SIZE_BITSET; } @@ -608,12 +682,6 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) { int r; - if (IS_NCCLASS_SHARE(cc)) { - add_opcode(reg, OP_CCLASS_NODE); - r = add_pointer(reg, cc); - return r; - } - if (IS_NULL(cc->mbuf)) { if (IS_NCCLASS_NOT(cc)) add_opcode(reg, OP_CCLASS_NOT); @@ -638,7 +706,7 @@ compile_cclass_node(CClassNode* cc, regex_t* reg) add_opcode(reg, OP_CCLASS_MIX); r = add_bitset(reg, cc->bs); - if (r) return r; + if (r != 0) return r; r = add_multi_byte_cclass(cc->mbuf, reg); } } @@ -678,46 +746,46 @@ entry_repeat_range(regex_t* reg, int id, int lower, int upper) } static int -compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info, - regex_t* reg) +compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, + regex_t* reg, ScanEnv* env) { int r; int num_repeat = reg->num_repeat; r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG); - if (r) return r; + if (r != 0) return r; r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ reg->num_repeat++; - if (r) return r; + if (r != 0) return r; r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC); - if (r) return r; + if (r != 0) return r; r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper); - if (r) return r; + if (r != 0) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + if (r != 0) return r; if ( #ifdef USE_SUBEXP_CALL - reg->num_call > 0 || + NODE_IS_IN_MULTI_ENTRY(qn) || #endif - IS_QUANTIFIER_IN_REPEAT(qn)) { + NODE_IS_IN_REAL_REPEAT(qn)) { r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG); } else { r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG); } - if (r) return r; + if (r != 0) return r; r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */ return r; } static int -is_anychar_star_quantifier(QtfrNode* qn) +is_anychar_star_quantifier(QuantNode* qn) { if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && - NTYPE(qn->target) == NT_CANY) + NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) return 1; else return 0; @@ -729,13 +797,13 @@ is_anychar_star_quantifier(QtfrNode* qn) #ifdef USE_COMBINATION_EXPLOSION_CHECK static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +compile_length_quantifier_node(QuantNode* qn, regex_t* reg) { int len, mod_tlen, cklen; int ckn; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); + int empty_info = qn->body_empty_info; + int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; @@ -744,7 +812,7 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { + if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; @@ -753,10 +821,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) } } - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else + if (empty_info == QUANT_BODY_IS_NOT_EMPTY) mod_tlen = tlen; + else + mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && qn->lower <= 1) { if (qn->greedy) { @@ -809,33 +877,33 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) } static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) +compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) { int r, mod_tlen; int ckn; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); + int empty_info = qn->body_empty_info; + int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; + r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); + if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { if (IS_MULTILINE(reg->options)) r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; + if (r != 0) return r; if (CKN_ON) { r = add_state_check_num(reg, ckn); - if (r) return r; + if (r != 0) return r; } - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) { @@ -848,7 +916,7 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) OP_STATE_CHECK_ANYCHAR_STAR : OP_ANYCHAR_STAR)); } - if (r) return r; + if (r != 0) return r; if (CKN_ON) r = add_state_check_num(reg, ckn); @@ -856,32 +924,32 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) } } - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else + if (empty_info == QUANT_BODY_IS_NOT_EMPTY) mod_tlen = tlen; + else + mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && qn->lower <= 1) { if (qn->greedy) { if (qn->lower == 1) { r = add_opcode_rel_addr(reg, OP_JUMP, (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); - if (r) return r; + if (r != 0) return r; } if (CKN_ON) { r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; + if (r != 0) return r; r = add_state_check_num(reg, ckn); - if (r) return r; + if (r != 0) return r; r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); } else { r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); } - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; + if (r != 0) return r; + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); @@ -889,15 +957,15 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) else { if (qn->lower == 0) { r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; + if (r != 0) return r; } - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + if (r != 0) return r; if (CKN_ON) { r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); - if (r) return r; + if (r != 0) return r; r = add_state_check_num(reg, ckn); - if (r) return r; + if (r != 0) return r; r = add_rel_addr(reg, -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); } @@ -908,8 +976,8 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) else if (qn->upper == 0) { if (qn->is_refered != 0) { /* /(?<n>..){0}/ */ r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); + if (r != 0) return r; + r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else r = 0; @@ -918,42 +986,42 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) if (qn->lower == 0) { if (CKN_ON) { r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; + if (r != 0) return r; r = add_state_check_num(reg, ckn); - if (r) return r; + if (r != 0) return r; r = add_rel_addr(reg, tlen); } else { r = add_opcode_rel_addr(reg, OP_PUSH, tlen); } - if (r) return r; + if (r != 0) return r; } - r = compile_tree(qn->target, reg); + r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ if (CKN_ON) { r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r) return r; + if (r != 0) return r; r = add_state_check_num(reg, ckn); - if (r) return r; + if (r != 0) return r; r = add_rel_addr(reg, SIZE_OP_JUMP); } else { r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); } - if (r) return r; + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); + if (r != 0) return r; + r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env); if (CKN_ON) { - if (r) return r; + if (r != 0) return r; r = add_opcode(reg, OP_STATE_CHECK); - if (r) return r; + if (r != 0) return r; r = add_state_check_num(reg, ckn); } } @@ -963,17 +1031,17 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) #else /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) +compile_length_quantifier_node(QuantNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); + int empty_info = qn->body_empty_info; + int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; /* anychar repeat */ - if (NTYPE(qn->target) == NT_CANY) { + if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) { if (qn->greedy && infinite) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; @@ -982,10 +1050,10 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) } } - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else + if (empty_info == QUANT_BODY_IS_NOT_EMPTY) mod_tlen = tlen; + else + mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { @@ -1028,25 +1096,25 @@ compile_length_quantifier_node(QtfrNode* qn, regex_t* reg) } static int -compile_quantifier_node(QtfrNode* qn, regex_t* reg) +compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->target_empty_info; - int tlen = compile_length_tree(qn->target, reg); + int empty_info = qn->body_empty_info; + int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; + r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); + if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { if (IS_MULTILINE(reg->options)) r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r) return r; - return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); + if (r != 0) return r; + return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); } else { if (IS_MULTILINE(reg->options)) @@ -1056,10 +1124,10 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) } } - if (empty_info != 0) - mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END); - else + if (empty_info == QUANT_BODY_IS_NOT_EMPTY) mod_tlen = tlen; + else + mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { @@ -1075,94 +1143,94 @@ compile_quantifier_node(QtfrNode* qn, regex_t* reg) else { r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP); } - if (r) return r; + if (r != 0) return r; } else { - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; + r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); + if (r != 0) return r; } if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) { r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1, mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; + if (r != 0) return r; + add_bytes(reg, STR_(qn->head_exact)->s, 1); + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1)); } else if (IS_NOT_NULL(qn->next_head_exact)) { r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT, mod_tlen + SIZE_OP_JUMP); - if (r) return r; - add_bytes(reg, NSTR(qn->next_head_exact)->s, 1); - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; + if (r != 0) return r; + add_bytes(reg, STR_(qn->next_head_exact)->s, 1); + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT)); } else { r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; + if (r != 0) return r; + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH)); } } else { r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r) return r; - r = compile_tree_empty_check(qn->target, reg, empty_info); - if (r) return r; + if (r != 0) return r; + r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); } } else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */ r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); + if (r != 0) return r; + r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else if (!infinite && qn->greedy && (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { int n = qn->upper - qn->lower; - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; + r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); + if (r != 0) return r; for (i = 0; i < n; i++) { r = add_opcode_rel_addr(reg, OP_PUSH, (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; + if (r != 0) return r; + r = compile_tree(NODE_QUANT_BODY(qn), reg, env); + if (r != 0) return r; } } else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - if (r) return r; + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r) return r; - r = compile_tree(qn->target, reg); + if (r != 0) return r; + r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg); + r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env); } return r; } #endif /* USE_COMBINATION_EXPLOSION_CHECK */ static int -compile_length_option_node(EncloseNode* node, regex_t* reg) +compile_length_option_node(EnclosureNode* node, regex_t* reg) { int tlen; OnigOptionType prev = reg->options; - reg->options = node->option; - tlen = compile_length_tree(node->target, reg); + reg->options = node->o.option; + tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); reg->options = prev; if (tlen < 0) return tlen; @@ -1176,82 +1244,88 @@ compile_length_option_node(EncloseNode* node, regex_t* reg) } static int -compile_option_node(EncloseNode* node, regex_t* reg) +compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) { int r; OnigOptionType prev = reg->options; - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option); - if (r) return r; + if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.option); + if (r != 0) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); - if (r) return r; + if (r != 0) return r; r = add_opcode(reg, OP_FAIL); - if (r) return r; + if (r != 0) return r; } - reg->options = node->option; - r = compile_tree(node->target, reg); + reg->options = node->o.option; + r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); reg->options = prev; - if (IS_DYNAMIC_OPTION(prev ^ node->option)) { - if (r) return r; + if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { + if (r != 0) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); } return r; } static int -compile_length_enclose_node(EncloseNode* node, regex_t* reg) +compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) { int len; int tlen; - if (node->type == ENCLOSE_OPTION) + if (node->type == ENCLOSURE_OPTION) return compile_length_option_node(node, reg); - if (node->target) { - tlen = compile_length_tree(node->target, reg); + if (NODE_ENCLOSURE_BODY(node)) { + tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); if (tlen < 0) return tlen; } else tlen = 0; switch (node->type) { - case ENCLOSE_MEMORY: + case ENCLOSURE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { + + if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { + len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; + return len; + } + + if (NODE_IS_CALLED(node)) { len = SIZE_OP_MEMORY_START_PUSH + tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) + if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + len += (NODE_IS_RECURSION(node) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); else - len += (IS_ENCLOSE_RECURSION(node) + len += (NODE_IS_RECURSION(node) ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); } - else if (IS_ENCLOSE_RECURSION(node)) { + else if (NODE_IS_RECURSION(node)) { len = SIZE_OP_MEMORY_START_PUSH; - len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC); } else #endif { - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) + if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum)) len = SIZE_OP_MEMORY_START_PUSH; else len = SIZE_OP_MEMORY_START; - len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum) + len += tlen + (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum) ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END); } break; - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - tlen = compile_length_tree(qn->target, reg); + case ENCLOSURE_STOP_BACKTRACK: + if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { + QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); + tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; len = tlen * qn->lower @@ -1273,102 +1347,124 @@ compile_length_enclose_node(EncloseNode* node, regex_t* reg) static int get_char_length_tree(Node* node, regex_t* reg, int* len); static int -compile_enclose_node(EncloseNode* node, regex_t* reg) +compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) { - int r, len; + int r; + int len; - if (node->type == ENCLOSE_OPTION) - return compile_option_node(node, reg); +#ifdef USE_SUBEXP_CALL + if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { + r = add_opcode(reg, OP_CALL); + if (r != 0) return r; + node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; + NODE_STATUS_ADD(node, NST_ADDR_FIXED); + r = add_abs_addr(reg, (int )node->m.called_addr); + if (r != 0) return r; + len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); + len += SIZE_OP_RETURN; + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r != 0) return r; + + r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); + if (r != 0) return r; + r = add_opcode(reg, OP_RETURN); + return r; + } +#endif - switch (node->type) { - case ENCLOSE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - r = add_opcode(reg, OP_CALL); - if (r) return r; - node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; - node->state |= NST_ADDR_FIXED; - r = add_abs_addr(reg, (int )node->call_addr); - if (r) return r; - len = compile_length_tree(node->target, reg); - len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); - else - len += (IS_ENCLOSE_RECURSION(node) - ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); + if (NODE_IS_CALLED(node)) { + r = add_opcode(reg, OP_CALL); + if (r != 0) return r; + node->m.called_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP; + NODE_STATUS_ADD(node, NST_ADDR_FIXED); + r = add_abs_addr(reg, (int )node->m.called_addr); + if (r != 0) return r; + len = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); + len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN); + if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + len += (NODE_IS_RECURSION(node) + ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH); + else + len += (NODE_IS_RECURSION(node) + ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END); - r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) return r; - } + r = add_opcode_rel_addr(reg, OP_JUMP, len); + if (r != 0) return r; + } #endif - if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum)) - r = add_opcode(reg, OP_MEMORY_START_PUSH); - else - r = add_opcode(reg, OP_MEMORY_START); - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; -#ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CALLED(node)) { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); - else - r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node) - ? OP_MEMORY_END_REC : OP_MEMORY_END)); - if (r) return r; - r = add_mem_num(reg, node->regnum); - if (r) return r; - r = add_opcode(reg, OP_RETURN); - } - else if (IS_ENCLOSE_RECURSION(node)) { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, OP_MEMORY_END_PUSH_REC); - else - r = add_opcode(reg, OP_MEMORY_END_REC); - if (r) return r; - r = add_mem_num(reg, node->regnum); - } - else + if (MEM_STATUS_AT0(reg->bt_mem_start, node->m.regnum)) + r = add_opcode(reg, OP_MEMORY_START_PUSH); + else + r = add_opcode(reg, OP_MEMORY_START); + if (r != 0) return r; + r = add_mem_num(reg, node->m.regnum); + if (r != 0) return r; + r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); + if (r != 0) return r; + +#ifdef USE_SUBEXP_CALL + if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + r = add_opcode(reg, (NODE_IS_RECURSION(node) + ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); + else + r = add_opcode(reg, (NODE_IS_RECURSION(node) + ? OP_MEMORY_END_REC : OP_MEMORY_END)); + if (r != 0) return r; + r = add_mem_num(reg, node->m.regnum); + if (NODE_IS_CALLED(node)) { + if (r != 0) return r; + r = add_opcode(reg, OP_RETURN); + } +#else + if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) + r = add_opcode(reg, OP_MEMORY_END_PUSH); + else + r = add_opcode(reg, OP_MEMORY_END); + if (r != 0) return r; + r = add_mem_num(reg, node->m.regnum); #endif - { - if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)) - r = add_opcode(reg, OP_MEMORY_END_PUSH); - else - r = add_opcode(reg, OP_MEMORY_END); - if (r) return r; - r = add_mem_num(reg, node->regnum); - } + + return r; +} + +static int +compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) +{ + int r, len; + + if (node->type == ENCLOSURE_OPTION) + return compile_option_node(node, reg, env); + + switch (node->type) { + case ENCLOSURE_MEMORY: + r = compile_enclosure_memory_node(node, reg, env); break; - case ENCLOSE_STOP_BACKTRACK: - if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) { - QtfrNode* qn = NQTFR(node->target); - r = compile_tree_n_times(qn->target, qn->lower, reg); - if (r) return r; + case ENCLOSURE_STOP_BACKTRACK: + if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { + QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); + r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); + if (r != 0) return r; - len = compile_length_tree(qn->target, reg); + len = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (len < 0) return len; r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); - if (r) return r; - r = compile_tree(qn->target, reg); - if (r) return r; + if (r != 0) return r; + r = compile_tree(NODE_QUANT_BODY(qn), reg, env); + if (r != 0) return r; r = add_opcode(reg, OP_POP); - if (r) return r; + if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); } else { r = add_opcode(reg, OP_PUSH_STOP_BT); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; + if (r != 0) return r; + r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); + if (r != 0) return r; r = add_opcode(reg, OP_POP_STOP_BT); } break; @@ -1387,8 +1483,8 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) int len; int tlen = 0; - if (node->target) { - tlen = compile_length_tree(node->target, reg); + if (IS_NOT_NULL(NODE_ANCHOR_BODY(node))) { + tlen = compile_length_tree(NODE_ANCHOR_BODY(node), reg); if (tlen < 0) return tlen; } @@ -1415,7 +1511,7 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) } static int -compile_anchor_node(AnchorNode* node, regex_t* reg) +compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) { int r, len; @@ -1436,19 +1532,19 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) case ANCHOR_PREC_READ: r = add_opcode(reg, OP_PUSH_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; + if (r != 0) return r; + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; r = add_opcode(reg, OP_POP_POS); break; case ANCHOR_PREC_READ_NOT: - len = compile_length_tree(node->target, reg); + len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); if (len < 0) return len; r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; + if (r != 0) return r; + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; r = add_opcode(reg, OP_FAIL_POS); break; @@ -1456,37 +1552,37 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) { int n; r = add_opcode(reg, OP_LOOK_BEHIND); - if (r) return r; + if (r != 0) return r; if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); + if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else n = node->char_len; r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); + if (r != 0) return r; + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); } break; case ANCHOR_LOOK_BEHIND_NOT: { int n; - len = compile_length_tree(node->target, reg); + len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT, len + SIZE_OP_FAIL_LOOK_BEHIND_NOT); - if (r) return r; + if (r != 0) return r; if (node->char_len < 0) { - r = get_char_length_tree(node->target, reg, &n); - if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = get_char_length_tree(NODE_ANCHOR_BODY(node), reg, &n); + if (r != 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; } else n = node->char_len; r = add_length(reg, n); - if (r) return r; - r = compile_tree(node->target, reg); - if (r) return r; + if (r != 0) return r; + r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); + if (r != 0) return r; r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT); } break; @@ -1502,55 +1598,53 @@ compile_anchor_node(AnchorNode* node, regex_t* reg) static int compile_length_tree(Node* node, regex_t* reg) { - int len, type, r; + int len, r; - type = NTYPE(node); - switch (type) { - case NT_LIST: + switch (NODE_TYPE(node)) { + case NODE_LIST: len = 0; do { - r = compile_length_tree(NCAR(node), reg); + r = compile_length_tree(NODE_CAR(node), reg); if (r < 0) return r; len += r; - } while (IS_NOT_NULL(node = NCDR(node))); + } while (IS_NOT_NULL(node = NODE_CDR(node))); r = len; break; - case NT_ALT: + case NODE_ALT: { int n; n = r = 0; do { - r += compile_length_tree(NCAR(node), reg); + r += compile_length_tree(NODE_CAR(node), reg); n++; - } while (IS_NOT_NULL(node = NCDR(node))); + } while (IS_NOT_NULL(node = NODE_CDR(node))); r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1); } break; - case NT_STR: + case NODE_STR: if (NSTRING_IS_RAW(node)) - r = compile_length_string_raw_node(NSTR(node), reg); + r = compile_length_string_raw_node(STR_(node), reg); else r = compile_length_string_node(node, reg); break; - case NT_CCLASS: - r = compile_length_cclass_node(NCCLASS(node), reg); + case NODE_CCLASS: + r = compile_length_cclass_node(CCLASS_(node), reg); break; - case NT_CTYPE: - case NT_CANY: + case NODE_CTYPE: r = SIZE_OPCODE; break; - case NT_BREF: + case NODE_BREF: { - BRefNode* br = NBREF(node); + BRefNode* br = BREF_(node); #ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { + if (NODE_IS_NEST_LEVEL(node)) { r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); } @@ -1567,21 +1661,21 @@ compile_length_tree(Node* node, regex_t* reg) break; #ifdef USE_SUBEXP_CALL - case NT_CALL: + case NODE_CALL: r = SIZE_OP_CALL; break; #endif - case NT_QTFR: - r = compile_length_quantifier_node(NQTFR(node), reg); + case NODE_QUANT: + r = compile_length_quantifier_node(QUANT_(node), reg); break; - case NT_ENCLOSE: - r = compile_length_enclose_node(NENCLOSE(node), reg); + case NODE_ENCLOSURE: + r = compile_length_enclosure_node(ENCLOSURE_(node), reg); break; - case NT_ANCHOR: - r = compile_length_anchor_node(NANCHOR(node), reg); + case NODE_ANCHOR: + r = compile_length_anchor_node(ANCHOR_(node), reg); break; default: @@ -1593,94 +1687,95 @@ compile_length_tree(Node* node, regex_t* reg) } static int -compile_tree(Node* node, regex_t* reg) +compile_tree(Node* node, regex_t* reg, ScanEnv* env) { - int n, type, len, pos, r = 0; + int n, len, pos, r = 0; - type = NTYPE(node); - switch (type) { - case NT_LIST: + switch (NODE_TYPE(node)) { + case NODE_LIST: do { - r = compile_tree(NCAR(node), reg); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + r = compile_tree(NODE_CAR(node), reg, env); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_ALT: + case NODE_ALT: { Node* x = node; len = 0; do { - len += compile_length_tree(NCAR(x), reg); - if (NCDR(x) != NULL) { + len += compile_length_tree(NODE_CAR(x), reg); + if (IS_NOT_NULL(NODE_CDR(x))) { len += SIZE_OP_PUSH + SIZE_OP_JUMP; } - } while (IS_NOT_NULL(x = NCDR(x))); + } while (IS_NOT_NULL(x = NODE_CDR(x))); pos = reg->used + len; /* goal position */ do { - len = compile_length_tree(NCAR(node), reg); - if (IS_NOT_NULL(NCDR(node))) { + len = compile_length_tree(NODE_CAR(node), reg); + if (IS_NOT_NULL(NODE_CDR(node))) { r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); - if (r) break; + if (r != 0) break; } - r = compile_tree(NCAR(node), reg); - if (r) break; - if (IS_NOT_NULL(NCDR(node))) { + r = compile_tree(NODE_CAR(node), reg, env); + if (r != 0) break; + if (IS_NOT_NULL(NODE_CDR(node))) { len = pos - (reg->used + SIZE_OP_JUMP); r = add_opcode_rel_addr(reg, OP_JUMP, len); - if (r) break; + if (r != 0) break; } - } while (IS_NOT_NULL(node = NCDR(node))); + } while (IS_NOT_NULL(node = NODE_CDR(node))); } break; - case NT_STR: + case NODE_STR: if (NSTRING_IS_RAW(node)) - r = compile_string_raw_node(NSTR(node), reg); + r = compile_string_raw_node(STR_(node), reg); else r = compile_string_node(node, reg); break; - case NT_CCLASS: - r = compile_cclass_node(NCCLASS(node), reg); + case NODE_CCLASS: + r = compile_cclass_node(CCLASS_(node), reg); break; - case NT_CTYPE: + case NODE_CTYPE: { int op; - switch (NCTYPE(node)->ctype) { + switch (CTYPE_(node)->ctype) { + case CTYPE_ANYCHAR: + if (IS_MULTILINE(reg->options)) + r = add_opcode(reg, OP_ANYCHAR_ML); + else + r = add_opcode(reg, OP_ANYCHAR); + break; + case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) op = OP_NOT_WORD; + if (CTYPE_(node)->not != 0) op = OP_NOT_WORD; else op = OP_WORD; + + r = add_opcode(reg, op); break; + default: return ONIGERR_TYPE_BUG; break; } - r = add_opcode(reg, op); } break; - case NT_CANY: - if (IS_MULTILINE(reg->options)) - r = add_opcode(reg, OP_ANYCHAR_ML); - else - r = add_opcode(reg, OP_ANYCHAR); - break; - - case NT_BREF: + case NODE_BREF: { - BRefNode* br = NBREF(node); + BRefNode* br = BREF_(node); #ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { + if (NODE_IS_NEST_LEVEL(node)) { r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); - if (r) return r; + if (r != 0) return r; r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); - if (r) return r; + if (r != 0) return r; r = add_length(reg, br->nest_level); - if (r) return r; + if (r != 0) return r; goto add_bacref_mems; } @@ -1690,7 +1785,7 @@ compile_tree(Node* node, regex_t* reg) n = br->back_static[0]; if (IS_IGNORECASE(reg->options)) { r = add_opcode(reg, OP_BACKREFN_IC); - if (r) return r; + if (r != 0) return r; r = add_mem_num(reg, n); } else { @@ -1699,7 +1794,7 @@ compile_tree(Node* node, regex_t* reg) case 2: r = add_opcode(reg, OP_BACKREF2); break; default: r = add_opcode(reg, OP_BACKREFN); - if (r) return r; + if (r != 0) return r; r = add_mem_num(reg, n); break; } @@ -1715,43 +1810,43 @@ compile_tree(Node* node, regex_t* reg) else { r = add_opcode(reg, OP_BACKREF_MULTI); } - if (r) return r; + if (r != 0) return r; #ifdef USE_BACKREF_WITH_LEVEL add_bacref_mems: #endif r = add_length(reg, br->back_num); - if (r) return r; + if (r != 0) return r; p = BACKREFS_P(br); for (i = br->back_num - 1; i >= 0; i--) { r = add_mem_num(reg, p[i]); - if (r) return r; + if (r != 0) return r; } } } break; #ifdef USE_SUBEXP_CALL - case NT_CALL: - r = compile_call(NCALL(node), reg); + case NODE_CALL: + r = compile_call(CALL_(node), reg, env); break; #endif - case NT_QTFR: - r = compile_quantifier_node(NQTFR(node), reg); + case NODE_QUANT: + r = compile_quantifier_node(QUANT_(node), reg, env); break; - case NT_ENCLOSE: - r = compile_enclose_node(NENCLOSE(node), reg); + case NODE_ENCLOSURE: + r = compile_enclosure_node(ENCLOSURE_(node), reg, env); break; - case NT_ANCHOR: - r = compile_anchor_node(NANCHOR(node), reg); + case NODE_ANCHOR: + r = compile_anchor_node(ANCHOR_(node), reg, env); break; default: #ifdef ONIG_DEBUG - fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node)); + fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node)); #endif break; } @@ -1767,50 +1862,50 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) int r = 0; Node* node = *plink; - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: do { - r = noname_disable_map(&(NCAR(node)), map, counter); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + r = noname_disable_map(&(NODE_CAR(node)), map, counter); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_QTFR: + case NODE_QUANT: { - Node** ptarget = &(NQTFR(node)->target); + Node** ptarget = &(NODE_BODY(node)); Node* old = *ptarget; r = noname_disable_map(ptarget, map, counter); - if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) { + if (*ptarget != old && NODE_TYPE(*ptarget) == NODE_QUANT) { onig_reduce_nested_quantifier(node, *ptarget); } } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - if (IS_ENCLOSE_NAMED_GROUP(en)) { + EnclosureNode* en = ENCLOSURE_(node); + if (en->type == ENCLOSURE_MEMORY) { + if (NODE_IS_NAMED_GROUP(node)) { (*counter)++; - map[en->regnum].new_val = *counter; - en->regnum = *counter; - r = noname_disable_map(&(en->target), map, counter); + map[en->m.regnum].new_val = *counter; + en->m.regnum = *counter; + r = noname_disable_map(&(NODE_BODY(node)), map, counter); } else { - *plink = en->target; - en->target = NULL_NODE; + *plink = NODE_BODY(node); + NODE_BODY(node) = NULL_NODE; onig_node_free(node); r = noname_disable_map(plink, map, counter); } } else - r = noname_disable_map(&(en->target), map, counter); + r = noname_disable_map(&(NODE_BODY(node)), map, counter); } break; - case NT_ANCHOR: - if (NANCHOR(node)->target) - r = noname_disable_map(&(NANCHOR(node)->target), map, counter); + case NODE_ANCHOR: + if (IS_NOT_NULL(NODE_BODY(node))) + r = noname_disable_map(&(NODE_BODY(node)), map, counter); break; default: @@ -1825,9 +1920,9 @@ renumber_node_backref(Node* node, GroupNumRemap* map) { int i, pos, n, old_num; int *backs; - BRefNode* bn = NBREF(node); + BRefNode* bn = BREF_(node); - if (! IS_BACKREF_NAME_REF(bn)) + if (! NODE_IS_BY_NAME(node)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; old_num = bn->back_num; @@ -1853,27 +1948,26 @@ renumber_by_map(Node* node, GroupNumRemap* map) { int r = 0; - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: do { - r = renumber_by_map(NCAR(node), map); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + r = renumber_by_map(NODE_CAR(node), map); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_QTFR: - r = renumber_by_map(NQTFR(node)->target, map); - break; - case NT_ENCLOSE: - r = renumber_by_map(NENCLOSE(node)->target, map); + + case NODE_QUANT: + case NODE_ENCLOSURE: + r = renumber_by_map(NODE_BODY(node), map); break; - case NT_BREF: + case NODE_BREF: r = renumber_node_backref(node, map); break; - case NT_ANCHOR: - if (NANCHOR(node)->target) - r = renumber_by_map(NANCHOR(node)->target, map); + case NODE_ANCHOR: + if (IS_NOT_NULL(NODE_BODY(node))) + r = renumber_by_map(NODE_BODY(node), map); break; default: @@ -1888,28 +1982,26 @@ numbered_ref_check(Node* node) { int r = 0; - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: do { - r = numbered_ref_check(NCAR(node)); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - case NT_QTFR: - r = numbered_ref_check(NQTFR(node)->target); - break; - case NT_ENCLOSE: - r = numbered_ref_check(NENCLOSE(node)->target); + r = numbered_ref_check(NODE_CAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_BREF: - if (! IS_BACKREF_NAME_REF(NBREF(node))) - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + case NODE_ANCHOR: + if (IS_NULL(NODE_BODY(node))) + break; + /* fall */ + case NODE_QUANT: + case NODE_ENCLOSURE: + r = numbered_ref_check(NODE_BODY(node)); break; - case NT_ANCHOR: - if (NANCHOR(node)->target) - r = numbered_ref_check(NANCHOR(node)->target); + case NODE_BREF: + if (! NODE_IS_BY_NAME(node)) + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; default: @@ -1923,7 +2015,7 @@ static int disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) { int r, i, pos, counter; - BitStatusType loc; + MemStatusType loc; GroupNumRemap* map; map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1)); @@ -1940,16 +2032,16 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) for (i = 1, pos = 1; i <= env->num_mem; i++) { if (map[i].new_val > 0) { - SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i]; + SCANENV_MEMENV(env)[pos] = SCANENV_MEMENV(env)[i]; pos++; } } loc = env->capture_history; - BIT_STATUS_CLEAR(env->capture_history); + MEM_STATUS_CLEAR(env->capture_history); for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(loc, i)) { - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val); + if (MEM_STATUS_AT(loc, i)) { + MEM_STATUS_ON_SIMPLE(env->capture_history, map[i].new_val); } } @@ -1965,13 +2057,13 @@ static int unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) { int i, offset; - EncloseNode* en; + EnclosureNode* en; AbsAddrType addr; for (i = 0; i < uslist->num; i++) { - en = NENCLOSE(uslist->us[i].target); - if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; - addr = en->call_addr; + en = ENCLOSURE_(uslist->us[i].target); + if (! NODE_IS_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; + addr = en->m.called_addr; offset = uslist->us[i].offset; BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); @@ -1980,75 +2072,6 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) } #endif -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT -static int -quantifiers_memory_node_info(Node* node) -{ - int r = 0; - - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: - { - int v; - do { - v = quantifiers_memory_node_info(NCAR(node)); - if (v > r) r = v; - } while (v >= 0 && IS_NOT_NULL(node = NCDR(node))); - } - break; - -#ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - return NQ_TARGET_IS_EMPTY_REC; /* tiny version */ - } - else - r = quantifiers_memory_node_info(NCALL(node)->target); - break; -#endif - - case NT_QTFR: - { - QtfrNode* qn = NQTFR(node); - if (qn->upper != 0) { - r = quantifiers_memory_node_info(qn->target); - } - } - break; - - case NT_ENCLOSE: - { - EncloseNode* en = NENCLOSE(node); - switch (en->type) { - case ENCLOSE_MEMORY: - return NQ_TARGET_IS_EMPTY_MEM; - break; - - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = quantifiers_memory_node_info(en->target); - break; - default: - break; - } - } - break; - - case NT_BREF: - case NT_STR: - case NT_CTYPE: - case NT_CCLASS: - case NT_CANY: - case NT_ANCHOR: - default: - break; - } - - return r; -} -#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */ - #define GET_CHAR_LEN_VARLEN -1 #define GET_CHAR_LEN_TOP_ALT_VARLEN -2 @@ -2062,23 +2085,23 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) level++; *len = 0; - switch (NTYPE(node)) { - case NT_LIST: + switch (NODE_TYPE(node)) { + case NODE_LIST: do { - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); + r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level); if (r == 0) *len = distance_add(*len, tlen); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_ALT: + case NODE_ALT: { int tlen2; int varlen = 0; - r = get_char_length_tree1(NCAR(node), reg, &tlen, level); - while (r == 0 && IS_NOT_NULL(node = NCDR(node))) { - r = get_char_length_tree1(NCAR(node), reg, &tlen2, level); + r = get_char_length_tree1(NODE_CAR(node), reg, &tlen, level); + while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))) { + r = get_char_length_tree1(NODE_CAR(node), reg, &tlen2, level); if (r == 0) { if (tlen != tlen2) varlen = 1; @@ -2097,9 +2120,9 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case NT_STR: + case NODE_STR: { - StrNode* sn = NSTR(node); + StrNode* sn = STR_(node); UChar *s = sn->s; while (s < sn->end) { s += enclen(reg->enc, s); @@ -2108,11 +2131,11 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case NT_QTFR: + case NODE_QUANT: { - QtfrNode* qn = NQTFR(node); + QuantNode* qn = QUANT_(node); if (qn->lower == qn->upper) { - r = get_char_length_tree1(qn->target, reg, &tlen, level); + r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); if (r == 0) *len = distance_multiply(tlen, qn->lower); } @@ -2122,43 +2145,42 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) break; #ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_char_length_tree1(NCALL(node)->target, reg, len, level); + case NODE_CALL: + if (! NODE_IS_RECURSION(node)) + r = get_char_length_tree1(NODE_BODY(node), reg, len, level); else r = GET_CHAR_LEN_VARLEN; break; #endif - case NT_CTYPE: + case NODE_CTYPE: *len = 1; break; - case NT_CCLASS: - case NT_CANY: + case NODE_CCLASS: *len = 1; break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { - case ENCLOSE_MEMORY: + case ENCLOSURE_MEMORY: #ifdef USE_SUBEXP_CALL - if (IS_ENCLOSE_CLEN_FIXED(en)) + if (NODE_IS_CLEN_FIXED(node)) *len = en->char_len; else { - r = get_char_length_tree1(en->target, reg, len, level); + r = get_char_length_tree1(NODE_BODY(node), reg, len, level); if (r == 0) { en->char_len = *len; - SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED); + NODE_STATUS_ADD(node, NST_CLEN_FIXED); } } break; #endif - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_char_length_tree1(en->target, reg, len, level); + case ENCLOSURE_OPTION: + case ENCLOSURE_STOP_BACKTRACK: + r = get_char_length_tree1(NODE_BODY(node), reg, len, level); break; default: break; @@ -2166,7 +2188,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case NT_ANCHOR: + case NODE_ANCHOR: break; default: @@ -2185,28 +2207,32 @@ get_char_length_tree(Node* node, regex_t* reg, int* len) /* x is not included y ==> 1 : 0 */ static int -is_not_included(Node* x, Node* y, regex_t* reg) +is_exclusive(Node* x, Node* y, regex_t* reg) { int i, len; OnigCodePoint code; UChar *p; - int ytype; + NodeType ytype; retry: - ytype = NTYPE(y); - switch (NTYPE(x)) { - case NT_CTYPE: + ytype = NODE_TYPE(y); + switch (NODE_TYPE(x)) { + case NODE_CTYPE: { + if (CTYPE_(x)->ctype == CTYPE_ANYCHAR || + CTYPE_(y)->ctype == CTYPE_ANYCHAR) + break; + switch (ytype) { - case NT_CTYPE: - if (NCTYPE(y)->ctype == NCTYPE(x)->ctype && - NCTYPE(y)->not != NCTYPE(x)->not) + case NODE_CTYPE: + if (CTYPE_(y)->ctype == CTYPE_(x)->ctype && + CTYPE_(y)->not != CTYPE_(x)->not) return 1; else return 0; break; - case NT_CCLASS: + case NODE_CCLASS: swap: { Node* tmp; @@ -2215,7 +2241,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case NT_STR: + case NODE_STR: goto swap; break; @@ -2225,14 +2251,18 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case NT_CCLASS: + case NODE_CCLASS: { - CClassNode* xc = NCCLASS(x); + CClassNode* xc = CCLASS_(x); switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { + case NODE_CTYPE: + switch (CTYPE_(y)->ctype) { + case CTYPE_ANYCHAR: + return 0; + break; + case ONIGENC_CTYPE_WORD: - if (NCTYPE(y)->not == 0) { + if (CTYPE_(y)->not == 0) { if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (BITSET_AT(xc->bs, i)) { @@ -2266,10 +2296,10 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case NT_CCLASS: + case NODE_CCLASS: { int v; - CClassNode* yc = NCCLASS(y); + CClassNode* yc = CCLASS_(y); for (i = 0; i < SINGLE_BYTE_SIZE; i++) { v = BITSET_AT(xc->bs, i); @@ -2288,7 +2318,7 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case NT_STR: + case NODE_STR: goto swap; break; @@ -2298,30 +2328,33 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case NT_STR: + case NODE_STR: { - StrNode* xs = NSTR(x); + StrNode* xs = STR_(x); if (NSTRING_LEN(x) == 0) break; //c = *(xs->s); switch (ytype) { - case NT_CTYPE: - switch (NCTYPE(y)->ctype) { + case NODE_CTYPE: + switch (CTYPE_(y)->ctype) { + case CTYPE_ANYCHAR: + break; + case ONIGENC_CTYPE_WORD: if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end)) - return NCTYPE(y)->not; + return CTYPE_(y)->not; else - return !(NCTYPE(y)->not); + return !(CTYPE_(y)->not); break; default: break; } break; - case NT_CCLASS: + case NODE_CCLASS: { - CClassNode* cc = NCCLASS(y); + CClassNode* cc = CCLASS_(y); code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s, xs->s + ONIGENC_MBC_MAXLEN(reg->enc)); @@ -2329,10 +2362,10 @@ is_not_included(Node* x, Node* y, regex_t* reg) } break; - case NT_STR: + case NODE_STR: { UChar *q; - StrNode* ys = NSTR(y); + StrNode* ys = STR_(y); len = NSTRING_LEN(x); if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { @@ -2365,29 +2398,31 @@ get_head_value_node(Node* node, int exact, regex_t* reg) { Node* n = NULL_NODE; - switch (NTYPE(node)) { - case NT_BREF: - case NT_ALT: - case NT_CANY: + switch (NODE_TYPE(node)) { + case NODE_BREF: + case NODE_ALT: #ifdef USE_SUBEXP_CALL - case NT_CALL: + case NODE_CALL: #endif break; - case NT_CTYPE: - case NT_CCLASS: + case NODE_CTYPE: + if (CTYPE_(node)->ctype == CTYPE_ANYCHAR) + break; + /* fall */ + case NODE_CCLASS: if (exact == 0) { n = node; } break; - case NT_LIST: - n = get_head_value_node(NCAR(node), exact, reg); + case NODE_LIST: + n = get_head_value_node(NODE_CAR(node), exact, reg); break; - case NT_STR: + case NODE_STR: { - StrNode* sn = NSTR(node); + StrNode* sn = STR_(node); if (sn->end <= sn->s) break; @@ -2401,43 +2436,43 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } break; - case NT_QTFR: + case NODE_QUANT: { - QtfrNode* qn = NQTFR(node); + QuantNode* qn = QUANT_(node); if (qn->lower > 0) { if (IS_NOT_NULL(qn->head_exact)) n = qn->head_exact; else - n = get_head_value_node(qn->target, exact, reg); + n = get_head_value_node(NODE_BODY(node), exact, reg); } } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { - case ENCLOSE_OPTION: + case ENCLOSURE_OPTION: { OnigOptionType options = reg->options; - reg->options = NENCLOSE(node)->option; - n = get_head_value_node(NENCLOSE(node)->target, exact, reg); + reg->options = ENCLOSURE_(node)->o.option; + n = get_head_value_node(NODE_BODY(node), exact, reg); reg->options = options; } break; - case ENCLOSE_MEMORY: - case ENCLOSE_STOP_BACKTRACK: - n = get_head_value_node(en->target, exact, reg); + case ENCLOSURE_MEMORY: + case ENCLOSURE_STOP_BACKTRACK: + n = get_head_value_node(NODE_BODY(node), exact, reg); break; } } break; - case NT_ANCHOR: - if (NANCHOR(node)->type == ANCHOR_PREC_READ) - n = get_head_value_node(NANCHOR(node)->target, exact, reg); + case NODE_ANCHOR: + if (ANCHOR_(node)->type == ANCHOR_PREC_READ) + n = get_head_value_node(NODE_BODY(node), exact, reg); break; default: @@ -2448,46 +2483,45 @@ get_head_value_node(Node* node, int exact, regex_t* reg) } static int -check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) +check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) { - int type, r = 0; + NodeType type; + int r = 0; - type = NTYPE(node); - if ((NTYPE2BIT(type) & type_mask) == 0) + type = NODE_TYPE(node); + if ((NODE_TYPE2BIT(type) & type_mask) == 0) return 1; switch (type) { - case NT_LIST: - case NT_ALT: + case NODE_LIST: + case NODE_ALT: do { - r = check_type_tree(NCAR(node), type_mask, enclose_mask, + r = check_type_tree(NODE_CAR(node), type_mask, enclosure_mask, anchor_mask); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_QTFR: - r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask, - anchor_mask); + case NODE_QUANT: + r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); - if ((en->type & enclose_mask) == 0) + EnclosureNode* en = ENCLOSURE_(node); + if ((en->type & enclosure_mask) == 0) return 1; - r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask); + r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); } break; - case NT_ANCHOR: - type = NANCHOR(node)->type; + case NODE_ANCHOR: + type = ANCHOR_(node)->type; if ((type & anchor_mask) == 0) return 1; - if (NANCHOR(node)->target) - r = check_type_tree(NANCHOR(node)->target, - type_mask, enclose_mask, anchor_mask); + if (IS_NOT_NULL(NODE_BODY(node))) + r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); break; default: @@ -2496,250 +2530,282 @@ check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask) return r; } -static int -get_min_len(Node* node, OnigLen *min, ScanEnv* env) +static OnigLen +get_min_len(Node* node, ScanEnv* env) { + OnigLen len; OnigLen tmin; - int r = 0; - *min = 0; - switch (NTYPE(node)) { - case NT_BREF: + len = 0; + switch (NODE_TYPE(node)) { + case NODE_BREF: { int i; int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) break; + MemEnv* mem_env = SCANENV_MEMENV(env); + BRefNode* br = BREF_(node); + if (NODE_IS_RECURSION(node)) break; backs = BACKREFS_P(br); - if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_len(nodes[backs[0]], min, env); - if (r != 0) break; + len = get_min_len(mem_env[backs[0]].node, env); for (i = 1; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_min_len(nodes[backs[i]], &tmin, env); - if (r != 0) break; - if (*min > tmin) *min = tmin; + tmin = get_min_len(mem_env[backs[i]].node, env); + if (len > tmin) len = tmin; } } break; #ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) { - EncloseNode* en = NENCLOSE(NCALL(node)->target); - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; + case NODE_CALL: + { + Node* t = NODE_BODY(node); + if (NODE_IS_RECURSION(node)) { + if (NODE_IS_MIN_FIXED(t)) + len = ENCLOSURE_(t)->min_len; + } + else + len = get_min_len(t, env); } - else - r = get_min_len(NCALL(node)->target, min, env); break; #endif - case NT_LIST: + case NODE_LIST: do { - r = get_min_len(NCAR(node), &tmin, env); - if (r == 0) *min += tmin; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + tmin = get_min_len(NODE_CAR(node), env); + len += tmin; + } while (IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_ALT: + case NODE_ALT: { Node *x, *y; y = node; do { - x = NCAR(y); - r = get_min_len(x, &tmin, env); - if (r != 0) break; - if (y == node) *min = tmin; - else if (*min > tmin) *min = tmin; - } while (r == 0 && IS_NOT_NULL(y = NCDR(y))); + x = NODE_CAR(y); + tmin = get_min_len(x, env); + if (y == node) len = tmin; + else if (len > tmin) len = tmin; + } while (IS_NOT_NULL(y = NODE_CDR(y))); } break; - case NT_STR: + case NODE_STR: { - StrNode* sn = NSTR(node); - *min = sn->end - sn->s; + StrNode* sn = STR_(node); + len = sn->end - sn->s; } break; - case NT_CTYPE: - *min = 1; + case NODE_CTYPE: + case NODE_CCLASS: + len = 1; break; - case NT_CCLASS: - case NT_CANY: - *min = 1; - break; - - case NT_QTFR: + case NODE_QUANT: { - QtfrNode* qn = NQTFR(node); + QuantNode* qn = QUANT_(node); if (qn->lower > 0) { - r = get_min_len(qn->target, min, env); - if (r == 0) - *min = distance_multiply(*min, qn->lower); + len = get_min_len(NODE_BODY(node), env); + len = distance_multiply(len, qn->lower); } } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { - case ENCLOSE_MEMORY: - if (IS_ENCLOSE_MIN_FIXED(en)) - *min = en->min_len; + case ENCLOSURE_MEMORY: + if (NODE_IS_MIN_FIXED(node)) + len = en->min_len; else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *min = 0; // recursive + if (NODE_IS_MARK1(node)) + len = 0; // recursive else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_min_len(en->target, min, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->min_len = *min; - SET_ENCLOSE_STATUS(node, NST_MIN_FIXED); - } + NODE_STATUS_ADD(node, NST_MARK1); + len = get_min_len(NODE_BODY(node), env); + NODE_STATUS_REMOVE(node, NST_MARK1); + + en->min_len = len; + NODE_STATUS_ADD(node, NST_MIN_FIXED); } } break; - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_min_len(en->target, min, env); + case ENCLOSURE_OPTION: + case ENCLOSURE_STOP_BACKTRACK: + len = get_min_len(NODE_BODY(node), env); break; } } break; - case NT_ANCHOR: + case NODE_ANCHOR: default: break; } - return r; + return len; } -static int -get_max_len(Node* node, OnigLen *max, ScanEnv* env) +static OnigLen +get_max_len(Node* node, ScanEnv* env) { + OnigLen len; OnigLen tmax; - int r = 0; - *max = 0; - switch (NTYPE(node)) { - case NT_LIST: + len = 0; + switch (NODE_TYPE(node)) { + case NODE_LIST: do { - r = get_max_len(NCAR(node), &tmax, env); - if (r == 0) - *max = distance_add(*max, tmax); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + tmax = get_max_len(NODE_CAR(node), env); + len = distance_add(len, tmax); + } while (IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_ALT: + case NODE_ALT: do { - r = get_max_len(NCAR(node), &tmax, env); - if (r == 0 && *max < tmax) *max = tmax; - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + tmax = get_max_len(NODE_CAR(node), env); + if (len < tmax) len = tmax; + } while (IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_STR: + case NODE_STR: { - StrNode* sn = NSTR(node); - *max = sn->end - sn->s; + StrNode* sn = STR_(node); + len = sn->end - sn->s; } break; - case NT_CTYPE: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); + case NODE_CTYPE: + case NODE_CCLASS: + len = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case NT_CCLASS: - case NT_CANY: - *max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - break; - - case NT_BREF: + case NODE_BREF: { int i; int* backs; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - if (br->state & NST_RECURSION) { - *max = ONIG_INFINITE_DISTANCE; + MemEnv* mem_env = SCANENV_MEMENV(env); + BRefNode* br = BREF_(node); + if (NODE_IS_RECURSION(node)) { + len = ONIG_INFINITE_DISTANCE; break; } backs = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - r = get_max_len(nodes[backs[i]], &tmax, env); - if (r != 0) break; - if (*max < tmax) *max = tmax; + tmax = get_max_len(mem_env[backs[i]].node, env); + if (len < tmax) len = tmax; } } break; #ifdef USE_SUBEXP_CALL - case NT_CALL: - if (! IS_CALL_RECURSION(NCALL(node))) - r = get_max_len(NCALL(node)->target, max, env); + case NODE_CALL: + if (! NODE_IS_RECURSION(node)) + len = get_max_len(NODE_BODY(node), env); else - *max = ONIG_INFINITE_DISTANCE; + len = ONIG_INFINITE_DISTANCE; break; #endif - case NT_QTFR: + case NODE_QUANT: { - QtfrNode* qn = NQTFR(node); + QuantNode* qn = QUANT_(node); if (qn->upper != 0) { - r = get_max_len(qn->target, max, env); - if (r == 0 && *max != 0) { + len = get_max_len(NODE_BODY(node), env); + if (len != 0) { if (! IS_REPEAT_INFINITE(qn->upper)) - *max = distance_multiply(*max, qn->upper); + len = distance_multiply(len, qn->upper); else - *max = ONIG_INFINITE_DISTANCE; + len = ONIG_INFINITE_DISTANCE; } } } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { - case ENCLOSE_MEMORY: - if (IS_ENCLOSE_MAX_FIXED(en)) - *max = en->max_len; + case ENCLOSURE_MEMORY: + if (NODE_IS_MAX_FIXED(node)) + len = en->max_len; else { - if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - *max = ONIG_INFINITE_DISTANCE; + if (NODE_IS_MARK1(node)) + len = ONIG_INFINITE_DISTANCE; else { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = get_max_len(en->target, max, env); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - if (r == 0) { - en->max_len = *max; - SET_ENCLOSE_STATUS(node, NST_MAX_FIXED); - } + NODE_STATUS_ADD(node, NST_MARK1); + len = get_max_len(NODE_BODY(node), env); + NODE_STATUS_REMOVE(node, NST_MARK1); + + en->max_len = len; + NODE_STATUS_ADD(node, NST_MAX_FIXED); } } break; - case ENCLOSE_OPTION: - case ENCLOSE_STOP_BACKTRACK: - r = get_max_len(en->target, max, env); + case ENCLOSURE_OPTION: + case ENCLOSURE_STOP_BACKTRACK: + len = get_max_len(NODE_BODY(node), env); break; } } break; - case NT_ANCHOR: + case NODE_ANCHOR: + default: + break; + } + + return len; +} + +static int +check_backrefs(Node* node, ScanEnv* env) +{ + int r; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + r = check_backrefs(NODE_CAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_ANCHOR: + if (! ANCHOR_HAS_BODY(ANCHOR_(node))) { + r = 0; + break; + } + /* fall */ + case NODE_QUANT: + case NODE_ENCLOSURE: + r = check_backrefs(NODE_BODY(node), env); + break; + + case NODE_BREF: + { + int i; + BRefNode* br = BREF_(node); + int* backs = BACKREFS_P(br); + MemEnv* mem_env = SCANENV_MEMENV(env); + + for (i = 0; i < br->back_num; i++) { + if (backs[i] > env->num_mem) + return ONIGERR_INVALID_BACKREF; + + NODE_STATUS_ADD(mem_env[backs[i]].node, NST_BACKREF); + } + r = 0; + } + break; + default: + r = 0; break; } @@ -2749,18 +2815,17 @@ get_max_len(Node* node, OnigLen *max, ScanEnv* env) #ifdef USE_SUBEXP_CALL -#define RECURSION_EXIST 1 -#define RECURSION_INFINITE 2 +#define RECURSION_EXIST (1<<0) +#define RECURSION_MUST (1<<1) +#define RECURSION_INFINITE (1<<2) static int -subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) +infinite_recursive_call_check(Node* node, ScanEnv* env, int head) { - int type; int r = 0; - type = NTYPE(node); - switch (type) { - case NT_LIST: + switch (NODE_TYPE(node)) { + case NODE_LIST: { Node *x; OnigLen min; @@ -2768,64 +2833,70 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) x = node; do { - ret = subexp_inf_recursive_check(NCAR(x), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; + ret = infinite_recursive_call_check(NODE_CAR(x), env, head); + if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; r |= ret; if (head) { - ret = get_min_len(NCAR(x), &min, env); - if (ret != 0) return ret; + min = get_min_len(NODE_CAR(x), env); if (min != 0) head = 0; } - } while (IS_NOT_NULL(x = NCDR(x))); + } while (IS_NOT_NULL(x = NODE_CDR(x))); } break; - case NT_ALT: + case NODE_ALT: { int ret; - r = RECURSION_EXIST; + int must; + + must = RECURSION_MUST; do { - ret = subexp_inf_recursive_check(NCAR(node), env, head); - if (ret < 0 || ret == RECURSION_INFINITE) return ret; - r &= ret; - } while (IS_NOT_NULL(node = NCDR(node))); - } - break; + ret = infinite_recursive_call_check(NODE_CAR(node), env, head); + if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; - case NT_QTFR: - r = subexp_inf_recursive_check(NQTFR(node)->target, env, head); - if (r == RECURSION_EXIST) { - if (NQTFR(node)->lower == 0) r = 0; + r |= (ret & RECURSION_EXIST); + must &= ret; + } while (IS_NOT_NULL(node = NODE_CDR(node))); + r |= must; } break; - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check(an->target, env, head); - break; - } + case NODE_QUANT: + r = infinite_recursive_call_check(NODE_BODY(node), env, head); + if (r < 0) return r; + if ((r & RECURSION_MUST) != 0) { + if (QUANT_(node)->lower == 0) + r &= ~RECURSION_MUST; } break; - case NT_CALL: - r = subexp_inf_recursive_check(NCALL(node)->target, env, head); + case NODE_ANCHOR: + if (! ANCHOR_HAS_BODY(ANCHOR_(node))) + break; + /* fall */ + case NODE_CALL: + r = infinite_recursive_call_check(NODE_BODY(node), env, head); break; - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE); - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if (NODE_IS_MARK2(node)) + return 0; + else if (NODE_IS_MARK1(node)) + return (head == 0 ? RECURSION_EXIST | RECURSION_MUST + : RECURSION_EXIST | RECURSION_MUST | RECURSION_INFINITE); + else { + NODE_STATUS_ADD(node, NST_MARK2); + r = infinite_recursive_call_check(NODE_BODY(node), env, head); + NODE_STATUS_REMOVE(node, NST_MARK2); + } + } + else { + r = infinite_recursive_call_check(NODE_BODY(node), env, head); + } } break; @@ -2837,53 +2908,53 @@ subexp_inf_recursive_check(Node* node, ScanEnv* env, int head) } static int -subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) +infinite_recursive_call_check_trav(Node* node, ScanEnv* env) { - int type; - int r = 0; + int r; - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: do { - r = subexp_inf_recursive_check_trav(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + r = infinite_recursive_call_check_trav(NODE_CAR(node), env); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_QTFR: - r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_inf_recursive_check_trav(an->target, env); - break; - } + case NODE_ANCHOR: + if (! ANCHOR_HAS_BODY(ANCHOR_(node))) { + r = 0; + break; } + /* fall */ + case NODE_QUANT: + r = infinite_recursive_call_check_trav(NODE_BODY(node), env); break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if (NODE_IS_RECURSION(node) && NODE_IS_CALLED(node)) { + int ret; + + NODE_STATUS_ADD(node, NST_MARK1); - if (IS_ENCLOSE_RECURSION(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_inf_recursive_check(en->target, env, 1); - if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION; - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); + ret = infinite_recursive_call_check(NODE_BODY(node), env, 1); + if (ret < 0) return ret; + else if ((ret & (RECURSION_MUST | RECURSION_INFINITE)) != 0) + return ONIGERR_NEVER_ENDING_RECURSION; + + NODE_STATUS_REMOVE(node, NST_MARK1); + } } - r = subexp_inf_recursive_check_trav(en->target, env); } + + r = infinite_recursive_call_check_trav(NODE_BODY(node), env); break; default: + r = 0; break; } @@ -2891,227 +2962,129 @@ subexp_inf_recursive_check_trav(Node* node, ScanEnv* env) } static int -subexp_recursive_check(Node* node) +recursive_call_check(Node* node) { - int r = 0; + int r; - switch (NTYPE(node)) { - case NT_LIST: - case NT_ALT: + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + r = 0; do { - r |= subexp_recursive_check(NCAR(node)); - } while (IS_NOT_NULL(node = NCDR(node))); + r |= recursive_call_check(NODE_CAR(node)); + } while (IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_QTFR: - r = subexp_recursive_check(NQTFR(node)->target); - break; - - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check(an->target); - break; - } + case NODE_ANCHOR: + if (! ANCHOR_HAS_BODY(ANCHOR_(node))) { + r = 0; + break; } + /* fall */ + case NODE_QUANT: + r = recursive_call_check(NODE_BODY(node)); break; - case NT_CALL: - r = subexp_recursive_check(NCALL(node)->target); - if (r != 0) SET_CALL_RECURSION(node); + case NODE_CALL: + r = recursive_call_check(NODE_BODY(node)); + if (r != 0) NODE_STATUS_ADD(node, NST_RECURSION); break; - case NT_ENCLOSE: - if (IS_ENCLOSE_MARK2(NENCLOSE(node))) - return 0; - else if (IS_ENCLOSE_MARK1(NENCLOSE(node))) - return 1; /* recursion */ - else { - SET_ENCLOSE_STATUS(node, NST_MARK2); - r = subexp_recursive_check(NENCLOSE(node)->target); - CLEAR_ENCLOSE_STATUS(node, NST_MARK2); + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if (NODE_IS_MARK2(node)) + return 0; + else if (NODE_IS_MARK1(node)) + return 1; /* recursion */ + else { + NODE_STATUS_ADD(node, NST_MARK2); + r = recursive_call_check(NODE_BODY(node)); + NODE_STATUS_REMOVE(node, NST_MARK2); + } + } + else { + r = recursive_call_check(NODE_BODY(node)); + } } break; default: + r = 0; break; } return r; } +#define IN_RECURSION (1<<0) +#define FOUND_CALLED_NODE 1 static int -subexp_recursive_check_trav(Node* node, ScanEnv* env) +recursive_call_check_trav(Node* node, ScanEnv* env, int state) { -#define FOUND_CALLED_NODE 1 - - int type; int r = 0; - type = NTYPE(node); - switch (type) { - case NT_LIST: - case NT_ALT: + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: { int ret; do { - ret = subexp_recursive_check_trav(NCAR(node), env); + ret = recursive_call_check_trav(NODE_CAR(node), env, state); if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE; else if (ret < 0) return ret; - } while (IS_NOT_NULL(node = NCDR(node))); + } while (IS_NOT_NULL(node = NODE_CDR(node))); } break; - case NT_QTFR: - r = subexp_recursive_check_trav(NQTFR(node)->target, env); - if (NQTFR(node)->upper == 0) { + case NODE_QUANT: + r = recursive_call_check_trav(NODE_BODY(node), env, state); + if (QUANT_(node)->upper == 0) { if (r == FOUND_CALLED_NODE) - NQTFR(node)->is_refered = 1; + QUANT_(node)->is_refered = 1; } break; - case NT_ANCHOR: + case NODE_ANCHOR: { - AnchorNode* an = NANCHOR(node); - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = subexp_recursive_check_trav(an->target, env); - break; - } + AnchorNode* an = ANCHOR_(node); + if (ANCHOR_HAS_BODY(an)) + r = recursive_call_check_trav(NODE_ANCHOR_BODY(an), env, state); } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); - - if (! IS_ENCLOSE_RECURSION(en)) { - if (IS_ENCLOSE_CALLED(en)) { - SET_ENCLOSE_STATUS(node, NST_MARK1); - r = subexp_recursive_check(en->target); - if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION); - CLEAR_ENCLOSE_STATUS(node, NST_MARK1); - } - } - r = subexp_recursive_check_trav(en->target, env); - if (IS_ENCLOSE_CALLED(en)) - r |= FOUND_CALLED_NODE; - } - break; - - default: - break; - } - - return r; -} - -static int -setup_subexp_call(Node* node, ScanEnv* env) -{ - int type; - int r = 0; - - type = NTYPE(node); - switch (type) { - case NT_LIST: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_ALT: - do { - r = setup_subexp_call(NCAR(node), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); - break; - - case NT_QTFR: - r = setup_subexp_call(NQTFR(node)->target, env); - break; - case NT_ENCLOSE: - r = setup_subexp_call(NENCLOSE(node)->target, env); - break; - - case NT_CALL: - { - CallNode* cn = NCALL(node); - Node** nodes = SCANENV_MEM_NODES(env); - - if (cn->group_num != 0) { - int gnum = cn->group_num; - -#ifdef USE_NAMED_GROUP - if (env->num_named > 0 && - IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { - return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; - } -#endif - if (gnum > env->num_mem) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_GROUP_REFERENCE; - } + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if (NODE_IS_CALLED(node) || (state & IN_RECURSION) != 0) { + if (! NODE_IS_RECURSION(node)) { + NODE_STATUS_ADD(node, NST_MARK1); + r = recursive_call_check(NODE_BODY(node)); + if (r != 0) + NODE_STATUS_ADD(node, NST_RECURSION); + NODE_STATUS_REMOVE(node, NST_MARK1); + } -#ifdef USE_NAMED_GROUP - set_call_attr: -#endif - cn->target = nodes[cn->group_num]; - if (IS_NULL(cn->target)) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - SET_ENCLOSE_STATUS(cn->target, NST_CALLED); - BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num); - cn->unset_addr_list = env->unset_addr_list; - } -#ifdef USE_NAMED_GROUP - else { - int *refs; - - int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, - &refs); - if (n <= 0) { - onig_scan_env_set_error_string(env, - ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end); - return ONIGERR_UNDEFINED_NAME_REFERENCE; - } - else if (n > 1) { - onig_scan_env_set_error_string(env, - ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end); - return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; - } - else { - cn->group_num = refs[0]; - goto set_call_attr; + if (NODE_IS_CALLED(node)) + r = FOUND_CALLED_NODE; } } -#endif - } - break; - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); + { + int ret; + int state1 = state; - switch (an->type) { - case ANCHOR_PREC_READ: - case ANCHOR_PREC_READ_NOT: - case ANCHOR_LOOK_BEHIND: - case ANCHOR_LOOK_BEHIND_NOT: - r = setup_subexp_call(an->target, env); - break; + if (NODE_IS_RECURSION(node)) + state1 |= IN_RECURSION; + + ret = recursive_call_check_trav(NODE_BODY(node), env, state1); + if (ret == FOUND_CALLED_NODE) + r = FOUND_CALLED_NODE; } } break; @@ -3122,6 +3095,7 @@ setup_subexp_call(Node* node, ScanEnv* env) return r; } + #endif /* divide different length alternatives in look-behind. @@ -3132,30 +3106,28 @@ static int divide_look_behind_alternatives(Node* node) { Node *head, *np, *insert_node; - AnchorNode* an = NANCHOR(node); + AnchorNode* an = ANCHOR_(node); int anc_type = an->type; - /* fprintf(stderr, "divide_look_behind: %d\n", (int )node); */ - - head = an->target; - np = NCAR(head); + head = NODE_ANCHOR_BODY(an); + np = NODE_CAR(head); swap_node(node, head); - NCAR(node) = head; - NANCHOR(head)->target = np; + NODE_CAR(node) = head; + NODE_BODY(head) = np; np = node; - while ((np = NCDR(np)) != NULL_NODE) { + while (IS_NOT_NULL(np = NODE_CDR(np))) { insert_node = onig_node_new_anchor(anc_type); CHECK_NULL_RETURN_MEMERR(insert_node); - NANCHOR(insert_node)->target = NCAR(np); - NCAR(np) = insert_node; + NODE_BODY(insert_node) = NODE_CAR(np); + NODE_CAR(np) = insert_node; } if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { np = node; do { - SET_NTYPE(np, NT_LIST); /* alt -> list */ - } while ((np = NCDR(np)) != NULL_NODE); + SET_NODE_TYPE(np, NODE_LIST); /* alt -> list */ + } while (IS_NOT_NULL(np = NODE_CDR(np))); } return 0; } @@ -3164,11 +3136,9 @@ static int setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) { int r, len; - AnchorNode* an = NANCHOR(node); - - /* fprintf(stderr, "setup_look_behind: %x\n", (int )node); */ + AnchorNode* an = ANCHOR_(node); - r = get_char_length_tree(an->target, reg, &len); + r = get_char_length_tree(NODE_ANCHOR_BODY(an), reg, &len); if (r == 0) an->char_len = len; else if (r == GET_CHAR_LEN_VARLEN) @@ -3186,44 +3156,43 @@ setup_look_behind(Node* node, regex_t* reg, ScanEnv* env) static int next_setup(Node* node, Node* next_node, regex_t* reg) { - int type; + NodeType type; retry: - type = NTYPE(node); - if (type == NT_QTFR) { - QtfrNode* qn = NQTFR(node); + type = NODE_TYPE(node); + if (type == NODE_QUANT) { + QuantNode* qn = QUANT_(node); if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) { -#ifdef USE_QTFR_PEEK_NEXT +#ifdef USE_QUANT_PEEK_NEXT Node* n = get_head_value_node(next_node, 1, reg); /* '\0': for UTF-16BE etc... */ - if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') { + if (IS_NOT_NULL(n) && STR_(n)->s[0] != '\0') { qn->next_head_exact = n; } #endif /* automatic posseivation a*b ==> (?>a*)b */ if (qn->lower <= 1) { - int ttype = NTYPE(qn->target); - if (IS_NODE_TYPE_SIMPLE(ttype)) { + if (NODE_IS_SIMPLE_TYPE(NODE_BODY(node))) { Node *x, *y; - x = get_head_value_node(qn->target, 0, reg); + x = get_head_value_node(NODE_BODY(node), 0, reg); if (IS_NOT_NULL(x)) { y = get_head_value_node(next_node, 0, reg); - if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) { - Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK); + if (IS_NOT_NULL(y) && is_exclusive(x, y, reg)) { + Node* en = onig_node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); CHECK_NULL_RETURN_MEMERR(en); - SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT); + NODE_STATUS_ADD(en, NST_STOP_BT_SIMPLE_REPEAT); swap_node(node, en); - NENCLOSE(node)->target = en; + NODE_BODY(node) = en; } } } } } } - else if (type == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(node); - if (en->type == ENCLOSE_MEMORY) { - node = en->target; + else if (type == NODE_ENCLOSURE) { + EnclosureNode* en = ENCLOSURE_(node); + if (en->type == ENCLOSURE_MEMORY) { + node = NODE_BODY(node); goto retry; } } @@ -3237,7 +3206,7 @@ update_string_node_case_fold(regex_t* reg, Node *node) UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; UChar *sbuf, *ebuf, *sp; int r, i, len, sbuf_size; - StrNode* sn = NSTR(node); + StrNode* sn = STR_(node); end = sn->end; sbuf_size = (end - sn->s) * 2; @@ -3319,11 +3288,11 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], xnode = onig_node_new_list(NULL, NULL); if (IS_NULL(xnode)) goto mem_err; - NCAR(var_anode) = xnode; + NODE_CAR(var_anode) = xnode; anode = onig_node_new_alt(NULL_NODE, NULL_NODE); if (IS_NULL(anode)) goto mem_err; - NCAR(xnode) = anode; + NODE_CAR(xnode) = anode; } else { *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE); @@ -3333,7 +3302,7 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], snode = onig_node_new_str(p, p + slen); if (IS_NULL(snode)) goto mem_err; - NCAR(anode) = snode; + NODE_CAR(anode) = snode; for (i = 0; i < item_num; i++) { snode = onig_node_new_str(NULL, NULL); @@ -3379,18 +3348,18 @@ expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], goto mem_err; } - NCAR(an) = xnode; + NODE_CAR(an) = xnode; } else { - NCAR(an) = snode; + NODE_CAR(an) = snode; } - NCDR(var_anode) = an; + NODE_CDR(var_anode) = an; var_anode = an; } else { - NCAR(an) = snode; - NCDR(anode) = an; + NODE_CAR(an) = snode; + NODE_CDR(anode) = an; anode = an; } } @@ -3415,7 +3384,7 @@ expand_case_fold_string(Node* node, regex_t* reg) UChar *start, *end, *p; Node *top_root, *root, *snode, *prev_node; OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; - StrNode* sn = NSTR(node); + StrNode* sn = STR_(node); if (NSTRING_IS_AMBIG(node)) return 0; @@ -3485,7 +3454,7 @@ expand_case_fold_string(Node* node, regex_t* reg) } } - root = NCAR(prev_node); + root = NODE_CAR(prev_node); } else { /* r == 0 */ if (IS_NOT_NULL(root)) { @@ -3555,37 +3524,35 @@ expand_case_fold_string(Node* node, regex_t* reg) static int setup_comb_exp_check(Node* node, int state, ScanEnv* env) { - int type; int r = state; - type = NTYPE(node); - switch (type) { - case NT_LIST: + switch (NODE_TYPE(node)) { + case NODE_LIST: { Node* prev = NULL_NODE; do { - r = setup_comb_exp_check(NCAR(node), r, env); - prev = NCAR(node); - } while (r >= 0 && IS_NOT_NULL(node = NCDR(node))); + r = setup_comb_exp_check(NODE_CAR(node), r, env); + prev = NODE_CAR(node); + } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); } break; - case NT_ALT: + case NODE_ALT: { int ret; do { - ret = setup_comb_exp_check(NCAR(node), state, env); + ret = setup_comb_exp_check(NODE_CAR(node), state, env); r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node))); + } while (ret >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); } break; - case NT_QTFR: + case NODE_QUANT: { int child_state = state; int add_state = 0; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; + QuantNode* qn = QUANT_(node); + Node* target = NODE_QUANT_BODY(qn); int var_num; if (! IS_REPEAT_INFINITE(qn->upper)) { @@ -3595,11 +3562,11 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ if (env->backrefed_mem == 0) { - if (NTYPE(qn->target) == NT_ENCLOSE) { - EncloseNode* en = NENCLOSE(qn->target); - if (en->type == ENCLOSE_MEMORY) { - if (NTYPE(en->target) == NT_QTFR) { - QtfrNode* q = NQTFR(en->target); + if (NODE_TYPE(NODE_QUANT_BODY(qn)) == NODE_ENCLOSURE) { + EnclosureNode* en = ENCLOSURE_(NODE_QUANT_BODY(qn)); + if (en->type == ENCLOSURE_MEMORY) { + if (NODE_TYPE(NODE_ENCLOSURE_BODY(en)) == NODE_QUANT) { + QuantNode* q = QUANT_(NODE_ENCLOSURE_BODY(en)); if (IS_REPEAT_INFINITE(q->upper) && q->greedy == qn->greedy) { qn->upper = (qn->lower == 0 ? 1 : qn->lower); @@ -3645,33 +3612,33 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { - case ENCLOSE_MEMORY: + case ENCLOSURE_MEMORY: { if (env->curr_max_regnum < en->regnum) env->curr_max_regnum = en->regnum; - r = setup_comb_exp_check(en->target, state, env); + r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); } break; default: - r = setup_comb_exp_check(en->target, state, env); + r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); break; } } break; #ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) + case NODE_CALL: + if (NODE_IS_RECURSION(node)) env->has_recursion = 1; else - r = setup_comb_exp_check(NCALL(node)->target, state, env); + r = setup_comb_exp_check(NODE_BODY(node), state, env); break; #endif @@ -3683,206 +3650,695 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } #endif -#define IN_ALT (1<<0) -#define IN_NOT (1<<1) -#define IN_REPEAT (1<<2) -#define IN_VAR_REPEAT (1<<3) -#define IN_CALL (1<<4) -#define IN_RECCALL (1<<5) - -/* setup_tree does the following work. - 1. check empty loop. (set qn->target_empty_info) - 2. expand ignore-case in char class. - 3. set memory status bit flags. (reg->mem_stats) - 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. - 5. find invalid patterns in look-behind. - 6. expand repeated string. - */ +#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT static int -setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +quantifiers_memory_node_info(Node* node) { - int type; - int r = 0; + int r = QUANT_BODY_IS_EMPTY; - type = NTYPE(node); - switch (type) { - case NT_LIST: + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: { - Node* prev = NULL_NODE; + int v; do { - r = setup_tree(NCAR(node), reg, state, env); - if (IS_NOT_NULL(prev) && r == 0) { - r = next_setup(prev, NCAR(node), reg); + v = quantifiers_memory_node_info(NODE_CAR(node)); + if (v > r) r = v; + } while (IS_NOT_NULL(node = NODE_CDR(node))); + } + break; + +#ifdef USE_SUBEXP_CALL + case NODE_CALL: + if (NODE_IS_RECURSION(node)) { + return QUANT_BODY_IS_EMPTY_REC; /* tiny version */ + } + else + r = quantifiers_memory_node_info(NODE_BODY(node)); + break; +#endif + + case NODE_QUANT: + { + QuantNode* qn = QUANT_(node); + if (qn->upper != 0) { + r = quantifiers_memory_node_info(NODE_BODY(node)); + } + } + break; + + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + switch (en->type) { + case ENCLOSURE_MEMORY: + if (NODE_IS_RECURSION(node)) { + return QUANT_BODY_IS_EMPTY_REC; } - prev = NCAR(node); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + return QUANT_BODY_IS_EMPTY_MEM; + break; + + case ENCLOSURE_OPTION: + case ENCLOSURE_STOP_BACKTRACK: + r = quantifiers_memory_node_info(NODE_BODY(node)); + break; + default: + break; + } } break; - case NT_ALT: + case NODE_BREF: + case NODE_STR: + case NODE_CTYPE: + case NODE_CCLASS: + case NODE_ANCHOR: + default: + break; + } + + return r; +} +#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */ + + +#define IN_ALT (1<<0) +#define IN_NOT (1<<1) +#define IN_REAL_REPEAT (1<<2) +#define IN_VAR_REPEAT (1<<3) +#define IN_ZERO_REPEAT (1<<4) +#define IN_MULTI_ENTRY (1<<5) + +#ifdef USE_SUBEXP_CALL + +#ifdef __GNUC__ +__inline +#endif +static int +setup_call_node_call(CallNode* cn, ScanEnv* env, int state) +{ + MemEnv* mem_env = SCANENV_MEMENV(env); + + if (cn->by_number != 0) { + int gnum = cn->group_num; + +#ifdef USE_NAMED_GROUP + if (env->num_named > 0 && + IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && + !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; + } +#endif + if (gnum > env->num_mem) { + onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_GROUP_REFERENCE, + cn->name, cn->name_end); + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + +#ifdef USE_NAMED_GROUP + set_call_attr: +#endif + NODE_CALL_BODY(cn) = mem_env[cn->group_num].node; + if (IS_NULL(NODE_CALL_BODY(cn))) { + onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE, + cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + } +#ifdef USE_NAMED_GROUP + else { + int *refs; + + int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end, &refs); + if (n <= 0) { + onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE, + cn->name, cn->name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + else if (n > 1) { + onig_scan_env_set_error_string(env, ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, + cn->name, cn->name_end); + return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL; + } + else { + cn->group_num = refs[0]; + goto set_call_attr; + } + } +#endif + + return 0; +} + +static void +setup_call2_call(Node* node) +{ + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: do { - r = setup_tree(NCAR(node), reg, (state | IN_ALT), env); - } while (r == 0 && IS_NOT_NULL(node = NCDR(node))); + setup_call2_call(NODE_CAR(node)); + } while (IS_NOT_NULL(node = NODE_CDR(node))); break; - case NT_CCLASS: + case NODE_QUANT: + setup_call2_call(NODE_BODY(node)); break; - case NT_STR: - if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { - r = expand_case_fold_string(node, reg); + case NODE_ANCHOR: + if (ANCHOR_HAS_BODY(ANCHOR_(node))) + setup_call2_call(NODE_BODY(node)); + break; + + case NODE_ENCLOSURE: + if (! NODE_IS_MARK1(node)) { + NODE_STATUS_ADD(node, NST_MARK1); + setup_call2_call(NODE_BODY(node)); + NODE_STATUS_REMOVE(node, NST_MARK1); } break; - case NT_CTYPE: - case NT_CANY: + case NODE_CALL: + if (! NODE_IS_MARK1(node)) { + NODE_STATUS_ADD(node, NST_MARK1); + { + CallNode* cn = CALL_(node); + Node* called = NODE_CALL_BODY(cn); + + cn->entry_count++; + + NODE_STATUS_ADD(called, NST_CALLED); + ENCLOSURE_(called)->m.entry_count++; + setup_call2_call(called); + } + NODE_STATUS_REMOVE(node, NST_MARK1); + } break; -#ifdef USE_SUBEXP_CALL - case NT_CALL: + default: break; -#endif + } +} + +static int +setup_call(Node* node, ScanEnv* env, int state) +{ + int r; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + r = setup_call(NODE_CAR(node), env, state); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_QUANT: + if (QUANT_(node)->upper == 0) + state |= IN_ZERO_REPEAT; + + r = setup_call(NODE_BODY(node), env, state); + break; + + case NODE_ANCHOR: + if (ANCHOR_HAS_BODY(ANCHOR_(node))) + r = setup_call(NODE_BODY(node), env, state); + else + r = 0; + break; + + case NODE_ENCLOSURE: + if ((state & IN_ZERO_REPEAT) != 0) { + NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); + ENCLOSURE_(node)->m.entry_count--; + } + r = setup_call(NODE_BODY(node), env, state); + break; + + case NODE_CALL: + if ((state & IN_ZERO_REPEAT) != 0) { + NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); + CALL_(node)->entry_count--; + } + + r = setup_call_node_call(CALL_(node), env, state); + break; + + default: + r = 0; + break; + } + + return r; +} + +static int +setup_call2(Node* node) +{ + int r = 0; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + case NODE_ALT: + do { + r = setup_call2(NODE_CAR(node)); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_QUANT: + if (QUANT_(node)->upper != 0) + r = setup_call2(NODE_BODY(node)); + break; + + case NODE_ANCHOR: + if (ANCHOR_HAS_BODY(ANCHOR_(node))) + r = setup_call2(NODE_BODY(node)); + break; + + case NODE_ENCLOSURE: + if (! NODE_IS_IN_ZERO_REPEAT(node)) + r = setup_call2(NODE_BODY(node)); + break; + + case NODE_CALL: + if (! NODE_IS_IN_ZERO_REPEAT(node)) { + setup_call2_call(node); + } + break; + + default: + break; + } - case NT_BREF: + return r; +} + + +static void +setup_called_state_call(Node* node, int state) +{ + switch (NODE_TYPE(node)) { + case NODE_ALT: + state |= IN_ALT; + /* fall */ + case NODE_LIST: + do { + setup_called_state_call(NODE_CAR(node), state); + } while (IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_QUANT: { - int i; - int* p; - Node** nodes = SCANENV_MEM_NODES(env); - BRefNode* br = NBREF(node); - p = BACKREFS_P(br); - for (i = 0; i < br->back_num; i++) { - if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; - BIT_STATUS_ON_AT(env->backrefed_mem, p[i]); - BIT_STATUS_ON_AT(env->bt_mem_start, p[i]); -#ifdef USE_BACKREF_WITH_LEVEL - if (IS_BACKREF_NEST_LEVEL(br)) { - BIT_STATUS_ON_AT(env->bt_mem_end, p[i]); + QuantNode* qn = QUANT_(node); + + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2) + state |= IN_REAL_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + + setup_called_state_call(NODE_QUANT_BODY(qn), state); + } + break; + + case NODE_ANCHOR: + { + AnchorNode* an = ANCHOR_(node); + + switch (an->type) { + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND_NOT: + state |= IN_NOT; + /* fall */ + case ANCHOR_PREC_READ: + case ANCHOR_LOOK_BEHIND: + setup_called_state_call(NODE_ANCHOR_BODY(an), state); + break; + default: + break; + } + } + break; + + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if (NODE_IS_MARK1(node)) { + if ((~en->m.called_state & state) != 0) { + en->m.called_state |= state; + setup_called_state_call(NODE_BODY(node), state); + } + } + else { + NODE_STATUS_ADD(node, NST_MARK1); + en->m.called_state |= state; + setup_called_state_call(NODE_BODY(node), state); + NODE_STATUS_REMOVE(node, NST_MARK1); } + } + else { + setup_called_state_call(NODE_BODY(node), state); + } + } + break; + + case NODE_CALL: + setup_called_state_call(NODE_BODY(node), state); + break; + + default: + break; + } +} + +static void +setup_called_state(Node* node, int state) +{ + switch (NODE_TYPE(node)) { + case NODE_ALT: + state |= IN_ALT; + /* fall */ + case NODE_LIST: + do { + setup_called_state(NODE_CAR(node), state); + } while (IS_NOT_NULL(node = NODE_CDR(node))); + break; + +#ifdef USE_SUBEXP_CALL + case NODE_CALL: + setup_called_state_call(node, state); + break; #endif - SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED); + + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + + switch (en->type) { + case ENCLOSURE_MEMORY: + if (en->m.entry_count > 1) + state |= IN_MULTI_ENTRY; + + en->m.called_state |= state; + /* fall */ + case ENCLOSURE_OPTION: + case ENCLOSURE_STOP_BACKTRACK: + setup_called_state(NODE_BODY(node), state); + break; } } break; - case NT_QTFR: + case NODE_QUANT: { - OnigLen d; - QtfrNode* qn = NQTFR(node); - Node* target = qn->target; + QuantNode* qn = QUANT_(node); - if ((state & IN_REPEAT) != 0) { - qn->state |= NST_IN_REPEAT; + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2) + state |= IN_REAL_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + + setup_called_state(NODE_QUANT_BODY(qn), state); + } + break; + + case NODE_ANCHOR: + { + AnchorNode* an = ANCHOR_(node); + + switch (an->type) { + case ANCHOR_PREC_READ_NOT: + case ANCHOR_LOOK_BEHIND_NOT: + state |= IN_NOT; + /* fall */ + case ANCHOR_PREC_READ: + case ANCHOR_LOOK_BEHIND: + setup_called_state(NODE_ANCHOR_BODY(an), state); + break; + default: + break; } + } + break; - if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { - r = get_min_len(target, &d, env); - if (r) break; - if (d == 0) { - qn->target_empty_info = NQ_TARGET_IS_EMPTY; -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - r = quantifiers_memory_node_info(target); - if (r < 0) break; - if (r > 0) { - qn->target_empty_info = r; - } + case NODE_BREF: + case NODE_STR: + case NODE_CTYPE: + case NODE_CCLASS: + default: + break; + } +} + +#endif /* USE_SUBEXP_CALL */ + + +static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env); + +#ifdef __GNUC__ +__inline #endif -#if 0 - r = get_max_len(target, &d, env); - if (r == 0 && d == 0) { - /* ()* ==> ()?, ()+ ==> () */ - qn->upper = 1; - if (qn->lower > 1) qn->lower = 1; - if (NTYPE(target) == NT_STR) { - qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */ - } - } +static int +setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) +{ +/* allowed node types in look-behind */ +#define ALLOWED_TYPE_IN_LB \ + ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE \ + | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT | BIT_NODE_CALL ) + +#define ALLOWED_ENCLOSURE_IN_LB ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION ) +#define ALLOWED_ENCLOSURE_IN_LB_NOT ENCLOSURE_OPTION + +#define ALLOWED_ANCHOR_IN_LB \ + ( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF \ + | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND \ + | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) + +#define ALLOWED_ANCHOR_IN_LB_NOT \ + ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE \ + | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND \ + | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) + + int r; + AnchorNode* an = ANCHOR_(node); + + switch (an->type) { + case ANCHOR_PREC_READ: + r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env); + break; + case ANCHOR_PREC_READ_NOT: + r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env); + break; + + case ANCHOR_LOOK_BEHIND: + { + r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSURE_IN_LB, ALLOWED_ANCHOR_IN_LB); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_tree(NODE_ANCHOR_BODY(an), reg, state, env); + if (r != 0) return r; + r = setup_look_behind(node, reg, env); + } + break; + + case ANCHOR_LOOK_BEHIND_NOT: + { + r = check_type_tree(NODE_ANCHOR_BODY(an), ALLOWED_TYPE_IN_LB, + ALLOWED_ENCLOSURE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); + if (r < 0) return r; + if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; + r = setup_tree(NODE_ANCHOR_BODY(an), reg, (state | IN_NOT), env); + if (r != 0) return r; + r = setup_look_behind(node, reg, env); + } + break; + + default: + r = 0; + break; + } + + return r; +} + +#ifdef __GNUC__ +__inline #endif +static int +setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int r; + OnigLen d; + QuantNode* qn = QUANT_(node); + Node* body = NODE_BODY(node); + + if ((state & IN_REAL_REPEAT) != 0) { + NODE_STATUS_ADD(node, NST_IN_REAL_REPEAT); + } + if ((state & IN_MULTI_ENTRY) != 0) { + NODE_STATUS_ADD(node, NST_IN_MULTI_ENTRY); + } + + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { + d = get_min_len(body, env); + if (d == 0) { +#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT + qn->body_empty_info = quantifiers_memory_node_info(body); + if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) { + if (NODE_TYPE(body) == NODE_ENCLOSURE && + ENCLOSURE_(body)->type == ENCLOSURE_MEMORY) { + MEM_STATUS_ON(env->bt_mem_end, ENCLOSURE_(body)->m.regnum); } } +#else + qn->body_empty_info = QUANT_BODY_IS_EMPTY; +#endif + } + } - state |= IN_REPEAT; - if (qn->lower != qn->upper) - state |= IN_VAR_REPEAT; - r = setup_tree(target, reg, state, env); - if (r) break; + if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 2) + state |= IN_REAL_REPEAT; + if (qn->lower != qn->upper) + state |= IN_VAR_REPEAT; + + r = setup_tree(body, reg, state, env); + if (r != 0) return r; - /* expand string */ + /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 - if (NTYPE(target) == NT_STR) { - if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && - qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int len = NSTRING_LEN(target); - StrNode* sn = NSTR(target); - - if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int i, n = qn->lower; - onig_node_conv_to_str_node(node, NSTR(target)->flag); - for (i = 0; i < n; i++) { - r = onig_node_str_cat(node, sn->s, sn->end); - if (r) break; - } - onig_node_free(target); - break; /* break case NT_QTFR: */ - } + if (NODE_TYPE(body) == NODE_STR) { + if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && + qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int len = NSTRING_LEN(body); + StrNode* sn = STR_(body); + + if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { + int i, n = qn->lower; + onig_node_conv_to_str_node(node, STR_(body)->flag); + for (i = 0; i < n; i++) { + r = onig_node_str_cat(node, sn->s, sn->end); + if (r != 0) return r; } + onig_node_free(body); + return r; } + } + } #ifdef USE_OP_PUSH_OR_JUMP_EXACT - if (qn->greedy && (qn->target_empty_info != 0)) { - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); - if (IS_NOT_NULL(tqn->head_exact)) { - qn->head_exact = tqn->head_exact; - tqn->head_exact = NULL; - } + if (qn->greedy && (qn->body_empty_info != 0)) { + if (NODE_TYPE(body) == NODE_QUANT) { + QuantNode* tqn = QUANT_(body); + if (IS_NOT_NULL(tqn->head_exact)) { + qn->head_exact = tqn->head_exact; + tqn->head_exact = NULL; + } + } + else { + qn->head_exact = get_head_value_node(NODE_BODY(node), 1, reg); + } + } +#endif + + return r; +} + +/* setup_tree does the following work. + 1. check empty loop. (set qn->body_empty_info) + 2. expand ignore-case in char class. + 3. set memory status bit flags. (reg->mem_stats) + 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact]. + 5. find invalid patterns in look-behind. + 6. expand repeated string. + */ +static int +setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) +{ + int r = 0; + + switch (NODE_TYPE(node)) { + case NODE_LIST: + { + Node* prev = NULL_NODE; + do { + r = setup_tree(NODE_CAR(node), reg, state, env); + if (IS_NOT_NULL(prev) && r == 0) { + r = next_setup(prev, NODE_CAR(node), reg); } - else { - qn->head_exact = get_head_value_node(qn->target, 1, reg); + prev = NODE_CAR(node); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + } + break; + + case NODE_ALT: + do { + r = setup_tree(NODE_CAR(node), reg, (state | IN_ALT), env); + } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); + break; + + case NODE_STR: + if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + r = expand_case_fold_string(node, reg); + } + break; + + case NODE_BREF: + { + int i; + int* p; + BRefNode* br = BREF_(node); + p = BACKREFS_P(br); + for (i = 0; i < br->back_num; i++) { + if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; + MEM_STATUS_ON(env->backrefed_mem, p[i]); + MEM_STATUS_ON(env->bt_mem_start, p[i]); +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + MEM_STATUS_ON(env->bt_mem_end, p[i]); } - } #endif + } } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { - case ENCLOSE_OPTION: + case ENCLOSURE_OPTION: { OnigOptionType options = reg->options; - reg->options = NENCLOSE(node)->option; - r = setup_tree(NENCLOSE(node)->target, reg, state, env); + reg->options = ENCLOSURE_(node)->o.option; + r = setup_tree(NODE_BODY(node), reg, state, env); reg->options = options; } break; - case ENCLOSE_MEMORY: - if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) { - BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum); - /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */ + case ENCLOSURE_MEMORY: +#ifdef USE_SUBEXP_CALL + state |= en->m.called_state; +#endif + + if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_MULTI_ENTRY)) != 0 + || NODE_IS_RECURSION(node)) { + MEM_STATUS_ON(env->bt_mem_start, en->m.regnum); } - if (IS_ENCLOSE_CALLED(en)) - state |= IN_CALL; - if (IS_ENCLOSE_RECURSION(en)) - state |= IN_RECCALL; - else if ((state & IN_RECCALL) != 0) - SET_CALL_RECURSION(node); - r = setup_tree(en->target, reg, state, env); + r = setup_tree(NODE_BODY(node), reg, state, env); break; - case ENCLOSE_STOP_BACKTRACK: + case ENCLOSURE_STOP_BACKTRACK: { - Node* target = en->target; + Node* target = NODE_BODY(node); r = setup_tree(target, reg, state, env); - if (NTYPE(target) == NT_QTFR) { - QtfrNode* tqn = NQTFR(target); + if (NODE_TYPE(target) == NODE_QUANT) { + QuantNode* tqn = QUANT_(target); if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 && tqn->greedy != 0) { /* (?>a*), a*+ etc... */ - int qtype = NTYPE(tqn->target); - if (IS_NODE_TYPE_SIMPLE(qtype)) - SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT); + if (NODE_IS_SIMPLE_TYPE(NODE_BODY(target))) + NODE_STATUS_ADD(node, NST_STOP_BT_SIMPLE_REPEAT); } } } @@ -3891,59 +4347,19 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } break; - case NT_ANCHOR: - { - AnchorNode* an = NANCHOR(node); - - switch (an->type) { - case ANCHOR_PREC_READ: - r = setup_tree(an->target, reg, state, env); - break; - case ANCHOR_PREC_READ_NOT: - r = setup_tree(an->target, reg, (state | IN_NOT), env); - break; - -/* allowed node types in look-behind */ -#define ALLOWED_TYPE_IN_LB \ - ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \ - BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL ) - -#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION ) -#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION - -#define ALLOWED_ANCHOR_IN_LB \ -( ANCHOR_LOOK_BEHIND | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) - -#define ALLOWED_ANCHOR_IN_LB_NOT \ -( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END ) - - case ANCHOR_LOOK_BEHIND: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_tree(an->target, reg, state, env); - if (r != 0) return r; - r = setup_look_behind(node, reg, env); - } - break; + case NODE_QUANT: + r = setup_quant(node, reg, state, env); + break; - case ANCHOR_LOOK_BEHIND_NOT: - { - r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB, - ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT); - if (r < 0) return r; - if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN; - r = setup_tree(an->target, reg, (state | IN_NOT), env); - if (r != 0) return r; - r = setup_look_behind(node, reg, env); - } - break; - } - } + case NODE_ANCHOR: + r = setup_anchor(node, reg, state, env); break; +#ifdef USE_SUBEXP_CALL + case NODE_CALL: +#endif + case NODE_CTYPE: + case NODE_CCLASS: default: break; } @@ -4594,15 +5010,13 @@ alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env) static int optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) { - int type; int r = 0; clear_node_opt_info(opt); set_bound_node_opt_info(opt, &env->mmd); - type = NTYPE(node); - switch (type) { - case NT_LIST: + switch (NODE_TYPE(node)) { + case NODE_LIST: { OptEnv nenv; NodeOptInfo nopt; @@ -4610,33 +5024,33 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) copy_opt_env(&nenv, env); do { - r = optimize_node_left(NCAR(nd), &nopt, &nenv); + r = optimize_node_left(NODE_CAR(nd), &nopt, &nenv); if (r == 0) { add_mml(&nenv.mmd, &nopt.len); concat_left_node_opt_info(env->enc, opt, &nopt); } - } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd))); + } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd))); } break; - case NT_ALT: + case NODE_ALT: { NodeOptInfo nopt; Node* nd = node; do { - r = optimize_node_left(NCAR(nd), &nopt, env); + r = optimize_node_left(NODE_CAR(nd), &nopt, env); if (r == 0) { if (nd == node) copy_node_opt_info(opt, &nopt); else alt_merge_node_opt_info(opt, &nopt, env); } - } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd))); + } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd))); } break; - case NT_STR: + case NODE_STR: { - StrNode* sn = NSTR(node); + StrNode* sn = STR_(node); int slen = sn->end - sn->s; int is_raw = NSTRING_IS_RAW(node); @@ -4677,10 +5091,10 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NT_CCLASS: + case NODE_CCLASS: { int i, z; - CClassNode* cc = NCCLASS(node); + CClassNode* cc = CCLASS_(node); /* no need to check ignore case. (set in setup_tree()) */ @@ -4702,7 +5116,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NT_CTYPE: + case NODE_CTYPE: { int i, min, max; @@ -4711,9 +5125,12 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (max == 1) { min = 1; - switch (NCTYPE(node)->ctype) { + switch (CTYPE_(node)->ctype) { + case CTYPE_ANYCHAR: + break; + case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) { + if (CTYPE_(node)->not != 0) { for (i = 0; i < SINGLE_BYTE_SIZE; i++) { if (! ONIGENC_IS_CODE_WORD(env->enc, i)) { add_char_opt_map_info(&opt->map, (UChar )i, env->enc); @@ -4737,16 +5154,8 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NT_CANY: - { - OnigLen min = ONIGENC_MBC_MINLEN(env->enc); - OnigLen max = ONIGENC_MBC_MAXLEN_DIST(env->enc); - set_mml(&opt->len, min, max); - } - break; - - case NT_ANCHOR: - switch (NANCHOR(node)->type) { + case NODE_ANCHOR: + switch (ANCHOR_(node)->type) { case ANCHOR_BEGIN_BUF: case ANCHOR_BEGIN_POSITION: case ANCHOR_BEGIN_LINE: @@ -4755,14 +5164,14 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ANCHOR_END_LINE: case ANCHOR_PREC_READ_NOT: case ANCHOR_LOOK_BEHIND: - add_opt_anc_info(&opt->anc, NANCHOR(node)->type); + add_opt_anc_info(&opt->anc, ANCHOR_(node)->type); break; case ANCHOR_PREC_READ: { NodeOptInfo nopt; - r = optimize_node_left(NANCHOR(node)->target, &nopt, env); + r = optimize_node_left(NODE_BODY(node), &nopt, env); if (r == 0) { if (nopt.exb.len > 0) copy_opt_exact_info(&opt->expr, &nopt.exb); @@ -4782,61 +5191,57 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NT_BREF: + case NODE_BREF: { int i; int* backs; OnigLen min, max, tmin, tmax; - Node** nodes = SCANENV_MEM_NODES(env->scan_env); - BRefNode* br = NBREF(node); + MemEnv* mem_env = SCANENV_MEMENV(env->scan_env); + BRefNode* br = BREF_(node); - if (br->state & NST_RECURSION) { + if (NODE_IS_RECURSION(node)) { set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); break; } backs = BACKREFS_P(br); - r = get_min_len(nodes[backs[0]], &min, env->scan_env); - if (r != 0) break; - r = get_max_len(nodes[backs[0]], &max, env->scan_env); - if (r != 0) break; + min = get_min_len(mem_env[backs[0]].node, env->scan_env); + max = get_max_len(mem_env[backs[0]].node, env->scan_env); for (i = 1; i < br->back_num; i++) { - r = get_min_len(nodes[backs[i]], &tmin, env->scan_env); - if (r != 0) break; - r = get_max_len(nodes[backs[i]], &tmax, env->scan_env); - if (r != 0) break; + tmin = get_min_len(mem_env[backs[i]].node, env->scan_env); + tmax = get_max_len(mem_env[backs[i]].node, env->scan_env); if (min > tmin) min = tmin; if (max < tmax) max = tmax; } - if (r == 0) set_mml(&opt->len, min, max); + set_mml(&opt->len, min, max); } break; #ifdef USE_SUBEXP_CALL - case NT_CALL: - if (IS_CALL_RECURSION(NCALL(node))) + case NODE_CALL: + if (NODE_IS_RECURSION(node)) set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); else { OnigOptionType save = env->options; - env->options = NENCLOSE(NCALL(node)->target)->option; - r = optimize_node_left(NCALL(node)->target, opt, env); + env->options = ENCLOSURE_(NODE_BODY(node))->o.option; + r = optimize_node_left(NODE_BODY(node), opt, env); env->options = save; } break; #endif - case NT_QTFR: + case NODE_QUANT: { int i; OnigLen min, max; NodeOptInfo nopt; - QtfrNode* qn = NQTFR(node); + QuantNode* qn = QUANT_(node); - r = optimize_node_left(qn->target, &nopt, env); - if (r) break; + r = optimize_node_left(NODE_BODY(node), &nopt, env); + if (r != 0) break; if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && - NTYPE(qn->target) == NT_CANY && qn->greedy) { + NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { if (IS_MULTILINE(env->options)) add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else @@ -4877,22 +5282,22 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: { - EncloseNode* en = NENCLOSE(node); + EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { - case ENCLOSE_OPTION: + case ENCLOSURE_OPTION: { OnigOptionType save = env->options; - env->options = en->option; - r = optimize_node_left(en->target, opt, env); + env->options = en->o.option; + r = optimize_node_left(NODE_BODY(node), opt, env); env->options = save; } break; - case ENCLOSE_MEMORY: + case ENCLOSURE_MEMORY: #ifdef USE_SUBEXP_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { @@ -4900,24 +5305,24 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) min = 0; max = ONIG_INFINITE_DISTANCE; - if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len; - if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len; + if (NODE_IS_MIN_FIXED(node)) min = en->min_len; + if (NODE_IS_MAX_FIXED(node)) max = en->max_len; set_mml(&opt->len, min, max); } else #endif { - r = optimize_node_left(en->target, opt, env); + r = optimize_node_left(NODE_BODY(node), opt, env); if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { - if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum)) + if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); } } break; - case ENCLOSE_STOP_BACKTRACK: - r = optimize_node_left(en->target, opt, env); + case ENCLOSURE_STOP_BACKTRACK: + r = optimize_node_left(NODE_BODY(node), opt, env); break; } } @@ -4925,8 +5330,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) default: #ifdef ONIG_DEBUG - fprintf(stderr, "optimize_node_left: undefined node type %d\n", - NTYPE(node)); + fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node)); #endif r = ONIGERR_TYPE_BUG; break; @@ -4962,7 +5366,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) if (e->len >= 3 || (e->len >= 2 && allow_reverse)) { r = set_bm_skip(reg->exact, reg->exact_end, reg->enc, reg->map, &(reg->int_map)); - if (r) return r; + if (r != 0) return r; reg->optimize = (allow_reverse != 0 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); @@ -5006,7 +5410,7 @@ set_sub_anchor(regex_t* reg, OptAncInfo* anc) reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE; } -#ifdef ONIG_DEBUG +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) static void print_optimize_info(FILE* f, regex_t* reg); #endif @@ -5025,7 +5429,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) clear_mml(&env.mmd); r = optimize_node_left(node, &opt, &env); - if (r) return r; + if (r != 0) return r; reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | @@ -5120,6 +5524,10 @@ static void print_enc_string(FILE* fp, OnigEncoding enc, fprintf(fp, "/\n"); } +#endif /* ONIG_DEBUG */ + +#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH) + static void print_distance_range(FILE* f, OnigLen a, OnigLen b) { @@ -5236,7 +5644,7 @@ print_optimize_info(FILE* f, regex_t* reg) } } } -#endif /* ONIG_DEBUG */ +#endif extern void @@ -5278,7 +5686,7 @@ onig_transfer(regex_t* to, regex_t* from) } -#ifdef ONIG_DEBUG +#ifdef ONIG_DEBUG_COMPILE static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); #endif #ifdef ONIG_DEBUG_PARSE_TREE @@ -5323,14 +5731,14 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->num_comb_exp_check = 0; #endif - r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env); + r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; #ifdef USE_NAMED_GROUP /* mixed use named group and no-named group */ if (scan_env.num_named > 0 && IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { + ! ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) { if (scan_env.num_named != scan_env.num_mem) r = disable_noname_group_capture(&root, reg, &scan_env); else @@ -5340,22 +5748,27 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } #endif + r = check_backrefs(root, &scan_env); + if (r != 0) goto err; + #ifdef USE_SUBEXP_CALL if (scan_env.num_call > 0) { r = unset_addr_list_init(&uslist, scan_env.num_call); if (r != 0) goto err; scan_env.unset_addr_list = &uslist; - r = setup_subexp_call(root, &scan_env); + r = setup_call(root, &scan_env, 0); + if (r != 0) goto err_unset; + r = setup_call2(root); if (r != 0) goto err_unset; - r = subexp_recursive_check_trav(root, &scan_env); + r = recursive_call_check_trav(root, &scan_env, 0); if (r < 0) goto err_unset; - r = subexp_inf_recursive_check_trav(root, &scan_env); + r = infinite_recursive_call_check_trav(root, &scan_env); if (r != 0) goto err_unset; - reg->num_call = scan_env.num_call; + setup_called_state(root, 0); } - else - reg->num_call = 0; + + reg->num_call = scan_env.num_call; #endif r = setup_tree(root, reg, 0, &scan_env); @@ -5369,11 +5782,12 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->bt_mem_start = scan_env.bt_mem_start; reg->bt_mem_start |= reg->capture_history; if (IS_FIND_CONDITION(reg->options)) - BIT_STATUS_ON_ALL(reg->bt_mem_end); + MEM_STATUS_ON_ALL(reg->bt_mem_end); else { reg->bt_mem_end = scan_env.bt_mem_end; reg->bt_mem_end |= reg->capture_history; } + reg->bt_mem_start |= reg->bt_mem_end; #ifdef USE_COMBINATION_EXPLOSION_CHECK if (scan_env.backrefed_mem == 0 @@ -5391,7 +5805,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, if (scan_env.comb_exp_max_regnum > 0) { int i; for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { - if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) { + if (MEM_STATUS_AT(scan_env.backrefed_mem, i) != 0) { scan_env.num_comb_exp_check = 0; break; } @@ -5408,19 +5822,19 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, if (r != 0) goto err_unset; #endif - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) { - xfree(scan_env.mem_nodes_dynamic); - scan_env.mem_nodes_dynamic = (Node** )NULL; + if (IS_NOT_NULL(scan_env.mem_env_dynamic)) { + xfree(scan_env.mem_env_dynamic); + scan_env.mem_env_dynamic = (MemEnv* )NULL; } - r = compile_tree(root, reg); + r = compile_tree(root, reg, &scan_env); if (r == 0) { r = add_opcode(reg, OP_END); #ifdef USE_SUBEXP_CALL if (scan_env.num_call > 0) { r = unset_addr_list_fix(&uslist, reg); unset_addr_list_end(&uslist); - if (r) goto err; + if (r != 0) goto err; } #endif @@ -5466,8 +5880,8 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } onig_node_free(root); - if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) - xfree(scan_env.mem_nodes_dynamic); + if (IS_NOT_NULL(scan_env.mem_env_dynamic)) + xfree(scan_env.mem_env_dynamic); return r; } @@ -5543,7 +5957,7 @@ onig_new_without_alloc(regex_t* reg, const UChar* pattern, int r; r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) return r; + if (r != 0) return r; r = onig_compile(reg, pattern, pattern_end, einfo); return r; @@ -5560,10 +5974,10 @@ onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end, if (IS_NULL(*reg)) return ONIGERR_MEMORY; r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax); - if (r) goto err; + if (r != 0) goto err; r = onig_compile(*reg, pattern, pattern_end, einfo); - if (r) { + if (r != 0) { err: onig_free(*reg); *reg = NULL; @@ -5657,9 +6071,10 @@ onig_is_in_code_range(const UChar* p, OnigCodePoint code) } extern int -onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc) +onig_is_code_in_cc_len(int elen, OnigCodePoint code, /* CClassNode* */ void* cc_arg) { int found; + CClassNode* cc = (CClassNode* )cc_arg; if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) { if (IS_NULL(cc->mbuf)) { @@ -5775,10 +6190,10 @@ OnigOpInfoType OnigOpInfo[] = { { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM }, - { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM }, - { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM }, + { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, { OP_PUSH_POS, "push-pos", ARG_NON }, { OP_POP_POS, "pop-pos", ARG_NON }, { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, @@ -5824,13 +6239,6 @@ op2arg_type(int opcode) } static void -Indent(FILE* f, int indent) -{ - int i; - for (i = 0; i < indent; i++) putc(' ', f); -} - -static void p_string(FILE* f, int len, UChar* s) { fputs(":", f); @@ -5846,8 +6254,16 @@ p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) while (x-- > 0) { fputc(*s++, f); } } +static void +p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) +{ + RelAddrType curr = (RelAddrType )(p - start); + + fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); +} + extern void -onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc) { int i, n, arg_type; @@ -5858,7 +6274,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, OnigCodePoint code; UChar *q; - fprintf(f, "[%s", op2name(*bp)); + fprintf(f, "%s", op2name(*bp)); arg_type = op2arg_type(*bp); if (arg_type != ARG_SPECIAL) { bp++; @@ -5867,11 +6283,12 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, break; case ARG_RELADDR: GET_RELADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); + fputc(':', f); + p_rel_addr(f, addr, bp, start); break; case ARG_ABSADDR: GET_ABSADDR_INC(addr, bp); - fprintf(f, ":(%d)", addr); + fprintf(f, ":{/%d}", addr); break; case ARG_LENGTH: GET_LENGTH_INC(len, bp); @@ -6056,7 +6473,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, case OP_PUSH_IF_PEEK_NEXT: addr = *((RelAddrType* )bp); bp += SIZE_RELADDR; - fprintf(f, ":(%d)", addr); + fputc(':', f); + p_rel_addr(f, addr, bp, start); p_string(f, 1, bp); bp += 1; break; @@ -6069,7 +6487,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, case OP_PUSH_LOOK_BEHIND_NOT: GET_RELADDR_INC(addr, bp); GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:(%d)", len, addr); + fprintf(f, ":%d:", len); + p_rel_addr(f, addr, bp, start); break; case OP_STATE_CHECK_PUSH: @@ -6078,7 +6497,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, bp += SIZE_STATE_CHECK_NUM; addr = *((RelAddrType* )bp); bp += SIZE_RELADDR; - fprintf(f, ":%d:(%d)", scn, addr); + fprintf(f, ":%d:", scn); + p_rel_addr(f, addr, bp, start); break; default: @@ -6086,40 +6506,50 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, *--bp); } } - fputs("]", f); if (nextp) *nextp = bp; } +#endif /* ONIG_DEBUG */ +#ifdef ONIG_DEBUG_COMPILE static void print_compiled_byte_code_list(FILE* f, regex_t* reg) { - int ncode; - UChar* bp = reg->p; - UChar* end = reg->p + reg->used; + UChar* bp; + UChar* start = reg->p; + UChar* end = reg->p + reg->used; - fprintf(f, "code length: %d\n", reg->used); + fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", + reg->bt_mem_start, reg->bt_mem_end); + fprintf(f, "code-length: %d\n", reg->used); - ncode = 0; + bp = start; while (bp < end) { - ncode++; - if (bp > reg->p) { - if (ncode % 5 == 0) - fprintf(f, "\n"); - else - fputs(" ", f); - } - onig_print_compiled_byte_code(f, bp, &bp, reg->enc); - } + int pos = bp - start; + fprintf(f, "%4d: ", pos); + onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); + fprintf(f, "\n"); + } fprintf(f, "\n"); } +#endif + +#ifdef ONIG_DEBUG_PARSE_TREE + +static void +Indent(FILE* f, int indent) +{ + int i; + for (i = 0; i < indent; i++) putc(' ', f); +} static void print_indent_tree(FILE* f, Node* node, int indent) { - int i, type; - int add = 3; + int i; + NodeType type; UChar* p; + int add = 3; Indent(f, indent); if (IS_NULL(node)) { @@ -6127,29 +6557,29 @@ print_indent_tree(FILE* f, Node* node, int indent) exit (0); } - type = NTYPE(node); + type = NODE_TYPE(node); switch (type) { - case NT_LIST: - case NT_ALT: - if (NTYPE(node) == NT_LIST) + case NODE_LIST: + case NODE_ALT: + if (type == NODE_LIST) fprintf(f, "<list:%p>\n", node); else fprintf(f, "<alt:%p>\n", node); - print_indent_tree(f, NCAR(node), indent + add); - while (IS_NOT_NULL(node = NCDR(node))) { - if (NTYPE(node) != type) { - fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node)); + print_indent_tree(f, NODE_CAR(node), indent + add); + while (IS_NOT_NULL(node = NODE_CDR(node))) { + if (NODE_TYPE(node) != type) { + fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NODE_TYPE(node)); exit(0); } - print_indent_tree(f, NCAR(node), indent + add); + print_indent_tree(f, NODE_CAR(node), indent + add); } break; - case NT_STR: + case NODE_STR: fprintf(f, "<string%s:%p>", (NSTRING_IS_RAW(node) ? "-raw" : ""), node); - for (p = NSTR(node)->s; p < NSTR(node)->end; p++) { + for (p = STR_(node)->s; p < STR_(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); else { @@ -6158,11 +6588,11 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NT_CCLASS: + case NODE_CCLASS: fprintf(f, "<cclass:%p>", node); - if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f); - if (NCCLASS(node)->mbuf) { - BBuf* bbuf = NCCLASS(node)->mbuf; + if (IS_NCCLASS_NOT(CCLASS_(node))) fputs(" not", f); + if (CCLASS_(node)->mbuf) { + BBuf* bbuf = CCLASS_(node)->mbuf; for (i = 0; i < bbuf->used; i++) { if (i > 0) fprintf(f, ","); fprintf(f, "%0x", bbuf->p[i]); @@ -6170,11 +6600,15 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NT_CTYPE: + case NODE_CTYPE: fprintf(f, "<ctype:%p> ", node); - switch (NCTYPE(node)->ctype) { + switch (CTYPE_(node)->ctype) { + case CTYPE_ANYCHAR: + fprintf(f, "<anychar:%p>", node); + break; + case ONIGENC_CTYPE_WORD: - if (NCTYPE(node)->not != 0) + if (CTYPE_(node)->not != 0) fputs("not word", f); else fputs("word", f); @@ -6186,13 +6620,9 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NT_CANY: - fprintf(f, "<anychar:%p>", node); - break; - - case NT_ANCHOR: + case NODE_ANCHOR: fprintf(f, "<anchor:%p> ", node); - switch (NANCHOR(node)->type) { + switch (ANCHOR_(node)->type) { case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break; case ANCHOR_END_BUF: fputs("end buf", f); break; case ANCHOR_BEGIN_LINE: fputs("begin line", f); break; @@ -6208,19 +6638,19 @@ print_indent_tree(FILE* f, Node* node, int indent) #endif case ANCHOR_PREC_READ: fprintf(f, "prec read\n"); - print_indent_tree(f, NANCHOR(node)->target, indent + add); + print_indent_tree(f, NODE_BODY(node), indent + add); break; case ANCHOR_PREC_READ_NOT: fprintf(f, "prec read not\n"); - print_indent_tree(f, NANCHOR(node)->target, indent + add); + print_indent_tree(f, NODE_BODY(node), indent + add); break; case ANCHOR_LOOK_BEHIND: fprintf(f, "look behind\n"); - print_indent_tree(f, NANCHOR(node)->target, indent + add); + print_indent_tree(f, NODE_BODY(node), indent + add); break; case ANCHOR_LOOK_BEHIND_NOT: fprintf(f, "look behind not\n"); - print_indent_tree(f, NANCHOR(node)->target, indent + add); + print_indent_tree(f, NODE_BODY(node), indent + add); break; default: @@ -6229,10 +6659,10 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NT_BREF: + case NODE_BREF: { int* p; - BRefNode* br = NBREF(node); + BRefNode* br = BREF_(node); p = BACKREFS_P(br); fprintf(f, "<backref:%p>", node); for (i = 0; i < br->back_num; i++) { @@ -6243,32 +6673,32 @@ print_indent_tree(FILE* f, Node* node, int indent) break; #ifdef USE_SUBEXP_CALL - case NT_CALL: + case NODE_CALL: { - CallNode* cn = NCALL(node); + CallNode* cn = CALL_(node); fprintf(f, "<call:%p>", node); p_string(f, cn->name_end - cn->name, cn->name); } break; #endif - case NT_QTFR: + case NODE_QUANT: fprintf(f, "<quantifier:%p>{%d,%d}%s\n", node, - NQTFR(node)->lower, NQTFR(node)->upper, - (NQTFR(node)->greedy ? "" : "?")); - print_indent_tree(f, NQTFR(node)->target, indent + add); + QUANT_(node)->lower, QUANT_(node)->upper, + (QUANT_(node)->greedy ? "" : "?")); + print_indent_tree(f, NODE_BODY(node), indent + add); break; - case NT_ENCLOSE: - fprintf(f, "<enclose:%p> ", node); - switch (NENCLOSE(node)->type) { - case ENCLOSE_OPTION: - fprintf(f, "option:%d", NENCLOSE(node)->option); + case NODE_ENCLOSURE: + fprintf(f, "<enclosure:%p> ", node); + switch (ENCLOSURE_(node)->type) { + case ENCLOSURE_OPTION: + fprintf(f, "option:%d", ENCLOSURE_(node)->option); break; - case ENCLOSE_MEMORY: - fprintf(f, "memory:%d", NENCLOSE(node)->regnum); + case ENCLOSURE_MEMORY: + fprintf(f, "memory:%d", ENCLOSURE_(node)->regnum); break; - case ENCLOSE_STOP_BACKTRACK: + case ENCLOSURE_STOP_BACKTRACK: fprintf(f, "stop-bt"); break; @@ -6276,22 +6706,20 @@ print_indent_tree(FILE* f, Node* node, int indent) break; } fprintf(f, "\n"); - print_indent_tree(f, NENCLOSE(node)->target, indent + add); + print_indent_tree(f, NODE_BODY(node), indent + add); break; default: - fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node)); + fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node)); break; } - if (type != NT_LIST && type != NT_ALT && type != NT_QTFR && - type != NT_ENCLOSE) + if (type != NODE_LIST && type != NODE_ALT && type != NODE_QUANT && + type != NODE_ENCLOSURE) fprintf(f, "\n"); fflush(f); } -#endif /* ONIG_DEBUG */ -#ifdef ONIG_DEBUG_PARSE_TREE static void print_tree(FILE* f, Node* node) { diff --git a/src/regenc.h b/src/regenc.h index e119dab..897c704 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -154,7 +154,7 @@ ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register con /* in enc/unicode.c */ ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((int ctype, const OnigCodePoint* ranges[])); +ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); diff --git a/src/regerror.c b/src/regerror.c index ee35b36..0285272 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -211,24 +211,24 @@ static int to_ascii(OnigEncoding enc, UChar *s, UChar *end, while (p < end) { code = ONIGENC_MBC_TO_CODE(enc, p, end); if (code >= 0x80) { - if (code > 0xffff && len + 10 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); - sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); - len += 10; - } - else if (len + 6 <= buf_size) { - sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); - sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); - len += 6; - } - else { - break; - } + if (code > 0xffff && len + 10 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 24)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)(code >> 16)); + sprint_byte((char*)(&(buf[len+6])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+8])), (unsigned int)code); + len += 10; + } + else if (len + 6 <= buf_size) { + sprint_byte_with_x((char*)(&(buf[len])), (unsigned int)(code >> 8)); + sprint_byte((char*)(&(buf[len+4])), (unsigned int)code); + len += 6; + } + else { + break; + } } else { - buf[len++] = (UChar )code; + buf[len++] = (UChar )code; } p += enclen(enc, p); @@ -278,27 +278,27 @@ onig_error_code_to_str(s, code, va_alist) case ONIGERR_INVALID_CHAR_PROPERTY_NAME: einfo = va_arg(vargs, OnigErrorInfo*); len = to_ascii(einfo->enc, einfo->par, einfo->par_end, - parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); + parbuf, MAX_ERROR_PAR_LEN - 3, &is_over); q = onig_error_code_to_format(code); p = s; while (*q != '\0') { if (*q == '%') { - q++; - if (*q == 'n') { /* '%n': name */ - xmemcpy(p, parbuf, len); - p += len; - if (is_over != 0) { - xmemcpy(p, "...", 3); - p += 3; - } - q++; - } - else - goto normal_char; + q++; + if (*q == 'n') { /* '%n': name */ + xmemcpy(p, parbuf, len); + p += len; + if (is_over != 0) { + xmemcpy(p, "...", 3); + p += 3; + } + q++; + } + else + goto normal_char; } else { normal_char: - *p++ = *q++; + *p++ = *q++; } } *p = '\0'; @@ -359,7 +359,7 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) int blen; while (len-- > 0) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); blen = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; while (blen-- > 0) *s++ = *bp++; @@ -367,23 +367,23 @@ onig_snprintf_with_pattern(buf, bufsize, enc, pat, pat_end, fmt, va_alist) } } else if (*p == '\\') { - *s++ = *p++; - len = enclen(enc, p); - while (len-- > 0) *s++ = *p++; + *s++ = *p++; + len = enclen(enc, p); + while (len-- > 0) *s++ = *p++; } else if (*p == '/') { - *s++ = (unsigned char )'\\'; - *s++ = *p++; + *s++ = (unsigned char )'\\'; + *s++ = *p++; } else if (!ONIGENC_IS_CODE_PRINT(enc, *p) && - !ONIGENC_IS_CODE_SPACE(enc, *p)) { - sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); - len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); + !ONIGENC_IS_CODE_SPACE(enc, *p)) { + sprint_byte_with_x((char* )bs, (unsigned int )(*p++)); + len = onigenc_str_bytelen_null(ONIG_ENCODING_ASCII, bs); bp = bs; - while (len-- > 0) *s++ = *bp++; + while (len-- > 0) *s++ = *bp++; } else { - *s++ = *p++; + *s++ = *p++; } } diff --git a/src/regexec.c b/src/regexec.c index c0626ef..f66da1f 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -240,6 +240,7 @@ onig_region_new(void) OnigRegion* r; r = (OnigRegion* )xmalloc(sizeof(OnigRegion)); + CHECK_NULL_RETURN(r); onig_region_init(r); return r; } @@ -247,7 +248,7 @@ onig_region_new(void) extern void onig_region_free(OnigRegion* r, int free_self) { - if (r) { + if (r != 0) { if (r->allocated > 0) { if (r->beg) xfree(r->beg); if (r->end) xfree(r->end); @@ -271,13 +272,17 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) if (to->allocated == 0) { if (from->num_regs > 0) { to->beg = (int* )xmalloc(RREGC_SIZE); + if (IS_NULL(to->beg)) return; to->end = (int* )xmalloc(RREGC_SIZE); + if (IS_NULL(to->end)) return; to->allocated = from->num_regs; } } else if (to->allocated < from->num_regs) { to->beg = (int* )xrealloc(to->beg, RREGC_SIZE); + if (IS_NULL(to->beg)) return; to->end = (int* )xrealloc(to->end, RREGC_SIZE); + if (IS_NULL(to->end)) return; to->allocated = from->num_regs; } @@ -311,8 +316,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_REPEAT_INC 0x0300 #define STK_STATE_CHECK_MARK 0x1000 /* avoided by normal-POP */ -#define STK_NULL_CHECK_START 0x3000 -#define STK_NULL_CHECK_END 0x5000 /* for recursive call */ +#define STK_EMPTY_CHECK_START 0x3000 +#define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ #define STK_MEM_END_MARK 0x8400 #define STK_POS 0x0500 /* used when POP-POS */ #define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ @@ -333,7 +338,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) (msa).region = (arg_region);\ (msa).start = (arg_start);\ (msa).best_len = ONIG_MISMATCH;\ - (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\ + (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #else #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ @@ -341,7 +346,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ - (msa).ptr_num = (reg)->num_repeat + (reg)->num_mem * 2;\ + (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #endif @@ -400,6 +405,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) is_alloca = 0;\ alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\ + sizeof(OnigStackType) * (stack_num));\ + CHECK_NULL_RETURN_MEMERR(alloc_base);\ stk_base = (OnigStackType* )(alloc_base\ + (sizeof(OnigStackIndex) * msa->ptr_num));\ stk = stk_base;\ @@ -409,6 +415,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) is_alloca = 1;\ alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\ + sizeof(OnigStackType) * (stack_num));\ + CHECK_NULL_RETURN_MEMERR(alloc_base);\ stk_base = (OnigStackType* )(alloc_base\ + (sizeof(OnigStackIndex) * msa->ptr_num));\ stk = stk_base;\ @@ -423,6 +430,7 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) size_t size = sizeof(OnigStackIndex) * msa->ptr_num \ + sizeof(OnigStackType) * msa->stack_n;\ msa->stack_p = xmalloc(size);\ + CHECK_NULL_RETURN_MEMERR(msa->stack_p);\ xmemcpy(msa->stack_p, alloc_base, size);\ }\ else {\ @@ -431,11 +439,9 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) } while(0) #define UPDATE_FOR_STACK_REALLOC do{\ - repeat_stk = (OnigStackIndex* )alloc_base;\ + repeat_stk = (OnigStackIndex* )alloc_base;\ mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\ - mem_end_stk = mem_start_stk + num_mem;\ - mem_start_stk--; /* for index start from 1 */\ - mem_end_stk--; /* for index start from 1 */\ + mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; @@ -533,7 +539,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STATE_CHECK_POS(s,snum) \ (((s) - str) * num_comb_exp_check + ((snum) - 1)) #define STATE_CHECK_VAL(v,snum) do {\ - if (state_check_buff != NULL) {\ + if (IS_NOT_NULL(state_check_buff)) {\ int x = STATE_CHECK_POS(s,snum);\ (v) = state_check_buff[x/8] & (1<<(x%8));\ }\ @@ -570,12 +576,12 @@ stack_double(int is_alloca, char** arg_alloc_base, stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\ + stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\ STACK_INC;\ } while(0) #define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (state_check_buff != NULL) {\ + if (IS_NOT_NULL(state_check_buff)) { \ STACK_ENSURE(1);\ stk->type = STK_STATE_CHECK_MARK;\ stk->u.state.pstr = (s);\ @@ -691,18 +697,18 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\ +#define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_START;\ - stk->u.null_check.num = (cnum);\ - stk->u.null_check.pstr = (s);\ + stk->type = STK_EMPTY_CHECK_START;\ + stk->u.empty_check.num = (cnum);\ + stk->u.empty_check.pstr = (s);\ STACK_INC;\ } while(0) -#define STACK_PUSH_NULL_CHECK_END(cnum) do {\ +#define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\ STACK_ENSURE(1);\ - stk->type = STK_NULL_CHECK_END;\ - stk->u.null_check.num = (cnum);\ + stk->type = STK_EMPTY_CHECK_END;\ + stk->u.empty_check.num = (cnum);\ STACK_INC;\ } while(0) @@ -849,49 +855,29 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define STACK_NULL_CHECK(isnull,id,s) do {\ +#define STACK_EMPTY_CHECK(isnull,id,s) do {\ OnigStackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - (isnull) = (k->u.null_check.pstr == (s));\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->u.empty_check.num == (id)) {\ + (isnull) = (k->u.empty_check.pstr == (s));\ break;\ }\ }\ }\ } while(0) -#define STACK_NULL_CHECK_REC(isnull,id,s) do {\ - int level = 0;\ +#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT +#define STACK_EMPTY_CHECK_MEMST(isnull,id,s,reg) do {\ OnigStackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (level == 0) {\ - (isnull) = (k->u.null_check.pstr == (s));\ - break;\ - }\ - else level--;\ - }\ - }\ - else if (k->type == STK_NULL_CHECK_END) {\ - level++;\ - }\ - }\ -} while(0) - -#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\ - OnigStackType* k = stk;\ - while (1) {\ - k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ - if (k->u.null_check.pstr != (s)) {\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->u.empty_check.num == (id)) {\ + if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ }\ @@ -903,10 +889,11 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ + /*fprintf(stderr, "num: %d, pstr: %p, endp: %p\n", k->u.mem.num, STACK_AT(k->u.mem.start)->u.mem.pstr, endp);*/ \ if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ (isnull) = 0; break;\ }\ @@ -916,23 +903,23 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ k++;\ }\ - break;\ + break;\ }\ }\ }\ }\ } while(0) -#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\ +#define STACK_EMPTY_CHECK_MEMST_REC(isnull,id,s,reg) do {\ int level = 0;\ OnigStackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \ - if (k->type == STK_NULL_CHECK_START) {\ - if (k->u.null_check.num == (id)) {\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->u.empty_check.num == (id)) {\ if (level == 0) {\ - if (k->u.null_check.pstr != (s)) {\ + if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ }\ @@ -944,7 +931,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -957,7 +944,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ k++;\ }\ - break;\ + break;\ }\ }\ else {\ @@ -965,11 +952,33 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ }\ }\ - else if (k->type == STK_NULL_CHECK_END) {\ - if (k->u.null_check.num == (id)) level++;\ + else if (k->type == STK_EMPTY_CHECK_END) {\ + if (k->u.empty_check.num == (id)) level++;\ + }\ + }\ +} while(0) +#else +#define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\ + int level = 0;\ + OnigStackType* k = stk;\ + while (1) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \ + if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->u.empty_check.num == (id)) {\ + if (level == 0) {\ + (isnull) = (k->u.empty_check.pstr == (s));\ + break;\ + }\ + }\ + level--;\ + }\ + else if (k->type == STK_EMPTY_CHECK_END) {\ + level++;\ }\ }\ } while(0) +#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */ #define STACK_GET_REPEAT(id, k) do {\ int level = 0;\ @@ -1091,7 +1100,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, if (k->type == STK_MEM_START) { n = k->u.mem.num; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && - BIT_STATUS_AT(reg->capture_history, n) != 0) { + MEM_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); CHECK_NULL_RETURN_MEMERR(child); child->group = n; @@ -1156,7 +1165,7 @@ static int backref_match_at_nested_level(regex_t* reg if (k->type == STK_MEM_START) { if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { pstart = k->u.mem.pstr; - if (pend != NULL_UCHARP) { + if (IS_NOT_NULL(pend)) { if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ p = pstart; ss = *s; @@ -1329,8 +1338,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "match_at: str: %d, end: %d, start: %d, sprev: %d\n", - (int )str, (int )end, (int )sstart, (int )sprev); + fprintf(stderr, "match_at: str: %p, end: %p, start: %p, sprev: %p\n", + str, end, sstart, sprev); fprintf(stderr, "size: %d, start offset: %d\n", (int )(end - str), (int )(sstart - str)); #endif @@ -1341,9 +1350,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (1) { #ifdef ONIG_DEBUG_MATCH { + static unsigned int counter = 1; + UChar *q, *bp, buf[50]; int len; - fprintf(stderr, "%4d> \"", (int )(s - str)); + fprintf(stderr, "%7u: %7ld: %4d> \"", + counter, GET_STACK_INDEX(stk), (int )(s - str)); + counter++; + bp = buf; for (i = 0, q = s; i < 7 && q < end; i++) { len = enclen(encode, q); @@ -1353,8 +1367,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else { xmemcpy(bp, "\"", 1); bp += 1; } *bp = 0; fputs((char* )buf, stderr); + for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - onig_print_compiled_byte_code(stderr, p, NULL, encode); + fprintf(stderr, "%4d: ", (int )(p - reg->p)); + onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); fprintf(stderr, "\n"); } #endif @@ -1386,12 +1402,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, rmt[0].rm_eo = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) + if (MEM_STATUS_AT(reg->bt_mem_start, i)) rmt[i].rm_so = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; else rmt[i].rm_so = (UChar* )((void* )(mem_start_stk[i])) - str; - rmt[i].rm_eo = (BIT_STATUS_AT(reg->bt_mem_end, i) + rmt[i].rm_eo = (MEM_STATUS_AT(reg->bt_mem_end, i) ? STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )mem_end_stk[i])) - str; } @@ -1406,12 +1422,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, region->end[0] = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { - if (BIT_STATUS_AT(reg->bt_mem_start, i)) + if (MEM_STATUS_AT(reg->bt_mem_start, i)) region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str; else region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str; - region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i) + region->end[i] = (MEM_STATUS_AT(reg->bt_mem_end, i) ? STACK_AT(mem_end_stk[i])->u.mem.pstr : (UChar* )((void* )mem_end_stk[i])) - str; } @@ -2156,7 +2172,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_end_stk[mem] = (OnigStackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) mem_start_stk[mem] = GET_STACK_INDEX(stkp); else mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); @@ -2190,12 +2206,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; @@ -2222,12 +2238,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; @@ -2254,12 +2270,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; @@ -2293,12 +2309,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; - if (BIT_STATUS_AT(reg->bt_mem_start, mem)) + if (MEM_STATUS_AT(reg->bt_mem_start, mem)) pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr; else pstart = (UChar* )((void* )mem_start_stk[mem]); - pend = (BIT_STATUS_AT(reg->bt_mem_end, mem) + pend = (MEM_STATUS_AT(reg->bt_mem_end, mem) ? STACK_AT(mem_end_stk[mem])->u.mem.pstr : (UChar* )((void* )mem_end_stk[mem])); n = pend - pstart; @@ -2364,25 +2380,24 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; #endif - case OP_NULL_CHECK_START: MOP_IN(OP_NULL_CHECK_START); + case OP_EMPTY_CHECK_START: MOP_IN(OP_EMPTY_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_PUSH_NULL_CHECK_START(mem, s); + STACK_PUSH_EMPTY_CHECK_START(mem, s); MOP_OUT; continue; break; - case OP_NULL_CHECK_END: MOP_IN(OP_NULL_CHECK_END); + case OP_EMPTY_CHECK_END: MOP_IN(OP_EMPTY_CHECK_END); { - int isnull; + int is_empty; GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK(isnull, mem, s); - if (isnull) { + STACK_EMPTY_CHECK(is_empty, mem, s); + if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%d\n", - (int )mem, (int )s); + fprintf(stderr, "EMPTY_CHECK_END: skip id:%d, s:%p\n", (int )mem, s); #endif - null_check_found: + empty_check_found: /* empty loop founded, skip next instruction */ switch (*p++) { case OP_JUMP: @@ -2405,20 +2420,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - case OP_NULL_CHECK_END_MEMST: MOP_IN(OP_NULL_CHECK_END_MEMST); +#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT + case OP_EMPTY_CHECK_END_MEMST: MOP_IN(OP_EMPTY_CHECK_END_MEMST); { - int isnull; + int is_empty; GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_NULL_CHECK_MEMST(isnull, mem, s, reg); - if (isnull) { + STACK_EMPTY_CHECK_MEMST(is_empty, mem, s, reg); + if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%d\n", - (int )mem, (int )s); + fprintf(stderr, "EMPTY_CHECK_END_MEMST: skip id:%d, s:%p\n", (int)mem, s); #endif - if (isnull == -1) goto fail; - goto null_check_found; + if (is_empty == -1) goto fail; + goto empty_check_found; } } MOP_OUT; @@ -2427,27 +2441,27 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif #ifdef USE_SUBEXP_CALL - case OP_NULL_CHECK_END_MEMST_PUSH: - MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH); + case OP_EMPTY_CHECK_END_MEMST_PUSH: + MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); { - int isnull; + int is_empty; GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT - STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg); +#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT + STACK_EMPTY_CHECK_MEMST_REC(is_empty, mem, s, reg); #else - STACK_NULL_CHECK_REC(isnull, mem, s); + STACK_EMPTY_CHECK_REC(is_empty, mem, s); #endif - if (isnull) { + if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%d\n", - (int )mem, (int )s); + fprintf(stderr, "EMPTY_CHECK_END_MEMST_PUSH: skip id:%d, s:%p\n", + (int )mem, s); #endif - if (isnull == -1) goto fail; - goto null_check_found; + if (is_empty == -1) goto fail; + goto empty_check_found; } else { - STACK_PUSH_NULL_CHECK_END(mem); + STACK_PUSH_EMPTY_CHECK_END(mem); } } MOP_OUT; @@ -2923,8 +2937,8 @@ bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end, int skip, tlen1; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "bm_search_notrev: text: %d, text_end: %d, text_range: %d\n", - (int )text, (int )text_end, (int )text_range); + fprintf(stderr, "bm_search_notrev: text: %p, text_end: %p, text_range: %p\n", + text, text_end, text_range); #endif tail = target_end - 1; @@ -3143,8 +3157,8 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, UChar *p, *pprev = (UChar* )NULL; #ifdef ONIG_DEBUG_SEARCH - fprintf(stderr, "forward_search_range: str: %d, end: %d, s: %d, range: %d\n", - (int )str, (int )end, (int )s, (int )range); + fprintf(stderr, "forward_search_range: str: %p, end: %p, s: %p, range: %p\n", + str, end, s, range); #endif p = s; @@ -3309,7 +3323,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc, &(reg->int_map_backward)); - if (r) return r; + if (r != 0) return r; } p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange, end, p); @@ -3398,8 +3412,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, - "onig_search (entry point): str: %d, end: %d, start: %d, range: %d\n", - (int )str, (int )(end - str), (int )(start - str), (int )(range - str)); + "onig_search (entry point): str: %p, end: %d, start: %d, range: %d\n", + str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif if (region @@ -3408,7 +3422,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, #endif ) { r = onig_region_resize_clear(region, reg->num_mem + 1); - if (r) goto finish_no_msa; + if (r != 0) goto finish_no_msa; } if (start > end || start < str) goto mismatch_no_msa; @@ -3853,7 +3867,7 @@ onig_number_of_capture_histories(regex_t* reg) n = 0; for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) { - if (BIT_STATUS_AT(reg->capture_history, i) != 0) + if (MEM_STATUS_AT(reg->capture_history, i) != 0) n++; } return n; diff --git a/src/regext.c b/src/regext.c index 1903174..62a557c 100644 --- a/src/regext.c +++ b/src/regext.c @@ -2,7 +2,7 @@ regext.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -171,7 +171,7 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, if (ci->pattern_enc != ci->target_enc) { r = conv_encoding(ci->pattern_enc, ci->target_enc, pattern, pattern_end, &cpat, &cpat_end); - if (r) return r; + if (r != 0) return r; } else { cpat = (UChar* )pattern; @@ -186,10 +186,10 @@ onig_new_deluxe(regex_t** reg, const UChar* pattern, const UChar* pattern_end, r = onig_reg_init(*reg, ci->option, ci->case_fold_flag, ci->target_enc, ci->syntax); - if (r) goto err; + if (r != 0) goto err; r = onig_compile(*reg, cpat, cpat_end, einfo); - if (r) { + if (r != 0) { err: onig_free(*reg); *reg = NULL; diff --git a/src/regint.h b/src/regint.h index 9835143..8da27d2 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -59,14 +59,14 @@ #define USE_NAMED_GROUP #define USE_SUBEXP_CALL #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ -#define USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ +#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR /* !!! moved to regenc.h. */ /* #define USE_CRNL_AS_LINE_TERMINATOR */ /* internal config */ #define USE_OP_PUSH_OR_JUMP_EXACT -#define USE_QTFR_PEEK_NEXT +#define USE_QUANT_PEEK_NEXT #define USE_ST_LIBRARY #define INIT_MATCH_STACK_SIZE 160 @@ -161,6 +161,10 @@ #endif #endif +#ifdef HAVE_INTTYPES_H +#include <inttypes.h> +#endif + #ifdef __BORLANDC__ #include <malloc.h> #endif @@ -169,6 +173,12 @@ # include <stdio.h> #endif +#ifdef _WIN32 +#if defined(_MSC_VER) && (_MSC_VER < 1300) +typedef int intptr_t; +#endif +#endif + #include "regenc.h" #ifdef MIN @@ -230,24 +240,28 @@ #define ONIG_OPTIMIZE_MAP 5 /* char map */ /* bit status */ -typedef unsigned int BitStatusType; - -#define BIT_STATUS_BITS_NUM (sizeof(BitStatusType) * 8) -#define BIT_STATUS_CLEAR(stats) (stats) = 0 -#define BIT_STATUS_ON_ALL(stats) (stats) = ~((BitStatusType )0) -#define BIT_STATUS_AT(stats,n) \ - ((n) < (int )BIT_STATUS_BITS_NUM ? ((stats) & (1 << n)) : ((stats) & 1)) - -#define BIT_STATUS_ON_AT(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM) \ - (stats) |= (1 << (n));\ +typedef unsigned int MemStatusType; + +#define MEM_STATUS_BITS_NUM (sizeof(MemStatusType) * 8) +#define MEM_STATUS_CLEAR(stats) (stats) = 0 +#define MEM_STATUS_ON_ALL(stats) (stats) = ~((MemStatusType )0) +#define MEM_STATUS_AT(stats,n) \ + ((n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) +#define MEM_STATUS_AT0(stats,n) \ + ((n) > 0 && (n) < (int )MEM_STATUS_BITS_NUM ? ((stats) & ((MemStatusType )1 << n)) : ((stats) & 1)) + +#define MEM_STATUS_ON(stats,n) do {\ + if ((n) < (int )MEM_STATUS_BITS_NUM) {\ + if ((n) != 0)\ + (stats) |= ((MemStatusType )1 << (n));\ + }\ else\ (stats) |= 1;\ } while (0) -#define BIT_STATUS_ON_AT_SIMPLE(stats,n) do {\ - if ((n) < (int )BIT_STATUS_BITS_NUM)\ - (stats) |= (1 << (n));\ +#define MEM_STATUS_ON_SIMPLE(stats,n) do {\ + if ((n) < (int )MEM_STATUS_BITS_NUM)\ + (stats) |= ((MemStatusType )1 << (n));\ } while (0) @@ -394,25 +408,28 @@ typedef struct _BBuf { #define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] -#define ANCHOR_BEGIN_BUF (1<<0) -#define ANCHOR_BEGIN_LINE (1<<1) -#define ANCHOR_BEGIN_POSITION (1<<2) -#define ANCHOR_END_BUF (1<<3) -#define ANCHOR_SEMI_END_BUF (1<<4) -#define ANCHOR_END_LINE (1<<5) - -#define ANCHOR_WORD_BOUND (1<<6) -#define ANCHOR_NOT_WORD_BOUND (1<<7) -#define ANCHOR_WORD_BEGIN (1<<8) -#define ANCHOR_WORD_END (1<<9) -#define ANCHOR_PREC_READ (1<<10) -#define ANCHOR_PREC_READ_NOT (1<<11) -#define ANCHOR_LOOK_BEHIND (1<<12) -#define ANCHOR_LOOK_BEHIND_NOT (1<<13) - +/* has body */ +#define ANCHOR_PREC_READ (1<<0) +#define ANCHOR_PREC_READ_NOT (1<<1) +#define ANCHOR_LOOK_BEHIND (1<<2) +#define ANCHOR_LOOK_BEHIND_NOT (1<<3) +/* no body */ +#define ANCHOR_BEGIN_BUF (1<<4) +#define ANCHOR_BEGIN_LINE (1<<5) +#define ANCHOR_BEGIN_POSITION (1<<6) +#define ANCHOR_END_BUF (1<<7) +#define ANCHOR_SEMI_END_BUF (1<<8) +#define ANCHOR_END_LINE (1<<9) +#define ANCHOR_WORD_BOUND (1<<10) +#define ANCHOR_NOT_WORD_BOUND (1<<11) +#define ANCHOR_WORD_BEGIN (1<<12) +#define ANCHOR_WORD_END (1<<13) #define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ #define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ +#define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF) + + /* operation code */ enum OpCode { OP_FINISH = 0, /* matching process terminator (no more alternative) */ @@ -490,10 +507,10 @@ enum OpCode { OP_REPEAT_INC_NG, /* non greedy */ OP_REPEAT_INC_SG, /* search and get in stack */ OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */ - OP_NULL_CHECK_START, /* null loop checker start */ - OP_NULL_CHECK_END, /* null loop checker end */ - OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */ - OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ + OP_EMPTY_CHECK_START, /* null loop checker start */ + OP_EMPTY_CHECK_END, /* null loop checker end */ + OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */ + OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ OP_PUSH_POS, /* (?=...) start */ OP_POP_POS, /* (?=...) end */ @@ -581,8 +598,8 @@ typedef void* PointerType; #define SIZE_OP_MEMORY_END_REC (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_PUSH_STOP_BT SIZE_OPCODE #define SIZE_OP_POP_STOP_BT SIZE_OPCODE -#define SIZE_OP_NULL_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_NULL_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_EMPTY_CHECK_START (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_EMPTY_CHECK_END (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_LOOK_BEHIND (SIZE_OPCODE + SIZE_LENGTH) #define SIZE_OP_PUSH_LOOK_BEHIND_NOT (SIZE_OPCODE + SIZE_RELADDR + SIZE_LENGTH) #define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE @@ -644,25 +661,10 @@ typedef void* PointerType; #define FLAG_NCCLASS_SHARE (1<<1) #define NCCLASS_SET_NOT(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_NOT) -#define NCCLASS_SET_SHARE(nd) NCCLASS_FLAG_SET(nd, FLAG_NCCLASS_SHARE) #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -#define IS_NCCLASS_SHARE(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_SHARE) - -typedef struct { - int type; - /* struct _Node* next; */ - /* unsigned int flags; */ -} NodeBase; - -typedef struct { - NodeBase base; - unsigned int flags; - BitSet bs; - BBuf* mbuf; /* multi-byte info or NULL */ -} CClassNode; -typedef long OnigStackIndex; +typedef intptr_t OnigStackIndex; typedef struct _OnigStackType { unsigned int type; @@ -693,7 +695,7 @@ typedef struct _OnigStackType { struct { int num; /* null check id */ UChar *pstr; /* start position */ - } null_check; + } empty_check; #ifdef USE_SUBEXP_CALL struct { UChar *ret_addr; /* byte code position */ @@ -744,7 +746,7 @@ typedef struct { extern OnigOpInfoType OnigOpInfo[]; -extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, OnigEncoding enc)); +extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc)); #ifdef ONIG_DEBUG_STATISTICS extern void onig_statistics_init P_((void)); @@ -758,8 +760,7 @@ extern void onig_snprintf_with_pattern PV_((UChar buf[], int bufsize, OnigEncod extern int onig_bbuf_init P_((BBuf* buf, int size)); extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_transfer P_((regex_t* to, regex_t* from)); -extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); -extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, CClassNode* cc)); +extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); /* strend hash */ typedef void hash_table_type; diff --git a/src/regparse.c b/src/regparse.c index 8153513..a5f8e5b 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -159,14 +159,25 @@ bbuf_clone(BBuf** rto, BBuf* from) *rto = to = (BBuf* )xmalloc(sizeof(BBuf)); CHECK_NULL_RETURN_MEMERR(to); r = BBUF_INIT(to, from->alloc); - if (r != 0) return r; + if (r != 0) { + xfree(to->p); + *rto = 0; + return r; + } to->used = from->used; xmemcpy(to->p, from->p, from->used); return 0; } -#define BACKREF_REL_TO_ABS(rel_no, env) \ - ((env)->num_mem + 1 + (rel_no)) +static int backref_rel_to_abs(int rel_no, ScanEnv* env) +{ + if (rel_no > 0) { + return env->num_mem + rel_no; + } + else { + return env->num_mem + 1 + rel_no; + } +} #define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) @@ -179,7 +190,7 @@ bbuf_clone(BBuf** rto, BBuf* from) #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ if (! ONIGENC_IS_SINGLEBYTE(enc)) {\ r = SET_ALL_MULTI_BYTE_RANGE(enc, &(mbuf));\ - if (r) return r;\ + if (r != 0) return r;\ }\ } while (0) @@ -422,6 +433,8 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, int result; key = (st_str_end_key* )xmalloc(sizeof(st_str_end_key)); + CHECK_NULL_RETURN_MEMERR(key); + key->s = (UChar* )str_key; key->end = (UChar* )end_key; result = onig_st_insert(table, (st_data_t )key, value); @@ -519,7 +532,7 @@ onig_names_free(regex_t* reg) NameTable* t; r = names_clear(reg); - if (r) return r; + if (r != 0) return r; t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) onig_st_free_table(t); @@ -700,7 +713,7 @@ onig_names_free(regex_t* reg) NameTable* t; r = names_clear(reg); - if (r) return r; + if (r != 0) return r; t = (NameTable* )reg->name_table; if (IS_NOT_NULL(t)) xfree(t); @@ -762,6 +775,7 @@ onig_number_of_names(regex_t* reg) static int name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) { + int r; int alloc; NameEntry* e; NameTable* t = (NameTable* )reg->name_table; @@ -783,8 +797,9 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (IS_NULL(e->name)) { xfree(e); return ONIGERR_MEMORY; } - onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), - (HashDataType )e); + r = onig_st_insert_strend(t, e->name, (e->name + (name_end - name)), + (HashDataType )e); + if (r < 0) return r; e->name_len = name_end - name; e->back_num = 0; @@ -918,14 +933,14 @@ onig_name_to_backref_number(regex_t* reg, const UChar* name, extern int onig_name_to_group_numbers(regex_t* reg, const UChar* name, - const UChar* name_end, int** nums) + const UChar* name_end, int** nums) { return ONIG_NO_SUPPORT_CONFIG; } extern int onig_name_to_backref_number(regex_t* reg, const UChar* name, - const UChar* name_end, OnigRegion* region) + const UChar* name_end, OnigRegion* region) { return ONIG_NO_SUPPORT_CONFIG; } @@ -962,29 +977,32 @@ onig_noname_group_capture_is_active(regex_t* reg) } -#define INIT_SCANENV_MEMNODES_ALLOC_SIZE 16 +#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16 static void scan_env_clear(ScanEnv* env) { - int i; - - BIT_STATUS_CLEAR(env->capture_history); - BIT_STATUS_CLEAR(env->bt_mem_start); - BIT_STATUS_CLEAR(env->bt_mem_end); - BIT_STATUS_CLEAR(env->backrefed_mem); + MEM_STATUS_CLEAR(env->capture_history); + MEM_STATUS_CLEAR(env->bt_mem_start); + MEM_STATUS_CLEAR(env->bt_mem_end); + MEM_STATUS_CLEAR(env->backrefed_mem); env->error = (UChar* )NULL; env->error_end = (UChar* )NULL; env->num_call = 0; + +#ifdef USE_SUBEXP_CALL + env->unset_addr_list = NULL; + env->has_call_zero = 0; +#endif + env->num_mem = 0; #ifdef USE_NAMED_GROUP env->num_named = 0; #endif - env->mem_alloc = 0; - env->mem_nodes_dynamic = (Node** )NULL; + env->mem_alloc = 0; + env->mem_env_dynamic = (MemEnv* )NULL; - for (i = 0; i < SCANENV_MEMNODES_SIZE; i++) - env->mem_nodes_static[i] = NULL_NODE; + xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); #ifdef USE_COMBINATION_EXPLOSION_CHECK env->num_comb_exp_check = 0; @@ -999,30 +1017,35 @@ static int scan_env_add_mem_entry(ScanEnv* env) { int i, need, alloc; - Node** p; + MemEnv* p; need = env->num_mem + 1; if (need > MaxCaptureNum && MaxCaptureNum != 0) return ONIGERR_TOO_MANY_CAPTURES; - if (need >= SCANENV_MEMNODES_SIZE) { + if (need >= SCANENV_MEMENV_SIZE) { if (env->mem_alloc <= need) { - if (IS_NULL(env->mem_nodes_dynamic)) { - alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; - p = (Node** )xmalloc(sizeof(Node*) * alloc); - xmemcpy(p, env->mem_nodes_static, - sizeof(Node*) * SCANENV_MEMNODES_SIZE); + if (IS_NULL(env->mem_env_dynamic)) { + alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE; + p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static)); } else { alloc = env->mem_alloc * 2; - p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); + p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc); + CHECK_NULL_RETURN_MEMERR(p); } - CHECK_NULL_RETURN_MEMERR(p); - for (i = env->num_mem + 1; i < alloc; i++) - p[i] = NULL_NODE; + for (i = env->num_mem + 1; i < alloc; i++) { + p[i].node = NULL_NODE; +#if 0 + p[i].in = 0; + p[i].recursion = 0; +#endif + } - env->mem_nodes_dynamic = p; + env->mem_env_dynamic = p; env->mem_alloc = alloc; } } @@ -1035,7 +1058,7 @@ static int scan_env_set_mem_node(ScanEnv* env, int num, Node* node) { if (env->num_mem >= num) - SCANENV_MEM_NODES(env)[num] = node; + SCANENV_MEMENV(env)[num].node = node; else return ONIGERR_PARSER_BUG; return 0; @@ -1051,19 +1074,19 @@ onig_node_free(Node* node) fprintf(stderr, "onig_node_free: %p\n", node); #endif - switch (NTYPE(node)) { - case NT_STR: - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); + switch (NODE_TYPE(node)) { + case NODE_STR: + if (STR_(node)->capa != 0 && + IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { + xfree(STR_(node)->s); } break; - case NT_LIST: - case NT_ALT: - onig_node_free(NCAR(node)); + case NODE_LIST: + case NODE_ALT: + onig_node_free(NODE_CAR(node)); { - Node* next_node = NCDR(node); + Node* next_node = NODE_CDR(node); xfree(node); node = next_node; @@ -1071,34 +1094,29 @@ onig_node_free(Node* node) } break; - case NT_CCLASS: + case NODE_CCLASS: { - CClassNode* cc = NCCLASS(node); + CClassNode* cc = CCLASS_(node); - if (IS_NCCLASS_SHARE(cc)) return ; if (cc->mbuf) bbuf_free(cc->mbuf); } break; - case NT_QTFR: - if (NQTFR(node)->target) - onig_node_free(NQTFR(node)->target); - break; - - case NT_ENCLOSE: - if (NENCLOSE(node)->target) - onig_node_free(NENCLOSE(node)->target); + case NODE_BREF: + if (IS_NOT_NULL(BREF_(node)->back_dynamic)) + xfree(BREF_(node)->back_dynamic); break; - case NT_BREF: - if (IS_NOT_NULL(NBREF(node)->back_dynamic)) - xfree(NBREF(node)->back_dynamic); + case NODE_QUANT: + case NODE_ENCLOSURE: + case NODE_ANCHOR: + if (NODE_BODY(node)) + onig_node_free(NODE_BODY(node)); break; - case NT_ANCHOR: - if (NANCHOR(node)->target) - onig_node_free(NANCHOR(node)->target); + case NODE_CTYPE: + case NODE_CALL: break; } @@ -1111,7 +1129,9 @@ node_new(void) Node* node; node = (Node* )xmalloc(sizeof(Node)); - /* xmemset(node, 0, sizeof(Node)); */ + //xmemset(node, 0, sizeof(node->u.base)); + xmemset(node, 0, sizeof(*node)); + #ifdef DEBUG_NODE_FREE fprintf(stderr, "node_new: %p\n", node); #endif @@ -1134,8 +1154,8 @@ node_new_cclass(void) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_CCLASS); - initialize_cclass(NCCLASS(node)); + SET_NODE_TYPE(node, NODE_CCLASS); + initialize_cclass(CCLASS_(node)); return node; } @@ -1145,19 +1165,9 @@ node_new_ctype(int type, int not) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_CTYPE); - NCTYPE(node)->ctype = type; - NCTYPE(node)->not = not; - return node; -} - -static Node* -node_new_anychar(void) -{ - Node* node = node_new(); - CHECK_NULL_RETURN(node); - - SET_NTYPE(node, NT_CANY); + SET_NODE_TYPE(node, NODE_CTYPE); + CTYPE_(node)->ctype = type; + CTYPE_(node)->not = not; return node; } @@ -1167,9 +1177,9 @@ node_new_list(Node* left, Node* right) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_LIST); - NCAR(node) = left; - NCDR(node) = right; + SET_NODE_TYPE(node, NODE_LIST); + NODE_CAR(node) = left; + NODE_CDR(node) = right; return node; } @@ -1188,10 +1198,10 @@ onig_node_list_add(Node* list, Node* x) if (IS_NULL(n)) return NULL_NODE; if (IS_NOT_NULL(list)) { - while (IS_NOT_NULL(NCDR(list))) - list = NCDR(list); + while (IS_NOT_NULL(NODE_CDR(list))) + list = NODE_CDR(list); - NCDR(list) = n; + NODE_CDR(list) = n; } return n; @@ -1203,9 +1213,9 @@ onig_node_new_alt(Node* left, Node* right) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_ALT); - NCAR(node) = left; - NCDR(node) = right; + SET_NODE_TYPE(node, NODE_ALT); + NODE_CAR(node) = left; + NODE_CDR(node) = right; return node; } @@ -1215,10 +1225,9 @@ onig_node_new_anchor(int type) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_ANCHOR); - NANCHOR(node)->type = type; - NANCHOR(node)->target = NULL; - NANCHOR(node)->char_len = -1; + SET_NODE_TYPE(node, NODE_ANCHOR); + ANCHOR_(node)->type = type; + ANCHOR_(node)->char_len = -1; return node; } @@ -1234,31 +1243,30 @@ node_new_backref(int back_num, int* backrefs, int by_name, CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_BREF); - NBREF(node)->state = 0; - NBREF(node)->back_num = back_num; - NBREF(node)->back_dynamic = (int* )NULL; + SET_NODE_TYPE(node, NODE_BREF); + BREF_(node)->back_num = back_num; + BREF_(node)->back_dynamic = (int* )NULL; if (by_name != 0) - NBREF(node)->state |= NST_NAME_REF; + NODE_STATUS_ADD(node, NST_BY_NAME); #ifdef USE_BACKREF_WITH_LEVEL if (exist_level != 0) { - NBREF(node)->state |= NST_NEST_LEVEL; - NBREF(node)->nest_level = nest_level; + NODE_STATUS_ADD(node, NST_NEST_LEVEL); + BREF_(node)->nest_level = nest_level; } #endif for (i = 0; i < back_num; i++) { if (backrefs[i] <= env->num_mem && - IS_NULL(SCANENV_MEM_NODES(env)[backrefs[i]])) { - NBREF(node)->state |= NST_RECURSION; /* /...(\1).../ */ + IS_NULL(SCANENV_MEMENV(env)[backrefs[i]].node)) { + NODE_STATUS_ADD(node, NST_RECURSION); /* /...(\1).../ */ break; } } if (back_num <= NODE_BACKREFS_SIZE) { for (i = 0; i < back_num; i++) - NBREF(node)->back_static[i] = backrefs[i]; + BREF_(node)->back_static[i] = backrefs[i]; } else { int* p = (int* )xmalloc(sizeof(int) * back_num); @@ -1266,7 +1274,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, onig_node_free(node); return NULL; } - NBREF(node)->back_dynamic = p; + BREF_(node)->back_dynamic = p; for (i = 0; i < back_num; i++) p[i] = backrefs[i]; } @@ -1275,17 +1283,17 @@ node_new_backref(int back_num, int* backrefs, int by_name, #ifdef USE_SUBEXP_CALL static Node* -node_new_call(UChar* name, UChar* name_end, int gnum) +node_new_call(UChar* name, UChar* name_end, int gnum, int by_number) { Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_CALL); - NCALL(node)->state = 0; - NCALL(node)->target = NULL_NODE; - NCALL(node)->name = name; - NCALL(node)->name_end = name_end; - NCALL(node)->group_num = gnum; /* call by number if gnum != 0 */ + SET_NODE_TYPE(node, NODE_CALL); + CALL_(node)->by_number = by_number; + CALL_(node)->name = name; + CALL_(node)->name_end = name_end; + CALL_(node)->group_num = gnum; + CALL_(node)->entry_count = 1; return node; } #endif @@ -1296,69 +1304,76 @@ node_new_quantifier(int lower, int upper, int by_number) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_QTFR); - NQTFR(node)->state = 0; - NQTFR(node)->target = NULL; - NQTFR(node)->lower = lower; - NQTFR(node)->upper = upper; - NQTFR(node)->greedy = 1; - NQTFR(node)->target_empty_info = NQ_TARGET_ISNOT_EMPTY; - NQTFR(node)->head_exact = NULL_NODE; - NQTFR(node)->next_head_exact = NULL_NODE; - NQTFR(node)->is_refered = 0; + SET_NODE_TYPE(node, NODE_QUANT); + QUANT_(node)->lower = lower; + QUANT_(node)->upper = upper; + QUANT_(node)->greedy = 1; + QUANT_(node)->body_empty_info = QUANT_BODY_IS_NOT_EMPTY; + QUANT_(node)->head_exact = NULL_NODE; + QUANT_(node)->next_head_exact = NULL_NODE; + QUANT_(node)->is_refered = 0; if (by_number != 0) - NQTFR(node)->state |= NST_BY_NUMBER; + NODE_STATUS_ADD(node, NST_BY_NUMBER); #ifdef USE_COMBINATION_EXPLOSION_CHECK - NQTFR(node)->comb_exp_check_num = 0; + QUANT_(node)->comb_exp_check_num = 0; #endif return node; } static Node* -node_new_enclose(int type) +node_new_enclosure(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_ENCLOSE); - NENCLOSE(node)->type = type; - NENCLOSE(node)->state = 0; - NENCLOSE(node)->regnum = 0; - NENCLOSE(node)->option = 0; - NENCLOSE(node)->target = NULL; - NENCLOSE(node)->call_addr = -1; - NENCLOSE(node)->opt_count = 0; + SET_NODE_TYPE(node, NODE_ENCLOSURE); + ENCLOSURE_(node)->type = type; + + switch (type) { + case ENCLOSURE_MEMORY: + ENCLOSURE_(node)->m.regnum = 0; + ENCLOSURE_(node)->m.called_addr = -1; + ENCLOSURE_(node)->m.entry_count = 1; + ENCLOSURE_(node)->m.called_state = 0; + break; + + case ENCLOSURE_OPTION: + ENCLOSURE_(node)->o.option = 0; + break; + + case ENCLOSURE_STOP_BACKTRACK: + break; + } + + ENCLOSURE_(node)->opt_count = 0; return node; } extern Node* -onig_node_new_enclose(int type) +onig_node_new_enclosure(int type) { - return node_new_enclose(type); + return node_new_enclosure(type); } static Node* -node_new_enclose_memory(OnigOptionType option, int is_named) +node_new_enclosure_memory(int is_named) { - Node* node = node_new_enclose(ENCLOSE_MEMORY); + Node* node = node_new_enclosure(ENCLOSURE_MEMORY); CHECK_NULL_RETURN(node); if (is_named != 0) - SET_ENCLOSE_STATUS(node, NST_NAMED_GROUP); + NODE_STATUS_ADD(node, NST_NAMED_GROUP); -#ifdef USE_SUBEXP_CALL - NENCLOSE(node)->option = option; -#endif return node; } static Node* node_new_option(OnigOptionType option) { - Node* node = node_new_enclose(ENCLOSE_OPTION); + Node* node = node_new_enclosure(ENCLOSURE_OPTION); CHECK_NULL_RETURN(node); - NENCLOSE(node)->option = option; + ENCLOSURE_(node)->o.option = option; return node; } @@ -1368,31 +1383,31 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) int addlen = end - s; if (addlen > 0) { - int len = NSTR(node)->end - NSTR(node)->s; + int len = STR_(node)->end - STR_(node)->s; - if (NSTR(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + if (STR_(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { UChar* p; int capa = len + addlen + NODE_STR_MARGIN; - if (capa <= NSTR(node)->capa) { - onig_strcpy(NSTR(node)->s + len, s, end); + if (capa <= STR_(node)->capa) { + onig_strcpy(STR_(node)->s + len, s, end); } else { - if (NSTR(node)->s == NSTR(node)->buf) - p = strcat_capa_from_static(NSTR(node)->s, NSTR(node)->end, + if (STR_(node)->s == STR_(node)->buf) + p = strcat_capa_from_static(STR_(node)->s, STR_(node)->end, s, end, capa); else - p = strcat_capa(NSTR(node)->s, NSTR(node)->end, s, end, capa); + p = strcat_capa(STR_(node)->s, STR_(node)->end, s, end, capa); CHECK_NULL_RETURN_MEMERR(p); - NSTR(node)->s = p; - NSTR(node)->capa = capa; + STR_(node)->s = p; + STR_(node)->capa = capa; } } else { - onig_strcpy(NSTR(node)->s + len, s, end); + onig_strcpy(STR_(node)->s + len, s, end); } - NSTR(node)->end = NSTR(node)->s + len + addlen; + STR_(node)->end = STR_(node)->s + len + addlen; } return 0; @@ -1417,25 +1432,25 @@ node_str_cat_char(Node* node, UChar c) extern void onig_node_conv_to_str_node(Node* node, int flag) { - SET_NTYPE(node, NT_STR); - NSTR(node)->flag = flag; - NSTR(node)->capa = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; + SET_NODE_TYPE(node, NODE_STR); + STR_(node)->flag = flag; + STR_(node)->capa = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } extern void onig_node_str_clear(Node* node) { - if (NSTR(node)->capa != 0 && - IS_NOT_NULL(NSTR(node)->s) && NSTR(node)->s != NSTR(node)->buf) { - xfree(NSTR(node)->s); + if (STR_(node)->capa != 0 && + IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { + xfree(STR_(node)->s); } - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; + STR_(node)->capa = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; } static Node* @@ -1444,11 +1459,11 @@ node_new_str(const UChar* s, const UChar* end) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NTYPE(node, NT_STR); - NSTR(node)->capa = 0; - NSTR(node)->flag = 0; - NSTR(node)->s = NSTR(node)->buf; - NSTR(node)->end = NSTR(node)->buf; + SET_NODE_TYPE(node, NODE_STR); + STR_(node)->capa = 0; + STR_(node)->flag = 0; + STR_(node)->s = STR_(node)->buf; + STR_(node)->end = STR_(node)->buf; if (onig_node_str_cat(node, s, end)) { onig_node_free(node); return NULL; @@ -1495,7 +1510,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc) p = onigenc_get_prev_char_head(enc, sn->s, sn->end); if (p && p > sn->s) { /* can be split. */ n = node_new_str(p, sn->end); - if ((sn->flag & NSTR_RAW) != 0) + if ((sn->flag & STRING_RAW) != 0) NSTRING_SET_RAW(n); sn->end = (UChar* )p; @@ -1540,7 +1555,7 @@ onig_scan_unsigned_number(UChar** src, const UChar* end, OnigEncoding enc) PFETCH_READY; num = 0; - while (!PEND) { + while (! PEND) { PFETCH(c); if (ONIGENC_IS_CODE_DIGIT(enc, c)) { val = (unsigned int )DIGITVAL(c); @@ -1596,7 +1611,7 @@ scan_unsigned_octal_number(UChar** src, UChar* end, int maxlen, PFETCH_READY; num = 0; - while (!PEND && maxlen-- != 0) { + while (! PEND && maxlen-- != 0) { PFETCH(c); if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') { val = ODIGITVAL(c); @@ -1631,9 +1646,13 @@ new_code_range(BBuf** pbuf) BBuf* bbuf; bbuf = *pbuf = (BBuf* )xmalloc(sizeof(BBuf)); - CHECK_NULL_RETURN_MEMERR(*pbuf); - r = BBUF_INIT(*pbuf, INIT_MULTI_BYTE_RANGE_SIZE); - if (r) return r; + CHECK_NULL_RETURN_MEMERR(bbuf); + r = BBUF_INIT(bbuf, INIT_MULTI_BYTE_RANGE_SIZE); + if (r != 0) { + xfree(bbuf); + *pbuf = 0; + return r; + } n = 0; BBUF_WRITE_CODE_POINT(bbuf, 0, n); @@ -1654,7 +1673,7 @@ add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) if (IS_NULL(*pbuf)) { r = new_code_range(pbuf); - if (r) return r; + if (r != 0) return r; bbuf = *pbuf; n = 0; } @@ -2069,27 +2088,27 @@ conv_backslash_value(OnigCodePoint c, ScanEnv* env) static int is_invalid_quantifier_target(Node* node) { - switch (NTYPE(node)) { - case NT_ANCHOR: + switch (NODE_TYPE(node)) { + case NODE_ANCHOR: return 1; break; - case NT_ENCLOSE: + case NODE_ENCLOSURE: /* allow enclosed elements */ - /* return is_invalid_quantifier_target(NENCLOSE(node)->target); */ + /* return is_invalid_quantifier_target(NODE_BODY(node)); */ break; - case NT_LIST: + case NODE_LIST: do { - if (! is_invalid_quantifier_target(NCAR(node))) return 0; - } while (IS_NOT_NULL(node = NCDR(node))); + if (! is_invalid_quantifier_target(NODE_CAR(node))) return 0; + } while (IS_NOT_NULL(node = NODE_CDR(node))); return 0; break; - case NT_ALT: + case NODE_ALT: do { - if (is_invalid_quantifier_target(NCAR(node))) return 1; - } while (IS_NOT_NULL(node = NCDR(node))); + if (is_invalid_quantifier_target(NODE_CAR(node))) return 1; + } while (IS_NOT_NULL(node = NODE_CDR(node))); break; default: @@ -2100,7 +2119,7 @@ is_invalid_quantifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_quantifier_num(QtfrNode* q) +popular_quantifier_num(QuantNode* q) { if (q->greedy) { if (q->lower == 0) { @@ -2147,10 +2166,10 @@ extern void onig_reduce_nested_quantifier(Node* pnode, Node* cnode) { int pnum, cnum; - QtfrNode *p, *c; + QuantNode *p, *c; - p = NQTFR(pnode); - c = NQTFR(cnode); + p = QUANT_(pnode); + c = QUANT_(cnode); pnum = popular_quantifier_num(p); cnum = popular_quantifier_num(c); if (pnum < 0 || cnum < 0) return ; @@ -2160,36 +2179,36 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) *pnode = *cnode; break; case RQ_A: - p->target = c->target; + NODE_BODY(pnode) = NODE_BODY(cnode); p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 1; break; case RQ_AQ: - p->target = c->target; + NODE_BODY(pnode) = NODE_BODY(cnode); p->lower = 0; p->upper = REPEAT_INFINITE; p->greedy = 0; break; case RQ_QQ: - p->target = c->target; + NODE_BODY(pnode) = NODE_BODY(cnode); p->lower = 0; p->upper = 1; p->greedy = 0; break; case RQ_P_QQ: - p->target = cnode; + NODE_BODY(pnode) = cnode; p->lower = 0; p->upper = 1; p->greedy = 0; c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 1; return ; break; case RQ_PQ_Q: - p->target = cnode; + NODE_BODY(pnode) = cnode; p->lower = 0; p->upper = 1; p->greedy = 1; c->lower = 1; c->upper = REPEAT_INFINITE; c->greedy = 0; return ; break; case RQ_ASIS: - p->target = cnode; + NODE_BODY(pnode) = cnode; return ; break; } - c->target = NULL_NODE; + NODE_BODY(cnode) = NULL_NODE; onig_node_free(cnode); } @@ -2253,6 +2272,7 @@ typedef struct { UChar* name; UChar* name_end; int gnum; + int by_number; } call; struct { int ctype; @@ -2441,19 +2461,27 @@ get_name_end_code_point(OnigCodePoint start) return (OnigCodePoint )0; } +enum REF_NUM { + IS_NOT_NUM = 0, + IS_ABS_NUM = 1, + IS_REL_NUM = 2 +}; + #ifdef USE_NAMED_GROUP #ifdef USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> \k<num+n>, \k<num-n> \k<-num+n>, \k<-num-n> + \k<+num+n>, \k<+num-n> */ static int fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, - int* rback_num, int* rlevel) + UChar** rname_end, ScanEnv* env, + int* rback_num, int* rlevel, enum REF_NUM* num_type) { - int r, sign, is_num, exist_level; + int r, sign, exist_level; + int digit_count; OnigCodePoint end_code; OnigCodePoint c = 0; OnigEncoding enc = env->enc; @@ -2463,12 +2491,14 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, PFETCH_READY; *rback_num = 0; - is_num = exist_level = 0; + exist_level = 0; + *num_type = IS_NOT_NUM; sign = 1; pnum_head = *src; end_code = get_name_end_code_point(start_code); + digit_count = 0; name_end = end; r = 0; if (PEND) { @@ -2480,13 +2510,19 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, return ONIGERR_EMPTY_GROUP_NAME; if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + *num_type = IS_ABS_NUM; + digit_count++; } else if (c == '-') { - is_num = 2; + *num_type = IS_REL_NUM; sign = -1; pnum_head = p; } + else if (c == '+') { + *num_type = IS_REL_NUM; + sign = 1; + pnum_head = p; + } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } @@ -2496,17 +2532,18 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH(c); if (c == end_code || c == ')' || c == '+' || c == '-') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (*num_type != IS_NOT_NUM && digit_count == 0) + r = ONIGERR_INVALID_GROUP_NAME; break; } - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + digit_count++; } else { r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + *num_type = IS_NOT_NUM; } } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { @@ -2539,16 +2576,20 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, } err: - r = ONIGERR_INVALID_GROUP_NAME; name_end = end; + err2: + r = ONIGERR_INVALID_GROUP_NAME; } end: if (r == 0) { - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; - else if (*rback_num == 0) goto err; + else if (*rback_num == 0) { + if (*num_type == IS_REL_NUM) + goto err2; + } *rback_num *= sign; } @@ -2570,9 +2611,11 @@ fetch_name_with_level(OnigCodePoint start_code, UChar** src, UChar* end, */ static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, + enum REF_NUM* num_type, int ref) { - int r, is_num, sign; + int r, sign; + int digit_count; OnigCodePoint end_code; OnigCodePoint c = 0; OnigEncoding enc = env->enc; @@ -2584,10 +2627,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, end_code = get_name_end_code_point(start_code); + digit_count = 0; name_end = end; pnum_head = *src; r = 0; - is_num = 0; + *num_type = IS_NOT_NUM; sign = 1; if (PEND) { return ONIGERR_EMPTY_GROUP_NAME; @@ -2599,21 +2643,30 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, if (ONIGENC_IS_CODE_DIGIT(enc, c)) { if (ref == 1) - is_num = 1; + *num_type = IS_ABS_NUM; else { r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; } + digit_count++; } else if (c == '-') { if (ref == 1) { - is_num = 2; + *num_type = IS_REL_NUM; sign = -1; pnum_head = p; } else { r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + } + } + else if (c == '+') { + if (ref == 1) { + *num_type = IS_REL_NUM; + sign = 1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; } } else if (!ONIGENC_IS_CODE_WORD(enc, c)) { @@ -2626,20 +2679,22 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, name_end = p; PFETCH_S(c); if (c == end_code || c == ')') { - if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; + if (*num_type != IS_NOT_NUM && digit_count == 0) + r = ONIGERR_INVALID_GROUP_NAME; break; } - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + digit_count++; } else { if (!ONIGENC_IS_CODE_WORD(enc, c)) r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; else r = ONIGERR_INVALID_GROUP_NAME; - is_num = 0; + + *num_type = IS_NOT_NUM; } } else { @@ -2654,12 +2709,14 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, name_end = end; } - if (is_num != 0) { + if (*num_type != IS_NOT_NUM) { *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; + if (*num_type == IS_REL_NUM) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } } *rback_num *= sign; @@ -2687,9 +2744,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, #else static int fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, - UChar** rname_end, ScanEnv* env, int* rback_num, int ref) + UChar** rname_end, ScanEnv* env, int* rback_num, + enum REF_NUM* num_type, int ref) { - int r, is_num, sign; + int r, sign; + int digit_count; OnigCodePoint end_code; OnigCodePoint c = 0; UChar *name_end; @@ -2702,10 +2761,11 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, end_code = get_name_end_code_point(start_code); + digit_count = 0; *rname_end = name_end = end; r = 0; pnum_head = *src; - is_num = 0; + *num_type = IS_ABS_NUM; sign = 1; if (PEND) { @@ -2717,37 +2777,61 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, return ONIGERR_EMPTY_GROUP_NAME; if (ONIGENC_IS_CODE_DIGIT(enc, c)) { - is_num = 1; + *num_type = IS_ABS_NUM; + digit_count++; } else if (c == '-') { - is_num = 2; - sign = -1; - pnum_head = p; + if (ref == 1) { + *num_type = IS_REL_NUM; + sign = -1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + } + } + else if (c == '+') { + if (ref == 1) { + *num_type = IS_REL_NUM; + sign = 1; + pnum_head = p; + } + else { + r = ONIGERR_INVALID_GROUP_NAME; + } } else { r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } } - while (!PEND) { + while (! PEND) { name_end = p; PFETCH(c); if (c == end_code || c == ')') break; - if (! ONIGENC_IS_CODE_DIGIT(enc, c)) + + if (ONIGENC_IS_CODE_DIGIT(enc, c)) + digit_count++; + else r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; } if (r == 0 && c != end_code) { r = ONIGERR_INVALID_GROUP_NAME; name_end = end; } + if (r == 0 && digit_count == 0) { + r = ONIGERR_INVALID_GROUP_NAME; + } if (r == 0) { *rback_num = onig_scan_unsigned_number(&pnum_head, name_end, enc); if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; else if (*rback_num == 0) { - r = ONIGERR_INVALID_GROUP_NAME; - goto err; + if (*num_type == IS_REL_NUM) { + r = ONIGERR_INVALID_GROUP_NAME; + goto err; + } } *rback_num *= sign; @@ -3418,7 +3502,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (IS_SYNTAX_OP(syn, ONIG_SYN_OP_DECIMAL_BACKREF) && (num <= env->num_mem || num <= 9)) { /* This spec. from GNU regex */ if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { - if (num > env->num_mem || IS_NULL(SCANENV_MEM_NODES(env)[num])) + if (num > env->num_mem || IS_NULL(SCANENV_MEMENV(env)[num].node)) return ONIGERR_INVALID_BACKREF; } @@ -3466,30 +3550,31 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) UChar* name_end; int* backs; int back_num; + enum REF_NUM num_type; prev = p; #ifdef USE_BACKREF_WITH_LEVEL name_end = NULL_UCHARP; /* no need. escape gcc warning. */ r = fetch_name_with_level((OnigCodePoint )c, &p, end, &name_end, - env, &back_num, &tok->u.backref.level); + env, &back_num, &tok->u.backref.level, &num_type); if (r == 1) tok->u.backref.exist_level = 1; else tok->u.backref.exist_level = 0; #else - r = fetch_name(&p, end, &name_end, env, &back_num, 1); + r = fetch_name(&p, end, &name_end, env, &back_num, &num_type, 1); #endif if (r < 0) return r; - if (back_num != 0) { - if (back_num < 0) { - back_num = BACKREF_REL_TO_ABS(back_num, env); - if (back_num <= 0) - return ONIGERR_INVALID_BACKREF; + if (num_type != IS_NOT_NUM) { + if (num_type == IS_REL_NUM) { + back_num = backref_rel_to_abs(back_num, env); } + if (back_num <= 0) + return ONIGERR_INVALID_BACKREF; if (IS_SYNTAX_BV(syn, ONIG_SYN_STRICT_CHECK_BACKREF)) { if (back_num > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[back_num])) + IS_NULL(SCANENV_MEMENV(env)[back_num].node)) return ONIGERR_INVALID_BACKREF; } tok->type = TK_BACKREF; @@ -3508,7 +3593,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) int i; for (i = 0; i < num; i++) { if (backs[i] > env->num_mem || - IS_NULL(SCANENV_MEM_NODES(env)[backs[i]])) + IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) return ONIGERR_INVALID_BACKREF; } } @@ -3538,15 +3623,30 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (c == '<' || c == '\'') { int gnum; UChar* name_end; + enum REF_NUM num_type; prev = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &gnum, 1); + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, + &gnum, &num_type, 1); if (r < 0) return r; + if (num_type != IS_NOT_NUM) { + if (num_type == IS_REL_NUM) { + gnum = backref_rel_to_abs(gnum, env); + if (gnum < 0) + return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } + tok->u.call.by_number = 1; + tok->u.call.gnum = gnum; + } + else { + tok->u.call.by_number = 0; + tok->u.call.gnum = 0; + } + tok->type = TK_CALL; tok->u.call.name = prev; tok->u.call.name_end = name_end; - tok->u.call.gnum = gnum; } else PUNFETCH; @@ -4040,7 +4140,7 @@ parse_char_property(Node** np, OnigToken* tok, UChar** src, UChar* end, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); + cc = CCLASS_(*np); r = add_ctype_to_cc(cc, ctype, 0, env); if (r != 0) return r; if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); @@ -4080,9 +4180,7 @@ next_state_class(CClassNode* cc, OnigCodePoint* vs, enum CCVALTYPE* type, } } - if (*state != CCS_START) - *state = CCS_VALUE; - + *state = CCS_VALUE; *type = CCV_CLASS; return 0; } @@ -4222,7 +4320,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, *np = node = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(node); - cc = NCCLASS(node); + cc = CCLASS_(node); and_start = 0; state = CCS_START; @@ -4335,7 +4433,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CHAR_TYPE: r = add_ctype_to_cc(cc, tok->u.prop.ctype, tok->u.prop.not, env); - if (r != 0) return r; + if (r != 0) goto err; next_class: r = next_state_class(cc, &vs, &val_type, &state, env); @@ -4344,12 +4442,13 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, case TK_CHAR_PROPERTY: { - int ctype; - - ctype = fetch_char_property_to_ctype(&p, end, env); - if (ctype < 0) return ctype; + int ctype = fetch_char_property_to_ctype(&p, end, env); + if (ctype < 0) { + r = ctype; + goto err; + } r = add_ctype_to_cc(cc, ctype, tok->u.prop.not, env); - if (r != 0) return r; + if (r != 0) goto err; goto next_class; } break; @@ -4369,6 +4468,12 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, CC_ESC_WARN(env, (UChar* )"-"); goto range_end_val; } + + if (val_type == CCV_CLASS) { + r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; + goto err; + } + state = CCS_RANGE; } else if (state == CCS_START) { @@ -4418,10 +4523,10 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, onig_node_free(anode); goto cc_open_err; } - acc = NCCLASS(anode); + acc = CCLASS_(anode); r = or_cclass(cc, acc, env->enc); - onig_node_free(anode); + cc_open_err: if (r != 0) goto err; } @@ -4488,9 +4593,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, NCCLASS_CLEAR_NOT(cc); if (IS_NCCLASS_NOT(cc) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC)) { - int is_empty; - - is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); + int is_empty = (IS_NULL(cc->mbuf) ? 1 : 0); if (is_empty != 0) BITSET_IS_EMPTY(cc->bs, is_empty); @@ -4510,7 +4613,7 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, return 0; err: - if (cc != NCCLASS(*np)) + if (cc != CCLASS_(*np)) bbuf_free(cc->mbuf); return r; } @@ -4519,18 +4622,17 @@ static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env); static int -parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, +parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) { int r, num; Node *target; OnigOptionType option; OnigCodePoint c; - OnigEncoding enc = env->enc; - #ifdef USE_NAMED_GROUP int list_capture; #endif + OnigEncoding enc = env->enc; UChar* p = *src; PFETCH_READY; @@ -4563,7 +4665,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, *np = onig_node_new_anchor(ANCHOR_PREC_READ_NOT); break; case '>': /* (?>...) stop backtrack */ - *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + *np = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); break; #ifdef USE_NAMED_GROUP @@ -4588,6 +4690,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { UChar *name; UChar *name_end; + enum REF_NUM num_type; PUNFETCH; c = '<'; @@ -4597,21 +4700,22 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, named_group2: name = p; - r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); + r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, + &num_type, 0); if (r < 0) return r; num = scan_env_add_mem_entry(env); if (num < 0) return num; - if (list_capture != 0 && num >= (int )BIT_STATUS_BITS_NUM) + if (list_capture != 0 && num >= (int )MEM_STATUS_BITS_NUM) return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; r = name_add(env->reg, name, name_end, num, env); if (r != 0) return r; - *np = node_new_enclose_memory(env->option, 1); + *np = node_new_enclosure_memory(1); CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->regnum = num; + ENCLOSURE_(*np)->m.regnum = num; if (list_capture != 0) - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + MEM_STATUS_ON_SIMPLE(env->capture_history, num); env->num_named++; } else { @@ -4637,17 +4741,17 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, PUNFETCH; } #endif - *np = node_new_enclose_memory(env->option, 0); + *np = node_new_enclosure_memory(0); CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { return num; } - else if (num >= (int )BIT_STATUS_BITS_NUM) { + else if (num >= (int )MEM_STATUS_BITS_NUM) { return ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY; } - NENCLOSE(*np)->regnum = num; - BIT_STATUS_ON_AT_SIMPLE(env->capture_history, num); + ENCLOSURE_(*np)->m.regnum = num; + MEM_STATUS_ON_SIMPLE(env->capture_history, num); } else { return ONIGERR_UNDEFINED_GROUP_OPTION; @@ -4717,7 +4821,7 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } *np = node_new_option(option); CHECK_NULL_RETURN_MEMERR(*np); - NENCLOSE(*np)->target = target; + NODE_BODY(*np) = target; *src = p; return 0; } @@ -4736,11 +4840,11 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; - *np = node_new_enclose_memory(env->option, 0); + *np = node_new_enclosure_memory(0); CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) return num; - NENCLOSE(*np)->regnum = num; + ENCLOSURE_(*np)->m.regnum = num; } CHECK_NULL_RETURN_MEMERR(*np); @@ -4752,13 +4856,12 @@ parse_enclose(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return r; } - if (NTYPE(*np) == NT_ANCHOR) - NANCHOR(*np)->target = target; - else { - NENCLOSE(*np)->target = target; - if (NENCLOSE(*np)->type == ENCLOSE_MEMORY) { + NODE_BODY(*np) = target; + + if (NODE_TYPE(*np) == NODE_ENCLOSURE) { + if (ENCLOSURE_(*np)->type == ENCLOSURE_MEMORY) { /* Don't move this to previous of parse_subexp() */ - r = scan_env_set_mem_node(env, NENCLOSE(*np)->regnum, *np); + r = scan_env_set_mem_node(env, ENCLOSURE_(*np)->m.regnum, *np); if (r != 0) return r; } } @@ -4778,36 +4881,35 @@ static const char* ReduceQStr[] = { static int set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { - QtfrNode* qn; + QuantNode* qn; - qn = NQTFR(qnode); - if (qn->lower == 1 && qn->upper == 1) { + qn = QUANT_(qnode); + if (qn->lower == 1 && qn->upper == 1) return 1; - } - switch (NTYPE(target)) { - case NT_STR: + switch (NODE_TYPE(target)) { + case NODE_STR: if (! group) { - StrNode* sn = NSTR(target); + StrNode* sn = STR_(target); if (str_node_can_be_split(sn, env->enc)) { Node* n = str_node_split_last_char(sn, env->enc); if (IS_NOT_NULL(n)) { - qn->target = n; + NODE_BODY(qnode) = n; return 2; } } } break; - case NT_QTFR: + case NODE_QUANT: { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ - QtfrNode* qnt = NQTFR(target); + QuantNode* qnt = QUANT_(target); int nestq_num = popular_quantifier_num(qn); int targetq_num = popular_quantifier_num(qnt); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (!IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt) && + if (! NODE_IS_BY_NUMBER(qnode) && ! NODE_IS_BY_NUMBER(target) && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { UChar buf[WARN_BUFSIZE]; @@ -4860,7 +4962,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) break; } - qn->target = target; + NODE_BODY(qnode) = target; q_exit: return 0; } @@ -4971,7 +5073,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], *(iarg->ptail) = onig_node_new_alt(snode, NULL_NODE); CHECK_NULL_RETURN_MEMERR(*(iarg->ptail)); - iarg->ptail = &(NCDR((*(iarg->ptail)))); + iarg->ptail = &(NODE_CDR((*(iarg->ptail)))); } } @@ -4999,14 +5101,14 @@ parse_exp(Node** np, OnigToken* tok, int term, break; case TK_SUBEXP_OPEN: - r = parse_enclose(np, tok, TK_SUBEXP_CLOSE, src, end, env); + r = parse_enclosure(np, tok, TK_SUBEXP_CLOSE, src, end, env); if (r < 0) return r; if (r == 1) group = 1; else if (r == 2) { /* option only */ Node* target; OnigOptionType prev = env->option; - env->option = NENCLOSE(*np)->option; + env->option = ENCLOSURE_(*np)->o.option; r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); @@ -5015,7 +5117,7 @@ parse_exp(Node** np, OnigToken* tok, int term, onig_node_free(target); return r; } - NENCLOSE(*np)->target = target; + NODE_BODY(*np) = target; return tok->type; } break; @@ -5057,7 +5159,7 @@ parse_exp(Node** np, OnigToken* tok, int term, len = 1; while (1) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enclen(env->enc, NSTR(*np)->s)) {//should not enclen_end() + if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end() r = fetch_token(tok, src, end, env); NSTRING_CLEAR_RAW(*np); goto string_end; @@ -5072,8 +5174,8 @@ parse_exp(Node** np, OnigToken* tok, int term, int rem; if (len < ONIGENC_MBC_MINLEN(env->enc)) { rem = ONIGENC_MBC_MINLEN(env->enc) - len; - (void )node_str_head_pad(NSTR(*np), rem, (UChar )0); - if (len + rem == enclen(env->enc, NSTR(*np)->s)) { + (void )node_str_head_pad(STR_(*np), rem, (UChar )0); + if (len + rem == enclen(env->enc, STR_(*np)->s)) { NSTRING_CLEAR_RAW(*np); goto string_end; } @@ -5138,7 +5240,7 @@ parse_exp(Node** np, OnigToken* tok, int term, *np = node_new_cclass(); CHECK_NULL_RETURN_MEMERR(*np); - cc = NCCLASS(*np); + cc = CCLASS_(*np); add_ctype_to_cc(cc, tok->u.prop.ctype, 0, env); if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); } @@ -5163,7 +5265,7 @@ parse_exp(Node** np, OnigToken* tok, int term, r = parse_char_class(np, tok, src, end, env); if (r != 0) return r; - cc = NCCLASS(*np); + cc = CCLASS_(*np); if (IS_IGNORECASE(env->option)) { IApplyCaseFoldArg iarg; @@ -5191,16 +5293,16 @@ parse_exp(Node** np, OnigToken* tok, int term, break; case TK_ANYCHAR: - *np = node_new_anychar(); + *np = node_new_ctype(CTYPE_ANYCHAR, 0); CHECK_NULL_RETURN_MEMERR(*np); break; case TK_ANYCHAR_ANYTIME: - *np = node_new_anychar(); + *np = node_new_ctype(CTYPE_ANYCHAR, 0); CHECK_NULL_RETURN_MEMERR(*np); qn = node_new_quantifier(0, REPEAT_INFINITE, 0); CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->target = *np; + NODE_BODY(qn) = *np; *np = qn; break; @@ -5222,14 +5324,13 @@ parse_exp(Node** np, OnigToken* tok, int term, { int gnum = tok->u.call.gnum; - if (gnum < 0) { - gnum = BACKREF_REL_TO_ABS(gnum, env); - if (gnum <= 0) - return ONIGERR_INVALID_BACKREF; - } - *np = node_new_call(tok->u.call.name, tok->u.call.name_end, gnum); + *np = node_new_call(tok->u.call.name, tok->u.call.name_end, + gnum, tok->u.call.by_number); CHECK_NULL_RETURN_MEMERR(*np); env->num_call++; + if (tok->u.call.by_number != 0 && gnum == 0) { + env->has_call_zero = 1; + } } break; #endif @@ -5271,7 +5372,7 @@ parse_exp(Node** np, OnigToken* tok, int term, qn = node_new_quantifier(tok->u.repeat.lower, tok->u.repeat.upper, (r == TK_INTERVAL ? 1 : 0)); CHECK_NULL_RETURN_MEMERR(qn); - NQTFR(qn)->greedy = tok->u.repeat.greedy; + QUANT_(qn)->greedy = tok->u.repeat.greedy; r = set_quantifier(qn, *targetp, group, env); if (r < 0) { onig_node_free(qn); @@ -5280,12 +5381,12 @@ parse_exp(Node** np, OnigToken* tok, int term, if (tok->u.repeat.possessive != 0) { Node* en; - en = node_new_enclose(ENCLOSE_STOP_BACKTRACK); + en = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); if (IS_NULL(en)) { onig_node_free(qn); return ONIGERR_MEMORY; } - NENCLOSE(en)->target = qn; + NODE_BODY(en) = qn; qn = en; } @@ -5303,12 +5404,12 @@ parse_exp(Node** np, OnigToken* tok, int term, onig_node_free(qn); return ONIGERR_MEMORY; } - tmp = NCDR(*targetp) = node_new_list(qn, NULL); + tmp = NODE_CDR(*targetp) = node_new_list(qn, NULL); if (IS_NULL(tmp)) { onig_node_free(qn); return ONIGERR_MEMORY; } - targetp = &(NCAR(tmp)); + targetp = &(NODE_CAR(tmp)); } goto re_entry; } @@ -5336,7 +5437,7 @@ parse_branch(Node** top, OnigToken* tok, int term, } else { *top = node_new_list(node, NULL); - headp = &(NCDR(*top)); + headp = &(NODE_CDR(*top)); while (r != TK_EOT && r != term && r != TK_ALT) { r = parse_exp(&node, tok, term, src, end, env); if (r < 0) { @@ -5344,14 +5445,14 @@ parse_branch(Node** top, OnigToken* tok, int term, return r; } - if (NTYPE(node) == NT_LIST) { + if (NODE_TYPE(node) == NODE_LIST) { *headp = node; - while (IS_NOT_NULL(NCDR(node))) node = NCDR(node); - headp = &(NCDR(node)); + while (IS_NOT_NULL(NODE_CDR(node))) node = NODE_CDR(node); + headp = &(NODE_CDR(node)); } else { *headp = node_new_list(node, NULL); - headp = &(NCDR(*headp)); + headp = &(NODE_CDR(*headp)); } } } @@ -5382,7 +5483,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, } else if (r == TK_ALT) { *top = onig_node_new_alt(node, NULL); - headp = &(NCDR(*top)); + headp = &(NODE_CDR(*top)); while (r == TK_ALT) { r = fetch_token(tok, src, end, env); if (r < 0) return r; @@ -5392,7 +5493,7 @@ parse_subexp(Node** top, OnigToken* tok, int term, return r; } *headp = onig_node_new_alt(node, NULL); - headp = &(NCDR(*headp)); + headp = &(NODE_CDR(*headp)); } if (tok->type != (enum TokenSyms )term) @@ -5421,12 +5522,35 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) if (r < 0) return r; r = parse_subexp(top, &tok, TK_EOT, src, end, env); if (r < 0) return r; + return 0; } +#ifdef USE_SUBEXP_CALL +static int +make_call_zero_body(Node* node, ScanEnv* env, Node** rnode) +{ + int r; + + Node* x = node_new_enclosure_memory(0 /* 0: is not named */); + CHECK_NULL_RETURN_MEMERR(x); + + NODE_BODY(x) = node; + ENCLOSURE_(x)->m.regnum = 0; + r = scan_env_set_mem_node(env, 0, x); + if (r != 0) { + onig_node_free(x); + return r; + } + + *rnode = x; + return 0; +} +#endif + extern int -onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, - regex_t* reg, ScanEnv* env) +onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, + regex_t* reg, ScanEnv* env) { int r; UChar* p; @@ -5451,6 +5575,19 @@ onig_parse_make_tree(Node** root, const UChar* pattern, const UChar* end, p = (UChar* )pattern; r = parse_regexp(root, &p, (UChar* )end, env); + +#ifdef USE_SUBEXP_CALL + if (r != 0) return r; + + if (env->has_call_zero != 0) { + Node* zero_node; + r = make_call_zero_body(*root, env, &zero_node); + if (r != 0) return r; + + *root = zero_node; + } +#endif + reg->num_mem = env->num_mem; return r; } diff --git a/src/regparse.h b/src/regparse.h index c9d1fe8..884f4d5 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,88 +32,91 @@ #include "regint.h" /* node type */ -#define NT_STR 0 -#define NT_CCLASS 1 -#define NT_CTYPE 2 -#define NT_CANY 3 -#define NT_BREF 4 -#define NT_QTFR 5 -#define NT_ENCLOSE 6 -#define NT_ANCHOR 7 -#define NT_LIST 8 -#define NT_ALT 9 -#define NT_CALL 10 +typedef enum { + NODE_STR = 0, + NODE_CCLASS = 1, + NODE_CTYPE = 2, + NODE_BREF = 3, + NODE_QUANT = 4, + NODE_ENCLOSURE = 5, + NODE_ANCHOR = 6, + NODE_LIST = 7, + NODE_ALT = 8, + NODE_CALL = 9 +} NodeType; /* node type bit */ -#define NTYPE2BIT(type) (1<<(type)) - -#define BIT_NT_STR NTYPE2BIT(NT_STR) -#define BIT_NT_CCLASS NTYPE2BIT(NT_CCLASS) -#define BIT_NT_CTYPE NTYPE2BIT(NT_CTYPE) -#define BIT_NT_CANY NTYPE2BIT(NT_CANY) -#define BIT_NT_BREF NTYPE2BIT(NT_BREF) -#define BIT_NT_QTFR NTYPE2BIT(NT_QTFR) -#define BIT_NT_ENCLOSE NTYPE2BIT(NT_ENCLOSE) -#define BIT_NT_ANCHOR NTYPE2BIT(NT_ANCHOR) -#define BIT_NT_LIST NTYPE2BIT(NT_LIST) -#define BIT_NT_ALT NTYPE2BIT(NT_ALT) -#define BIT_NT_CALL NTYPE2BIT(NT_CALL) - -#define IS_NODE_TYPE_SIMPLE(type) \ - ((NTYPE2BIT(type) & (BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE |\ - BIT_NT_CANY | BIT_NT_BREF)) != 0) - -#define NTYPE(node) ((node)->u.base.type) -#define SET_NTYPE(node, ntype) (node)->u.base.type = (ntype) - -#define NSTR(node) (&((node)->u.str)) -#define NCCLASS(node) (&((node)->u.cclass)) -#define NCTYPE(node) (&((node)->u.ctype)) -#define NBREF(node) (&((node)->u.bref)) -#define NQTFR(node) (&((node)->u.qtfr)) -#define NENCLOSE(node) (&((node)->u.enclose)) -#define NANCHOR(node) (&((node)->u.anchor)) -#define NCONS(node) (&((node)->u.cons)) -#define NCALL(node) (&((node)->u.call)) - -#define NCAR(node) (NCONS(node)->car) -#define NCDR(node) (NCONS(node)->cdr) - +#define NODE_TYPE2BIT(type) (1<<(type)) + +#define BIT_NODE_STR NODE_TYPE2BIT(NODE_STR) +#define BIT_NODE_CCLASS NODE_TYPE2BIT(NODE_CCLASS) +#define BIT_NODE_CTYPE NODE_TYPE2BIT(NODE_CTYPE) +#define BIT_NODE_BREF NODE_TYPE2BIT(NODE_BREF) +#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT) +#define BIT_NODE_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE) +#define BIT_NODE_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR) +#define BIT_NODE_LIST NODE_TYPE2BIT(NODE_LIST) +#define BIT_NODE_ALT NODE_TYPE2BIT(NODE_ALT) +#define BIT_NODE_CALL NODE_TYPE2BIT(NODE_CALL) + +#define NODE_IS_SIMPLE_TYPE(node) \ + ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ + (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0) + +#define NODE_TYPE(node) ((node)->u.base.node_type) +#define SET_NODE_TYPE(node, ntype) (node)->u.base.node_type = (ntype) + +#define STR_(node) (&((node)->u.str)) +#define CCLASS_(node) (&((node)->u.cclass)) +#define CTYPE_(node) (&((node)->u.ctype)) +#define BREF_(node) (&((node)->u.bref)) +#define QUANT_(node) (&((node)->u.quant)) +#define ENCLOSURE_(node) (&((node)->u.enclosure)) +#define ANCHOR_(node) (&((node)->u.anchor)) +#define CONS_(node) (&((node)->u.cons)) +#define CALL_(node) (&((node)->u.call)) + +#define NODE_CAR(node) (CONS_(node)->car) +#define NODE_CDR(node) (CONS_(node)->cdr) + +#define CTYPE_ANYCHAR -1 +#define NODE_IS_ANYCHAR(node) \ + (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) -#define ENCLOSE_MEMORY (1<<0) -#define ENCLOSE_OPTION (1<<1) -#define ENCLOSE_STOP_BACKTRACK (1<<2) +#define ENCLOSURE_MEMORY (1<<0) +#define ENCLOSURE_OPTION (1<<1) +#define ENCLOSURE_STOP_BACKTRACK (1<<2) #define NODE_STR_MARGIN 16 #define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ #define NODE_BACKREFS_SIZE 6 -#define NSTR_RAW (1<<0) /* by backslashed number */ -#define NSTR_AMBIG (1<<1) -#define NSTR_DONT_GET_OPT_INFO (1<<2) +#define STRING_RAW (1<<0) /* by backslashed number */ +#define STRING_AMBIG (1<<1) +#define STRING_DONT_GET_OPT_INFO (1<<2) #define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node) (node)->u.str.flag |= NSTR_RAW -#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~NSTR_RAW -#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= NSTR_AMBIG +#define NSTRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW +#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW +#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG #define NSTRING_SET_DONT_GET_OPT_INFO(node) \ - (node)->u.str.flag |= NSTR_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node) (((node)->u.str.flag & NSTR_RAW) != 0) -#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & NSTR_AMBIG) != 0) + (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO +#define NSTRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0) +#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0) #define NSTRING_IS_DONT_GET_OPT_INFO(node) \ - (((node)->u.str.flag & NSTR_DONT_GET_OPT_INFO) != 0) + (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0) #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static); -#define NQ_TARGET_ISNOT_EMPTY 0 -#define NQ_TARGET_IS_EMPTY 1 -#define NQ_TARGET_IS_EMPTY_MEM 2 -#define NQ_TARGET_IS_EMPTY_REC 3 +#define QUANT_BODY_IS_NOT_EMPTY 0 +#define QUANT_BODY_IS_EMPTY 1 +#define QUANT_BODY_IS_EMPTY_MEM 2 +#define QUANT_BODY_IS_EMPTY_REC 3 /* status bits */ #define NST_MIN_FIXED (1<<0) @@ -121,44 +124,56 @@ #define NST_CLEN_FIXED (1<<2) #define NST_MARK1 (1<<3) #define NST_MARK2 (1<<4) -#define NST_MEM_BACKREFED (1<<5) -#define NST_STOP_BT_SIMPLE_REPEAT (1<<6) -#define NST_RECURSION (1<<7) -#define NST_CALLED (1<<8) -#define NST_ADDR_FIXED (1<<9) -#define NST_NAMED_GROUP (1<<10) -#define NST_NAME_REF (1<<11) -#define NST_IN_REPEAT (1<<12) /* STK_REPEAT is nested in stack. */ +#define NST_STOP_BT_SIMPLE_REPEAT (1<<5) +#define NST_RECURSION (1<<6) +#define NST_CALLED (1<<7) +#define NST_ADDR_FIXED (1<<8) +#define NST_NAMED_GROUP (1<<9) +#define NST_IN_REAL_REPEAT (1<<10) /* STK_REPEAT is nested in stack. */ +#define NST_IN_ZERO_REPEAT (1<<11) /* (....){0} */ +#define NST_IN_MULTI_ENTRY (1<<12) #define NST_NEST_LEVEL (1<<13) #define NST_BY_NUMBER (1<<14) /* {n,m} */ +#define NST_BY_NAME (1<<15) /* backref by name */ +#define NST_BACKREF (1<<16) + + +#define NODE_STATUS(node) (((Node* )node)->u.base.status) +#define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (f)) +#define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(f)) + +#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) +#define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0) +#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0) +#define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0) +#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0) +#define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0) +#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) +#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0) +#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0) +#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0) +#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0) +#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0) +#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0) +#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0) +#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0) +#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0) +#define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ + ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0) + +#define NODE_BODY(node) ((node)->u.base.body) +#define NODE_QUANT_BODY(node) ((node)->body) +#define NODE_ENCLOSURE_BODY(node) ((node)->body) +#define NODE_CALL_BODY(node) ((node)->body) +#define NODE_ANCHOR_BODY(node) ((node)->body) -#define SET_ENCLOSE_STATUS(node,f) (node)->u.enclose.state |= (f) -#define CLEAR_ENCLOSE_STATUS(node,f) (node)->u.enclose.state &= ~(f) - -#define IS_ENCLOSE_CALLED(en) (((en)->state & NST_CALLED) != 0) -#define IS_ENCLOSE_ADDR_FIXED(en) (((en)->state & NST_ADDR_FIXED) != 0) -#define IS_ENCLOSE_RECURSION(en) (((en)->state & NST_RECURSION) != 0) -#define IS_ENCLOSE_MARK1(en) (((en)->state & NST_MARK1) != 0) -#define IS_ENCLOSE_MARK2(en) (((en)->state & NST_MARK2) != 0) -#define IS_ENCLOSE_MIN_FIXED(en) (((en)->state & NST_MIN_FIXED) != 0) -#define IS_ENCLOSE_MAX_FIXED(en) (((en)->state & NST_MAX_FIXED) != 0) -#define IS_ENCLOSE_CLEN_FIXED(en) (((en)->state & NST_CLEN_FIXED) != 0) -#define IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(en) \ - (((en)->state & NST_STOP_BT_SIMPLE_REPEAT) != 0) -#define IS_ENCLOSE_NAMED_GROUP(en) (((en)->state & NST_NAMED_GROUP) != 0) - -#define SET_CALL_RECURSION(node) (node)->u.call.state |= NST_RECURSION -#define IS_CALL_RECURSION(cn) (((cn)->state & NST_RECURSION) != 0) -#define IS_CALL_NAME_REF(cn) (((cn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NAME_REF(bn) (((bn)->state & NST_NAME_REF) != 0) -#define IS_BACKREF_NEST_LEVEL(bn) (((bn)->state & NST_NEST_LEVEL) != 0) -#define IS_QUANTIFIER_IN_REPEAT(qn) (((qn)->state & NST_IN_REPEAT) != 0) -#define IS_QUANTIFIER_BY_NUMBER(qn) (((qn)->state & NST_BY_NUMBER) != 0) #define CALLNODE_REFNUM_UNDEF -1 typedef struct { - NodeBase base; + NodeType node_type; + int status; + UChar* s; UChar* end; unsigned int flag; @@ -167,35 +182,54 @@ typedef struct { } StrNode; typedef struct { - NodeBase base; - int state; - struct _Node* target; + NodeType node_type; + int status; + + unsigned int flags; + BitSet bs; + BBuf* mbuf; /* multi-byte info or NULL */ +} CClassNode; + +typedef struct { + NodeType node_type; + int status; + struct _Node* body; + int lower; int upper; int greedy; - int target_empty_info; + int body_empty_info; struct _Node* head_exact; struct _Node* next_head_exact; int is_refered; /* include called node. don't eliminate even if {0} */ #ifdef USE_COMBINATION_EXPLOSION_CHECK int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ #endif -} QtfrNode; +} QuantNode; typedef struct { - NodeBase base; - int state; + NodeType node_type; + int status; + struct _Node* body; + int type; - int regnum; - OnigOptionType option; - struct _Node* target; - AbsAddrType call_addr; + union { + struct { + int regnum; + AbsAddrType called_addr; + int entry_count; + int called_state; + } m; + struct { + OnigOptionType option; + } o; + }; /* for multiple call reference */ OnigLen min_len; /* min length (byte) */ OnigLen max_len; /* max length (byte) */ int char_len; /* character length */ int opt_count; /* referenced count in optimize_node_left() */ -} EncloseNode; +} EnclosureNode; #ifdef USE_SUBEXP_CALL @@ -211,20 +245,23 @@ typedef struct { } UnsetAddrList; typedef struct { - NodeBase base; - int state; + NodeType node_type; + int status; + struct _Node* body; /* to EnclosureNode : ENCLOSURE_MEMORY */ + + int by_number; int group_num; UChar* name; UChar* name_end; - struct _Node* target; /* EncloseNode : ENCLOSE_MEMORY */ - UnsetAddrList* unset_addr_list; + int entry_count; } CallNode; #endif typedef struct { - NodeBase base; - int state; + NodeType node_type; + int status; + int back_num; int back_static[NODE_BACKREFS_SIZE]; int* back_dynamic; @@ -232,37 +269,48 @@ typedef struct { } BRefNode; typedef struct { - NodeBase base; + NodeType node_type; + int status; + struct _Node* body; + int type; - struct _Node* target; int char_len; } AnchorNode; typedef struct { - NodeBase base; + NodeType node_type; + int status; + struct _Node* car; struct _Node* cdr; } ConsAltNode; typedef struct { - NodeBase base; + NodeType node_type; + int status; + int ctype; int not; } CtypeNode; typedef struct _Node { union { - NodeBase base; - StrNode str; - CClassNode cclass; - QtfrNode qtfr; - EncloseNode enclose; - BRefNode bref; - AnchorNode anchor; - ConsAltNode cons; - CtypeNode ctype; + struct { + NodeType node_type; + int status; + struct _Node* body; + } base; + + StrNode str; + CClassNode cclass; + QuantNode quant; + EnclosureNode enclosure; + BRefNode bref; + AnchorNode anchor; + ConsAltNode cons; + CtypeNode ctype; #ifdef USE_SUBEXP_CALL - CallNode call; + CallNode call; #endif } u; } Node; @@ -270,20 +318,28 @@ typedef struct _Node { #define NULL_NODE ((Node* )0) -#define SCANENV_MEMNODES_SIZE 8 -#define SCANENV_MEM_NODES(senv) \ - (IS_NOT_NULL((senv)->mem_nodes_dynamic) ? \ - (senv)->mem_nodes_dynamic : (senv)->mem_nodes_static) +#define SCANENV_MEMENV_SIZE 8 +#define SCANENV_MEMENV(senv) \ + (IS_NOT_NULL((senv)->mem_env_dynamic) ? \ + (senv)->mem_env_dynamic : (senv)->mem_env_static) + +typedef struct { + Node* node; +#if 0 + int in; + int recursion; +#endif +} MemEnv; typedef struct { OnigOptionType option; OnigCaseFoldType case_fold_flag; OnigEncoding enc; OnigSyntaxType* syntax; - BitStatusType capture_history; - BitStatusType bt_mem_start; - BitStatusType bt_mem_end; - BitStatusType backrefed_mem; + MemStatusType capture_history; + MemStatusType bt_mem_start; + MemStatusType bt_mem_end; + MemStatusType backrefed_mem; UChar* pattern; UChar* pattern_end; UChar* error; @@ -292,14 +348,15 @@ typedef struct { int num_call; #ifdef USE_SUBEXP_CALL UnsetAddrList* unset_addr_list; + int has_call_zero; #endif int num_mem; #ifdef USE_NAMED_GROUP int num_named; #endif int mem_alloc; - Node* mem_nodes_static[SCANENV_MEMNODES_SIZE]; - Node** mem_nodes_dynamic; + MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; + MemEnv* mem_env_dynamic; #ifdef USE_COMBINATION_EXPLOSION_CHECK int num_comb_exp_check; int comb_exp_max_regnum; @@ -331,7 +388,7 @@ extern void onig_node_conv_to_str_node P_((Node* node, int raw)); extern int onig_node_str_cat P_((Node* node, const UChar* s, const UChar* end)); extern int onig_node_str_set P_((Node* node, const UChar* s, const UChar* end)); extern void onig_node_free P_((Node* node)); -extern Node* onig_node_new_enclose P_((int type)); +extern Node* onig_node_new_enclosure P_((int type)); extern Node* onig_node_new_anchor P_((int type)); extern Node* onig_node_new_str P_((const UChar* s, const UChar* end)); extern Node* onig_node_new_list P_((Node* left, Node* right)); @@ -339,8 +396,9 @@ extern Node* onig_node_list_add P_((Node* list, Node* x)); extern Node* onig_node_new_alt P_((Node* left, Node* right)); extern void onig_node_str_clear P_((Node* node)); extern int onig_names_free P_((regex_t* reg)); -extern int onig_parse_make_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); +extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); extern int onig_free_shared_cclass_table P_((void)); +extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); #ifdef ONIG_DEBUG #ifdef USE_NAMED_GROUP @@ -108,17 +108,16 @@ new_size(size) #if 0 for (i=3; i<31; i++) { - if ((1<<i) > size) return 1<<i; + if ((1<<i) > size) return 1<<i; } return -1; #else int newsize; for (i = 0, newsize = MINSIZE; - i < (int )(sizeof(primes)/sizeof(primes[0])); - i++, newsize <<= 1) - { - if (newsize > size) return primes[i]; + i < (int )(sizeof(primes)/sizeof(primes[0])); + i++, newsize <<= 1) { + if (newsize > size) return primes[i]; } /* Ran out of polynomials */ return -1; /* should raise exception */ @@ -145,82 +144,82 @@ st_init_table_with_size(type, size) struct st_hash_type *type; int size; { - st_table *tbl; + st_table *tbl; #ifdef HASH_LOG - if (init_st == 0) { - init_st = 1; - atexit(stat_col); - } + if (init_st == 0) { + init_st = 1; + atexit(stat_col); + } #endif - size = new_size(size); /* round up to prime number */ + size = new_size(size); /* round up to prime number */ - tbl = alloc(st_table); - if (tbl == 0) return 0; + tbl = alloc(st_table); + if (tbl == 0) return 0; - tbl->type = type; - tbl->num_entries = 0; - tbl->num_bins = size; - tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); - if (tbl->bins == 0) { - free(tbl); - return 0; - } + tbl->type = type; + tbl->num_entries = 0; + tbl->num_bins = size; + tbl->bins = (st_table_entry **)Calloc(size, sizeof(st_table_entry*)); + if (tbl->bins == 0) { + free(tbl); + return 0; + } - return tbl; + return tbl; } st_table* st_init_table(type) struct st_hash_type *type; { - return st_init_table_with_size(type, 0); + return st_init_table_with_size(type, 0); } st_table* st_init_numtable(void) { - return st_init_table(&type_numhash); + return st_init_table(&type_numhash); } st_table* st_init_numtable_with_size(size) int size; { - return st_init_table_with_size(&type_numhash, size); + return st_init_table_with_size(&type_numhash, size); } st_table* st_init_strtable(void) { - return st_init_table(&type_strhash); + return st_init_table(&type_strhash); } st_table* st_init_strtable_with_size(size) int size; { - return st_init_table_with_size(&type_strhash, size); + return st_init_table_with_size(&type_strhash, size); } void st_free_table(table) st_table *table; { - register st_table_entry *ptr, *next; - int i; + register st_table_entry *ptr, *next; + int i; - for(i = 0; i < table->num_bins; i++) { - ptr = table->bins[i]; - while (ptr != 0) { + for(i = 0; i < table->num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { next = ptr->next; free(ptr); ptr = next; - } } - free(table->bins); - free(table); + } + free(table->bins); + free(table); } #define PTR_NOT_EQUAL(table, ptr, hash_val, key) \ @@ -236,187 +235,186 @@ st_free_table(table) bin_pos = hash_val%(table)->num_bins;\ ptr = (table)->bins[bin_pos];\ if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {\ - COLLISION;\ - while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ - ptr = ptr->next;\ - }\ - ptr = ptr->next;\ + COLLISION;\ + while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {\ + ptr = ptr->next;\ + }\ + ptr = ptr->next;\ }\ } while (0) int st_lookup(table, key, value) - st_table *table; - register st_data_t key; - st_data_t *value; + st_table *table; + register st_data_t key; + st_data_t *value; { - unsigned int hash_val, bin_pos; - register st_table_entry *ptr; + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; - hash_val = do_hash(key, table); - FIND_ENTRY(table, ptr, hash_val, bin_pos); + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); - if (ptr == 0) { - return 0; - } - else { - if (value != 0) *value = ptr->record; - return 1; - } + if (ptr == 0) { + return 0; + } + else { + if (value != 0) *value = ptr->record; + return 1; + } } -#define ADD_DIRECT(table, key, value, hash_val, bin_pos)\ +#define ADD_DIRECT(table, key, value, hash_val, bin_pos, ret) \ do {\ - st_table_entry *entry;\ - if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ - rehash(table);\ - bin_pos = hash_val % table->num_bins;\ - }\ - \ - entry = alloc(st_table_entry);\ - \ - entry->hash = hash_val;\ - entry->key = key;\ - entry->record = value;\ - entry->next = table->bins[bin_pos];\ - table->bins[bin_pos] = entry;\ - table->num_entries++;\ + st_table_entry *entry;\ + if (table->num_entries/(table->num_bins) > ST_DEFAULT_MAX_DENSITY) {\ + rehash(table);\ + bin_pos = hash_val % table->num_bins;\ + }\ + entry = alloc(st_table_entry);\ + if (IS_NULL(entry)) return ret;\ + entry->hash = hash_val;\ + entry->key = key;\ + entry->record = value;\ + entry->next = table->bins[bin_pos];\ + table->bins[bin_pos] = entry;\ + table->num_entries++;\ } while (0) int st_insert(table, key, value) - register st_table *table; - register st_data_t key; - st_data_t value; + register st_table *table; + register st_data_t key; + st_data_t value; { - unsigned int hash_val, bin_pos; - register st_table_entry *ptr; + unsigned int hash_val, bin_pos; + register st_table_entry *ptr; - hash_val = do_hash(key, table); - FIND_ENTRY(table, ptr, hash_val, bin_pos); + hash_val = do_hash(key, table); + FIND_ENTRY(table, ptr, hash_val, bin_pos); - if (ptr == 0) { - ADD_DIRECT(table, key, value, hash_val, bin_pos); - return 0; - } - else { - ptr->record = value; - return 1; - } + if (ptr == 0) { + ADD_DIRECT(table, key, value, hash_val, bin_pos, ONIGERR_MEMORY); + return 0; + } + else { + ptr->record = value; + return 1; + } } void st_add_direct(table, key, value) - st_table *table; - st_data_t key; - st_data_t value; + st_table *table; + st_data_t key; + st_data_t value; { - unsigned int hash_val, bin_pos; + unsigned int hash_val, bin_pos; - hash_val = do_hash(key, table); - bin_pos = hash_val % table->num_bins; - ADD_DIRECT(table, key, value, hash_val, bin_pos); + hash_val = do_hash(key, table); + bin_pos = hash_val % table->num_bins; + ADD_DIRECT(table, key, value, hash_val, bin_pos,); } static void rehash(table) - register st_table *table; + register st_table *table; { - register st_table_entry *ptr, *next, **new_bins; - int i, old_num_bins = table->num_bins, new_num_bins; - unsigned int hash_val; - - new_num_bins = new_size(old_num_bins+1); - new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); - if (new_bins == 0) { - return ; - } - - for(i = 0; i < old_num_bins; i++) { - ptr = table->bins[i]; - while (ptr != 0) { + register st_table_entry *ptr, *next, **new_bins; + int i, old_num_bins = table->num_bins, new_num_bins; + unsigned int hash_val; + + new_num_bins = new_size(old_num_bins+1); + new_bins = (st_table_entry**)Calloc(new_num_bins, sizeof(st_table_entry*)); + if (new_bins == 0) { + return ; + } + + for(i = 0; i < old_num_bins; i++) { + ptr = table->bins[i]; + while (ptr != 0) { next = ptr->next; hash_val = ptr->hash % new_num_bins; ptr->next = new_bins[hash_val]; new_bins[hash_val] = ptr; ptr = next; - } } - free(table->bins); - table->num_bins = new_num_bins; - table->bins = new_bins; + } + free(table->bins); + table->num_bins = new_num_bins; + table->bins = new_bins; } st_table* st_copy(old_table) - st_table *old_table; + st_table *old_table; { - st_table *new_table; - st_table_entry *ptr, *entry; - int i, num_bins = old_table->num_bins; + st_table *new_table; + st_table_entry *ptr, *entry; + int i, num_bins = old_table->num_bins; - new_table = alloc(st_table); - if (new_table == 0) { - return 0; - } + new_table = alloc(st_table); + if (new_table == 0) { + return 0; + } - *new_table = *old_table; - new_table->bins = (st_table_entry**) - Calloc((unsigned)num_bins, sizeof(st_table_entry*)); + *new_table = *old_table; + new_table->bins = (st_table_entry**) + Calloc((unsigned)num_bins, sizeof(st_table_entry*)); - if (new_table->bins == 0) { - free(new_table); - return 0; - } + if (new_table->bins == 0) { + free(new_table); + return 0; + } - for(i = 0; i < num_bins; i++) { - new_table->bins[i] = 0; - ptr = old_table->bins[i]; - while (ptr != 0) { + for(i = 0; i < num_bins; i++) { + new_table->bins[i] = 0; + ptr = old_table->bins[i]; + while (ptr != 0) { entry = alloc(st_table_entry); if (entry == 0) { - free(new_table->bins); - free(new_table); - return 0; + free(new_table->bins); + free(new_table); + return 0; } *entry = *ptr; entry->next = new_table->bins[i]; new_table->bins[i] = entry; ptr = ptr->next; - } } - return new_table; + } + return new_table; } int st_delete(table, key, value) - register st_table *table; - register st_data_t *key; - st_data_t *value; + register st_table *table; + register st_data_t *key; + st_data_t *value; { - unsigned int hash_val; - st_table_entry *tmp; - register st_table_entry *ptr; + unsigned int hash_val; + st_table_entry *tmp; + register st_table_entry *ptr; - hash_val = do_hash_bin(*key, table); - ptr = table->bins[hash_val]; + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; - if (ptr == 0) { - if (value != 0) *value = 0; - return 0; - } - - if (EQUAL(table, *key, ptr->key)) { - table->bins[hash_val] = ptr->next; - table->num_entries--; - if (value != 0) *value = ptr->record; - *key = ptr->key; - free(ptr); - return 1; - } - - for(; ptr->next != 0; ptr = ptr->next) { - if (EQUAL(table, ptr->next->key, *key)) { + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } + + if (EQUAL(table, *key, ptr->key)) { + table->bins[hash_val] = ptr->next; + table->num_entries--; + if (value != 0) *value = ptr->record; + *key = ptr->key; + free(ptr); + return 1; + } + + for(; ptr->next != 0; ptr = ptr->next) { + if (EQUAL(table, ptr->next->key, *key)) { tmp = ptr->next; ptr->next = ptr->next->next; table->num_entries--; @@ -424,41 +422,41 @@ st_delete(table, key, value) *key = tmp->key; free(tmp); return 1; - } } + } - return 0; + return 0; } int st_delete_safe(table, key, value, never) - register st_table *table; - register st_data_t *key; - st_data_t *value; - st_data_t never; + register st_table *table; + register st_data_t *key; + st_data_t *value; + st_data_t never; { - unsigned int hash_val; - register st_table_entry *ptr; + unsigned int hash_val; + register st_table_entry *ptr; - hash_val = do_hash_bin(*key, table); - ptr = table->bins[hash_val]; + hash_val = do_hash_bin(*key, table); + ptr = table->bins[hash_val]; - if (ptr == 0) { - if (value != 0) *value = 0; - return 0; - } + if (ptr == 0) { + if (value != 0) *value = 0; + return 0; + } - for(; ptr != 0; ptr = ptr->next) { - if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { + for(; ptr != 0; ptr = ptr->next) { + if ((ptr->key != never) && EQUAL(table, ptr->key, *key)) { table->num_entries--; *key = ptr->key; if (value != 0) *value = ptr->record; ptr->key = ptr->record = never; return 1; - } } + } - return 0; + return 0; } static int @@ -476,114 +474,114 @@ delete_never(key, value, never) void st_cleanup_safe(table, never) - st_table *table; - st_data_t never; + st_table *table; + st_data_t never; { - int num_entries = table->num_entries; + int num_entries = table->num_entries; - st_foreach(table, delete_never, never); - table->num_entries = num_entries; + st_foreach(table, delete_never, never); + table->num_entries = num_entries; } int st_foreach(table, func, arg) - st_table *table; - int (*func)(); - st_data_t arg; + st_table *table; + int (*func)(); + st_data_t arg; { - st_table_entry *ptr, *last, *tmp; - enum st_retval retval; - int i; + st_table_entry *ptr, *last, *tmp; + enum st_retval retval; + int i; - for(i = 0; i < table->num_bins; i++) { - last = 0; - for(ptr = table->bins[i]; ptr != 0;) { + for(i = 0; i < table->num_bins; i++) { + last = 0; + for(ptr = table->bins[i]; ptr != 0;) { retval = (*func)(ptr->key, ptr->record, arg); switch (retval) { case ST_CHECK: /* check if hash is modified during iteration */ - tmp = 0; - if (i < table->num_bins) { - for (tmp = table->bins[i]; tmp; tmp=tmp->next) { - if (tmp == ptr) break; - } - } - if (!tmp) { - /* call func with error notice */ - return 1; - } - /* fall through */ + tmp = 0; + if (i < table->num_bins) { + for (tmp = table->bins[i]; tmp; tmp=tmp->next) { + if (tmp == ptr) break; + } + } + if (!tmp) { + /* call func with error notice */ + return 1; + } + /* fall through */ case ST_CONTINUE: - last = ptr; - ptr = ptr->next; - break; + last = ptr; + ptr = ptr->next; + break; case ST_STOP: - return 0; + return 0; case ST_DELETE: - tmp = ptr; - if (last == 0) { - table->bins[i] = ptr->next; - } - else { - last->next = ptr->next; - } - ptr = ptr->next; - free(tmp); - table->num_entries--; + tmp = ptr; + if (last == 0) { + table->bins[i] = ptr->next; + } + else { + last->next = ptr->next; + } + ptr = ptr->next; + free(tmp); + table->num_entries--; } - } } - return 0; + } + return 0; } static int strhash(string) - register const char *string; + register const char *string; { - register int c; + register int c; #ifdef HASH_ELFHASH - register unsigned int h = 0, g; + register unsigned int h = 0, g; - while ((c = *string++) != '\0') { - h = ( h << 4 ) + c; - if ( g = h & 0xF0000000 ) + while ((c = *string++) != '\0') { + h = ( h << 4 ) + c; + if ( g = h & 0xF0000000 ) h ^= g >> 24; - h &= ~g; - } - return h; + h &= ~g; + } + return h; #elif HASH_PERL - register int val = 0; + register int val = 0; - while ((c = *string++) != '\0') { - val += c; - val += (val << 10); - val ^= (val >> 6); - } - val += (val << 3); - val ^= (val >> 11); + while ((c = *string++) != '\0') { + val += c; + val += (val << 10); + val ^= (val >> 6); + } + val += (val << 3); + val ^= (val >> 11); - return val + (val << 15); + return val + (val << 15); #else - register int val = 0; + register int val = 0; - while ((c = *string++) != '\0') { - val = val*997 + c; - } + while ((c = *string++) != '\0') { + val = val*997 + c; + } - return val + (val>>5); + return val + (val>>5); #endif } static int numcmp(x, y) - long x, y; + long x, y; { - return x != y; + return x != y; } static int numhash(n) - long n; + long n; { - return n; + return n; } diff --git a/src/unicode.c b/src/unicode.c index 8812ca2..5b6b3e7 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -2,7 +2,7 @@ unicode.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -91,6 +91,7 @@ extern int onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges) { UserDefinedPropertyValue* e; + int r; int i; int n; int len; @@ -130,9 +131,10 @@ onig_unicode_define_user_property(const char* name, OnigCodePoint* ranges) e = UserDefinedPropertyRanges + UserDefinedPropertyNum; e->ctype = CODE_RANGES_NUM + UserDefinedPropertyNum; e->ranges = ranges; - onig_st_insert_strend(UserDefinedPropertyTable, - (const UChar* )s, (const UChar* )s + n, - (hash_data_type )((void* )e)); + r = onig_st_insert_strend(UserDefinedPropertyTable, + (const UChar* )s, (const UChar* )s + n, + (hash_data_type )((void* )e)); + if (r < 0) return r; UserDefinedPropertyNum++; return 0; @@ -162,7 +164,7 @@ onigenc_unicode_is_code_ctype(OnigCodePoint code, unsigned int ctype) extern int -onigenc_unicode_ctype_code_range(int ctype, const OnigCodePoint* ranges[]) +onigenc_unicode_ctype_code_range(OnigCtype ctype, const OnigCodePoint* ranges[]) { if (ctype >= CODE_RANGES_NUM) { int index = ctype - CODE_RANGES_NUM; diff --git a/test/testc.c b/test/testc.c index ddf9fd5..725e375 100644 --- a/test/testc.c +++ b/test/testc.c @@ -586,7 +586,17 @@ extern int main(int argc, char* argv[]) n("[0-9-a]", ":"); // PR#44 x3("(\\(((?:[^(]|\\g<1>)*)\\))", "(abc)(abc)", 1, 4, 2); // PR#43 x2("\\o{101}", "A", 0, 1); + x2("(?:\\k'+1'B|(A)C)*", "ACAB", 0, 4); // relative backref by postitive number + x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number + x2("A\\g'0'|B()", "AAAAB", 0, 5); + x3("(A\\g'0')|B", "AAAAB", 0, 5, 1); + /* + < ifndef IGNORE_EUC_JP > + for testcases print warnings #63 + warning: illegal character encoding in string literal [-Winvalid-source-encoding] + */ +#ifndef IGNORE_EUC_JP x2("", "あ", 0, 0); x2("あ", "あ", 0, 2); n("い", "あ"); @@ -868,6 +878,8 @@ extern int main(int argc, char* argv[]) n("\\P{Hiragana}", "ぴ"); #endif +#endif /* IGNORE_EUC_JP */ + fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", nsucc, nfail, nerror, onig_version()); |