diff options
-rw-r--r-- | .bzrignore | 3 | ||||
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | CMakeLists.txt | 2 | ||||
-rw-r--r-- | HISTORY | 14 | ||||
-rw-r--r-- | README | 7 | ||||
-rw-r--r-- | README.md | 19 | ||||
-rwxr-xr-x | compile | 2 | ||||
-rwxr-xr-x | config.guess | 134 | ||||
-rwxr-xr-x | config.sub | 36 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | debian/changelog | 7 | ||||
-rw-r--r-- | debian/symbols | 1 | ||||
-rwxr-xr-x | depcomp | 2 | ||||
-rw-r--r-- | dist.info | 2 | ||||
-rw-r--r-- | doc/RE | 69 | ||||
-rw-r--r-- | doc/RE.ja | 71 | ||||
-rw-r--r-- | index.html | 3 | ||||
-rw-r--r-- | index_ja.html | 3 | ||||
-rwxr-xr-x | install-sh | 373 | ||||
-rwxr-xr-x | missing | 2 | ||||
-rw-r--r-- | src/oniguruma.h | 10 | ||||
-rw-r--r-- | src/regcomp.c | 1102 | ||||
-rw-r--r-- | src/regenc.h | 2 | ||||
-rw-r--r-- | src/regerror.c | 6 | ||||
-rw-r--r-- | src/regexec.c | 487 | ||||
-rw-r--r-- | src/regint.h | 119 | ||||
-rw-r--r-- | src/regparse.c | 1075 | ||||
-rw-r--r-- | src/regparse.h | 159 | ||||
-rw-r--r-- | src/regposix.c | 5 | ||||
-rw-r--r-- | src/regsyntax.c | 16 | ||||
-rw-r--r-- | src/utf8.c | 41 | ||||
-rwxr-xr-x | test-driver | 15 | ||||
-rw-r--r-- | test/testc.c | 82 | ||||
-rw-r--r-- | test/testu.c | 9 |
34 files changed, 2954 insertions, 929 deletions
diff --git a/.bzrignore b/.bzrignore deleted file mode 100644 index 2386f62..0000000 --- a/.bzrignore +++ /dev/null @@ -1,3 +0,0 @@ -.git -**/.git -**/.pc @@ -25,4 +25,3 @@ testcu testp /build m4/*.m4 -.bzr diff --git a/CMakeLists.txt b/CMakeLists.txt index 60ce397..b40fb2c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ cmake_minimum_required(VERSION 2.8) project(oniguruma C) set(PACKAGE onig) -set(PACKAGE_VERSION "6.4.0") +set(PACKAGE_VERSION "6.5.0") set(USE_COMBINATION_EXPLOSION_CHECK 0) set(USE_CRNL_AS_LINE_TERMINATOR 0) @@ -1,5 +1,19 @@ History +2017/08/03: Version 6.5.0 + +2017/07/30: [new] support Absent clear (Absent functions) +2017/07/25: abolish configure option: --enable-combination-explosion-check +2017/07/23: [new] support Absent functions (?~...) +2017/07/14: fix #65: SIZEOF_SIZE_T doesn't exist on certain architecutres +2017/07/11: [new] support \O (true anychar) +2017/07/10: [new] support \K (keep) +2017/07/10: add new node type: NODE_GIMMICK +2017/07/07: [new] support \N (no newline) +2017/07/05: [new] support \R (general newline) +2017/07/05: [new] support if-then-else syntax +2017/07/04: [new] support backref validity checker + 2017/07/03: Version 6.4.0 2017/06/30: fix memory leaks @@ -1,9 +1,14 @@ -README 2016/05/06 +README 2017/07/08 Oniguruma ---- (C) K.Kosako <kkosako0@gmail.com> https://github.com/kkos/oniguruma +FIXED Security Issues: + CVE-2017-9224, CVE-2017-9225, CVE-2017-9226 + CVE-2017-9227, CVE-2017-9228, CVE-2017-9229 + +--- Oniguruma is a regular expressions library. The characteristics of this library is that different character encoding for every regular expression object can be specified. @@ -3,6 +3,12 @@ Oniguruma https://github.com/kkos/oniguruma +FIXED Security Issues: +-------------------------- + CVE-2017-9224, CVE-2017-9225, CVE-2017-9226 + CVE-2017-9227, CVE-2017-9228, CVE-2017-9229 + + Oniguruma is a regular expressions library. The characteristics of this library is that different character encoding for every regular expression object can be specified. @@ -20,6 +26,19 @@ Supported character encodings: * CP1251: contributed by Byte +New feature of version 6.5.0 +-------------------------- + +* NEW: \K (keep) +* NEW: \R (general newline) \N (no newline) +* NEW: \O (true anychar) +* NEW: if-then-else syntax (?(...)...\|...) +* NEW: Backreference validity checker (*original) +* NEW: Absent repeater (?~absent) +* NEW: Absent expression (?~|absent|expr) (*original) +* NEW: Absent range cutter (?~|absent) (*original) + + New feature of version 6.4.0 -------------------------- @@ -3,7 +3,7 @@ scriptversion=2012-10-14.11; # UTC -# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# Copyright (C) 1999-2014 Free Software Foundation, Inc. # Written by Tom Tromey <tromey@cygnus.com>. # # This program is free software; you can redistribute it and/or modify diff --git a/config.guess b/config.guess index bf5ad89..1659250 100755 --- a/config.guess +++ b/config.guess @@ -1,8 +1,8 @@ #! /bin/sh # Attempt to guess a canonical system name. -# Copyright 1992-2016 Free Software Foundation, Inc. +# Copyright 1992-2015 Free Software Foundation, Inc. -timestamp='2016-09-11' +timestamp='2015-08-20' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -27,7 +27,7 @@ timestamp='2016-09-11' # Originally written by Per Bothner; maintained since 2000 by Ben Elliston. # # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD # # Please send patches to <config-patches@gnu.org>. @@ -50,7 +50,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright 1992-2016 Free Software Foundation, Inc. +Copyright 1992-2015 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -186,12 +186,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *) machine=${UNAME_MACHINE_ARCH}-unknown ;; esac # The Operating System including object format, if it has switched - # to ELF recently (or will in the future) and ABI. + # to ELF recently, or will in the future. case "${UNAME_MACHINE_ARCH}" in - earm*) - os=netbsdelf - ;; - arm*|i386|m68k|ns32k|sh3*|sparc|vax) + arm*|earm*|i386|m68k|ns32k|sh3*|sparc|vax) eval $set_cc_for_build if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \ | grep -q __ELF__ @@ -240,10 +237,6 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in UNAME_MACHINE_ARCH=`arch | sed 's/OpenBSD.//'` echo ${UNAME_MACHINE_ARCH}-unknown-openbsd${UNAME_RELEASE} exit ;; - *:LibertyBSD:*:*) - UNAME_MACHINE_ARCH=`arch | sed 's/^.*BSD\.//'` - echo ${UNAME_MACHINE_ARCH}-unknown-libertybsd${UNAME_RELEASE} - exit ;; *:ekkoBSD:*:*) echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} exit ;; @@ -275,42 +268,42 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in ALPHA_CPU_TYPE=`/usr/sbin/psrinfo -v | sed -n -e 's/^ The alpha \(.*\) processor.*$/\1/p' | head -n 1` case "$ALPHA_CPU_TYPE" in "EV4 (21064)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "EV4.5 (21064)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "LCA4 (21066/21068)") - UNAME_MACHINE=alpha ;; + UNAME_MACHINE="alpha" ;; "EV5 (21164)") - UNAME_MACHINE=alphaev5 ;; + UNAME_MACHINE="alphaev5" ;; "EV5.6 (21164A)") - UNAME_MACHINE=alphaev56 ;; + UNAME_MACHINE="alphaev56" ;; "EV5.6 (21164PC)") - UNAME_MACHINE=alphapca56 ;; + UNAME_MACHINE="alphapca56" ;; "EV5.7 (21164PC)") - UNAME_MACHINE=alphapca57 ;; + UNAME_MACHINE="alphapca57" ;; "EV6 (21264)") - UNAME_MACHINE=alphaev6 ;; + UNAME_MACHINE="alphaev6" ;; "EV6.7 (21264A)") - UNAME_MACHINE=alphaev67 ;; + UNAME_MACHINE="alphaev67" ;; "EV6.8CB (21264C)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.8AL (21264B)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.8CX (21264D)") - UNAME_MACHINE=alphaev68 ;; + UNAME_MACHINE="alphaev68" ;; "EV6.9A (21264/EV69A)") - UNAME_MACHINE=alphaev69 ;; + UNAME_MACHINE="alphaev69" ;; "EV7 (21364)") - UNAME_MACHINE=alphaev7 ;; + UNAME_MACHINE="alphaev7" ;; "EV7.9 (21364A)") - UNAME_MACHINE=alphaev79 ;; + UNAME_MACHINE="alphaev79" ;; esac # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` # Reset EXIT trap before exiting to avoid spurious non-zero exit code. exitcode=$? trap '' 0 @@ -383,16 +376,16 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*) eval $set_cc_for_build - SUN_ARCH=i386 + SUN_ARCH="i386" # If there is a compiler, see if it is configured for 64-bit objects. # Note that the Sun cc does not turn __LP64__ into 1 like gcc does. # This test works for both compilers. - if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then - SUN_ARCH=x86_64 + SUN_ARCH="x86_64" fi fi echo ${SUN_ARCH}-pc-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` @@ -417,7 +410,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in exit ;; sun*:*:4.2BSD:*) UNAME_RELEASE=`(sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null` - test "x${UNAME_RELEASE}" = x && UNAME_RELEASE=3 + test "x${UNAME_RELEASE}" = "x" && UNAME_RELEASE=3 case "`/bin/arch`" in sun3) echo m68k-sun-sunos${UNAME_RELEASE} @@ -642,13 +635,13 @@ EOF sc_cpu_version=`/usr/bin/getconf SC_CPU_VERSION 2>/dev/null` sc_kernel_bits=`/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null` case "${sc_cpu_version}" in - 523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0 - 528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1 + 523) HP_ARCH="hppa1.0" ;; # CPU_PA_RISC1_0 + 528) HP_ARCH="hppa1.1" ;; # CPU_PA_RISC1_1 532) # CPU_PA_RISC2_0 case "${sc_kernel_bits}" in - 32) HP_ARCH=hppa2.0n ;; - 64) HP_ARCH=hppa2.0w ;; - '') HP_ARCH=hppa2.0 ;; # HP-UX 10.20 + 32) HP_ARCH="hppa2.0n" ;; + 64) HP_ARCH="hppa2.0w" ;; + '') HP_ARCH="hppa2.0" ;; # HP-UX 10.20 esac ;; esac fi @@ -687,11 +680,11 @@ EOF exit (0); } EOF - (CCOPTS="" $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` + (CCOPTS= $CC_FOR_BUILD -o $dummy $dummy.c 2>/dev/null) && HP_ARCH=`$dummy` test -z "$HP_ARCH" && HP_ARCH=hppa fi ;; esac - if [ ${HP_ARCH} = hppa2.0w ] + if [ ${HP_ARCH} = "hppa2.0w" ] then eval $set_cc_for_build @@ -704,12 +697,12 @@ EOF # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess # => hppa64-hp-hpux11.23 - if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | + if echo __LP64__ | (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | grep -q __LP64__ then - HP_ARCH=hppa2.0w + HP_ARCH="hppa2.0w" else - HP_ARCH=hppa64 + HP_ARCH="hppa64" fi fi echo ${HP_ARCH}-hp-hpux${HPUX_REV} @@ -814,14 +807,14 @@ EOF echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) - FUJITSU_PROC=`uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz` - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` + FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; 5000:UNIX_System_V:4.*:*) - FUJITSU_SYS=`uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///'` - FUJITSU_REL=`echo ${UNAME_RELEASE} | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/'` + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) @@ -903,7 +896,7 @@ EOF exit ;; *:GNU/*:*:*) # other systems with GNU libc and userland - echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]"``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-${LIBC} exit ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix @@ -926,7 +919,7 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep -q ld.so.1 - if test "$?" = 0 ; then LIBC=gnulibc1 ; fi + if test "$?" = 0 ; then LIBC="gnulibc1" ; fi echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; arc:Linux:*:* | arceb:Linux:*:*) @@ -972,9 +965,6 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; - k1om:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit ;; @@ -1032,9 +1022,6 @@ EOF ppcle:Linux:*:*) echo powerpcle-unknown-linux-${LIBC} exit ;; - riscv32:Linux:*:* | riscv64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-${LIBC} - exit ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux-${LIBC} exit ;; @@ -1133,7 +1120,7 @@ EOF # uname -m prints for DJGPP always 'pc', but it prints nothing about # the processor, so we play safe by assuming i586. # Note: whatever this is, it MUST be the same as what config.sub - # prints for the "djgpp" host, or else GDB configure will decide that + # prints for the "djgpp" host, or else GDB configury will decide that # this is a cross-build. echo i586-pc-msdosdjgpp exit ;; @@ -1282,9 +1269,6 @@ EOF SX-8R:SUPER-UX:*:*) echo sx8r-nec-superux${UNAME_RELEASE} exit ;; - SX-ACE:SUPER-UX:*:*) - echo sxace-nec-superux${UNAME_RELEASE} - exit ;; Power*:Rhapsody:*:*) echo powerpc-apple-rhapsody${UNAME_RELEASE} exit ;; @@ -1298,9 +1282,9 @@ EOF UNAME_PROCESSOR=powerpc fi if test `echo "$UNAME_RELEASE" | sed -e 's/\..*//'` -le 10 ; then - if [ "$CC_FOR_BUILD" != no_compiler_found ]; then + if [ "$CC_FOR_BUILD" != 'no_compiler_found' ]; then if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \ - (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \ + (CCOPTS= $CC_FOR_BUILD -E - 2>/dev/null) | \ grep IS_64BIT_ARCH >/dev/null then case $UNAME_PROCESSOR in @@ -1322,7 +1306,7 @@ EOF exit ;; *:procnto*:*:* | *:QNX:[0123456789]*:*) UNAME_PROCESSOR=`uname -p` - if test "$UNAME_PROCESSOR" = x86; then + if test "$UNAME_PROCESSOR" = "x86"; then UNAME_PROCESSOR=i386 UNAME_MACHINE=pc fi @@ -1353,7 +1337,7 @@ EOF # "uname -m" is not consistent, so use $cputype instead. 386 # is converted to i386 for consistency with other x86 # operating systems. - if test "$cputype" = 386; then + if test "$cputype" = "386"; then UNAME_MACHINE=i386 else UNAME_MACHINE="$cputype" @@ -1395,7 +1379,7 @@ EOF echo i386-pc-xenix exit ;; i*86:skyos:*:*) - echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE} | sed -e 's/ .*$//'` + echo ${UNAME_MACHINE}-pc-skyos`echo ${UNAME_RELEASE}` | sed -e 's/ .*$//' exit ;; i*86:rdos:*:*) echo ${UNAME_MACHINE}-pc-rdos @@ -1406,25 +1390,23 @@ EOF x86_64:VMkernel:*:*) echo ${UNAME_MACHINE}-unknown-esx exit ;; - amd64:Isilon\ OneFS:*:*) - echo x86_64-unknown-onefs - exit ;; esac cat >&2 <<EOF $0: unable to guess system type -This script (version $timestamp), has failed to recognize the -operating system you are using. If your script is old, overwrite -config.guess and config.sub with the latest versions from: +This script, last modified $timestamp, has failed to recognize +the operating system you are using. It is advised that you +download the most up to date version of the config scripts from - http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.guess;hb=HEAD and - http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub + http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD -If $0 has already been updated, send the following data and any -information you think might be pertinent to config-patches@gnu.org to -provide the necessary information to handle your system. +If the version you run ($0) is already up to date, please +send the following data and any information you think might be +pertinent to <config-patches@gnu.org> in order to provide the needed +information to handle your system. config.guess timestamp = $timestamp @@ -1,8 +1,8 @@ #! /bin/sh # Configuration validation subroutine script. -# Copyright 1992-2016 Free Software Foundation, Inc. +# Copyright 1992-2015 Free Software Foundation, Inc. -timestamp='2016-09-05' +timestamp='2015-08-20' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ timestamp='2016-09-05' # Otherwise, we print the canonical config type on stdout and succeed. # You can get the latest version of this script from: -# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub +# http://git.savannah.gnu.org/gitweb/?p=config.git;a=blob_plain;f=config.sub;hb=HEAD # This file is supposed to be the same for all GNU packages # and recognize all the CPU types, system types and aliases @@ -53,7 +53,8 @@ timestamp='2016-09-05' me=`echo "$0" | sed -e 's,.*/,,'` usage="\ -Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS +Usage: $0 [OPTION] CPU-MFR-OPSYS + $0 [OPTION] ALIAS Canonicalize a configuration name. @@ -67,7 +68,7 @@ Report bugs and patches to <config-patches@gnu.org>." version="\ GNU config.sub ($timestamp) -Copyright 1992-2016 Free Software Foundation, Inc. +Copyright 1992-2015 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE." @@ -117,7 +118,7 @@ case $maybe_os in nto-qnx* | linux-gnu* | linux-android* | linux-dietlibc | linux-newlib* | \ linux-musl* | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | kfreebsd*-gnu* | \ knetbsd*-gnu* | netbsd*-gnu* | netbsd*-eabi* | \ - kopensolaris*-gnu* | cloudabi*-eabi* | \ + kopensolaris*-gnu* | \ storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` @@ -520,7 +521,7 @@ case $basic_machine in basic_machine=i386-pc os=-aros ;; - asmjs) + asmjs) basic_machine=asmjs-unknown ;; aux) @@ -643,14 +644,6 @@ case $basic_machine in basic_machine=m68k-bull os=-sysv3 ;; - e500v[12]) - basic_machine=powerpc-unknown - os=$os"spe" - ;; - e500v[12]-*) - basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` - os=$os"spe" - ;; ebmon29k) basic_machine=a29k-amd os=-ebmon @@ -1030,7 +1023,7 @@ case $basic_machine in ppc-* | ppcbe-*) basic_machine=powerpc-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppcle | powerpclittle) + ppcle | powerpclittle | ppc-le | powerpc-little) basic_machine=powerpcle-unknown ;; ppcle-* | powerpclittle-*) @@ -1040,7 +1033,7 @@ case $basic_machine in ;; ppc64-*) basic_machine=powerpc64-`echo $basic_machine | sed 's/^[^-]*-//'` ;; - ppc64le | powerpc64little) + ppc64le | powerpc64little | ppc64-le | powerpc64-little) basic_machine=powerpc64le-unknown ;; ppc64le-* | powerpc64little-*) @@ -1390,14 +1383,14 @@ case $os in | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* \ - | -bitrig* | -openbsd* | -solidbsd* | -libertybsd* \ + | -bitrig* | -openbsd* | -solidbsd* \ | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* | -cegcc* \ | -cygwin* | -msys* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -midipix* | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ + | -mingw32* | -mingw64* | -linux-gnu* | -linux-android* \ | -linux-newlib* | -linux-musl* | -linux-uclibc* \ | -uxpv* | -beos* | -mpeix* | -udk* | -moxiebox* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ @@ -1406,8 +1399,7 @@ case $os in | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* \ - | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* \ - | -onefs* | -tirtos* | -phoenix*) + | -skyos* | -haiku* | -rdos* | -toppers* | -drops* | -es* | -tirtos*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1539,8 +1531,6 @@ case $os in ;; -nacl*) ;; - -ios) - ;; -none) ;; *) diff --git a/configure.ac b/configure.ac index 688d15b..efaf5e1 100644 --- a/configure.ac +++ b/configure.ac @@ -1,5 +1,5 @@ dnl Process this file with autoconf to produce a configure script. -AC_INIT(onig, 6.4.0) +AC_INIT(onig, 6.5.0) AC_CONFIG_MACRO_DIR([m4]) @@ -16,7 +16,7 @@ AC_SUBST(STATISTICS) dnl check for COMBINATION_EXPLOSION AC_ARG_ENABLE(combination-explosion-check, - [ --enable-combination-explosion-check enable combination explosion check], + [ --enable-combination-explosion-check deprecated], [comb_expl_check=$enableval]) if test "${comb_expl_check}" = yes; then AC_DEFINE(USE_COMBINATION_EXPLOSION_CHECK,1,[Define if combination explosion check]) diff --git a/debian/changelog b/debian/changelog index 69a8598..217f16f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,10 @@ +libonig (6.5.0-1) UNRELEASED; urgency=medium + + * New upstream release. + + Refresh symbols file. + + -- Jörg Frings-Fürst <debian@jff-webhosting.net> Sun, 06 Aug 2017 19:02:29 +0200 + libonig (6.4.0-1) unstable; urgency=medium * New upstream release. diff --git a/debian/symbols b/debian/symbols index 23c1b49..d3c085c 100644 --- a/debian/symbols +++ b/debian/symbols @@ -50,6 +50,7 @@ libonig.so.4 libonig4 #MINVER# OnigUnicodeFolds2@Base 6.0.0 OnigUnicodeFolds3@Base 6.0.0 euc_jp_lookup_property_name@Base 6.0.0 + list_node_free_not_car@Base 6.5.0 onig_add_end_call@Base 5.9.6 onig_bbuf_init@Base 5.9.5 onig_capture_tree_traverse@Base 5.9.5 @@ -3,7 +3,7 @@ scriptversion=2013-05-30.07; # UTC -# Copyright (C) 1999-2013 Free Software Foundation, Inc. +# Copyright (C) 1999-2014 Free Software Foundation, Inc. # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -1,7 +1,7 @@ --- This file is part of LuaDist project name = "onig" -version = "6.4.0" +version = "6.5.0" desc = "Oniguruma is a regular expressions library." author = "K.Kosako" @@ -1,4 +1,4 @@ -Oniguruma Regular Expressions Version 6.4.0 2017/06/28 +Oniguruma Regular Expressions Version 6.5.0 2017/07/30 syntax: ONIG_SYNTAX_RUBY (default) @@ -52,8 +52,8 @@ syntax: ONIG_SYNTAX_RUBY (default) Not Unicode: \t, \n, \v, \f, \r, \x20 - Unicode: - 0009, 000A, 000B, 000C, 000D, 0085(NEL), + Unicode case: + U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL), General_Category -- Line_Separator -- Paragraph_Separator -- Space_Separator @@ -70,6 +70,16 @@ syntax: ONIG_SYNTAX_RUBY (default) \H non-hexdigit char + \R general newline (* can't be used in character-class) + "\r\n" or \n,\v,\f,\r (* but doesn't backtrack from \r\n to \r) + + Unicode case: + "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029 + + \N negative newline (?-m:.) + + \O true anychar (?m:.) (* original function) + Character Property @@ -133,6 +143,8 @@ syntax: ONIG_SYNTAX_RUBY (default) \Z end of string, or before newline at the end \z end of string \G where the current search attempt begins + \K keep (keep start position of the result string) + 6. Character class @@ -183,9 +195,9 @@ syntax: ONIG_SYNTAX_RUBY (default) Final_Punctuation | Initial_Punctuation | Other_Punctuation | Open_Punctuation space Space_Separator | Line_Separator | Paragraph_Separator | - 0009 | 000A | 000B | 000C | 000D | 0085 + U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 upper Uppercase_Letter - xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 + xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066 (0-9, a-f, A-F) word Letter | Mark | Decimal_Number | Connector_Punctuation @@ -228,6 +240,50 @@ syntax: ONIG_SYNTAX_RUBY (default) Assigning the same name to two or more subexps is allowed. + <Absent functions> + + (?~absent) Absent repeater (* proposed by Tanaka Akira) + This works like .* (more precisely \O*), but it is + limited by the range that does not include the string + match with absent. + This is a written abbreviation of (?~|absent|\O*). + \O* is used as a repeater. + + (?~|absent|exp) Absent expression (* original) + This works like "exp", but it is limited by the range + that does not include the string match with absent. + + ex. (?~|345|\d*) "12345678" ==> "12", "1", "" + + (?~|absent) Absent cutter (* original) + After passed this operator, string right range is limited + at the point that does not include the string match whth + absent. + + (?~|) Absent clear + Clear the effects caused by Absent cutters. + (* This operation is not cancelled by backtrack.) + + * Nested Absent functions are not supported and the behavior + is undefined. + + + (?(condition_exp)then_exp|else_exp) if-then-else + (?(condition_exp)then_exp) if-then + + condition_exp can be a backreference number/name or a normal + regular expression. + When condition_exp is a backreference, both then_exp and + else_exp can be omitted. + Then it works as a backreference validity checker. + + [ backreference validity checker ] (* original) + + (?(n)), (?(-n)), (?(+n)), (?(n+level)) ... + (?(<n>)), (?('-n')), (?(<+n>)) ... + (?(<name>)), (?('name')), (?(<name+level>)) ... + + 8. Backreferences @@ -282,7 +338,7 @@ syntax: ONIG_SYNTAX_RUBY (default) p r.match("<foo>f<bar>bbb</bar>f</foo>").captures -9. Subexp calls ("Tanaka Akira special") +9. Subexp calls ("Tanaka Akira special") (* original function) When we say "call a group," it actually means, "re-execute the subexp in that group." @@ -367,7 +423,6 @@ A-3. Missing features compared with perl 5.8.0 + \l,\u,\L,\U, \X, \C + (?{code}) + (??{code}) - + (?(condition)yes-pat|no-pat) * \Q...\E This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA. @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.4.0 2017/06/28 +鬼車 正規表現 Version 6.5.0 2017/07/30 使用文法: ONIG_SYNTAX_RUBY (既定値) @@ -35,7 +35,7 @@ 3. 文字種 - . 任意文字 (改行を除く) + . 任意文字 (改行を除く: オプションに依存) \w 単語構成文字 @@ -53,7 +53,7 @@ \t, \n, \v, \f, \r, \x20 Unicodeの場合: - 0009, 000A, 000B, 000C, 000D, 0085(NEL), + U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL), General_Category -- Line_Separator -- Paragraph_Separator -- Space_Separator @@ -70,6 +70,16 @@ \H 非16進数字 + \R 汎改行 (* 文字集合の中では使用できない) + "\r\n" or \n,\v,\f,\r (* 但し \r\nから\rにはバックトラックしない) + + Unicodeの場合: + "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029 + + \N 非改行文字 (?-m:.) + + \O 真任意文字 (?m:.) (* 原作) + Character Property @@ -133,6 +143,8 @@ \Z 文字列末尾、または文字列末尾の改行の直前 \z 文字列末尾 \G 照合開始位置 + \K 保持 (結果の開始位置をこの位置に保つ) + 6. 文字集合 @@ -182,9 +194,9 @@ Final_Punctuation | Initial_Punctuation | Other_Punctuation | Open_Punctuation space Space_Separator | Line_Separator | Paragraph_Separator | - 0009 | 000A | 000B | 000C | 000D | 0085 + U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085 upper Uppercase_Letter - xdigit 0030 - 0039 | 0041 - 0046 | 0061 - 0066 + xdigit U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066 (0-9, a-f, A-F) word Letter | Mark | Decimal_Number | Connector_Punctuation @@ -230,6 +242,52 @@ この場合には、この名前を使用した後方参照は可能であるが、 部分式呼出しはできない。 + <不在機能群> + + (?~不在式) 不在繰り返し (*原案 田中哲) + これは.*のように(より正確には\O*)動作するが、不在式に + 適合する文字列を含まない範囲に制限される。 + これは(?~|不在式|\O*)の省略表記である。 + \O*の部分はマルチラインオプション(?m)の影響を受けない。 + + (?~|不在式|式) 不在式 (* 原作) + これは"式"のように動作するが、不在式に適合する文字列を + 含まない範囲に制限される。 + + 例 (?~|345|\d*) "12345678" ==> "12", "1", "" + + (?~|不在式) 不在切断 (* 原作) + この演算子を通過した後は、対象文字列の適合範囲の最後が + 不在式に適合する文字列を含まない範囲に制限される。 + + (?~|) 不在消去 + 不在切断の効果を消して、初期状態にする。 + (* この演算子の効果は後退再試行で無効化されない) + + * 不在機能の入れ子はサポートしておらず、挙動は不定とする。 + + + (?(条件式)成功式|失敗式) 条件式が成功すれば成功式、失敗すれば失敗式を実行する + この機能の存在理由は、成功式が失敗しても失敗式には + 行かないこと。これは他の正規表現で書くことができない。 + もうひとつは、条件式が後方参照のとき、後方参照値の有効性 + を調べる(文字列とマッチングはしない)意味になる。 + + (?(条件式)成功式) 条件式が成功すれば成功式を実行する + (条件式が通常の式のときには、この構文は不必要だが + 今のところエラーにはしない。) + + + 条件式は後方参照または通常の式を使用できる。 + 条件式が後方参照の場合、成功式と失敗式の両方を省略可能であり、 + この場合、後方参照値有効性を調べる(成功/失敗)機能のみになる。 + + [後方参照値有効性確認器] (* 原作) + (?(n)), (?(-n)), (?(+n)), (?(n+level)) ... + (?(<n>)), (?('-n')), (?(<+n>)) ... + (?(<name>)), (?('name')), (?(<name+level>)) ... + + 8. 後方参照 @@ -288,7 +346,7 @@ -9. 部分式呼出し ("田中哲スペシャル") +9. 部分式呼出し ("田中哲スペシャル") (* 原作) \g<name> 名前指定呼出し \g'name' 名前指定呼出し @@ -373,7 +431,6 @@ + \l,\u,\L,\U, \X, \C + (?{code}) + (??{code}) - + (?(condition)yes-pat|no-pat) * \Q...\E 但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効 @@ -8,7 +8,7 @@ <h1>Oniguruma</h1> (<a href="index_ja.html">Japanese</a>) <p> -(c) K.Kosako, updated at: 2017/06/30 +(c) K.Kosako, updated at: 2017/08/03 </p> <dl> @@ -16,6 +16,7 @@ <dt><b>What's new</b> </font> <ul> +<li>2017/08/03: Version 6.5.0 released.</li> <li>2017/07/03: Version 6.4.0 released.</li> <li>2017/05/29: Version 6.3.0 released.</li> <li>2017/04/08: Version 6.2.0 released.</li> diff --git a/index_ja.html b/index_ja.html index 502f460..52f0412 100644 --- a/index_ja.html +++ b/index_ja.html @@ -8,7 +8,7 @@ <h1>鬼車</h1> <p> -(c) K.Kosako, 最終更新: 2017/06/30 +(c) K.Kosako, 最終更新: 2017/08/03 </p> <dl> @@ -16,6 +16,7 @@ <dt><b>更新情報</b> </font> <ul> +<li>2017/08/03: Version 6.5.0 リリース</li> <li>2017/07/03: Version 6.4.0 リリース</li> <li>2017/05/29: Version 6.3.0 リリース</li> <li>2017/04/08: Version 6.2.0 リリース</li> @@ -1,7 +1,7 @@ #!/bin/sh # install - install a program, script, or datafile -scriptversion=2011-11-20.07; # UTC +scriptversion=2014-09-12.12; # UTC # This originates from X11R5 (mit/util/scripts/install.sh), which was # later released in X11R6 (xc/config/util/install.sh) with the @@ -41,19 +41,15 @@ scriptversion=2011-11-20.07; # UTC # This script is compatible with the BSD install script, but was written # from scratch. +tab=' ' nl=' ' -IFS=" "" $nl" +IFS=" $tab$nl" -# set DOITPROG to echo to test this script +# Set DOITPROG to "echo" to test this script. -# Don't use :- since 4.3BSD and earlier shells don't like it. doit=${DOITPROG-} -if test -z "$doit"; then - doit_exec=exec -else - doit_exec=$doit -fi +doit_exec=${doit:-exec} # Put in absolute file names if you don't have them in your path; # or use environment vars. @@ -68,17 +64,6 @@ mvprog=${MVPROG-mv} rmprog=${RMPROG-rm} stripprog=${STRIPPROG-strip} -posix_glob='?' -initialize_posix_glob=' - test "$posix_glob" != "?" || { - if (set -f) 2>/dev/null; then - posix_glob= - else - posix_glob=: - fi - } -' - posix_mkdir= # Desired mode of installed file. @@ -97,7 +82,7 @@ dir_arg= dst_arg= copy_on_change=false -no_target_directory= +is_target_a_directory=possibly usage="\ Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE @@ -137,46 +122,57 @@ while test $# -ne 0; do -d) dir_arg=true;; -g) chgrpcmd="$chgrpprog $2" - shift;; + shift;; --help) echo "$usage"; exit $?;; -m) mode=$2 - case $mode in - *' '* | *' '* | *' -'* | *'*'* | *'?'* | *'['*) - echo "$0: invalid mode: $mode" >&2 - exit 1;; - esac - shift;; + case $mode in + *' '* | *"$tab"* | *"$nl"* | *'*'* | *'?'* | *'['*) + echo "$0: invalid mode: $mode" >&2 + exit 1;; + esac + shift;; -o) chowncmd="$chownprog $2" - shift;; + shift;; -s) stripcmd=$stripprog;; - -t) dst_arg=$2 - # Protect names problematic for 'test' and other utilities. - case $dst_arg in - -* | [=\(\)!]) dst_arg=./$dst_arg;; - esac - shift;; + -t) + is_target_a_directory=always + dst_arg=$2 + # Protect names problematic for 'test' and other utilities. + case $dst_arg in + -* | [=\(\)!]) dst_arg=./$dst_arg;; + esac + shift;; - -T) no_target_directory=true;; + -T) is_target_a_directory=never;; --version) echo "$0 $scriptversion"; exit $?;; - --) shift - break;; + --) shift + break;; - -*) echo "$0: invalid option: $1" >&2 - exit 1;; + -*) echo "$0: invalid option: $1" >&2 + exit 1;; *) break;; esac shift done +# We allow the use of options -d and -T together, by making -d +# take the precedence; this is for compatibility with GNU install. + +if test -n "$dir_arg"; then + if test -n "$dst_arg"; then + echo "$0: target directory not allowed when installing a directory." >&2 + exit 1 + fi +fi + if test $# -ne 0 && test -z "$dir_arg$dst_arg"; then # When -d is used, all remaining arguments are directories to create. # When -t is used, the destination is already specified. @@ -208,6 +204,15 @@ if test $# -eq 0; then fi if test -z "$dir_arg"; then + if test $# -gt 1 || test "$is_target_a_directory" = always; then + if test ! -d "$dst_arg"; then + echo "$0: $dst_arg: Is not a directory." >&2 + exit 1 + fi + fi +fi + +if test -z "$dir_arg"; then do_exit='(exit $ret); exit $ret' trap "ret=129; $do_exit" 1 trap "ret=130; $do_exit" 2 @@ -223,16 +228,16 @@ if test -z "$dir_arg"; then *[0-7]) if test -z "$stripcmd"; then - u_plus_rw= + u_plus_rw= else - u_plus_rw='% 200' + u_plus_rw='% 200' fi cp_umask=`expr '(' 777 - $mode % 1000 ')' $u_plus_rw`;; *) if test -z "$stripcmd"; then - u_plus_rw= + u_plus_rw= else - u_plus_rw=,u+rw + u_plus_rw=,u+rw fi cp_umask=$mode$u_plus_rw;; esac @@ -269,41 +274,15 @@ do # If destination is a directory, append the input filename; won't work # if double slashes aren't ignored. if test -d "$dst"; then - if test -n "$no_target_directory"; then - echo "$0: $dst_arg: Is a directory" >&2 - exit 1 + if test "$is_target_a_directory" = never; then + echo "$0: $dst_arg: Is a directory" >&2 + exit 1 fi dstdir=$dst dst=$dstdir/`basename "$src"` dstdir_status=0 else - # Prefer dirname, but fall back on a substitute if dirname fails. - dstdir=` - (dirname "$dst") 2>/dev/null || - expr X"$dst" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$dst" : 'X\(//\)[^/]' \| \ - X"$dst" : 'X\(//\)$' \| \ - X"$dst" : 'X\(/\)' \| . 2>/dev/null || - echo X"$dst" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ - s//\1/ - q - } - /^X\(\/\/\)[^/].*/{ - s//\1/ - q - } - /^X\(\/\/\)$/{ - s//\1/ - q - } - /^X\(\/\).*/{ - s//\1/ - q - } - s/.*/./; q' - ` - + dstdir=`dirname "$dst"` test -d "$dstdir" dstdir_status=$? fi @@ -314,74 +293,81 @@ do if test $dstdir_status != 0; then case $posix_mkdir in '') - # Create intermediate dirs using mode 755 as modified by the umask. - # This is like FreeBSD 'install' as of 1997-10-28. - umask=`umask` - case $stripcmd.$umask in - # Optimize common cases. - *[2367][2367]) mkdir_umask=$umask;; - .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; - - *[0-7]) - mkdir_umask=`expr $umask + 22 \ - - $umask % 100 % 40 + $umask % 20 \ - - $umask % 10 % 4 + $umask % 2 - `;; - *) mkdir_umask=$umask,go-w;; - esac - - # With -d, create the new directory with the user-specified mode. - # Otherwise, rely on $mkdir_umask. - if test -n "$dir_arg"; then - mkdir_mode=-m$mode - else - mkdir_mode= - fi - - posix_mkdir=false - case $umask in - *[123567][0-7][0-7]) - # POSIX mkdir -p sets u+wx bits regardless of umask, which - # is incompatible with FreeBSD 'install' when (umask & 300) != 0. - ;; - *) - tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ - trap 'ret=$?; rmdir "$tmpdir/d" "$tmpdir" 2>/dev/null; exit $ret' 0 - - if (umask $mkdir_umask && - exec $mkdirprog $mkdir_mode -p -- "$tmpdir/d") >/dev/null 2>&1 - then - if test -z "$dir_arg" || { - # Check for POSIX incompatibilities with -m. - # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or - # other-writable bit of parent directory when it shouldn't. - # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. - ls_ld_tmpdir=`ls -ld "$tmpdir"` - case $ls_ld_tmpdir in - d????-?r-*) different_mode=700;; - d????-?--*) different_mode=755;; - *) false;; - esac && - $mkdirprog -m$different_mode -p -- "$tmpdir" && { - ls_ld_tmpdir_1=`ls -ld "$tmpdir"` - test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" - } - } - then posix_mkdir=: - fi - rmdir "$tmpdir/d" "$tmpdir" - else - # Remove any dirs left behind by ancient mkdir implementations. - rmdir ./$mkdir_mode ./-p ./-- 2>/dev/null - fi - trap '' 0;; - esac;; + # Create intermediate dirs using mode 755 as modified by the umask. + # This is like FreeBSD 'install' as of 1997-10-28. + umask=`umask` + case $stripcmd.$umask in + # Optimize common cases. + *[2367][2367]) mkdir_umask=$umask;; + .*0[02][02] | .[02][02] | .[02]) mkdir_umask=22;; + + *[0-7]) + mkdir_umask=`expr $umask + 22 \ + - $umask % 100 % 40 + $umask % 20 \ + - $umask % 10 % 4 + $umask % 2 + `;; + *) mkdir_umask=$umask,go-w;; + esac + + # With -d, create the new directory with the user-specified mode. + # Otherwise, rely on $mkdir_umask. + if test -n "$dir_arg"; then + mkdir_mode=-m$mode + else + mkdir_mode= + fi + + posix_mkdir=false + case $umask in + *[123567][0-7][0-7]) + # POSIX mkdir -p sets u+wx bits regardless of umask, which + # is incompatible with FreeBSD 'install' when (umask & 300) != 0. + ;; + *) + # $RANDOM is not portable (e.g. dash); use it when possible to + # lower collision chance + tmpdir=${TMPDIR-/tmp}/ins$RANDOM-$$ + trap 'ret=$?; rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" 2>/dev/null; exit $ret' 0 + + # As "mkdir -p" follows symlinks and we work in /tmp possibly; so + # create the $tmpdir first (and fail if unsuccessful) to make sure + # that nobody tries to guess the $tmpdir name. + if (umask $mkdir_umask && + $mkdirprog $mkdir_mode "$tmpdir" && + exec $mkdirprog $mkdir_mode -p -- "$tmpdir/a/b") >/dev/null 2>&1 + then + if test -z "$dir_arg" || { + # Check for POSIX incompatibilities with -m. + # HP-UX 11.23 and IRIX 6.5 mkdir -m -p sets group- or + # other-writable bit of parent directory when it shouldn't. + # FreeBSD 6.1 mkdir -m -p sets mode of existing directory. + test_tmpdir="$tmpdir/a" + ls_ld_tmpdir=`ls -ld "$test_tmpdir"` + case $ls_ld_tmpdir in + d????-?r-*) different_mode=700;; + d????-?--*) different_mode=755;; + *) false;; + esac && + $mkdirprog -m$different_mode -p -- "$test_tmpdir" && { + ls_ld_tmpdir_1=`ls -ld "$test_tmpdir"` + test "$ls_ld_tmpdir" = "$ls_ld_tmpdir_1" + } + } + then posix_mkdir=: + fi + rmdir "$tmpdir/a/b" "$tmpdir/a" "$tmpdir" + else + # Remove any dirs left behind by ancient mkdir implementations. + rmdir ./$mkdir_mode ./-p ./-- "$tmpdir" 2>/dev/null + fi + trap '' 0;; + esac;; esac if $posix_mkdir && ( - umask $mkdir_umask && - $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" + umask $mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir" ) then : else @@ -391,53 +377,51 @@ do # directory the slow way, step by step, checking for races as we go. case $dstdir in - /*) prefix='/';; - [-=\(\)!]*) prefix='./';; - *) prefix='';; + /*) prefix='/';; + [-=\(\)!]*) prefix='./';; + *) prefix='';; esac - eval "$initialize_posix_glob" - oIFS=$IFS IFS=/ - $posix_glob set -f + set -f set fnord $dstdir shift - $posix_glob set +f + set +f IFS=$oIFS prefixes= for d do - test X"$d" = X && continue - - prefix=$prefix$d - if test -d "$prefix"; then - prefixes= - else - if $posix_mkdir; then - (umask=$mkdir_umask && - $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break - # Don't fail if two instances are running concurrently. - test -d "$prefix" || exit 1 - else - case $prefix in - *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; - *) qprefix=$prefix;; - esac - prefixes="$prefixes '$qprefix'" - fi - fi - prefix=$prefix/ + test X"$d" = X && continue + + prefix=$prefix$d + if test -d "$prefix"; then + prefixes= + else + if $posix_mkdir; then + (umask=$mkdir_umask && + $doit_exec $mkdirprog $mkdir_mode -p -- "$dstdir") && break + # Don't fail if two instances are running concurrently. + test -d "$prefix" || exit 1 + else + case $prefix in + *\'*) qprefix=`echo "$prefix" | sed "s/'/'\\\\\\\\''/g"`;; + *) qprefix=$prefix;; + esac + prefixes="$prefixes '$qprefix'" + fi + fi + prefix=$prefix/ done if test -n "$prefixes"; then - # Don't fail if two instances are running concurrently. - (umask $mkdir_umask && - eval "\$doit_exec \$mkdirprog $prefixes") || - test -d "$dstdir" || exit 1 - obsolete_mkdir_used=true + # Don't fail if two instances are running concurrently. + (umask $mkdir_umask && + eval "\$doit_exec \$mkdirprog $prefixes") || + test -d "$dstdir" || exit 1 + obsolete_mkdir_used=true fi fi fi @@ -472,15 +456,12 @@ do # If -C, don't bother to copy if it wouldn't change the file. if $copy_on_change && - old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && - new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && - - eval "$initialize_posix_glob" && - $posix_glob set -f && + old=`LC_ALL=C ls -dlL "$dst" 2>/dev/null` && + new=`LC_ALL=C ls -dlL "$dsttmp" 2>/dev/null` && + set -f && set X $old && old=:$2:$4:$5:$6 && set X $new && new=:$2:$4:$5:$6 && - $posix_glob set +f && - + set +f && test "$old" = "$new" && $cmpprog "$dst" "$dsttmp" >/dev/null 2>&1 then @@ -493,24 +474,24 @@ do # to itself, or perhaps because mv is so ancient that it does not # support -f. { - # Now remove or move aside any old file at destination location. - # We try this two ways since rm can't unlink itself on some - # systems and the destination file might be busy for other - # reasons. In this case, the final cleanup might fail but the new - # file should still install successfully. - { - test ! -f "$dst" || - $doit $rmcmd -f "$dst" 2>/dev/null || - { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && - { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } - } || - { echo "$0: cannot unlink or rename $dst" >&2 - (exit 1); exit 1 - } - } && - - # Now rename the file to the real destination. - $doit $mvcmd "$dsttmp" "$dst" + # Now remove or move aside any old file at destination location. + # We try this two ways since rm can't unlink itself on some + # systems and the destination file might be busy for other + # reasons. In this case, the final cleanup might fail but the new + # file should still install successfully. + { + test ! -f "$dst" || + $doit $rmcmd -f "$dst" 2>/dev/null || + { $doit $mvcmd -f "$dst" "$rmtmp" 2>/dev/null && + { $doit $rmcmd -f "$rmtmp" 2>/dev/null; :; } + } || + { echo "$0: cannot unlink or rename $dst" >&2 + (exit 1); exit 1 + } + } && + + # Now rename the file to the real destination. + $doit $mvcmd "$dsttmp" "$dst" } fi || exit 1 @@ -3,7 +3,7 @@ scriptversion=2013-10-28.13; # UTC -# Copyright (C) 1996-2013 Free Software Foundation, Inc. +# Copyright (C) 1996-2014 Free Software Foundation, Inc. # Originally written by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996. # This program is free software; you can redistribute it and/or modify diff --git a/src/oniguruma.h b/src/oniguruma.h index a8ae09a..bc8983f 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -35,7 +35,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 4 +#define ONIGURUMA_VERSION_MINOR 5 #define ONIGURUMA_VERSION_TEENY 0 #ifdef __cplusplus @@ -496,6 +496,11 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; /* #define ONIG_SYN_OP2_CHAR_PROPERTY_PREFIX_IS (1U<<18) */ #define ONIG_SYN_OP2_ESC_H_XDIGIT (1U<<19) /* \h, \H */ #define ONIG_SYN_OP2_INEFFECTIVE_ESCAPE (1U<<20) /* \ */ +#define ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (1U<<21) /* (?(n)) (?(...)...|...) */ +#define ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (1U<<22) /* \K */ +#define ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (1U<<23) /* \R \r\n else [\x0a-\x0d] */ +#define ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (1U<<24) /* \N (?-m:.), \O (?m:.) */ +#define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */ /* syntax (behavior) */ #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ @@ -596,6 +601,9 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_NEVER_ENDING_RECURSION -221 #define ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY -222 #define ONIGERR_INVALID_CHAR_PROPERTY_NAME -223 +#define ONIGERR_INVALID_IF_ELSE_SYNTAX -224 +#define ONIGERR_INVALID_ABSENT_GROUP_PATTERN -225 +#define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN -226 #define ONIGERR_INVALID_CODE_POINT_VALUE -400 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 diff --git a/src/regcomp.c b/src/regcomp.c index db83739..47023cb 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -147,7 +147,7 @@ swap_node(Node* a, Node* b) Node c; c = *a; *a = *b; *b = c; - if (NODE_TYPE(a) == NODE_STR) { + if (NODE_TYPE(a) == NODE_STRING) { StrNode* sn = STR_(a); if (sn->capa == 0) { int len = sn->end - sn->s; @@ -156,7 +156,7 @@ swap_node(Node* a, Node* b) } } - if (NODE_TYPE(b) == NODE_STR) { + if (NODE_TYPE(b) == NODE_STRING) { StrNode* sn = STR_(b); if (sn->capa == 0) { int len = sn->end - sn->s; @@ -169,11 +169,11 @@ swap_node(Node* a, Node* b) static OnigLen distance_add(OnigLen d1, OnigLen d2) { - if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE) - return ONIG_INFINITE_DISTANCE; + if (d1 == INFINITE_LEN || d2 == INFINITE_LEN) + return INFINITE_LEN; else { - if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2; - else return ONIG_INFINITE_DISTANCE; + if (d1 <= INFINITE_LEN - d2) return d1 + d2; + else return INFINITE_LEN; } } @@ -182,10 +182,10 @@ distance_multiply(OnigLen d, int m) { if (m == 0) return 0; - if (d < ONIG_INFINITE_DISTANCE / m) + if (d < INFINITE_LEN / m) return d * m; else - return ONIG_INFINITE_DISTANCE; + return INFINITE_LEN; } static int @@ -230,7 +230,7 @@ onig_bbuf_init(BBuf* buf, int size) } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL static int unset_addr_list_init(UnsetAddrList* list, int size) @@ -271,7 +271,7 @@ unset_addr_list_add(UnsetAddrList* list, int offset, struct _Node* node) list->num++; return 0; } -#endif /* USE_SUBEXP_CALL */ +#endif /* USE_CALL */ static int @@ -347,6 +347,24 @@ add_option(regex_t* reg, OnigOptionType option) } static int +add_save_type(regex_t* reg, enum SaveType type) +{ + SaveType t = (SaveType )type; + + BBUF_ADD(reg, &t, SIZE_SAVE_TYPE); + return 0; +} + +static int +add_update_var_type(regex_t* reg, enum UpdateVarType type) +{ + UpdateVarType t = (UpdateVarType )type; + + BBUF_ADD(reg, &t, SIZE_UPDATE_VAR_TYPE); + return 0; +} + +static int add_opcode_rel_addr(regex_t* reg, int opcode, int addr) { int r; @@ -466,7 +484,7 @@ compile_tree_empty_check(Node* node, regex_t* reg, int empty_info, ScanEnv* env) return r; } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL static int compile_call(CallNode* node, regex_t* reg, ScanEnv* env) { @@ -545,7 +563,7 @@ compile_length_string_node(Node* node, regex_t* reg) if (sn->end <= sn->s) return 0; - ambig = NSTRING_IS_AMBIG(node); + ambig = NODE_STRING_IS_AMBIG(node); p = prev = sn->s; prev_len = enclen(enc, p); @@ -594,7 +612,7 @@ compile_string_node(Node* node, regex_t* reg) return 0; end = sn->end; - ambig = NSTRING_IS_AMBIG(node); + ambig = NODE_STRING_IS_AMBIG(node); p = prev = sn->s; prev_len = enclen(enc, p); @@ -767,7 +785,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, if (r != 0) return r; if ( -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL NODE_IS_IN_MULTI_ENTRY(qn) || #endif NODE_IS_IN_REAL_REPEAT(qn)) { @@ -893,7 +911,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); @@ -906,7 +924,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); } else { - if (IS_MULTILINE(reg->options)) { + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) { r = add_opcode(reg, (CKN_ON ? OP_STATE_CHECK_ANYCHAR_ML_STAR : OP_ANYCHAR_ML_STAR)); @@ -1109,7 +1127,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); else r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); @@ -1117,7 +1135,7 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); } else { - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) return add_opcode(reg, OP_ANYCHAR_ML_STAR); else return add_opcode(reg, OP_ANYCHAR_STAR); @@ -1229,7 +1247,7 @@ compile_length_option_node(EnclosureNode* node, regex_t* reg) int tlen; OnigOptionType prev = reg->options; - reg->options = node->o.option; + reg->options = node->o.options; tlen = compile_length_tree(NODE_ENCLOSURE_BODY(node), reg); reg->options = prev; @@ -1249,8 +1267,8 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) int r; OnigOptionType prev = reg->options; - if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { - r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.option); + if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) { + r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->o.options); if (r != 0) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); if (r != 0) return r; @@ -1258,11 +1276,11 @@ compile_option_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) if (r != 0) return r; } - reg->options = node->o.option; + reg->options = node->o.options; r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); reg->options = prev; - if (IS_DYNAMIC_OPTION(prev ^ node->o.option)) { + if (IS_DYNAMIC_OPTION(prev ^ node->o.options)) { if (r != 0) return r; r = add_opcode_option(reg, OP_SET_OPTION, prev); } @@ -1287,7 +1305,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) switch (node->type) { case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { len = tlen + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN; @@ -1336,6 +1354,32 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) } break; + case ENCLOSURE_IF_ELSE: + { + Node* cond = NODE_ENCLOSURE_BODY(node); + Node* Then = node->te.Then; + Node* Else = node->te.Else; + + len = compile_length_tree(cond, reg); + if (len < 0) return len; + len += SIZE_OP_PUSH; + len += SIZE_OP_PUSH_STOP_BT + SIZE_OP_POP_STOP_BT; + + if (IS_NOT_NULL(Then)) { + tlen = compile_length_tree(Then, reg); + if (tlen < 0) return tlen; + len += tlen; + } + + if (IS_NOT_NULL(Else)) { + len += SIZE_OP_JUMP; + tlen = compile_length_tree(Else, reg); + if (tlen < 0) return tlen; + len += tlen; + } + } + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1352,7 +1396,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) int r; int len; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (node->m.regnum == 0 && NODE_IS_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r != 0) return r; @@ -1370,9 +1414,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) r = add_opcode(reg, OP_RETURN); return r; } -#endif -#ifdef USE_SUBEXP_CALL if (NODE_IS_CALLED(node)) { r = add_opcode(reg, OP_CALL); if (r != 0) return r; @@ -1404,7 +1446,7 @@ compile_enclosure_memory_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) r = compile_tree(NODE_ENCLOSURE_BODY(node), reg, env); if (r != 0) return r; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (MEM_STATUS_AT0(reg->bt_mem_end, node->m.regnum)) r = add_opcode(reg, (NODE_IS_RECURSION(node) ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH)); @@ -1434,14 +1476,15 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) { int r, len; - if (node->type == ENCLOSURE_OPTION) - return compile_option_node(node, reg, env); - switch (node->type) { case ENCLOSURE_MEMORY: r = compile_enclosure_memory_node(node, reg, env); break; + case ENCLOSURE_OPTION: + r = compile_option_node(node, reg, env); + break; + case ENCLOSURE_STOP_BACKTRACK: if (NODE_IS_STOP_BT_SIMPLE_REPEAT(node)) { QuantNode* qn = QUANT_(NODE_ENCLOSURE_BODY(node)); @@ -1469,6 +1512,49 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) } break; + case ENCLOSURE_IF_ELSE: + { + int cond_len, then_len, jump_len; + Node* cond = NODE_ENCLOSURE_BODY(node); + Node* Then = node->te.Then; + Node* Else = node->te.Else; + + r = add_opcode(reg, OP_PUSH_STOP_BT); + if (r != 0) return r; + + cond_len = compile_length_tree(cond, reg); + if (cond_len < 0) return cond_len; + if (IS_NOT_NULL(Then)) { + then_len = compile_length_tree(Then, reg); + if (then_len < 0) return then_len; + } + else + then_len = 0; + + jump_len = cond_len + then_len + SIZE_OP_POP_STOP_BT; + if (IS_NOT_NULL(Else)) jump_len += SIZE_OP_JUMP; + + r = add_opcode_rel_addr(reg, OP_PUSH, jump_len); + if (r != 0) return r; + r = compile_tree(cond, reg, env); + if (r != 0) return r; + r = add_opcode(reg, OP_POP_STOP_BT); + if (r != 0) return r; + + if (IS_NOT_NULL(Then)) { + r = compile_tree(Then, reg, env); + if (r != 0) return r; + } + + if (IS_NOT_NULL(Else)) { + int else_len = compile_length_tree(Else, reg); + r = add_opcode_rel_addr(reg, OP_JUMP, else_len); + if (r != 0) return r; + r = compile_tree(Else, reg, env); + } + } + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1490,10 +1576,10 @@ compile_length_anchor_node(AnchorNode* node, regex_t* reg) switch (node->type) { case ANCHOR_PREC_READ: - len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS; + len = SIZE_OP_PREC_READ_START + tlen + SIZE_OP_PREC_READ_END; break; case ANCHOR_PREC_READ_NOT: - len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS; + len = SIZE_OP_PUSH_PREC_READ_NOT + tlen + SIZE_OP_FAIL_PREC_READ_NOT; break; case ANCHOR_LOOK_BEHIND: len = SIZE_OP_LOOK_BEHIND + tlen; @@ -1531,21 +1617,21 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) #endif case ANCHOR_PREC_READ: - r = add_opcode(reg, OP_PUSH_POS); + r = add_opcode(reg, OP_PREC_READ_START); if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP_POS); + r = add_opcode(reg, OP_PREC_READ_END); break; case ANCHOR_PREC_READ_NOT: len = compile_length_tree(NODE_ANCHOR_BODY(node), reg); if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS); + r = add_opcode_rel_addr(reg, OP_PUSH_PREC_READ_NOT, len + SIZE_OP_FAIL_PREC_READ_NOT); if (r != 0) return r; r = compile_tree(NODE_ANCHOR_BODY(node), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_FAIL_POS); + r = add_opcode(reg, OP_FAIL_PREC_READ_NOT); break; case ANCHOR_LOOK_BEHIND: @@ -1596,6 +1682,67 @@ compile_anchor_node(AnchorNode* node, regex_t* reg, ScanEnv* env) } static int +compile_gimmick_node(GimmickNode* node, regex_t* reg) +{ + int r; + + switch (node->type) { + case GIMMICK_FAIL: + r = add_opcode(reg, OP_FAIL); + break; + + case GIMMICK_KEEP: + r = add_opcode(reg, OP_PUSH_SAVE_VAL); + if (r != 0) return r; + r = add_save_type(reg, SAVE_KEEP); + if (r != 0) return r; + r = add_mem_num(reg, node->id); + break; + + case GIMMICK_SAVE: + r = add_opcode(reg, OP_PUSH_SAVE_VAL); + if (r != 0) return r; + r = add_save_type(reg, node->detail_type); + if (r != 0) return r; + r = add_mem_num(reg, node->id); + break; + + case GIMMICK_UPDATE_VAR: + r = add_opcode(reg, OP_UPDATE_VAR); + if (r != 0) return r; + r = add_update_var_type(reg, node->detail_type); + if (r != 0) return r; + r = add_mem_num(reg, node->id); + break; + } + + return r; +} + +static int +compile_length_gimmick_node(GimmickNode* node, regex_t* reg) +{ + int len; + + switch (node->type) { + case GIMMICK_FAIL: + len = SIZE_OP_FAIL; + break; + + case GIMMICK_KEEP: + case GIMMICK_SAVE: + len = SIZE_OP_PUSH_SAVE_VAL; + break; + + case GIMMICK_UPDATE_VAR: + len = SIZE_OP_UPDATE_VAR; + break; + } + + return len; +} + +static int compile_length_tree(Node* node, regex_t* reg) { int len, r; @@ -1624,8 +1771,8 @@ compile_length_tree(Node* node, regex_t* reg) } break; - case NODE_STR: - if (NSTRING_IS_RAW(node)) + case NODE_STRING: + if (NODE_STRING_IS_RAW(node)) r = compile_length_string_raw_node(STR_(node), reg); else r = compile_length_string_node(node, reg); @@ -1639,28 +1786,39 @@ compile_length_tree(Node* node, regex_t* reg) r = SIZE_OPCODE; break; - case NODE_BREF: + case NODE_BACKREF: { - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); + if (NODE_IS_CHECKER(node)) { #ifdef USE_BACKREF_WITH_LEVEL - if (NODE_IS_NEST_LEVEL(node)) { - r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + - SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); - } - else + if (NODE_IS_NEST_LEVEL(node)) { + r = SIZE_OPCODE + SIZE_LENGTH + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else #endif - if (br->back_num == 1) { - r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) - ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); } else { - r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH + + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } + else +#endif + if (br->back_num == 1) { + r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2) + ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM)); + } + else { + r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num); + } } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: r = SIZE_OP_CALL; break; @@ -1678,6 +1836,10 @@ compile_length_tree(Node* node, regex_t* reg) r = compile_length_anchor_node(ANCHOR_(node), reg); break; + case NODE_GIMMICK: + r = compile_length_gimmick_node(GIMMICK_(node), reg); + break; + default: return ONIGERR_TYPE_BUG; break; @@ -1713,7 +1875,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) do { len = compile_length_tree(NODE_CAR(node), reg); if (IS_NOT_NULL(NODE_CDR(node))) { - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP); + enum OpCode push = NODE_IS_SUPER(node) ? OP_PUSH_SUPER : OP_PUSH; + r = add_opcode_rel_addr(reg, push, len + SIZE_OP_JUMP); if (r != 0) break; } r = compile_tree(NODE_CAR(node), reg, env); @@ -1727,8 +1890,8 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) } break; - case NODE_STR: - if (NSTRING_IS_RAW(node)) + case NODE_STRING: + if (NODE_STRING_IS_RAW(node)) r = compile_string_raw_node(STR_(node), reg); else r = compile_string_node(node, reg); @@ -1744,7 +1907,7 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) switch (CTYPE_(node)->ctype) { case CTYPE_ANYCHAR: - if (IS_MULTILINE(reg->options)) + if (IS_MULTILINE(CTYPE_OPTION(node, reg))) r = add_opcode(reg, OP_ANYCHAR_ML); else r = add_opcode(reg, OP_ANYCHAR); @@ -1764,69 +1927,86 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) } break; - case NODE_BREF: + case NODE_BACKREF: { - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); + if (NODE_IS_CHECKER(node)) { #ifdef USE_BACKREF_WITH_LEVEL - if (NODE_IS_NEST_LEVEL(node)) { - r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); - if (r != 0) return r; - r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); - if (r != 0) return r; - r = add_length(reg, br->nest_level); - if (r != 0) return r; + if (NODE_IS_NEST_LEVEL(node)) { + r = add_opcode(reg, OP_BACKREF_CHECK_WITH_LEVEL); + if (r != 0) return r; + r = add_length(reg, br->nest_level); + if (r != 0) return r; + } + else +#endif + { + r = add_opcode(reg, OP_BACKREF_CHECK); + if (r != 0) return r; + } goto add_bacref_mems; } - else -#endif - if (br->back_num == 1) { - n = br->back_static[0]; - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREFN_IC); + else { +#ifdef USE_BACKREF_WITH_LEVEL + if (NODE_IS_NEST_LEVEL(node)) { + r = add_opcode(reg, OP_BACKREF_WITH_LEVEL); + if (r != 0) return r; + r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE)); + if (r != 0) return r; + r = add_length(reg, br->nest_level); if (r != 0) return r; - r = add_mem_num(reg, n); + + goto add_bacref_mems; } - else { - switch (n) { - case 1: r = add_opcode(reg, OP_BACKREF1); break; - case 2: r = add_opcode(reg, OP_BACKREF2); break; - default: - r = add_opcode(reg, OP_BACKREFN); + else +#endif + if (br->back_num == 1) { + n = br->back_static[0]; + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_N_IC); if (r != 0) return r; r = add_mem_num(reg, n); - break; } - } - } - else { - int i; - int* p; - - if (IS_IGNORECASE(reg->options)) { - r = add_opcode(reg, OP_BACKREF_MULTI_IC); + else { + switch (n) { + case 1: r = add_opcode(reg, OP_BACKREF1); break; + case 2: r = add_opcode(reg, OP_BACKREF2); break; + default: + r = add_opcode(reg, OP_BACKREF_N); + if (r != 0) return r; + r = add_mem_num(reg, n); + break; + } + } } else { - r = add_opcode(reg, OP_BACKREF_MULTI); - } - if (r != 0) return r; + int i; + int* p; -#ifdef USE_BACKREF_WITH_LEVEL - add_bacref_mems: -#endif - r = add_length(reg, br->back_num); - if (r != 0) return r; - p = BACKREFS_P(br); - for (i = br->back_num - 1; i >= 0; i--) { - r = add_mem_num(reg, p[i]); + if (IS_IGNORECASE(reg->options)) { + r = add_opcode(reg, OP_BACKREF_MULTI_IC); + } + else { + r = add_opcode(reg, OP_BACKREF_MULTI); + } if (r != 0) return r; + + add_bacref_mems: + r = add_length(reg, br->back_num); + if (r != 0) return r; + p = BACKREFS_P(br); + for (i = br->back_num - 1; i >= 0; i--) { + r = add_mem_num(reg, p[i]); + if (r != 0) return r; + } } } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: r = compile_call(CALL_(node), reg, env); break; @@ -1844,6 +2024,10 @@ compile_tree(Node* node, regex_t* reg, ScanEnv* env) r = compile_anchor_node(ANCHOR_(node), reg, env); break; + case NODE_GIMMICK: + r = compile_gimmick_node(GIMMICK_(node), reg); + break; + default: #ifdef ONIG_DEBUG fprintf(stderr, "compile_tree: undefined node type %d\n", NODE_TYPE(node)); @@ -1898,6 +2082,18 @@ noname_disable_map(Node** plink, GroupNumRemap* map, int* counter) r = noname_disable_map(plink, map, counter); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + r = noname_disable_map(&(NODE_ENCLOSURE_BODY(en)), map, counter); + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = noname_disable_map(&(en->te.Then), map, counter); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = noname_disable_map(&(en->te.Else), map, counter); + if (r != 0) return r; + } + } else r = noname_disable_map(&(NODE_BODY(node)), map, counter); } @@ -1920,7 +2116,7 @@ renumber_node_backref(Node* node, GroupNumRemap* map) { int i, pos, n, old_num; int *backs; - BRefNode* bn = BREF_(node); + BackRefNode* bn = BACKREF_(node); if (! NODE_IS_BY_NAME(node)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; @@ -1957,11 +2153,29 @@ renumber_by_map(Node* node, GroupNumRemap* map) break; case NODE_QUANT: - case NODE_ENCLOSURE: r = renumber_by_map(NODE_BODY(node), map); break; - case NODE_BREF: + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + r = renumber_by_map(NODE_BODY(node), map); + if (r != 0) return r; + + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = renumber_by_map(en->te.Then, map); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = renumber_by_map(en->te.Else, map); + if (r != 0) return r; + } + } + } + break; + + case NODE_BACKREF: r = renumber_node_backref(node, map); break; @@ -1995,11 +2209,30 @@ numbered_ref_check(Node* node) break; /* fall */ case NODE_QUANT: - case NODE_ENCLOSURE: r = numbered_ref_check(NODE_BODY(node)); break; - case NODE_BREF: + case NODE_ENCLOSURE: + { + EnclosureNode* en = ENCLOSURE_(node); + r = numbered_ref_check(NODE_BODY(node)); + if (r != 0) return r; + + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = numbered_ref_check(en->te.Then); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = numbered_ref_check(en->te.Else); + if (r != 0) return r; + } + } + } + + break; + + case NODE_BACKREF: if (! NODE_IS_BY_NAME(node)) return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; break; @@ -2052,7 +2285,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) } #endif /* USE_NAMED_GROUP */ -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL static int unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) { @@ -2061,9 +2294,11 @@ unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) AbsAddrType addr; for (i = 0; i < uslist->num; i++) { + if (! NODE_IS_ADDR_FIXED(uslist->us[i].target)) + return ONIGERR_PARSER_BUG; + en = ENCLOSURE_(uslist->us[i].target); - if (! NODE_IS_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG; - addr = en->m.called_addr; + addr = en->m.called_addr; offset = uslist->us[i].offset; BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR); @@ -2120,7 +2355,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) } break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); UChar *s = sn->s; @@ -2135,16 +2370,21 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) { QuantNode* qn = QUANT_(node); if (qn->lower == qn->upper) { - r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); - if (r == 0) - *len = distance_multiply(tlen, qn->lower); + if (qn->upper == 0) { + *len = 0; + } + else { + r = get_char_length_tree1(NODE_BODY(node), reg, &tlen, level); + if (r == 0) + *len = distance_multiply(tlen, qn->lower); + } } else r = GET_CHAR_LEN_VARLEN; } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (! NODE_IS_RECURSION(node)) r = get_char_length_tree1(NODE_BODY(node), reg, len, level); @@ -2166,7 +2406,7 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) EnclosureNode* en = ENCLOSURE_(node); switch (en->type) { case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (NODE_IS_CLEN_FIXED(node)) *len = en->char_len; else { @@ -2182,6 +2422,31 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) case ENCLOSURE_STOP_BACKTRACK: r = get_char_length_tree1(NODE_BODY(node), reg, len, level); break; + case ENCLOSURE_IF_ELSE: + { + int clen, elen; + r = get_char_length_tree1(NODE_BODY(node), reg, &clen, level); + if (r == 0) { + if (IS_NOT_NULL(en->te.Then)) { + r = get_char_length_tree1(en->te.Then, reg, &tlen, level); + if (r != 0) break; + } + else tlen = 0; + if (IS_NOT_NULL(en->te.Else)) { + r = get_char_length_tree1(en->te.Else, reg, &elen, level); + if (r != 0) break; + } + else elen = 0; + + if (clen + tlen != elen) { + r = GET_CHAR_LEN_VARLEN; + } + else { + *len = elen; + } + } + } + break; default: break; } @@ -2189,8 +2454,13 @@ get_char_length_tree1(Node* node, regex_t* reg, int* len, int level) break; case NODE_ANCHOR: + case NODE_GIMMICK: break; + case NODE_BACKREF: + if (NODE_IS_CHECKER(node)) + break; + /* fall */ default: r = GET_CHAR_LEN_VARLEN; break; @@ -2241,7 +2511,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: goto swap; break; @@ -2318,7 +2588,7 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: goto swap; break; @@ -2328,10 +2598,10 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: { StrNode* xs = STR_(x); - if (NSTRING_LEN(x) == 0) + if (NODE_STRING_LEN(x) == 0) break; //c = *(xs->s); @@ -2362,13 +2632,13 @@ is_exclusive(Node* x, Node* y, regex_t* reg) } break; - case NODE_STR: + case NODE_STRING: { UChar *q; StrNode* ys = STR_(y); - len = NSTRING_LEN(x); - if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y); - if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) { + len = NODE_STRING_LEN(x); + if (len > NODE_STRING_LEN(y)) len = NODE_STRING_LEN(y); + if (NODE_STRING_IS_AMBIG(x) || NODE_STRING_IS_AMBIG(y)) { /* tiny version */ return 0; } @@ -2399,9 +2669,9 @@ get_head_value_node(Node* node, int exact, regex_t* reg) Node* n = NULL_NODE; switch (NODE_TYPE(node)) { - case NODE_BREF: + case NODE_BACKREF: case NODE_ALT: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: #endif break; @@ -2420,7 +2690,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) n = get_head_value_node(NODE_CAR(node), exact, reg); break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); @@ -2428,7 +2698,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) break; if (exact != 0 && - !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { + !NODE_STRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) { } else { n = node; @@ -2456,7 +2726,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) { OnigOptionType options = reg->options; - reg->options = ENCLOSURE_(node)->o.option; + reg->options = ENCLOSURE_(node)->o.options; n = get_head_value_node(NODE_BODY(node), exact, reg); reg->options = options; } @@ -2464,6 +2734,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) case ENCLOSURE_MEMORY: case ENCLOSURE_STOP_BACKTRACK: + case ENCLOSURE_IF_ELSE: n = get_head_value_node(NODE_BODY(node), exact, reg); break; } @@ -2475,6 +2746,7 @@ get_head_value_node(Node* node, int exact, regex_t* reg) n = get_head_value_node(NODE_BODY(node), exact, reg); break; + case NODE_GIMMICK: default: break; } @@ -2512,6 +2784,15 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) return 1; r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); + if (r == 0 && en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = check_type_tree(en->te.Then, type_mask, enclosure_mask, anchor_mask); + if (r != 0) break; + } + if (IS_NOT_NULL(en->te.Else)) { + r = check_type_tree(en->te.Else, type_mask, enclosure_mask, anchor_mask); + } + } } break; @@ -2524,6 +2805,7 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) r = check_type_tree(NODE_BODY(node), type_mask, enclosure_mask, anchor_mask); break; + case NODE_GIMMICK: default: break; } @@ -2531,31 +2813,31 @@ check_type_tree(Node* node, int type_mask, int enclosure_mask, int anchor_mask) } static OnigLen -get_min_len(Node* node, ScanEnv* env) +tree_min_len(Node* node, ScanEnv* env) { OnigLen len; OnigLen tmin; len = 0; switch (NODE_TYPE(node)) { - case NODE_BREF: - { + case NODE_BACKREF: + if (! NODE_IS_CHECKER(node)) { int i; int* backs; MemEnv* mem_env = SCANENV_MEMENV(env); - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); if (NODE_IS_RECURSION(node)) break; backs = BACKREFS_P(br); - len = get_min_len(mem_env[backs[0]].node, env); + len = tree_min_len(mem_env[backs[0]].node, env); for (i = 1; i < br->back_num; i++) { - tmin = get_min_len(mem_env[backs[i]].node, env); + tmin = tree_min_len(mem_env[backs[i]].node, env); if (len > tmin) len = tmin; } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: { Node* t = NODE_BODY(node); @@ -2564,15 +2846,15 @@ get_min_len(Node* node, ScanEnv* env) len = ENCLOSURE_(t)->min_len; } else - len = get_min_len(t, env); + len = tree_min_len(t, env); } break; #endif case NODE_LIST: do { - tmin = get_min_len(NODE_CAR(node), env); - len += tmin; + tmin = tree_min_len(NODE_CAR(node), env); + len = distance_add(len, tmin); } while (IS_NOT_NULL(node = NODE_CDR(node))); break; @@ -2582,14 +2864,14 @@ get_min_len(Node* node, ScanEnv* env) y = node; do { x = NODE_CAR(y); - tmin = get_min_len(x, env); + tmin = tree_min_len(x, env); if (y == node) len = tmin; else if (len > tmin) len = tmin; } while (IS_NOT_NULL(y = NODE_CDR(y))); } break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); len = sn->end - sn->s; @@ -2598,7 +2880,7 @@ get_min_len(Node* node, ScanEnv* env) case NODE_CTYPE: case NODE_CCLASS: - len = 1; + len = ONIGENC_MBC_MINLEN(env->enc); break; case NODE_QUANT: @@ -2606,7 +2888,7 @@ get_min_len(Node* node, ScanEnv* env) QuantNode* qn = QUANT_(node); if (qn->lower > 0) { - len = get_min_len(NODE_BODY(node), env); + len = tree_min_len(NODE_BODY(node), env); len = distance_multiply(len, qn->lower); } } @@ -2624,7 +2906,7 @@ get_min_len(Node* node, ScanEnv* env) len = 0; // recursive else { NODE_STATUS_ADD(node, NST_MARK1); - len = get_min_len(NODE_BODY(node), env); + len = tree_min_len(NODE_BODY(node), env); NODE_STATUS_REMOVE(node, NST_MARK1); en->min_len = len; @@ -2635,12 +2917,34 @@ get_min_len(Node* node, ScanEnv* env) case ENCLOSURE_OPTION: case ENCLOSURE_STOP_BACKTRACK: - len = get_min_len(NODE_BODY(node), env); + len = tree_min_len(NODE_BODY(node), env); + break; + case ENCLOSURE_IF_ELSE: + { + int elen; + len = tree_min_len(NODE_BODY(node), env); + if (IS_NOT_NULL(en->te.Then)) + len += tree_min_len(en->te.Then, env); + if (IS_NOT_NULL(en->te.Else)) + elen = tree_min_len(en->te.Else, env); + else elen = 0; + + if (elen < len) len = elen; + } break; } } break; + case NODE_GIMMICK: + { + GimmickNode* g = GIMMICK_(node); + if (g->type == GIMMICK_FAIL) { + len = INFINITE_LEN; + break; + } + } + /* fall */ case NODE_ANCHOR: default: break; @@ -2650,7 +2954,7 @@ get_min_len(Node* node, ScanEnv* env) } static OnigLen -get_max_len(Node* node, ScanEnv* env) +tree_max_len(Node* node, ScanEnv* env) { OnigLen len; OnigLen tmax; @@ -2659,19 +2963,19 @@ get_max_len(Node* node, ScanEnv* env) switch (NODE_TYPE(node)) { case NODE_LIST: do { - tmax = get_max_len(NODE_CAR(node), env); + tmax = tree_max_len(NODE_CAR(node), env); len = distance_add(len, tmax); } while (IS_NOT_NULL(node = NODE_CDR(node))); break; case NODE_ALT: do { - tmax = get_max_len(NODE_CAR(node), env); + tmax = tree_max_len(NODE_CAR(node), env); if (len < tmax) len = tmax; } while (IS_NOT_NULL(node = NODE_CDR(node))); break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); len = sn->end - sn->s; @@ -2683,30 +2987,30 @@ get_max_len(Node* node, ScanEnv* env) len = ONIGENC_MBC_MAXLEN_DIST(env->enc); break; - case NODE_BREF: - { + case NODE_BACKREF: + if (! NODE_IS_CHECKER(node)) { int i; int* backs; MemEnv* mem_env = SCANENV_MEMENV(env); - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); if (NODE_IS_RECURSION(node)) { - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; break; } backs = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { - tmax = get_max_len(mem_env[backs[i]].node, env); + tmax = tree_max_len(mem_env[backs[i]].node, env); if (len < tmax) len = tmax; } } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (! NODE_IS_RECURSION(node)) - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); else - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; break; #endif @@ -2715,12 +3019,12 @@ get_max_len(Node* node, ScanEnv* env) QuantNode* qn = QUANT_(node); if (qn->upper != 0) { - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); if (len != 0) { if (! IS_REPEAT_INFINITE(qn->upper)) len = distance_multiply(len, qn->upper); else - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; } } } @@ -2735,10 +3039,10 @@ get_max_len(Node* node, ScanEnv* env) len = en->max_len; else { if (NODE_IS_MARK1(node)) - len = ONIG_INFINITE_DISTANCE; + len = INFINITE_LEN; else { NODE_STATUS_ADD(node, NST_MARK1); - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); NODE_STATUS_REMOVE(node, NST_MARK1); en->max_len = len; @@ -2749,13 +3053,29 @@ get_max_len(Node* node, ScanEnv* env) case ENCLOSURE_OPTION: case ENCLOSURE_STOP_BACKTRACK: - len = get_max_len(NODE_BODY(node), env); + len = tree_max_len(NODE_BODY(node), env); + break; + case ENCLOSURE_IF_ELSE: + { + int tlen, elen; + len = tree_max_len(NODE_BODY(node), env); + if (IS_NOT_NULL(en->te.Then)) { + tlen = tree_max_len(en->te.Then, env); + len = distance_add(len, tlen); + } + if (IS_NOT_NULL(en->te.Else)) + elen = tree_max_len(en->te.Else, env); + else elen = 0; + + if (elen > len) len = elen; + } break; } } break; case NODE_ANCHOR: + case NODE_GIMMICK: default: break; } @@ -2783,14 +3103,31 @@ check_backrefs(Node* node, ScanEnv* env) } /* fall */ case NODE_QUANT: + r = check_backrefs(NODE_BODY(node), env); + break; + case NODE_ENCLOSURE: r = check_backrefs(NODE_BODY(node), env); + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_IF_ELSE) { + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = check_backrefs(en->te.Then, env); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = check_backrefs(en->te.Else, env); + } + } + } break; - case NODE_BREF: + case NODE_BACKREF: { int i; - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); int* backs = BACKREFS_P(br); MemEnv* mem_env = SCANENV_MEMENV(env); @@ -2813,7 +3150,7 @@ check_backrefs(Node* node, ScanEnv* env) } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL #define RECURSION_EXIST (1<<0) #define RECURSION_MUST (1<<1) @@ -2822,6 +3159,7 @@ check_backrefs(Node* node, ScanEnv* env) static int infinite_recursive_call_check(Node* node, ScanEnv* env, int head) { + int ret; int r = 0; switch (NODE_TYPE(node)) { @@ -2829,15 +3167,14 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) { Node *x; OnigLen min; - int ret; x = node; do { ret = infinite_recursive_call_check(NODE_CAR(x), env, head); if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; r |= ret; - if (head) { - min = get_min_len(NODE_CAR(x), env); + if (head != 0) { + min = tree_min_len(NODE_CAR(x), env); if (min != 0) head = 0; } } while (IS_NOT_NULL(x = NODE_CDR(x))); @@ -2846,7 +3183,6 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) case NODE_ALT: { - int ret; int must; must = RECURSION_MUST; @@ -2894,6 +3230,31 @@ infinite_recursive_call_check(Node* node, ScanEnv* env, int head) NODE_STATUS_REMOVE(node, NST_MARK2); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + int eret; + + ret = infinite_recursive_call_check(NODE_BODY(node), env, head); + if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; + r |= ret; + if (IS_NOT_NULL(en->te.Then)) { + OnigLen min; + if (head != 0) { + min = tree_min_len(NODE_BODY(node), env); + } + else min = 0; + + ret = infinite_recursive_call_check(en->te.Then, env, min != 0 ? 0:head); + if (ret < 0 || (ret & RECURSION_INFINITE) != 0) return ret; + r |= ret; + } + if (IS_NOT_NULL(en->te.Else)) { + eret = infinite_recursive_call_check(en->te.Else, env, head); + if (eret < 0 || (eret & RECURSION_INFINITE) != 0) return eret; + r |= (eret & RECURSION_EXIST); + if ((eret & RECURSION_MUST) == 0) + r &= ~RECURSION_MUST; + } + } else { r = infinite_recursive_call_check(NODE_BODY(node), env, head); } @@ -2948,6 +3309,16 @@ infinite_recursive_call_check_trav(Node* node, ScanEnv* env) NODE_STATUS_REMOVE(node, NST_MARK1); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = infinite_recursive_call_check_trav(en->te.Then, env); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) { + r = infinite_recursive_call_check_trav(en->te.Else, env); + if (r != 0) return r; + } + } } r = infinite_recursive_call_check_trav(NODE_BODY(node), env); @@ -2987,7 +3358,10 @@ recursive_call_check(Node* node) case NODE_CALL: r = recursive_call_check(NODE_BODY(node)); - if (r != 0) NODE_STATUS_ADD(node, NST_RECURSION); + if (r != 0) { + if (NODE_IS_MARK1(NODE_BODY(node))) + NODE_STATUS_ADD(node, NST_RECURSION); + } break; case NODE_ENCLOSURE: @@ -3005,6 +3379,16 @@ recursive_call_check(Node* node) NODE_STATUS_REMOVE(node, NST_MARK2); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + r = 0; + if (IS_NOT_NULL(en->te.Then)) { + r |= recursive_call_check(en->te.Then); + } + if (IS_NOT_NULL(en->te.Else)) { + r |= recursive_call_check(en->te.Else); + } + r |= recursive_call_check(NODE_BODY(node)); + } else { r = recursive_call_check(NODE_BODY(node)); } @@ -3058,6 +3442,8 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) case NODE_ENCLOSURE: { + int ret; + int state1; EnclosureNode* en = ENCLOSURE_(node); if (en->type == ENCLOSURE_MEMORY) { @@ -3075,16 +3461,25 @@ recursive_call_check_trav(Node* node, ScanEnv* env, int state) } } - { - int ret; - int state1 = state; + state1 = state; + if (NODE_IS_RECURSION(node)) + state1 |= IN_RECURSION; - if (NODE_IS_RECURSION(node)) - state1 |= IN_RECURSION; + ret = recursive_call_check_trav(NODE_BODY(node), env, state1); + if (ret == FOUND_CALLED_NODE) + r = FOUND_CALLED_NODE; - ret = recursive_call_check_trav(NODE_BODY(node), env, state1); - if (ret == FOUND_CALLED_NODE) - r = FOUND_CALLED_NODE; + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + ret = recursive_call_check_trav(en->te.Then, env, state1); + if (ret == FOUND_CALLED_NODE) + r = FOUND_CALLED_NODE; + } + if (IS_NOT_NULL(en->te.Else)) { + ret = recursive_call_check_trav(en->te.Else, env, state1); + if (ret == FOUND_CALLED_NODE) + r = FOUND_CALLED_NODE; + } } } break; @@ -3126,7 +3521,7 @@ divide_look_behind_alternatives(Node* node) if (anc_type == ANCHOR_LOOK_BEHIND_NOT) { np = node; do { - SET_NODE_TYPE(np, NODE_LIST); /* alt -> list */ + NODE_SET_TYPE(np, NODE_LIST); /* alt -> list */ } while (IS_NOT_NULL(np = NODE_CDR(np))); } return 0; @@ -3257,8 +3652,8 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, return r; } - NSTRING_SET_AMBIG(node); - NSTRING_SET_DONT_GET_OPT_INFO(node); + NODE_STRING_SET_AMBIG(node); + NODE_STRING_SET_DONT_GET_OPT_INFO(node); *rnode = node; return 0; } @@ -3386,7 +3781,7 @@ expand_case_fold_string(Node* node, regex_t* reg) OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; StrNode* sn = STR_(node); - if (NSTRING_IS_AMBIG(node)) return 0; + if (NODE_STRING_IS_AMBIG(node)) return 0; start = sn->s; end = sn->end; @@ -3529,10 +3924,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) switch (NODE_TYPE(node)) { case NODE_LIST: { - Node* prev = NULL_NODE; do { r = setup_comb_exp_check(NODE_CAR(node), r, env); - prev = NODE_CAR(node); } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); } break; @@ -3619,8 +4012,8 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) switch (en->type) { case ENCLOSURE_MEMORY: { - if (env->curr_max_regnum < en->regnum) - env->curr_max_regnum = en->regnum; + if (env->curr_max_regnum < en->m.regnum) + env->curr_max_regnum = en->m.regnum; r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); } @@ -3633,7 +4026,7 @@ setup_comb_exp_check(Node* node, int state, ScanEnv* env) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) env->has_recursion = 1; @@ -3668,7 +4061,7 @@ quantifiers_memory_node_info(Node* node) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) { return QUANT_BODY_IS_EMPTY_REC; /* tiny version */ @@ -3702,17 +4095,32 @@ quantifiers_memory_node_info(Node* node) case ENCLOSURE_STOP_BACKTRACK: r = quantifiers_memory_node_info(NODE_BODY(node)); break; + case ENCLOSURE_IF_ELSE: + { + int v; + r = quantifiers_memory_node_info(NODE_BODY(node)); + if (IS_NOT_NULL(en->te.Then)) { + v = quantifiers_memory_node_info(en->te.Then); + if (v > r) r = v; + } + if (IS_NOT_NULL(en->te.Else)) { + v = quantifiers_memory_node_info(en->te.Else); + if (v > r) r = v; + } + } + break; default: break; } } break; - case NODE_BREF: - case NODE_STR: + case NODE_BACKREF: + case NODE_STRING: case NODE_CTYPE: case NODE_CCLASS: case NODE_ANCHOR: + case NODE_GIMMICK: default: break; } @@ -3729,7 +4137,7 @@ quantifiers_memory_node_info(Node* node) #define IN_ZERO_REPEAT (1<<4) #define IN_MULTI_ENTRY (1<<5) -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL #ifdef __GNUC__ __inline @@ -3745,7 +4153,7 @@ setup_call_node_call(CallNode* cn, ScanEnv* env, int state) #ifdef USE_NAMED_GROUP if (env->num_named > 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) && - !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) { + !ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_CAPTURE_GROUP)) { return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED; } #endif @@ -3811,10 +4219,26 @@ setup_call2_call(Node* node) break; case NODE_ENCLOSURE: - if (! NODE_IS_MARK1(node)) { - NODE_STATUS_ADD(node, NST_MARK1); - setup_call2_call(NODE_BODY(node)); - NODE_STATUS_REMOVE(node, NST_MARK1); + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if (! NODE_IS_MARK1(node)) { + NODE_STATUS_ADD(node, NST_MARK1); + setup_call2_call(NODE_BODY(node)); + NODE_STATUS_REMOVE(node, NST_MARK1); + } + } + else if (en->type == ENCLOSURE_IF_ELSE) { + setup_call2_call(NODE_BODY(node)); + if (IS_NOT_NULL(en->te.Then)) + setup_call2_call(en->te.Then); + if (IS_NOT_NULL(en->te.Else)) + setup_call2_call(en->te.Else); + } + else { + setup_call2_call(NODE_BODY(node)); + } } break; @@ -3868,11 +4292,29 @@ setup_call(Node* node, ScanEnv* env, int state) break; case NODE_ENCLOSURE: - if ((state & IN_ZERO_REPEAT) != 0) { - NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); - ENCLOSURE_(node)->m.entry_count--; + { + EnclosureNode* en = ENCLOSURE_(node); + + if (en->type == ENCLOSURE_MEMORY) { + if ((state & IN_ZERO_REPEAT) != 0) { + NODE_STATUS_ADD(node, NST_IN_ZERO_REPEAT); + ENCLOSURE_(node)->m.entry_count--; + } + r = setup_call(NODE_BODY(node), env, state); + } + else if (en->type == ENCLOSURE_IF_ELSE) { + r = setup_call(NODE_BODY(node), env, state); + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = setup_call(en->te.Then, env, state); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) + r = setup_call(en->te.Else, env, state); + } + else + r = setup_call(NODE_BODY(node), env, state); } - r = setup_call(NODE_BODY(node), env, state); break; case NODE_CALL: @@ -3918,6 +4360,20 @@ setup_call2(Node* node) case NODE_ENCLOSURE: if (! NODE_IS_IN_ZERO_REPEAT(node)) r = setup_call2(NODE_BODY(node)); + + { + EnclosureNode* en = ENCLOSURE_(node); + + if (r != 0) return r; + if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + r = setup_call2(en->te.Then); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) + r = setup_call2(en->te.Else); + } + } break; case NODE_CALL: @@ -3997,6 +4453,13 @@ setup_called_state_call(Node* node, int state) NODE_STATUS_REMOVE(node, NST_MARK1); } } + else if (en->type == ENCLOSURE_IF_ELSE) { + if (IS_NOT_NULL(en->te.Then)) { + setup_called_state_call(en->te.Then, state); + } + if (IS_NOT_NULL(en->te.Else)) + setup_called_state_call(en->te.Else, state); + } else { setup_called_state_call(NODE_BODY(node), state); } @@ -4025,7 +4488,7 @@ setup_called_state(Node* node, int state) } while (IS_NOT_NULL(node = NODE_CDR(node))); break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: setup_called_state_call(node, state); break; @@ -4046,6 +4509,13 @@ setup_called_state(Node* node, int state) case ENCLOSURE_STOP_BACKTRACK: setup_called_state(NODE_BODY(node), state); break; + case ENCLOSURE_IF_ELSE: + setup_called_state(NODE_BODY(node), state); + if (IS_NOT_NULL(en->te.Then)) + setup_called_state(en->te.Then, state); + if (IS_NOT_NULL(en->te.Else)) + setup_called_state(en->te.Else, state); + break; } } break; @@ -4082,16 +4552,17 @@ setup_called_state(Node* node, int state) } break; - case NODE_BREF: - case NODE_STR: + case NODE_BACKREF: + case NODE_STRING: case NODE_CTYPE: case NODE_CCLASS: + case NODE_GIMMICK: default: break; } } -#endif /* USE_SUBEXP_CALL */ +#endif /* USE_CALL */ static int setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env); @@ -4104,8 +4575,9 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) { /* allowed node types in look-behind */ #define ALLOWED_TYPE_IN_LB \ - ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE \ - | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT | BIT_NODE_CALL ) + ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \ + | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \ + | BIT_NODE_CALL ) #define ALLOWED_ENCLOSURE_IN_LB ( ENCLOSURE_MEMORY | ENCLOSURE_OPTION ) #define ALLOWED_ENCLOSURE_IN_LB_NOT ENCLOSURE_OPTION @@ -4182,7 +4654,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) } if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { - d = get_min_len(body, env); + d = tree_min_len(body, env); if (d == 0) { #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT qn->body_empty_info = quantifiers_memory_node_info(body); @@ -4208,10 +4680,10 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) /* expand string */ #define EXPAND_STRING_MAX_LENGTH 100 - if (NODE_TYPE(body) == NODE_STR) { + if (NODE_TYPE(body) == NODE_STRING) { if (!IS_REPEAT_INFINITE(qn->lower) && qn->lower == qn->upper && qn->lower > 1 && qn->lower <= EXPAND_STRING_MAX_LENGTH) { - int len = NSTRING_LEN(body); + int len = NODE_STRING_LEN(body); StrNode* sn = STR_(body); if (len * qn->lower <= EXPAND_STRING_MAX_LENGTH) { @@ -4278,17 +4750,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } while (r == 0 && IS_NOT_NULL(node = NODE_CDR(node))); break; - case NODE_STR: - if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) { + case NODE_STRING: + if (IS_IGNORECASE(reg->options) && !NODE_STRING_IS_RAW(node)) { r = expand_case_fold_string(node, reg); } break; - case NODE_BREF: + case NODE_BACKREF: { int i; int* p; - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); p = BACKREFS_P(br); for (i = 0; i < br->back_num; i++) { if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF; @@ -4311,14 +4783,14 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) case ENCLOSURE_OPTION: { OnigOptionType options = reg->options; - reg->options = ENCLOSURE_(node)->o.option; + reg->options = ENCLOSURE_(node)->o.options; r = setup_tree(NODE_BODY(node), reg, state, env); reg->options = options; } break; case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL state |= en->m.called_state; #endif @@ -4343,6 +4815,17 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) } } break; + + case ENCLOSURE_IF_ELSE: + r = setup_tree(NODE_BODY(node), reg, (state | IN_ALT), env); + if (r != 0) return r; + if (IS_NOT_NULL(en->te.Then)) { + r = setup_tree(en->te.Then, reg, (state | IN_ALT), env); + if (r != 0) return r; + } + if (IS_NOT_NULL(en->te.Else)) + r = setup_tree(en->te.Else, reg, (state | IN_ALT), env); + break; } } break; @@ -4355,11 +4838,12 @@ setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env) r = setup_anchor(node, reg, state, env); break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: #endif case NODE_CTYPE: case NODE_CCLASS: + case NODE_GIMMICK: default: break; } @@ -4487,7 +4971,7 @@ distance_value(MinMaxLen* mm) OnigLen d; - if (mm->max == ONIG_INFINITE_DISTANCE) return 0; + if (mm->max == INFINITE_LEN) return 0; d = mm->max - mm->min; if (d < (OnigLen )(sizeof(dist_vals)/sizeof(dist_vals[0]))) @@ -5048,15 +5532,15 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NODE_STR: + case NODE_STRING: { StrNode* sn = STR_(node); int slen = sn->end - sn->s; - int is_raw = NSTRING_IS_RAW(node); + int is_raw = NODE_STRING_IS_RAW(node); - if (! NSTRING_IS_AMBIG(node)) { + if (! NODE_STRING_IS_AMBIG(node)) { concat_opt_exact_info_str(&opt->exb, sn->s, sn->end, - NSTRING_IS_RAW(node), env->enc); + NODE_STRING_IS_RAW(node), env->enc); if (slen > 0) { add_char_opt_map_info(&opt->map, *(sn->s), env->enc); } @@ -5065,7 +5549,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) else { int max; - if (NSTRING_IS_DONT_GET_OPT_INFO(node)) { + if (NODE_STRING_IS_DONT_GET_OPT_INFO(node)) { int n = onigenc_strlen(env->enc, sn->s, sn->end); max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n; } @@ -5191,24 +5675,24 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; - case NODE_BREF: - { + case NODE_BACKREF: + if (! NODE_IS_CHECKER(node)) { int i; int* backs; OnigLen min, max, tmin, tmax; MemEnv* mem_env = SCANENV_MEMENV(env->scan_env); - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); if (NODE_IS_RECURSION(node)) { - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + set_mml(&opt->len, 0, INFINITE_LEN); break; } backs = BACKREFS_P(br); - min = get_min_len(mem_env[backs[0]].node, env->scan_env); - max = get_max_len(mem_env[backs[0]].node, env->scan_env); + min = tree_min_len(mem_env[backs[0]].node, env->scan_env); + max = tree_max_len(mem_env[backs[0]].node, env->scan_env); for (i = 1; i < br->back_num; i++) { - tmin = get_min_len(mem_env[backs[i]].node, env->scan_env); - tmax = get_max_len(mem_env[backs[i]].node, env->scan_env); + tmin = tree_min_len(mem_env[backs[i]].node, env->scan_env); + tmax = tree_max_len(mem_env[backs[i]].node, env->scan_env); if (min > tmin) min = tmin; if (max < tmax) max = tmax; } @@ -5216,13 +5700,13 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: if (NODE_IS_RECURSION(node)) - set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE); + set_mml(&opt->len, 0, INFINITE_LEN); else { OnigOptionType save = env->options; - env->options = ENCLOSURE_(NODE_BODY(node))->o.option; + env->options = ENCLOSURE_(NODE_BODY(node))->o.options; r = optimize_node_left(NODE_BODY(node), opt, env); env->options = save; } @@ -5242,7 +5726,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { - if (IS_MULTILINE(env->options)) + if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); else add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); @@ -5274,7 +5758,7 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) min = distance_multiply(nopt.len.min, qn->lower); if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0); + max = (nopt.len.max > 0 ? INFINITE_LEN : 0); else max = distance_multiply(nopt.len.max, qn->upper); @@ -5291,20 +5775,20 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) { OnigOptionType save = env->options; - env->options = en->o.option; + env->options = en->o.options; r = optimize_node_left(NODE_BODY(node), opt, env); env->options = save; } break; case ENCLOSURE_MEMORY: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL en->opt_count++; if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) { OnigLen min, max; min = 0; - max = ONIG_INFINITE_DISTANCE; + max = INFINITE_LEN; if (NODE_IS_MIN_FIXED(node)) min = en->min_len; if (NODE_IS_MAX_FIXED(node)) max = en->max_len; set_mml(&opt->len, min, max); @@ -5324,10 +5808,39 @@ optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env) case ENCLOSURE_STOP_BACKTRACK: r = optimize_node_left(NODE_BODY(node), opt, env); break; + + case ENCLOSURE_IF_ELSE: + { + OptEnv nenv; + NodeOptInfo nopt; + + copy_opt_env(&nenv, env); + r = optimize_node_left(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); + if (r == 0) { + add_mml(&nenv.mmd, &nopt.len); + concat_left_node_opt_info(env->enc, opt, &nopt); + if (IS_NOT_NULL(en->te.Then)) { + r = optimize_node_left(en->te.Then, &nopt, &nenv); + if (r == 0) { + concat_left_node_opt_info(env->enc, opt, &nopt); + } + } + + if (IS_NOT_NULL(en->te.Else)) { + r = optimize_node_left(en->te.Else, &nopt, env); + if (r == 0) + alt_merge_node_opt_info(opt, &nopt, env); + } + } + } + break; } } break; + case NODE_GIMMICK: + break; + default: #ifdef ONIG_DEBUG fprintf(stderr, "optimize_node_left: undefined node type %d\n", NODE_TYPE(node)); @@ -5379,7 +5892,7 @@ set_optimize_exact_info(regex_t* reg, OptExactInfo* e) reg->dmin = e->mmd.min; reg->dmax = e->mmd.max; - if (reg->dmin != ONIG_INFINITE_DISTANCE) { + if (reg->dmin != INFINITE_LEN) { reg->threshold_len = reg->dmin + (reg->exact_end - reg->exact); } @@ -5398,7 +5911,7 @@ set_optimize_map_info(regex_t* reg, OptMapInfo* m) reg->dmin = m->mmd.min; reg->dmax = m->mmd.max; - if (reg->dmin != ONIG_INFINITE_DISTANCE) { + if (reg->dmin != INFINITE_LEN) { reg->threshold_len = reg->dmin + 1; } } @@ -5531,14 +6044,14 @@ static void print_enc_string(FILE* fp, OnigEncoding enc, static void print_distance_range(FILE* f, OnigLen a, OnigLen b) { - if (a == ONIG_INFINITE_DISTANCE) + if (a == INFINITE_LEN) fputs("inf", f); else fprintf(f, "(%u)", a); fputs("-", f); - if (b == ONIG_INFINITE_DISTANCE) + if (b == INFINITE_LEN) fputs("inf", f); else fprintf(f, "(%u)", b); @@ -5656,7 +6169,7 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain); + if (IS_NOT_NULL(REG_EXTP(reg))) xfree(REG_EXTP(reg)); #ifdef USE_NAMED_GROUP onig_names_free(reg); @@ -5702,7 +6215,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, int r, init_size; Node* root; ScanEnv scan_env; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL UnsetAddrList uslist; #endif @@ -5751,7 +6264,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = check_backrefs(root, &scan_env); if (r != 0) goto err; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.num_call > 0) { r = unset_addr_list_init(&uslist, scan_env.num_call); if (r != 0) goto err; @@ -5791,12 +6304,12 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #ifdef USE_COMBINATION_EXPLOSION_CHECK if (scan_env.backrefed_mem == 0 -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL || scan_env.num_call == 0 #endif ) { setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.has_recursion != 0) { scan_env.num_comb_exp_check = 0; } @@ -5829,8 +6342,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = compile_tree(root, reg, &scan_env); if (r == 0) { + if (scan_env.keep_num > 0) { + r = add_opcode(reg, OP_UPDATE_VAR); + if (r != 0) goto err; + r = add_update_var_type(reg, UPDATE_VAR_KEEP_FROM_STACK_LAST); + if (r != 0) goto err; + r = add_mem_num(reg, 0 /* not used */); + if (r != 0) goto err; + } + r = add_opcode(reg, OP_END); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.num_call > 0) { r = unset_addr_list_fix(&uslist, reg); unset_addr_list_end(&uslist); @@ -5847,7 +6369,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->stack_pop_level = STACK_POP_LEVEL_FREE; } } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL else if (scan_env.num_call > 0) { unset_addr_list_end(&uslist); } @@ -5865,7 +6387,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, return r; err_unset: -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (scan_env.num_call > 0) { unset_addr_list_end(&uslist); } @@ -5890,8 +6412,8 @@ static int onig_inited = 0; extern int onig_reg_init(regex_t* reg, OnigOptionType option, - OnigCaseFoldType case_fold_flag, - OnigEncoding enc, OnigSyntaxType* syntax) + OnigCaseFoldType case_fold_flag, + OnigEncoding enc, OnigSyntaxType* syntax) { int r; @@ -5938,7 +6460,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, (reg)->exact = (UChar* )NULL; (reg)->int_map = (int* )NULL; (reg)->int_map_backward = (int* )NULL; - (reg)->chain = (regex_t* )NULL; + REG_EXTPL(reg) = NULL; (reg)->p = (UChar* )NULL; (reg)->alloc = 0; @@ -6165,11 +6687,13 @@ OnigOpInfoType OnigOpInfo[] = { { OP_BEGIN_POSITION, "begin-position", ARG_NON }, { OP_BACKREF1, "backref1", ARG_NON }, { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREFN, "backrefn", ARG_MEMNUM }, - { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL }, + { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, + { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, + { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, + { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, @@ -6181,6 +6705,7 @@ OnigOpInfoType OnigOpInfo[] = { { OP_FAIL, "fail", ARG_NON }, { OP_JUMP, "jump", ARG_RELADDR }, { OP_PUSH, "push", ARG_RELADDR }, + { OP_PUSH_SUPER, "push_SUPER", ARG_RELADDR }, { OP_POP, "pop", ARG_NON }, { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, @@ -6194,10 +6719,10 @@ OnigOpInfoType OnigOpInfo[] = { { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, - { OP_PUSH_POS, "push-pos", ARG_NON }, - { OP_POP_POS, "pop-pos", ARG_NON }, - { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR }, - { OP_FAIL_POS, "fail-pos", ARG_NON }, + { OP_PREC_READ_START, "push-pos", ARG_NON }, + { OP_PREC_READ_END, "pop-pos", ARG_NON }, + { OP_PUSH_PREC_READ_NOT, "push-prec-read-not", ARG_RELADDR }, + { OP_FAIL_PREC_READ_NOT, "fail-prec-read-not", ARG_NON }, { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON }, { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON }, { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, @@ -6205,6 +6730,8 @@ OnigOpInfoType OnigOpInfo[] = { { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON }, { OP_CALL, "call", ARG_ABSADDR }, { OP_RETURN, "return", ARG_NON }, + { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, + { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, @@ -6272,6 +6799,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, MemNumType mem; StateCheckNumType scn; OnigCodePoint code; + OnigOptionType option; UChar *q; fprintf(f, "%s", op2name(*bp)); @@ -6421,7 +6949,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, } break; - case OP_BACKREFN_IC: + case OP_BACKREF_N_IC: mem = *((MemNumType* )bp); bp += SIZE_MEMNUM; fprintf(f, ":%d", mem); @@ -6429,6 +6957,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, case OP_BACKREF_MULTI_IC: case OP_BACKREF_MULTI: + case OP_BACKREF_CHECK: fputs(" ", f); GET_LENGTH_INC(len, bp); for (i = 0; i < len; i++) { @@ -6439,12 +6968,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, break; case OP_BACKREF_WITH_LEVEL: + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + /* fall */ + case OP_BACKREF_CHECK_WITH_LEVEL: { - OnigOptionType option; LengthType level; - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); GET_LENGTH_INC(level, bp); fprintf(f, ":%d", level); @@ -6501,6 +7031,24 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, p_rel_addr(f, addr, bp, start); break; + case OP_PUSH_SAVE_VAL: + { + SaveType type; + GET_SAVE_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + + case OP_UPDATE_VAR: + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + default: fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); @@ -6576,9 +7124,9 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NODE_STR: + case NODE_STRING: fprintf(f, "<string%s:%p>", - (NSTRING_IS_RAW(node) ? "-raw" : ""), node); + (NODE_STRING_IS_RAW(node) ? "-raw" : ""), node); for (p = STR_(node)->s; p < STR_(node)->end; p++) { if (*p >= 0x20 && *p < 0x7f) fputc(*p, f); @@ -6659,12 +7207,12 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; - case NODE_BREF: + case NODE_BACKREF: { int* p; - BRefNode* br = BREF_(node); + BackRefNode* br = BACKREF_(node); p = BACKREFS_P(br); - fprintf(f, "<backref:%p>", node); + fprintf(f, "<backref%s:%p>", NODE_IS_CHECKER(node) ? "-checker" : "", node); for (i = 0; i < br->back_num; i++) { if (i > 0) fputs(", ", f); fprintf(f, "%d", p[i]); @@ -6672,7 +7220,7 @@ print_indent_tree(FILE* f, Node* node, int indent) } break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case NODE_CALL: { CallNode* cn = CALL_(node); @@ -6693,10 +7241,10 @@ print_indent_tree(FILE* f, Node* node, int indent) fprintf(f, "<enclosure:%p> ", node); switch (ENCLOSURE_(node)->type) { case ENCLOSURE_OPTION: - fprintf(f, "option:%d", ENCLOSURE_(node)->option); + fprintf(f, "option:%d", ENCLOSURE_(node)->o.options); break; case ENCLOSURE_MEMORY: - fprintf(f, "memory:%d", ENCLOSURE_(node)->regnum); + fprintf(f, "memory:%d", ENCLOSURE_(node)->m.regnum); break; case ENCLOSURE_STOP_BACKTRACK: fprintf(f, "stop-bt"); @@ -6709,6 +7257,24 @@ print_indent_tree(FILE* f, Node* node, int indent) print_indent_tree(f, NODE_BODY(node), indent + add); break; + case NODE_GIMMICK: + fprintf(f, "<gimmick:%p> ", node); + switch (GIMMICK_(node)->type) { + case GIMMICK_FAIL: + fprintf(f, "fail"); + break; + case GIMMICK_KEEP: + fprintf(f, "keep:%d", GIMMICK_(node)->id); + break; + case GIMMICK_SAVE: + fprintf(f, "save:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); + break; + case GIMMICK_UPDATE_VAR: + fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); + break; + } + break; + default: fprintf(f, "print_indent_tree: undefined node type %d\n", NODE_TYPE(node)); break; diff --git a/src/regenc.h b/src/regenc.h index 897c704..abc26be 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -239,5 +239,7 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; (ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_UPPER) ||\ ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) +#define ONIGENC_IS_UNICODE_ENCODING(enc) \ + ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype) #endif /* REGENC_H */ diff --git a/src/regerror.c b/src/regerror.c index 0285272..a430e60 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -174,6 +174,12 @@ onig_error_code_to_format(int code) p = "group number is too big for capture history"; break; case ONIGERR_INVALID_CHAR_PROPERTY_NAME: p = "invalid character property name {%n}"; break; + case ONIGERR_INVALID_IF_ELSE_SYNTAX: + p = "invalid if-else syntax"; break; + case ONIGERR_INVALID_ABSENT_GROUP_PATTERN: + p = "invalid absent group pattern"; break; + case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN: + p = "invalid absent group generator pattern"; break; case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: p = "not supported encoding combination"; break; case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: diff --git a/src/regexec.c b/src/regexec.c index f66da1f..e7dfb96 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -305,32 +305,85 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) /** stack **/ #define INVALID_STACK_INDEX -1 +#define STK_ALT_FLAG 0x0001 + /* stack type */ /* used by normal-POP */ -#define STK_ALT 0x0001 -#define STK_LOOK_BEHIND_NOT 0x0002 -#define STK_POS_NOT 0x0003 +#define STK_SUPER_ALT STK_ALT_FLAG +#define STK_ALT (0x0002 | STK_ALT_FLAG) +#define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG) +#define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG) /* handled by normal-POP */ #define STK_MEM_START 0x0100 #define STK_MEM_END 0x8200 #define STK_REPEAT_INC 0x0300 #define STK_STATE_CHECK_MARK 0x1000 /* avoided by normal-POP */ +#define STK_VOID 0x0000 /* for fill a blank */ #define STK_EMPTY_CHECK_START 0x3000 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ #define STK_MEM_END_MARK 0x8400 #define STK_POS 0x0500 /* used when POP-POS */ -#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */ +#define STK_STOP_BACKTRACK 0x0600 /* mark for "(?>...)" */ #define STK_REPEAT 0x0700 #define STK_CALL_FRAME 0x0800 #define STK_RETURN 0x0900 -#define STK_VOID 0x0a00 /* for fill a blank */ +#define STK_SAVE_VAL 0x0a00 /* stack type check mask */ -#define STK_MASK_POP_USED 0x00ff -#define STK_MASK_TO_VOID_TARGET 0x10ff +#define STK_MASK_POP_USED STK_ALT_FLAG +#define STK_MASK_TO_VOID_TARGET 0x10fe #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ +typedef intptr_t StackIndex; + +typedef struct _StackType { + unsigned int type; + union { + struct { + UChar *pcode; /* byte code position */ + UChar *pstr; /* string position */ + UChar *pstr_prev; /* previous char position of pstr */ +#ifdef USE_COMBINATION_EXPLOSION_CHECK + unsigned int state_check; +#endif + } state; + struct { + int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ + UChar *pcode; /* byte code position (head of repeated target) */ + int num; /* repeat id */ + } repeat; + struct { + StackIndex si; /* index of stack */ + } repeat_inc; + struct { + int num; /* memory num */ + UChar *pstr; /* start/end position */ + /* Following information is set, if this stack type is MEM-START */ + StackIndex start; /* prev. info (for backtrack "(...)*" ) */ + StackIndex end; /* prev. info (for backtrack "(...)*" ) */ + } mem; + struct { + int num; /* null check id */ + UChar *pstr; /* start position */ + } empty_check; +#ifdef USE_CALL + struct { + UChar *ret_addr; /* byte code position */ + int num; /* null check id */ + UChar *pstr; /* string position */ + } call_frame; +#endif + struct { + int id; + enum SaveType type; + UChar* v; + UChar* v2; + } val; + } u; +} StackType; + + #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ (msa).stack_p = (void* )0;\ @@ -396,28 +449,28 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) if (msa->stack_p) {\ is_alloca = 0;\ alloc_base = msa->stack_p;\ - stk_base = (OnigStackType* )(alloc_base\ - + (sizeof(OnigStackIndex) * msa->ptr_num));\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + msa->stack_n;\ }\ else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\ is_alloca = 0;\ - alloc_base = (char* )xmalloc(sizeof(OnigStackIndex) * msa->ptr_num\ - + sizeof(OnigStackType) * (stack_num));\ + alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ + + sizeof(StackType) * (stack_num));\ CHECK_NULL_RETURN_MEMERR(alloc_base);\ - stk_base = (OnigStackType* )(alloc_base\ - + (sizeof(OnigStackIndex) * msa->ptr_num));\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ else {\ is_alloca = 1;\ - alloc_base = (char* )xalloca(sizeof(OnigStackIndex) * msa->ptr_num\ - + sizeof(OnigStackType) * (stack_num));\ + alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\ + + sizeof(StackType) * (stack_num));\ CHECK_NULL_RETURN_MEMERR(alloc_base);\ - stk_base = (OnigStackType* )(alloc_base\ - + (sizeof(OnigStackIndex) * msa->ptr_num));\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ stk = stk_base;\ stk_end = stk_base + (stack_num);\ }\ @@ -427,8 +480,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STACK_SAVE do{\ msa->stack_n = stk_end - stk_base;\ if (is_alloca != 0) {\ - size_t size = sizeof(OnigStackIndex) * msa->ptr_num \ - + sizeof(OnigStackType) * msa->stack_n;\ + size_t size = sizeof(StackIndex) * msa->ptr_num \ + + sizeof(StackType) * msa->stack_n;\ msa->stack_p = xmalloc(size);\ CHECK_NULL_RETURN_MEMERR(msa->stack_p);\ xmemcpy(msa->stack_p, alloc_base, size);\ @@ -439,8 +492,8 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) } while(0) #define UPDATE_FOR_STACK_REALLOC do{\ - repeat_stk = (OnigStackIndex* )alloc_base;\ - mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);\ + repeat_stk = (StackIndex* )alloc_base;\ + mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ mem_end_stk = mem_start_stk + num_mem + 1;\ } while(0) @@ -461,8 +514,8 @@ onig_set_match_stack_limit_size(unsigned int size) static int stack_double(int is_alloca, char** arg_alloc_base, - OnigStackType** arg_stk_base, - OnigStackType** arg_stk_end, OnigStackType** arg_stk, + StackType** arg_stk_base, + StackType** arg_stk_end, StackType** arg_stk, OnigMatchArg* msa) { unsigned int n; @@ -471,7 +524,7 @@ stack_double(int is_alloca, char** arg_alloc_base, size_t new_size; char* alloc_base; char* new_alloc_base; - OnigStackType *stk_base, *stk_end, *stk; + StackType *stk_base, *stk_end, *stk; alloc_base = *arg_alloc_base; stk_base = *arg_stk_base; @@ -479,9 +532,9 @@ stack_double(int is_alloca, char** arg_alloc_base, stk = *arg_stk; n = stk_end - stk_base; - size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; + size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; n *= 2; - new_size = sizeof(OnigStackIndex) * msa->ptr_num + sizeof(OnigStackType) * n; + new_size = sizeof(StackIndex) * msa->ptr_num + sizeof(StackType) * n; if (is_alloca != 0) { new_alloc_base = (char* )xmalloc(new_size); if (IS_NULL(new_alloc_base)) { @@ -507,8 +560,8 @@ stack_double(int is_alloca, char** arg_alloc_base, alloc_base = new_alloc_base; used = stk - stk_base; *arg_alloc_base = alloc_base; - *arg_stk_base = (OnigStackType* )(alloc_base - + (sizeof(OnigStackIndex) * msa->ptr_num)); + *arg_stk_base = (StackType* )(alloc_base + + (sizeof(StackIndex) * msa->ptr_num)); *arg_stk = *arg_stk_base + used; *arg_stk_end = *arg_stk_base + n; return 0; @@ -516,8 +569,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_ENSURE(n) do {\ if (stk_end - stk < (n)) {\ - int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk,\ - msa);\ + int r = stack_double(is_alloca, &alloc_base, &stk_base, &stk_end, &stk, msa);\ if (r != 0) { STACK_SAVE; return r; } \ is_alloca = 0;\ UPDATE_FOR_STACK_REALLOC;\ @@ -610,12 +662,14 @@ stack_double(int is_alloca, char** arg_alloc_base, } while(0) #endif /* USE_COMBINATION_EXPLOSION_CHECK */ -#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) +#define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) #define STACK_PUSH_POS(s,sprev) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev) -#define STACK_PUSH_POS_NOT(pat,s,sprev) STACK_PUSH(STK_POS_NOT,pat,s,sprev) -#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT) -#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev) \ - STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev) +#define STACK_PUSH_ALT_PREC_READ_NOT(pat,s,sprev) \ + STACK_PUSH(STK_ALT_PREC_READ_NOT,pat,s,sprev) +#define STACK_PUSH_STOP_BACKTRACK STACK_PUSH_TYPE(STK_STOP_BACKTRACK) +#define STACK_PUSH_ALT_LOOK_BEHIND_NOT(pat,s,sprev) \ + STACK_PUSH(STK_ALT_LOOK_BEHIND_NOT,pat,s,sprev) #define STACK_PUSH_REPEAT(id, pat) do {\ STACK_ENSURE(1);\ @@ -725,6 +779,97 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_INC;\ } while(0) +#define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ + STACK_ENSURE(1);\ + stk->type = STK_SAVE_VAL;\ + stk->u.val.id = (sid);\ + stk->u.val.type = (stype);\ + stk->u.val.v = (UChar* )(sval);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\ + STACK_ENSURE(1);\ + stk->type = STK_SAVE_VAL;\ + stk->u.val.id = (sid);\ + stk->u.val.type = (stype);\ + stk->u.val.v = (UChar* )(sval);\ + stk->u.val.v2 = sprev;\ + STACK_INC;\ +} while(0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST(stype, sval) do {\ + StackType *k = stk;\ + while (k > stk_base) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)) {\ + (sval) = k->u.val.v;\ + break;\ + }\ + }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID(stype, sid, sval) do { \ + int level = 0;\ + StackType *k = stk;\ + while (k > stk_base) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ + && k->u.val.id == (sid)) {\ + if (level == 0) {\ + (sval) = k->u.val.v;\ + break;\ + }\ + }\ + else if (k->type == STK_CALL_FRAME)\ + level--;\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(stype, sid, sval) do { \ + int level = 0;\ + StackType *k = stk;\ + while (k > stk_base) {\ + k--;\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ + && k->u.val.id == (sid)) {\ + if (level == 0) {\ + (sval) = k->u.val.v;\ + sprev = k->u.val.v2;\ + break;\ + }\ + }\ + else if (k->type == STK_CALL_FRAME)\ + level--;\ + else if (k->type == STK_RETURN)\ + level++;\ + }\ +} while (0) + +#define STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM(stype, sid, sval, stk_from) do { \ + int level = 0;\ + StackType *k = (stk_from);\ + while (k > stk_base) {\ + STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID_FROM"); \ + if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ + && k->u.val.id == (sid)) {\ + if (level == 0) {\ + (sval) = k->u.val.v;\ + break;\ + }\ + }\ + else if (k->type == STK_CALL_FRAME)\ + level--;\ + else if (k->type == STK_RETURN)\ + level++;\ + k--;\ + }\ +} while (0) #ifdef ONIG_DEBUG #define STACK_BASE_CHECK(p, at) \ @@ -785,11 +930,11 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define STACK_POP_TIL_POS_NOT do {\ +#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \ - if (stk->type == STK_POS_NOT) break;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \ + if (stk->type == STK_ALT_PREC_READ_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ @@ -805,11 +950,11 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\ +#define STACK_POP_TIL_ALT_LOOK_BEHIND_NOT do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \ - if (stk->type == STK_LOOK_BEHIND_NOT) break;\ + STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_LOOK_BEHIND_NOT"); \ + if (stk->type == STK_ALT_LOOK_BEHIND_NOT) break;\ else if (stk->type == STK_MEM_START) {\ mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\ mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\ @@ -840,15 +985,15 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define STACK_STOP_BT_END do {\ - OnigStackType *k = stk;\ +#define STACK_STOP_BACKTRACK_END do {\ + StackType *k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \ + STACK_BASE_CHECK(k, "STACK_STOP_BACKTRACK_END"); \ if (IS_TO_VOID_TARGET(k)) {\ k->type = STK_VOID;\ }\ - else if (k->type == STK_STOP_BT) {\ + else if (k->type == STK_STOP_BACKTRACK) {\ k->type = STK_VOID;\ break;\ }\ @@ -856,7 +1001,7 @@ stack_double(int is_alloca, char** arg_alloc_base, } while(0) #define STACK_EMPTY_CHECK(isnull,id,s) do {\ - OnigStackType* k = stk;\ + StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ @@ -871,7 +1016,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT #define STACK_EMPTY_CHECK_MEMST(isnull,id,s,reg) do {\ - OnigStackType* k = stk;\ + StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ @@ -912,7 +1057,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_EMPTY_CHECK_MEMST_REC(isnull,id,s,reg) do {\ int level = 0;\ - OnigStackType* k = stk;\ + StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ @@ -960,7 +1105,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #else #define STACK_EMPTY_CHECK_REC(isnull,id,s) do {\ int level = 0;\ - OnigStackType* k = stk;\ + StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_REC"); \ @@ -1000,7 +1145,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_RETURN(addr) do {\ int level = 0;\ - OnigStackType* k = stk;\ + StackType* k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_RETURN"); \ @@ -1074,27 +1219,26 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define IS_EMPTY_STR (str == end) -#define ON_STR_BEGIN(s) ((s) == str) -#define ON_STR_END(s) ((s) == end) -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define ON_STR_BEGIN(s) ((s) == str) +#define ON_STR_END(s) ((s) == end) #define DATA_ENSURE_CHECK1 (s < right_range) #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#else -#define DATA_ENSURE_CHECK1 (s < end) -#define DATA_ENSURE_CHECK(n) (s + (n) <= end) -#define DATA_ENSURE(n) if (s + (n) > end) goto fail -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ +#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE +#define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range +#else +#define INIT_RIGHT_RANGE right_range = (UChar* )end +#endif #ifdef USE_CAPTURE_HISTORY static int -make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp, - OnigStackType* stk_top, UChar* str, regex_t* reg) +make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, + StackType* stk_top, UChar* str, regex_t* reg) { int n, r; OnigCaptureTreeNode* child; - OnigStackType* k = *kp; + StackType* k = *kp; while (k < stk_top) { if (k->type == STK_MEM_START) { @@ -1143,13 +1287,13 @@ static int mem_is_in_memp(int mem, int num, UChar* memp) } static int backref_match_at_nested_level(regex_t* reg - , OnigStackType* top, OnigStackType* stk_base + , StackType* top, StackType* stk_base , int ignore_case, int case_fold_flag , int nest, int mem_num, UChar* memp, UChar** s, const UChar* send) { UChar *ss, *p, *pstart, *pend = NULL_UCHARP; int level; - OnigStackType* k; + StackType* k; level = 0; k = top; @@ -1197,6 +1341,37 @@ static int backref_match_at_nested_level(regex_t* reg return 0; } + +static int +backref_check_at_nested_level(regex_t* reg, + StackType* top, StackType* stk_base, + int nest, int mem_num, UChar* memp) +{ + int level; + StackType* k; + + level = 0; + k = top; + k--; + while (k >= stk_base) { + if (k->type == STK_CALL_FRAME) { + level--; + } + else if (k->type == STK_RETURN) { + level++; + } + else if (level == nest) { + if (k->type == STK_MEM_END) { + if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) { + return 1; + } + } + } + k--; + } + + return 0; +} #endif /* USE_BACKREF_WITH_LEVEL */ @@ -1300,7 +1475,7 @@ typedef struct { static int match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* right_range, + const UChar* in_right_range, #endif const UChar* sstart, UChar* sprev, OnigMatchArg* msa) { @@ -1311,13 +1486,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, MemNumType mem; RelAddrType addr; UChar *s, *q, *sbegin; + UChar *right_range; int is_alloca; char *alloc_base; - OnigStackType *stk_base, *stk, *stk_end; - OnigStackType *stkp; /* used as any purpose. */ - OnigStackIndex si; - OnigStackIndex *repeat_stk; - OnigStackIndex *mem_start_stk, *mem_end_stk; + StackType *stk_base, *stk, *stk_end; + StackType *stkp; /* used as any purpose. */ + StackIndex si; + StackIndex *repeat_stk; + StackIndex *mem_start_stk, *mem_end_stk; + UChar* keep; #ifdef USE_COMBINATION_EXPLOSION_CHECK int scv; unsigned char* state_check_buff = msa->state_check_buff; @@ -1346,7 +1523,9 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; - s = (UChar* )sstart; + keep = s = (UChar* )sstart; + INIT_RIGHT_RANGE; + while (1) { #ifdef ONIG_DEBUG_MATCH { @@ -1394,12 +1573,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, best_len = n; region = msa->region; if (region) { + if (keep > s) keep = s; + #ifdef USE_POSIX_API_REGION_OPTION if (IS_POSIX_REGION(msa->options)) { posix_regmatch_t* rmt = (posix_regmatch_t* )region; - rmt[0].rm_so = sstart - str; - rmt[0].rm_eo = s - str; + rmt[0].rm_so = keep - str; + rmt[0].rm_eo = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { if (MEM_STATUS_AT(reg->bt_mem_start, i)) @@ -1418,8 +1599,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else { #endif /* USE_POSIX_API_REGION_OPTION */ - region->beg[0] = sstart - str; - region->end[0] = s - str; + region->beg[0] = keep - str; + region->end[0] = s - str; for (i = 1; i <= num_mem; i++) { if (mem_end_stk[i] != INVALID_STACK_INDEX) { if (MEM_STATUS_AT(reg->bt_mem_start, i)) @@ -1451,8 +1632,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } node->group = 0; - node->beg = sstart - str; - node->end = s - str; + node->beg = keep - str; + node->end = s - str; stkp = stk_base; r = make_capture_history_tree(region->history_root, &stkp, @@ -2138,7 +2319,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); - mem_start_stk[mem] = (OnigStackIndex )((void* )s); + mem_start_stk[mem] = (StackIndex )((void* )s); MOP_OUT; continue; break; @@ -2152,12 +2333,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); + mem_end_stk[mem] = (StackIndex )((void* )s); MOP_OUT; continue; break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ @@ -2169,13 +2350,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); - mem_end_stk[mem] = (OnigStackIndex )((void* )s); + mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); if (MEM_STATUS_AT(reg->bt_mem_start, mem)) mem_start_stk[mem] = GET_STACK_INDEX(stkp); else - mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr); + mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); MOP_OUT; @@ -2193,16 +2374,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto backref; break; - case OP_BACKREFN: MOP_IN(OP_BACKREFN); + case OP_BACKREF_N: MOP_IN(OP_BACKREF_N); GET_MEMNUM_INC(mem, p); backref: { int len; UChar *pstart, *pend; - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; @@ -2226,15 +2404,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } break; - case OP_BACKREFN_IC: MOP_IN(OP_BACKREFN_IC); + case OP_BACKREF_N_IC: MOP_IN(OP_BACKREF_N_IC); GET_MEMNUM_INC(mem, p); { int len; UChar *pstart, *pend; - /* if you want to remove following line, - you should check in parse and compile time. */ - if (mem > num_mem) goto fail; if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail; if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail; @@ -2364,6 +2539,45 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; #endif + case OP_BACKREF_CHECK: MOP_IN(OP_BACKREF_CHECK); + { + GET_LENGTH_INC(tlen, p); + for (i = 0; i < tlen; i++) { + GET_MEMNUM_INC(mem, p); + + if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue; + if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue; + + p += (SIZE_MEMNUM * (tlen - i - 1)); + break; /* success */ + } + if (i == tlen) goto fail; + MOP_OUT; + continue; + } + break; + +#ifdef USE_BACKREF_WITH_LEVEL + case OP_BACKREF_CHECK_WITH_LEVEL: + { + LengthType level; + + GET_LENGTH_INC(level, p); + GET_LENGTH_INC(tlen, p); + + if (backref_check_at_nested_level(reg, stk, stk_base, + (int )level, (int )tlen, p) != 0) { + p += (SIZE_MEMNUM * tlen); + } + else + goto fail; + + MOP_OUT; + continue; + } + break; +#endif + #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); @@ -2440,7 +2654,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; #endif -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case OP_EMPTY_CHECK_END_MEMST_PUSH: MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); { @@ -2484,6 +2698,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; + case OP_PUSH_SUPER: MOP_IN(OP_PUSH_SUPER); + GET_RELADDR_INC(addr, p); + STACK_PUSH_SUPER_ALT(p + addr, s, sprev); + MOP_OUT; + continue; + break; + #ifdef USE_COMBINATION_EXPLOSION_CHECK case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); GET_STATE_CHECK_NUM_INC(mem, p); @@ -2652,13 +2873,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto repeat_inc_ng; break; - case OP_PUSH_POS: MOP_IN(OP_PUSH_POS); + case OP_PREC_READ_START: MOP_IN(OP_PREC_READ_START); STACK_PUSH_POS(s, sprev); MOP_OUT; continue; break; - case OP_POP_POS: MOP_IN(OP_POP_POS); + case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END); { STACK_POS_END(stkp); s = stkp->u.state.pstr; @@ -2668,26 +2889,26 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; - case OP_PUSH_POS_NOT: MOP_IN(OP_PUSH_POS_NOT); + case OP_PUSH_PREC_READ_NOT: MOP_IN(OP_PUSH_PREC_READ_NOT); GET_RELADDR_INC(addr, p); - STACK_PUSH_POS_NOT(p + addr, s, sprev); + STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); MOP_OUT; continue; break; - case OP_FAIL_POS: MOP_IN(OP_FAIL_POS); - STACK_POP_TIL_POS_NOT; + case OP_FAIL_PREC_READ_NOT: MOP_IN(OP_FAIL_PREC_READ_NOT); + STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; break; case OP_PUSH_STOP_BT: MOP_IN(OP_PUSH_STOP_BT); - STACK_PUSH_STOP_BT; + STACK_PUSH_STOP_BACKTRACK; MOP_OUT; continue; break; case OP_POP_STOP_BT: MOP_IN(OP_POP_STOP_BT); - STACK_STOP_BT_END; + STACK_STOP_BACKTRACK_END; MOP_OUT; continue; break; @@ -2712,7 +2933,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, /* goto fail; */ } else { - STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev); + STACK_PUSH_ALT_LOOK_BEHIND_NOT(p + addr, s, sprev); s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } @@ -2721,11 +2942,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; case OP_FAIL_LOOK_BEHIND_NOT: MOP_IN(OP_FAIL_LOOK_BEHIND_NOT); - STACK_POP_TIL_LOOK_BEHIND_NOT; + STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; goto fail; break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case OP_CALL: MOP_IN(OP_CALL); GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); @@ -2742,6 +2963,56 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; #endif + case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL); + { + SaveType type; + GET_SAVE_TYPE_INC(type, p); + GET_MEMNUM_INC(mem, p); /* mem: save id */ + switch ((enum SaveType )type) { + case SAVE_KEEP: + STACK_PUSH_SAVE_VAL(mem, type, s); + break; + + case SAVE_S: + STACK_PUSH_SAVE_VAL_WITH_SPREV(mem, type, s); + break; + + case SAVE_RIGHT_RANGE: + STACK_PUSH_SAVE_VAL(mem, SAVE_RIGHT_RANGE, right_range); + break; + } + } + MOP_OUT; + continue; + break; + + case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, p); + GET_MEMNUM_INC(mem, p); /* mem: save id */ + switch ((enum UpdateVarType )type) { + case UPDATE_VAR_KEEP_FROM_STACK_LAST: + STACK_GET_SAVE_VAL_TYPE_LAST(SAVE_KEEP, keep); + break; + case UPDATE_VAR_S_FROM_STACK: + STACK_GET_SAVE_VAL_TYPE_LAST_ID_WITH_SPREV(SAVE_S, mem, s); + break; + case UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK: + STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_S, mem, right_range); + break; + case UPDATE_VAR_RIGHT_RANGE_FROM_STACK: + STACK_GET_SAVE_VAL_TYPE_LAST_ID(SAVE_RIGHT_RANGE, mem, right_range); + break; + case UPDATE_VAR_RIGHT_RANGE_INIT: + INIT_RIGHT_RANGE; + break; + } + } + MOP_OUT; + continue; + break; + case OP_FINISH: goto finish; break; @@ -3248,7 +3519,7 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, } } else { - if (reg->dmax != ONIG_INFINITE_DISTANCE) { + if (reg->dmax != INFINITE_LEN) { if (p - str < reg->dmax) { *low = (UChar* )str; if (low_prev) @@ -3377,7 +3648,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, } /* no needs to adjust *high, *high is used as range check only */ - if (reg->dmax != ONIG_INFINITE_DISTANCE) { + if (reg->dmax != INFINITE_LEN) { *low = p - reg->dmax; *high = p - reg->dmin; *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high); @@ -3514,7 +3785,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, end_buf: if ((OnigLen )(max_semi_end - str) < reg->anchor_dmin) - goto mismatch_no_msa; + goto mismatch_no_msa; if (range > start) { if ((OnigLen )(min_semi_end - start) > reg->anchor_dmax) { @@ -3616,7 +3887,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, sch_range = (UChar* )range; if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) + if (reg->dmax == INFINITE_LEN) sch_range = (UChar* )end; else { sch_range += reg->dmax; @@ -3627,7 +3898,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if ((end - start) < reg->threshold_len) goto mismatch; - if (reg->dmax != ONIG_INFINITE_DISTANCE) { + if (reg->dmax != INFINITE_LEN) { do { if (! forward_search_range(reg, str, end, s, sch_range, &low, &high, &low_prev)) goto mismatch; @@ -3689,7 +3960,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else adjrange = (UChar* )end; - if (reg->dmax != ONIG_INFINITE_DISTANCE && + if (reg->dmax != INFINITE_LEN && (end - range) >= reg->threshold_len) { do { sch_start = s + reg->dmax; @@ -3714,7 +3985,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, sch_start = s; if (reg->dmax != 0) { - if (reg->dmax == ONIG_INFINITE_DISTANCE) + if (reg->dmax == INFINITE_LEN) sch_start = (UChar* )end; else { sch_start += reg->dmax; diff --git a/src/regint.h b/src/regint.h index 8da27d2..185f4b6 100644 --- a/src/regint.h +++ b/src/regint.h @@ -57,7 +57,7 @@ /* config */ /* spec. config */ #define USE_NAMED_GROUP -#define USE_SUBEXP_CALL +#define USE_CALL #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ #define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ @@ -196,6 +196,8 @@ typedef int intptr_t; #define CHECK_NULL_RETURN_MEMERR(p) if (IS_NULL(p)) return ONIGERR_MEMORY #define NULL_UCHARP ((UChar* )0) +#define INFINITE_LEN ONIG_INFINITE_DISTANCE + #ifdef PLATFORM_UNALIGNED_WORD_ACCESS #define PLATFORM_GET_INC(val,p,type) do{\ @@ -211,7 +213,11 @@ typedef int intptr_t; } while(0) /* sizeof(OnigCodePoint) */ -#define WORD_ALIGNMENT_SIZE SIZEOF_LONG +#ifdef SIZEOF_SIZE_T +# define WORD_ALIGNMENT_SIZE SIZEOF_SIZE_T +#else +# define WORD_ALIGNMENT_SIZE SIZEOF_LONG +#endif #define GET_ALIGNMENT_PAD_SIZE(addr,pad_size) do {\ (pad_size) = WORD_ALIGNMENT_SIZE \ @@ -226,10 +232,20 @@ typedef int intptr_t; #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ +typedef struct { + int num_keeper; + int* keepers; +} RegExt; + +#define REG_EXTP(reg) (RegExt* )((reg)->chain) +#define REG_EXTPL(reg) ((reg)->chain) + /* stack pop level */ -#define STACK_POP_LEVEL_FREE 0 -#define STACK_POP_LEVEL_MEM_START 1 -#define STACK_POP_LEVEL_ALL 2 +enum StackPopLevel { + STACK_POP_LEVEL_FREE = 0, + STACK_POP_LEVEL_MEM_START = 1, + STACK_POP_LEVEL_ALL =2 +}; /* optimize flags */ #define ONIG_OPTIMIZE_NONE 0 @@ -482,11 +498,13 @@ enum OpCode { OP_BACKREF1, OP_BACKREF2, - OP_BACKREFN, - OP_BACKREFN_IC, + OP_BACKREF_N, + OP_BACKREF_N_IC, OP_BACKREF_MULTI, OP_BACKREF_MULTI_IC, - OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ + OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */ + OP_BACKREF_CHECK, /* (?(n)), (?('name')) */ + OP_BACKREF_CHECK_WITH_LEVEL, /* (?(n)), (?('name')) */ OP_MEMORY_START, OP_MEMORY_START_PUSH, /* push back-tracker to stack */ @@ -498,6 +516,7 @@ enum OpCode { OP_FAIL, /* pop stack and move */ OP_JUMP, OP_PUSH, + OP_PUSH_SUPER, OP_POP, OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ @@ -512,10 +531,10 @@ enum OpCode { OP_EMPTY_CHECK_END_MEMST, /* null loop checker end (with capture status) */ OP_EMPTY_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */ - OP_PUSH_POS, /* (?=...) start */ - OP_POP_POS, /* (?=...) end */ - OP_PUSH_POS_NOT, /* (?!...) start */ - OP_FAIL_POS, /* (?!...) end */ + OP_PREC_READ_START, /* (?=...) start */ + OP_PREC_READ_END, /* (?=...) end */ + OP_PUSH_PREC_READ_NOT, /* (?!...) start */ + OP_FAIL_PREC_READ_NOT, /* (?!...) end */ OP_PUSH_STOP_BT, /* (?>...) start */ OP_POP_STOP_BT, /* (?>...) end */ OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */ @@ -524,6 +543,8 @@ enum OpCode { OP_CALL, /* \g<name> */ OP_RETURN, + OP_PUSH_SAVE_VAL, + OP_UPDATE_VAR, OP_STATE_CHECK_PUSH, /* combination explosion check and push */ OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ @@ -536,6 +557,20 @@ enum OpCode { OP_SET_OPTION /* set option */ }; +enum SaveType { + SAVE_KEEP = 0, /* SAVE S */ + SAVE_S = 1, + SAVE_RIGHT_RANGE = 2, +}; + +enum UpdateVarType { + UPDATE_VAR_KEEP_FROM_STACK_LAST = 0, + UPDATE_VAR_S_FROM_STACK = 1, + UPDATE_VAR_RIGHT_RANGE_FROM_STACK = 2, + UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK = 3, + UPDATE_VAR_RIGHT_RANGE_INIT = 4, +}; + typedef int RelAddrType; typedef int AbsAddrType; typedef int LengthType; @@ -543,6 +578,8 @@ typedef int RepeatNumType; typedef int MemNumType; typedef short int StateCheckNumType; typedef void* PointerType; +typedef int SaveType; +typedef int UpdateVarType; #define SIZE_OPCODE 1 #define SIZE_RELADDR sizeof(RelAddrType) @@ -554,7 +591,8 @@ typedef void* PointerType; #define SIZE_OPTION sizeof(OnigOptionType) #define SIZE_CODE_POINT sizeof(OnigCodePoint) #define SIZE_POINTER sizeof(PointerType) - +#define SIZE_SAVE_TYPE sizeof(SaveType) +#define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType) #define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) #define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) @@ -564,6 +602,8 @@ typedef void* PointerType; #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) #define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) +#define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType) +#define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType) /* code point's address must be aligned address. */ #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) @@ -578,15 +618,16 @@ typedef void* PointerType; #define SIZE_OP_ANYCHAR_STAR_PEEK_NEXT (SIZE_OPCODE + 1) #define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_POP SIZE_OPCODE #define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) #define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) #define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) -#define SIZE_OP_PUSH_POS SIZE_OPCODE -#define SIZE_OP_PUSH_POS_NOT (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP_POS SIZE_OPCODE -#define SIZE_OP_FAIL_POS SIZE_OPCODE +#define SIZE_OP_PREC_READ_START SIZE_OPCODE +#define SIZE_OP_PUSH_PREC_READ_NOT (SIZE_OPCODE + SIZE_RELADDR) +#define SIZE_OP_PREC_READ_END SIZE_OPCODE +#define SIZE_OP_FAIL_PREC_READ_NOT SIZE_OPCODE #define SIZE_OP_SET_OPTION (SIZE_OPCODE + SIZE_OPTION) #define SIZE_OP_SET_OPTION_PUSH (SIZE_OPCODE + SIZE_OPTION) #define SIZE_OP_FAIL SIZE_OPCODE @@ -605,6 +646,8 @@ typedef void* PointerType; #define SIZE_OP_FAIL_LOOK_BEHIND_NOT SIZE_OPCODE #define SIZE_OP_CALL (SIZE_OPCODE + SIZE_ABSADDR) #define SIZE_OP_RETURN SIZE_OPCODE +#define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM) +#define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM) #ifdef USE_COMBINATION_EXPLOSION_CHECK #define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) @@ -664,48 +707,6 @@ typedef void* PointerType; #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -typedef intptr_t OnigStackIndex; - -typedef struct _OnigStackType { - unsigned int type; - union { - struct { - UChar *pcode; /* byte code position */ - UChar *pstr; /* string position */ - UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif - } state; - struct { - int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ - UChar *pcode; /* byte code position (head of repeated target) */ - int num; /* repeat id */ - } repeat; - struct { - OnigStackIndex si; /* index of stack */ - } repeat_inc; - struct { - int num; /* memory num */ - UChar *pstr; /* start/end position */ - /* Following information is set, if this stack type is MEM-START */ - OnigStackIndex start; /* prev. info (for backtrack "(...)*" ) */ - OnigStackIndex end; /* prev. info (for backtrack "(...)*" ) */ - } mem; - struct { - int num; /* null check id */ - UChar *pstr; /* start position */ - } empty_check; -#ifdef USE_SUBEXP_CALL - struct { - UChar *ret_addr; /* byte code position */ - int num; /* null check id */ - UChar *pstr; /* string position */ - } call_frame; -#endif - } u; -} OnigStackType; - typedef struct { void* stack_p; int stack_n; diff --git a/src/regparse.c b/src/regparse.c index a5f8e5b..25291c5 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -48,6 +48,11 @@ OnigSyntaxType OnigSyntaxRuby = { , ( ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_RUBY | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | + ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | + ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | + ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | + ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | @@ -179,7 +184,10 @@ static int backref_rel_to_abs(int rel_no, ScanEnv* env) } } -#define ONOFF(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) +#define OPTION_ON(v,f) ((v) |= (f)) +#define OPTION_OFF(v,f) ((v) &= ~(f)) + +#define OPTION_NEGATE(v,f,negative) (negative) ? ((v) &= ~(f)) : ((v) |= (f)) #define MBCODE_START_POS(enc) \ (OnigCodePoint )(ONIGENC_MBC_MINLEN(enc) > 1 ? 0 : 0x80) @@ -301,6 +309,34 @@ strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) } #endif +static int +save_entry(ScanEnv* env, enum SaveType type, int* id) +{ + int nid = env->save_num; + +#if 0 + if (IS_NULL(env->saves)) { + int n = 10; + env->saves = (SaveItem* )xmalloc(sizeof(SaveItem) * n); + CHECK_NULL_RETURN_MEMERR(env->saves); + env->save_alloc_num = n; + } + else if (env->save_alloc_num <= nid) { + int n = env->save_alloc_num * 2; + SaveItem* p = (SaveItem* )xrealloc(env->saves, sizeof(SaveItem) * n); + CHECK_NULL_RETURN_MEMERR(p); + env->saves = p; + env->save_alloc_num = n; + } + + env->saves[nid].type = type; +#endif + + env->save_num++; + *id = nid; + return 0; +} + /* scan pattern methods */ #define PEND_VALUE 0 @@ -990,7 +1026,7 @@ scan_env_clear(ScanEnv* env) env->error_end = (UChar* )NULL; env->num_call = 0; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL env->unset_addr_list = NULL; env->has_call_zero = 0; #endif @@ -1011,6 +1047,10 @@ scan_env_clear(ScanEnv* env) env->has_recursion = 0; #endif env->parse_depth = 0; + env->keep_num = 0; + env->save_num = 0; + env->save_alloc_num = 0; + env->saves = 0; } static int @@ -1075,7 +1115,7 @@ onig_node_free(Node* node) #endif switch (NODE_TYPE(node)) { - case NODE_STR: + case NODE_STRING: if (STR_(node)->capa != 0 && IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { xfree(STR_(node)->s); @@ -1103,13 +1143,25 @@ onig_node_free(Node* node) } break; - case NODE_BREF: - if (IS_NOT_NULL(BREF_(node)->back_dynamic)) - xfree(BREF_(node)->back_dynamic); + case NODE_BACKREF: + if (IS_NOT_NULL(BACKREF_(node)->back_dynamic)) + xfree(BACKREF_(node)->back_dynamic); break; - case NODE_QUANT: case NODE_ENCLOSURE: + if (NODE_BODY(node)) + onig_node_free(NODE_BODY(node)); + + { + EnclosureNode* en = ENCLOSURE_(node); + if (en->type == ENCLOSURE_IF_ELSE) { + onig_node_free(en->te.Then); + onig_node_free(en->te.Else); + } + } + break; + + case NODE_QUANT: case NODE_ANCHOR: if (NODE_BODY(node)) onig_node_free(NODE_BODY(node)); @@ -1117,12 +1169,35 @@ onig_node_free(Node* node) case NODE_CTYPE: case NODE_CALL: + case NODE_GIMMICK: break; } xfree(node); } +static void +cons_node_free_alone(Node* node) +{ + NODE_CAR(node) = 0; + NODE_CDR(node) = 0; + onig_node_free(node); +} + +extern void +list_node_free_not_car(Node* node) +{ + Node* next_node; + + start: + if (IS_NULL(node)) return; + + next_node = NODE_CDR(node); + xfree(node); + node = next_node; + goto start; +} + static Node* node_new(void) { @@ -1154,7 +1229,7 @@ node_new_cclass(void) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_CCLASS); + NODE_SET_TYPE(node, NODE_CCLASS); initialize_cclass(CCLASS_(node)); return node; } @@ -1165,19 +1240,61 @@ node_new_ctype(int type, int not) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_CTYPE); + NODE_SET_TYPE(node, NODE_CTYPE); CTYPE_(node)->ctype = type; CTYPE_(node)->not = not; return node; } static Node* +node_new_anychar(void) +{ + Node* node = node_new_ctype(CTYPE_ANYCHAR, 0); + return node; +} + +static Node* +node_new_anychar_with_fixed_option(OnigOptionType option) +{ + CtypeNode* ct; + Node* node; + + node = node_new_anychar(); + ct = CTYPE_(node); + ct->options = option; + NODE_STATUS_ADD(node, NST_FIXED_OPTION); + return node; +} + +static int +node_new_no_newline(Node** node, ScanEnv* env) +{ + Node* n; + + n = node_new_anychar_with_fixed_option(ONIG_OPTION_NONE); + CHECK_NULL_RETURN_MEMERR(n); + *node = n; + return 0; +} + +static int +node_new_true_anychar(Node** node, ScanEnv* env) +{ + Node* n; + + n = node_new_anychar_with_fixed_option(ONIG_OPTION_MULTILINE); + CHECK_NULL_RETURN_MEMERR(n); + *node = n; + return 0; +} + +static Node* node_new_list(Node* left, Node* right) { Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_LIST); + NODE_SET_TYPE(node, NODE_LIST); NODE_CAR(node) = left; NODE_CDR(node) = right; return node; @@ -1213,19 +1330,65 @@ onig_node_new_alt(Node* left, Node* right) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_ALT); + NODE_SET_TYPE(node, NODE_ALT); NODE_CAR(node) = left; NODE_CDR(node) = right; return node; } +static Node* +make_list_or_alt(NodeType type, int n, Node* ns[]) +{ + Node* r; + + if (n <= 0) return NULL_NODE; + + if (n == 1) { + r = node_new(); + CHECK_NULL_RETURN(r); + NODE_SET_TYPE(r, type); + NODE_CAR(r) = ns[0]; + NODE_CDR(r) = NULL_NODE; + } + else { + Node* right; + + r = node_new(); + CHECK_NULL_RETURN(r); + + right = make_list_or_alt(type, n - 1, ns + 1); + if (IS_NULL(right)) { + onig_node_free(r); + return NULL_NODE; + } + + NODE_SET_TYPE(r, type); + NODE_CAR(r) = ns[0]; + NODE_CDR(r) = right; + } + + return r; +} + +static Node* +make_list(int n, Node* ns[]) +{ + return make_list_or_alt(NODE_LIST, n, ns); +} + +static Node* +make_alt(int n, Node* ns[]) +{ + return make_list_or_alt(NODE_ALT, n, ns); +} + extern Node* onig_node_new_anchor(int type) { Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_ANCHOR); + NODE_SET_TYPE(node, NODE_ANCHOR); ANCHOR_(node)->type = type; ANCHOR_(node)->char_len = -1; return node; @@ -1243,16 +1406,16 @@ node_new_backref(int back_num, int* backrefs, int by_name, CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_BREF); - BREF_(node)->back_num = back_num; - BREF_(node)->back_dynamic = (int* )NULL; + NODE_SET_TYPE(node, NODE_BACKREF); + BACKREF_(node)->back_num = back_num; + BACKREF_(node)->back_dynamic = (int* )NULL; if (by_name != 0) NODE_STATUS_ADD(node, NST_BY_NAME); #ifdef USE_BACKREF_WITH_LEVEL if (exist_level != 0) { NODE_STATUS_ADD(node, NST_NEST_LEVEL); - BREF_(node)->nest_level = nest_level; + BACKREF_(node)->nest_level = nest_level; } #endif @@ -1266,7 +1429,7 @@ node_new_backref(int back_num, int* backrefs, int by_name, if (back_num <= NODE_BACKREFS_SIZE) { for (i = 0; i < back_num; i++) - BREF_(node)->back_static[i] = backrefs[i]; + BACKREF_(node)->back_static[i] = backrefs[i]; } else { int* p = (int* )xmalloc(sizeof(int) * back_num); @@ -1274,21 +1437,41 @@ node_new_backref(int back_num, int* backrefs, int by_name, onig_node_free(node); return NULL; } - BREF_(node)->back_dynamic = p; + BACKREF_(node)->back_dynamic = p; for (i = 0; i < back_num; i++) p[i] = backrefs[i]; } return node; } -#ifdef USE_SUBEXP_CALL +static Node* +node_new_backref_checker(int back_num, int* backrefs, int by_name, +#ifdef USE_BACKREF_WITH_LEVEL + int exist_level, int nest_level, +#endif + ScanEnv* env) +{ + Node* node; + + node = node_new_backref(back_num, backrefs, by_name, +#ifdef USE_BACKREF_WITH_LEVEL + exist_level, nest_level, +#endif + env); + CHECK_NULL_RETURN(node); + + NODE_STATUS_ADD(node, NST_CHECKER); + return node; +} + +#ifdef USE_CALL static Node* node_new_call(UChar* name, UChar* name_end, int gnum, int by_number) { Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_CALL); + NODE_SET_TYPE(node, NODE_CALL); CALL_(node)->by_number = by_number; CALL_(node)->name = name; CALL_(node)->name_end = name_end; @@ -1304,7 +1487,7 @@ node_new_quantifier(int lower, int upper, int by_number) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_QUANT); + NODE_SET_TYPE(node, NODE_QUANT); QUANT_(node)->lower = lower; QUANT_(node)->upper = upper; QUANT_(node)->greedy = 1; @@ -1328,7 +1511,7 @@ node_new_enclosure(int type) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_ENCLOSURE); + NODE_SET_TYPE(node, NODE_ENCLOSURE); ENCLOSURE_(node)->type = type; switch (type) { @@ -1340,11 +1523,16 @@ node_new_enclosure(int type) break; case ENCLOSURE_OPTION: - ENCLOSURE_(node)->o.option = 0; + ENCLOSURE_(node)->o.options = 0; break; case ENCLOSURE_STOP_BACKTRACK: break; + + case ENCLOSURE_IF_ELSE: + ENCLOSURE_(node)->te.Then = 0; + ENCLOSURE_(node)->te.Else = 0; + break; } ENCLOSURE_(node)->opt_count = 0; @@ -1358,7 +1546,20 @@ onig_node_new_enclosure(int type) } static Node* -node_new_enclosure_memory(int is_named) +node_new_enclosure_if_else(Node* cond, Node* Then, Node* Else) +{ + Node* n; + n = node_new_enclosure(ENCLOSURE_IF_ELSE); + CHECK_NULL_RETURN(n); + + NODE_BODY(n) = cond; + ENCLOSURE_(n)->te.Then = Then; + ENCLOSURE_(n)->te.Else = Else; + return n; +} + +static Node* +node_new_memory(int is_named) { Node* node = node_new_enclosure(ENCLOSURE_MEMORY); CHECK_NULL_RETURN(node); @@ -1373,10 +1574,395 @@ node_new_option(OnigOptionType option) { Node* node = node_new_enclosure(ENCLOSURE_OPTION); CHECK_NULL_RETURN(node); - ENCLOSURE_(node)->o.option = option; + ENCLOSURE_(node)->o.options = option; return node; } +static int +node_new_fail(Node** node, ScanEnv* env) +{ + *node = node_new(); + CHECK_NULL_RETURN_MEMERR(*node); + + NODE_SET_TYPE(*node, NODE_GIMMICK); + GIMMICK_(*node)->type = GIMMICK_FAIL; + return ONIG_NORMAL; +} + +static int +node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env) +{ + int id; + int r; + + r = save_entry(env, save_type, &id); + if (r != ONIG_NORMAL) return r; + + *node = node_new(); + CHECK_NULL_RETURN_MEMERR(*node); + + NODE_SET_TYPE(*node, NODE_GIMMICK); + GIMMICK_(*node)->id = id; + GIMMICK_(*node)->type = GIMMICK_SAVE; + GIMMICK_(*node)->detail_type = (int )save_type; + + return ONIG_NORMAL; +} + +static int +node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type, + int id, ScanEnv* env) +{ + *node = node_new(); + CHECK_NULL_RETURN_MEMERR(*node); + + NODE_SET_TYPE(*node, NODE_GIMMICK); + GIMMICK_(*node)->id = id; + GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR; + GIMMICK_(*node)->detail_type = (int )update_var_type; + + return ONIG_NORMAL; +} + +static int +node_new_keep(Node** node, ScanEnv* env) +{ + int r; + + r = node_new_save_gimmick(node, SAVE_KEEP, env); + if (r != 0) return r; + + env->keep_num++; + return ONIG_NORMAL; +} + +static int +make_absent_engine(Node** node, int pre_save_right_id, Node* absent, + Node* step_one, int lower, int upper, int possessive, + int is_range_cutter, ScanEnv* env) +{ + int r; + int i; + int id; + Node* x; + Node* ns[4]; + + for (i = 0; i < 4; i++) ns[i] = NULL_NODE; + + ns[1] = absent; + ns[3] = step_one; // for err + r = node_new_save_gimmick(&ns[0], SAVE_S, env); + if (r != 0) goto err; + + id = GIMMICK_(ns[0])->id; + r = node_new_update_var_gimmick(&ns[2], UPDATE_VAR_RIGHT_RANGE_FROM_S_STACK, + id, env); + if (r != 0) goto err; + + r = node_new_fail(&ns[3], env); + if (r != 0) goto err; + + x = make_list(4, ns); + if (IS_NULL(x)) goto err; + + ns[0] = x; + ns[1] = step_one; + ns[2] = ns[3] = NULL_NODE; + + x = make_alt(2, ns); + if (IS_NULL(x)) goto err; + + ns[0] = x; + + x = node_new_quantifier(lower, upper, 0); + if (IS_NULL(x)) goto err; + + NODE_BODY(x) = ns[0]; + ns[0] = x; + + if (possessive != 0) { + x = node_new_enclosure(ENCLOSURE_STOP_BACKTRACK); + if (IS_NULL(x)) goto err; + + NODE_BODY(x) = ns[0]; + ns[0] = x; + } + + r = node_new_update_var_gimmick(&ns[1], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, + pre_save_right_id, env); + if (r != 0) goto err; + + r = node_new_fail(&ns[2], env); + if (r != 0) goto err; + + x = make_list(2, ns + 1); + if (IS_NULL(x)) goto err; + + ns[1] = x; ns[2] = NULL_NODE; + + x = make_alt(2, ns); + if (IS_NULL(x)) goto err; + + if (is_range_cutter != 0) + NODE_STATUS_ADD(x, NST_SUPER); + + *node = x; + return ONIG_NORMAL; + + err: + for (i = 0; i < 4; i++) onig_node_free(ns[i]); + return r; +} + +static int +make_absent_tail(Node** node1, Node** node2, int pre_save_right_id, + ScanEnv* env) +{ + int r; + int id; + Node* save; + Node* x; + Node* ns[2]; + + *node1 = *node2 = NULL_NODE; + save = ns[0] = ns[1] = NULL_NODE; + + r = node_new_save_gimmick(&save, SAVE_RIGHT_RANGE, env); + if (r != 0) goto err; + + id = GIMMICK_(save)->id; + r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, + id, env); + if (r != 0) goto err; + + r = node_new_fail(&ns[1], env); + if (r != 0) goto err; + + x = make_list(2, ns); + if (IS_NULL(x)) goto err; + + ns[0] = NULL_NODE; ns[1] = x; + + r = node_new_update_var_gimmick(&ns[0], UPDATE_VAR_RIGHT_RANGE_FROM_STACK, + pre_save_right_id, env); + if (r != 0) goto err; + + x = make_alt(2, ns); + if (IS_NULL(x)) goto err; + + *node1 = save; + *node2 = x; + return ONIG_NORMAL; + + err: + onig_node_free(save); + onig_node_free(ns[0]); + onig_node_free(ns[1]); + return r; +} + +static int +is_simple_one_char_repeat(Node* node, Node** rquant, Node** rbody, + int* is_possessive, ScanEnv* env) +{ + Node* quant; + Node* body; + + *rquant = *rbody = 0; + *is_possessive = 0; + + if (NODE_TYPE(node) == NODE_QUANT) { + quant = node; + } + else { + if (NODE_TYPE(node) == NODE_ENCLOSURE) { + EnclosureNode* en = ENCLOSURE_(node); + if (en->type == ENCLOSURE_STOP_BACKTRACK) { + *is_possessive = 1; + quant = NODE_ENCLOSURE_BODY(en); + if (NODE_TYPE(quant) != NODE_QUANT) + return 0; + } + else + return 0; + } + else + return 0; + } + + body = NODE_BODY(quant); + switch (NODE_TYPE(body)) { + case NODE_STRING: + { + int len; + StrNode* sn = STR_(body); + UChar *s = sn->s; + + len = 0; + while (s < sn->end) { + s += enclen(env->enc, s); + len++; + } + if (len != 1) + return 0; + } + + case NODE_CCLASS: + break; + + default: + return 0; + break; + } + + if (node != quant) { + NODE_BODY(node) = 0; + onig_node_free(node); + } + NODE_BODY(quant) = NULL_NODE; + *rquant = quant; + *rbody = body; + return 1; +} + +static int +make_absent_tree_for_simple_one_char_repeat(Node** node, Node* absent, Node* quant, + Node* body, int possessive, ScanEnv* env) +{ + int r; + int i; + int id1; + int lower, upper; + Node* x; + Node* ns[4]; + + *node = NULL_NODE; + r = ONIGERR_MEMORY; + ns[0] = ns[1] = NULL_NODE; + ns[2] = body, ns[3] = absent; + + lower = QUANT_(quant)->lower; + upper = QUANT_(quant)->upper; + onig_node_free(quant); + + r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env); + if (r != 0) goto err; + + id1 = GIMMICK_(ns[0])->id; + + r = make_absent_engine(&ns[1], id1, absent, body, lower, upper, possessive, + 0, env); + if (r != 0) goto err; + + ns[2] = ns[3] = NULL_NODE; + + r = make_absent_tail(&ns[2], &ns[3], id1, env); + if (r != 0) goto err; + + x = make_list(4, ns); + if (IS_NULL(x)) goto err; + + *node = x; + return ONIG_NORMAL; + + err: + for (i = 0; i < 4; i++) onig_node_free(ns[i]); + return r; +} + +static int +make_absent_tree(Node** node, Node* absent, Node* expr, int is_range_cutter, + ScanEnv* env) +{ + int r; + int i; + int id1, id2; + int possessive; + Node* x; + Node* ns[7]; + + r = ONIGERR_MEMORY; + for (i = 0; i < 7; i++) ns[i] = NULL_NODE; + ns[4] = expr; ns[5] = absent; + + if (is_range_cutter == 0) { + Node* quant; + Node* body; + + if (expr == NULL_NODE) { + /* default expr \O* */ + quant = node_new_quantifier(0, REPEAT_INFINITE, 0); + if (IS_NULL(quant)) goto err; + + r = node_new_true_anychar(&body, env); + if (r != 0) { + onig_node_free(quant); + goto err; + } + possessive = 0; + goto simple; + } + else { + if (is_simple_one_char_repeat(expr, &quant, &body, &possessive, env)) { + simple: + r = make_absent_tree_for_simple_one_char_repeat(node, absent, quant, + body, possessive, env); + if (r != 0) { + ns[4] = NULL_NODE; + onig_node_free(quant); + onig_node_free(body); + goto err; + } + + return ONIG_NORMAL; + } + } + } + + r = node_new_save_gimmick(&ns[0], SAVE_RIGHT_RANGE, env); + if (r != 0) goto err; + + id1 = GIMMICK_(ns[0])->id; + + r = node_new_save_gimmick(&ns[1], SAVE_S, env); + if (r != 0) goto err; + + id2 = GIMMICK_(ns[1])->id; + + r = node_new_true_anychar(&ns[3], env); + if (r != 0) goto err; + + possessive = 1; + r = make_absent_engine(&ns[2], id1, absent, ns[3], 0, REPEAT_INFINITE, + possessive, is_range_cutter, env); + if (r != 0) goto err; + + ns[3] = NULL_NODE; + ns[5] = NULL_NODE; + + r = node_new_update_var_gimmick(&ns[3], UPDATE_VAR_S_FROM_STACK, id2, env); + if (r != 0) goto err; + + if (is_range_cutter != 0) { + x = make_list(4, ns); + if (IS_NULL(x)) goto err; + } + else { + r = make_absent_tail(&ns[5], &ns[6], id1, env); + if (r != 0) goto err; + + x = make_list(7, ns); + if (IS_NULL(x)) goto err; + } + + *node = x; + return ONIG_NORMAL; + + err: + for (i = 0; i < 7; i++) onig_node_free(ns[i]); + return r; +} + extern int onig_node_str_cat(Node* node, const UChar* s, const UChar* end) { @@ -1385,9 +1971,9 @@ onig_node_str_cat(Node* node, const UChar* s, const UChar* end) if (addlen > 0) { int len = STR_(node)->end - STR_(node)->s; - if (STR_(node)->capa > 0 || (len + addlen > NODE_STR_BUF_SIZE - 1)) { + if (STR_(node)->capa > 0 || (len + addlen > NODE_STRING_BUF_SIZE - 1)) { UChar* p; - int capa = len + addlen + NODE_STR_MARGIN; + int capa = len + addlen + NODE_STRING_MARGIN; if (capa <= STR_(node)->capa) { onig_strcpy(STR_(node)->s + len, s, end); @@ -1432,7 +2018,7 @@ node_str_cat_char(Node* node, UChar c) extern void onig_node_conv_to_str_node(Node* node, int flag) { - SET_NODE_TYPE(node, NODE_STR); + NODE_SET_TYPE(node, NODE_STRING); STR_(node)->flag = flag; STR_(node)->capa = 0; STR_(node)->s = STR_(node)->buf; @@ -1459,7 +2045,7 @@ node_new_str(const UChar* s, const UChar* end) Node* node = node_new(); CHECK_NULL_RETURN(node); - SET_NODE_TYPE(node, NODE_STR); + NODE_SET_TYPE(node, NODE_STRING); STR_(node)->capa = 0; STR_(node)->flag = 0; STR_(node)->s = STR_(node)->buf; @@ -1481,7 +2067,7 @@ static Node* node_new_str_raw(UChar* s, UChar* end) { Node* node = node_new_str(s, end); - NSTRING_SET_RAW(node); + NODE_STRING_SET_RAW(node); return node; } @@ -1511,7 +2097,7 @@ str_node_split_last_char(StrNode* sn, OnigEncoding enc) if (p && p > sn->s) { /* can be split. */ n = node_new_str(p, sn->end); if ((sn->flag & STRING_RAW) != 0) - NSTRING_SET_RAW(n); + NODE_STRING_SET_RAW(n); sn->end = (UChar* )p; } @@ -1532,7 +2118,7 @@ str_node_can_be_split(StrNode* sn, OnigEncoding enc) static int node_str_head_pad(StrNode* sn, int num, UChar val) { - UChar buf[NODE_STR_BUF_SIZE]; + UChar buf[NODE_STRING_BUF_SIZE]; int i, len; len = sn->end - sn->s; @@ -2090,6 +2676,7 @@ is_invalid_quantifier_target(Node* node) { switch (NODE_TYPE(node)) { case NODE_ANCHOR: + case NODE_GIMMICK: return 1; break; @@ -2212,6 +2799,56 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) onig_node_free(cnode); } +static int +node_new_general_newline(Node** node, ScanEnv* env) +{ + int r; + int dlen, alen; + UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2]; + Node* crnl; + Node* ncc; + Node* x; + CClassNode* cc; + + dlen = ONIGENC_CODE_TO_MBC(env->enc, 0x0d, buf); + if (dlen < 0) return dlen; + alen = ONIGENC_CODE_TO_MBC(env->enc, 0x0a, buf + dlen); + if (alen < 0) return alen; + + crnl = node_new_str_raw(buf, buf + dlen + alen); + CHECK_NULL_RETURN_MEMERR(crnl); + + ncc = node_new_cclass(); + if (IS_NULL(ncc)) goto err2; + + cc = CCLASS_(ncc); + if (dlen == 1) { + bitset_set_range(cc->bs, 0x0a, 0x0d); + } + else { + r = add_code_range(&(cc->mbuf), env, 0x0a, 0x0d); + if (r != 0) { + err1: + onig_node_free(ncc); + err2: + onig_node_free(crnl); + return ONIGERR_MEMORY; + } + } + + if (ONIGENC_IS_UNICODE_ENCODING(env->enc)) { + r = add_code_range(&(cc->mbuf), env, 0x85, 0x85); + if (r != 0) goto err1; + r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029); + if (r != 0) goto err1; + } + + x = node_new_enclosure_if_else(crnl, 0, ncc); + if (IS_NULL(x)) goto err1; + + *node = x; + return 0; +} enum TokenSyms { TK_EOT = 0, /* end of token */ @@ -2233,6 +2870,11 @@ enum TokenSyms { TK_CC_OPEN, TK_QUOTE_OPEN, TK_CHAR_PROPERTY, /* \p{...}, \P{...} */ + TK_KEEP, /* \K */ + TK_GENERAL_NEWLINE, /* \R */ + TK_NO_NEWLINE, /* \N */ + TK_TRUE_ANYCHAR, /* \O */ + /* in cc */ TK_CC_CLOSE, TK_CC_RANGE, @@ -2452,8 +3094,9 @@ static OnigCodePoint get_name_end_code_point(OnigCodePoint start) { switch (start) { - case '<': return (OnigCodePoint )'>'; break; + case '<': return (OnigCodePoint )'>'; break; case '\'': return (OnigCodePoint )'\''; break; + case '(': return (OnigCodePoint )')'; break; default: break; } @@ -2706,7 +3349,7 @@ fetch_name(OnigCodePoint start_code, UChar** src, UChar* end, if (c != end_code) { r = ONIGERR_INVALID_GROUP_NAME; - name_end = end; + goto err; } if (*num_type != IS_NOT_NUM) { @@ -3378,6 +4021,26 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) tok->u.prop.not = 1; break; + case 'K': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP)) break; + tok->type = TK_KEEP; + break; + + case 'R': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE)) break; + tok->type = TK_GENERAL_NEWLINE; + break; + + case 'N': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break; + tok->type = TK_NO_NEWLINE; + break; + + case 'O': + if (! IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT)) break; + tok->type = TK_TRUE_ANYCHAR; + break; + case 'A': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR)) break; begin_buf: @@ -3561,7 +4224,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (r == 1) tok->u.backref.exist_level = 1; else tok->u.backref.exist_level = 0; #else - r = fetch_name(&p, end, &name_end, env, &back_num, &num_type, 1); + r = fetch_name(c, &p, end, &name_end, env, &back_num, &num_type, 1); #endif if (r < 0) return r; @@ -3616,7 +4279,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) break; #endif -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case 'g': if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { PFETCH(c); @@ -3815,14 +4478,14 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) case '^': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) + tok->u.subtype = (IS_SINGLELINE(env->options) ? ANCHOR_BEGIN_BUF : ANCHOR_BEGIN_LINE); break; case '$': if (! IS_SYNTAX_OP(syn, ONIG_SYN_OP_LINE_ANCHOR)) break; tok->type = TK_ANCHOR; - tok->u.subtype = (IS_SINGLELINE(env->option) + tok->u.subtype = (IS_SINGLELINE(env->options) ? ANCHOR_SEMI_END_BUF : ANCHOR_END_LINE); break; @@ -3837,7 +4500,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case '#': - if (IS_EXTEND(env->option)) { + if (IS_EXTEND(env->options)) { while (!PEND) { PFETCH(c); if (ONIGENC_IS_CODE_NEWLINE(enc, c)) @@ -3849,7 +4512,7 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) break; case ' ': case '\t': case '\n': case '\r': case '\f': - if (IS_EXTEND(env->option)) + if (IS_EXTEND(env->options)) goto start; break; @@ -4640,7 +5303,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, *np = NULL; if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; - option = env->option; + option = env->options; if (PPEEK_IS('?') && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { PINC; @@ -4711,7 +5374,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, r = name_add(env->reg, name, name_end, num, env); if (r != 0) return r; - *np = node_new_enclosure_memory(1); + *np = node_new_memory(1); CHECK_NULL_RETURN_MEMERR(*np); ENCLOSURE_(*np)->m.regnum = num; if (list_capture != 0) @@ -4729,6 +5392,259 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, #endif break; + case '~': + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP)) { + Node* absent; + Node* expr; + int head_bar; + int is_range_cutter; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + if (PPEEK_IS('|')) { // (?~|generator|absent) + PINC; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + head_bar = 1; + if (PPEEK_IS(')')) { // (?~|) : absent clear + PINC; + r = node_new_update_var_gimmick(np, UPDATE_VAR_RIGHT_RANGE_INIT, + 0, env); + if (r != 0) return r; + goto end; + } + } + else + head_bar = 0; + + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&absent, tok, term, &p, end, env); + if (r < 0) { + onig_node_free(absent); + return r; + } + + expr = NULL_NODE; + is_range_cutter = 0; + if (head_bar != 0) { + Node* top = absent; + if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) { + expr = NULL_NODE; + is_range_cutter = 1; + //return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; + } + else { + absent = NODE_CAR(top); + expr = NODE_CDR(top); + NODE_CAR(top) = NULL_NODE; + NODE_CDR(top) = NULL_NODE; + onig_node_free(top); + if (IS_NULL(NODE_CDR(expr))) { + top = expr; + expr = NODE_CAR(top); + NODE_CAR(top) = NULL_NODE; + onig_node_free(top); + } + } + } + + r = make_absent_tree(np, absent, expr, is_range_cutter, env); + if (r != 0) { + return r; + } + goto end; + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + break; + + case '(': + /* (?()...) */ + if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) { + UChar *prev; + Node* condition; + int condition_is_checker; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH(c); + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + if (ONIGENC_IS_CODE_DIGIT(enc, c) + || c == '-' || c == '+' || c == '<' || c == '\'') { + UChar* name_end; + int back_num; + int exist_level; + int level; + enum REF_NUM num_type; + int is_enclosed; + + is_enclosed = (c == '<' || c == '\'') ? 1 : 0; + if (! is_enclosed) + PUNFETCH; + prev = p; + exist_level = 0; +#ifdef USE_BACKREF_WITH_LEVEL + name_end = NULL_UCHARP; /* no need. escape gcc warning. */ + r = fetch_name_with_level( + (OnigCodePoint )(is_enclosed != 0 ? c : '('), + &p, end, &name_end, + env, &back_num, &level, &num_type); + if (r == 1) exist_level = 1; +#else + r = fetch_name((OnigCodePoint )(is_enclosed != 0 ? c : '('), + &p, end, &name_end, env, &back_num, &num_type, 1); +#endif + if (r < 0) { + if (is_enclosed == 0) { + goto any_condition; + } + else + return r; + } + + condition_is_checker = 1; + if (num_type != IS_NOT_NUM) { + if (num_type == IS_REL_NUM) { + back_num = backref_rel_to_abs(back_num, env); + } + if (back_num <= 0) + return ONIGERR_INVALID_BACKREF; + + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { + if (back_num > env->num_mem || + IS_NULL(SCANENV_MEMENV(env)[back_num].node)) + return ONIGERR_INVALID_BACKREF; + } + + condition = node_new_backref_checker(1, &back_num, 0, +#ifdef USE_BACKREF_WITH_LEVEL + exist_level, level, +#endif + env); + } + else { + int num; + int* backs; + + num = onig_name_to_group_numbers(env->reg, prev, name_end, &backs); + if (num <= 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, prev, name_end); + return ONIGERR_UNDEFINED_NAME_REFERENCE; + } + if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { + int i; + for (i = 0; i < num; i++) { + if (backs[i] > env->num_mem || + IS_NULL(SCANENV_MEMENV(env)[backs[i]].node)) + return ONIGERR_INVALID_BACKREF; + } + } + + condition = node_new_backref_checker(num, backs, 1, +#ifdef USE_BACKREF_WITH_LEVEL + exist_level, level, +#endif + env); + } + + if (is_enclosed != 0) { + if (PEND) goto err_if_else; + PFETCH(c); + if (c != ')') goto err_if_else; + } + } + else { + any_condition: + PUNFETCH; + condition_is_checker = 0; + r = fetch_token(tok, &p, end, env); + if (r < 0) return r; + r = parse_subexp(&condition, tok, term, &p, end, env); + if (r < 0) { + onig_node_free(condition); + return r; + } + } + + CHECK_NULL_RETURN_MEMERR(condition); + + if (PEND) { + err_if_else: + onig_node_free(condition); + return ONIGERR_END_PATTERN_IN_GROUP; + } + + if (PPEEK_IS(')')) { /* case: empty body: make backref checker */ + if (condition_is_checker == 0) { + onig_node_free(condition); + return ONIGERR_INVALID_IF_ELSE_SYNTAX; + } + PFETCH(c); + *np = condition; + } + else { /* if-else */ + int then_is_empty; + Node *Then, *Else; + + if (PPEEK_IS('|')) { + PFETCH(c); + Then = 0; + then_is_empty = 1; + } + else + then_is_empty = 0; + + r = fetch_token(tok, &p, end, env); + if (r < 0) { + onig_node_free(condition); + return r; + } + r = parse_subexp(&target, tok, term, &p, end, env); + if (r < 0) { + onig_node_free(condition); + onig_node_free(target); + return r; + } + + if (then_is_empty != 0) { + Else = target; + } + else { + if (NODE_TYPE(target) == NODE_ALT) { + Then = NODE_CAR(target); + if (NODE_CDR(NODE_CDR(target)) == NULL_NODE) { + Else = NODE_CAR(NODE_CDR(target)); + cons_node_free_alone(NODE_CDR(target)); + } + else { + Else = NODE_CDR(target); + } + cons_node_free_alone(target); + } + else { + Then = target; + Else = 0; + } + } + + *np = node_new_enclosure_if_else(condition, Then, Else); + if (IS_NULL(*np)) { + onig_node_free(condition); + onig_node_free(Then); + onig_node_free(Else); + return ONIGERR_MEMORY; + } + } + goto end; + } + else { + return ONIGERR_UNDEFINED_GROUP_OPTION; + } + break; + case '@': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { #ifdef USE_NAMED_GROUP @@ -4741,7 +5657,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, PUNFETCH; } #endif - *np = node_new_enclosure_memory(0); + *np = node_new_memory(0); CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) { @@ -4772,11 +5688,11 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, break; case '-': neg = 1; break; - case 'x': ONOFF(option, ONIG_OPTION_EXTEND, neg); break; - case 'i': ONOFF(option, ONIG_OPTION_IGNORECASE, neg); break; + case 'x': OPTION_NEGATE(option, ONIG_OPTION_EXTEND, neg); break; + case 'i': OPTION_NEGATE(option, ONIG_OPTION_IGNORECASE, neg); break; case 's': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); + OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg); } else return ONIGERR_UNDEFINED_GROUP_OPTION; @@ -4784,17 +5700,17 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, case 'm': if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { - ONOFF(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); + OPTION_NEGATE(option, ONIG_OPTION_SINGLELINE, (neg == 0 ? 1 : 0)); } else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { - ONOFF(option, ONIG_OPTION_MULTILINE, neg); + OPTION_NEGATE(option, ONIG_OPTION_MULTILINE, neg); } else return ONIGERR_UNDEFINED_GROUP_OPTION; break; #ifdef USE_POSIXLINE_OPTION case 'p': - ONOFF(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); + OPTION_NEGATE(option, ONIG_OPTION_MULTILINE|ONIG_OPTION_SINGLELINE, neg); break; #endif default: @@ -4808,13 +5724,13 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return 2; /* option only */ } else if (c == ':') { - OnigOptionType prev = env->option; + OnigOptionType prev = env->options; - env->option = option; + env->options = option; r = fetch_token(tok, &p, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, &p, end, env); - env->option = prev; + env->options = prev; if (r < 0) { onig_node_free(target); return r; @@ -4837,10 +5753,10 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } else { - if (ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_DONT_CAPTURE_GROUP)) + if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; - *np = node_new_enclosure_memory(0); + *np = node_new_memory(0); CHECK_NULL_RETURN_MEMERR(*np); num = scan_env_add_mem_entry(env); if (num < 0) return num; @@ -4866,6 +5782,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } + end: *src = p; return 0; } @@ -4888,7 +5805,7 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) return 1; switch (NODE_TYPE(target)) { - case NODE_STR: + case NODE_STRING: if (! group) { StrNode* sn = STR_(target); if (str_node_can_be_split(sn, env->enc)) { @@ -5060,7 +5977,7 @@ i_apply_case_fold(OnigCodePoint from, OnigCodePoint to[], /* char-class expanded multi-char only compare with string folded at match time. */ - NSTRING_SET_AMBIG(snode); + NODE_STRING_SET_AMBIG(snode); } else { r = onig_node_str_cat(snode, buf, buf + len); @@ -5106,13 +6023,13 @@ parse_exp(Node** np, OnigToken* tok, int term, if (r == 1) group = 1; else if (r == 2) { /* option only */ Node* target; - OnigOptionType prev = env->option; + OnigOptionType prev = env->options; - env->option = ENCLOSURE_(*np)->o.option; + env->options = ENCLOSURE_(*np)->o.options; r = fetch_token(tok, src, end, env); if (r < 0) return r; r = parse_subexp(&target, tok, term, src, end, env); - env->option = prev; + env->options = prev; if (r < 0) { onig_node_free(target); return r; @@ -5161,7 +6078,7 @@ parse_exp(Node** np, OnigToken* tok, int term, if (len >= ONIGENC_MBC_MINLEN(env->enc)) { if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end() r = fetch_token(tok, src, end, env); - NSTRING_CLEAR_RAW(*np); + NODE_STRING_CLEAR_RAW(*np); goto string_end; } } @@ -5176,7 +6093,7 @@ parse_exp(Node** np, OnigToken* tok, int term, rem = ONIGENC_MBC_MINLEN(env->enc) - len; (void )node_str_head_pad(STR_(*np), rem, (UChar )0); if (len + rem == enclen(env->enc, STR_(*np)->s)) { - NSTRING_CLEAR_RAW(*np); + NODE_STRING_CLEAR_RAW(*np); goto string_end; } } @@ -5266,7 +6183,7 @@ parse_exp(Node** np, OnigToken* tok, int term, if (r != 0) return r; cc = CCLASS_(*np); - if (IS_IGNORECASE(env->option)) { + if (IS_IGNORECASE(env->options)) { IApplyCaseFoldArg iarg; iarg.env = env; @@ -5293,12 +6210,12 @@ parse_exp(Node** np, OnigToken* tok, int term, break; case TK_ANYCHAR: - *np = node_new_ctype(CTYPE_ANYCHAR, 0); + *np = node_new_anychar(); CHECK_NULL_RETURN_MEMERR(*np); break; case TK_ANYCHAR_ANYTIME: - *np = node_new_ctype(CTYPE_ANYCHAR, 0); + *np = node_new_anychar(); CHECK_NULL_RETURN_MEMERR(*np); qn = node_new_quantifier(0, REPEAT_INFINITE, 0); CHECK_NULL_RETURN_MEMERR(qn); @@ -5319,7 +6236,7 @@ parse_exp(Node** np, OnigToken* tok, int term, CHECK_NULL_RETURN_MEMERR(*np); break; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL case TK_CALL: { int gnum = tok->u.call.gnum; @@ -5352,6 +6269,26 @@ parse_exp(Node** np, OnigToken* tok, int term, } break; + case TK_KEEP: + r = node_new_keep(np, env); + if (r < 0) return r; + break; + + case TK_GENERAL_NEWLINE: + r = node_new_general_newline(np, env); + if (r < 0) return r; + break; + + case TK_NO_NEWLINE: + r = node_new_no_newline(np, env); + if (r < 0) return r; + break; + + case TK_TRUE_ANYCHAR: + r = node_new_true_anychar(np, env); + if (r < 0) return r; + break; + default: return ONIGERR_PARSER_BUG; break; @@ -5526,13 +6463,13 @@ parse_regexp(Node** top, UChar** src, UChar* end, ScanEnv* env) return 0; } -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL static int make_call_zero_body(Node* node, ScanEnv* env, Node** rnode) { int r; - Node* x = node_new_enclosure_memory(0 /* 0: is not named */); + Node* x = node_new_memory(0 /* 0: is not named */); CHECK_NULL_RETURN_MEMERR(x); NODE_BODY(x) = node; @@ -5560,7 +6497,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, #endif scan_env_clear(env); - env->option = reg->options; + env->options = reg->options; env->case_fold_flag = reg->case_fold_flag; env->enc = reg->enc; env->syntax = reg->syntax; @@ -5576,7 +6513,7 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, p = (UChar* )pattern; r = parse_regexp(root, &p, (UChar* )end, env); -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL if (r != 0) return r; if (env->has_call_zero != 0) { diff --git a/src/regparse.h b/src/regparse.h index 884f4d5..b7260ea 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -33,48 +33,58 @@ /* node type */ typedef enum { - NODE_STR = 0, - NODE_CCLASS = 1, - NODE_CTYPE = 2, - NODE_BREF = 3, - NODE_QUANT = 4, - NODE_ENCLOSURE = 5, - NODE_ANCHOR = 6, - NODE_LIST = 7, - NODE_ALT = 8, - NODE_CALL = 9 + NODE_STRING = 0, + NODE_CCLASS = 1, + NODE_CTYPE = 2, + NODE_BACKREF = 3, + NODE_QUANT = 4, + NODE_ENCLOSURE = 5, + NODE_ANCHOR = 6, + NODE_LIST = 7, + NODE_ALT = 8, + NODE_CALL = 9, + NODE_GIMMICK = 10 } NodeType; +enum GimmickType { + GIMMICK_FAIL = 0, + GIMMICK_KEEP = 1, + GIMMICK_SAVE = 2, + GIMMICK_UPDATE_VAR = 3, +}; + /* node type bit */ #define NODE_TYPE2BIT(type) (1<<(type)) -#define BIT_NODE_STR NODE_TYPE2BIT(NODE_STR) +#define BIT_NODE_STRING NODE_TYPE2BIT(NODE_STRING) #define BIT_NODE_CCLASS NODE_TYPE2BIT(NODE_CCLASS) #define BIT_NODE_CTYPE NODE_TYPE2BIT(NODE_CTYPE) -#define BIT_NODE_BREF NODE_TYPE2BIT(NODE_BREF) -#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT) +#define BIT_NODE_BACKREF NODE_TYPE2BIT(NODE_BACKREF) +#define BIT_NODE_QUANT NODE_TYPE2BIT(NODE_QUANT) #define BIT_NODE_ENCLOSURE NODE_TYPE2BIT(NODE_ENCLOSURE) #define BIT_NODE_ANCHOR NODE_TYPE2BIT(NODE_ANCHOR) #define BIT_NODE_LIST NODE_TYPE2BIT(NODE_LIST) #define BIT_NODE_ALT NODE_TYPE2BIT(NODE_ALT) #define BIT_NODE_CALL NODE_TYPE2BIT(NODE_CALL) +#define BIT_NODE_GIMMICK NODE_TYPE2BIT(NODE_GIMMICK) #define NODE_IS_SIMPLE_TYPE(node) \ ((NODE_TYPE2BIT(NODE_TYPE(node)) & \ - (BIT_NODE_STR | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BREF)) != 0) + (BIT_NODE_STRING | BIT_NODE_CCLASS | BIT_NODE_CTYPE | BIT_NODE_BACKREF)) != 0) #define NODE_TYPE(node) ((node)->u.base.node_type) -#define SET_NODE_TYPE(node, ntype) (node)->u.base.node_type = (ntype) +#define NODE_SET_TYPE(node, ntype) (node)->u.base.node_type = (ntype) #define STR_(node) (&((node)->u.str)) #define CCLASS_(node) (&((node)->u.cclass)) #define CTYPE_(node) (&((node)->u.ctype)) -#define BREF_(node) (&((node)->u.bref)) -#define QUANT_(node) (&((node)->u.quant)) -#define ENCLOSURE_(node) (&((node)->u.enclosure)) +#define BACKREF_(node) (&((node)->u.backref)) +#define QUANT_(node) (&((node)->u.quant)) +#define ENCLOSURE_(node) (&((node)->u.enclosure)) #define ANCHOR_(node) (&((node)->u.anchor)) #define CONS_(node) (&((node)->u.cons)) #define CALL_(node) (&((node)->u.call)) +#define GIMMICK_(node) (&((node)->u.gimmick)) #define NODE_CAR(node) (CONS_(node)->car) #define NODE_CDR(node) (CONS_(node)->cdr) @@ -83,6 +93,9 @@ typedef enum { #define NODE_IS_ANYCHAR(node) \ (NODE_TYPE(node) == NODE_CTYPE && CTYPE_(node)->ctype == CTYPE_ANYCHAR) +#define CTYPE_OPTION(node, reg) \ + (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) + #define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) @@ -90,24 +103,25 @@ typedef enum { #define ENCLOSURE_MEMORY (1<<0) #define ENCLOSURE_OPTION (1<<1) #define ENCLOSURE_STOP_BACKTRACK (1<<2) +#define ENCLOSURE_IF_ELSE (1<<3) -#define NODE_STR_MARGIN 16 -#define NODE_STR_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ +#define NODE_STRING_MARGIN 16 +#define NODE_STRING_BUF_SIZE 24 /* sizeof(CClassNode) - sizeof(int)*4 */ #define NODE_BACKREFS_SIZE 6 #define STRING_RAW (1<<0) /* by backslashed number */ #define STRING_AMBIG (1<<1) #define STRING_DONT_GET_OPT_INFO (1<<2) -#define NSTRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) -#define NSTRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW -#define NSTRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW -#define NSTRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG -#define NSTRING_SET_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_LEN(node) ((node)->u.str.end - (node)->u.str.s) +#define NODE_STRING_SET_RAW(node) (node)->u.str.flag |= STRING_RAW +#define NODE_STRING_CLEAR_RAW(node) (node)->u.str.flag &= ~STRING_RAW +#define NODE_STRING_SET_AMBIG(node) (node)->u.str.flag |= STRING_AMBIG +#define NODE_STRING_SET_DONT_GET_OPT_INFO(node) \ (node)->u.str.flag |= STRING_DONT_GET_OPT_INFO -#define NSTRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0) -#define NSTRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0) -#define NSTRING_IS_DONT_GET_OPT_INFO(node) \ +#define NODE_STRING_IS_RAW(node) (((node)->u.str.flag & STRING_RAW) != 0) +#define NODE_STRING_IS_AMBIG(node) (((node)->u.str.flag & STRING_AMBIG) != 0) +#define NODE_STRING_IS_DONT_GET_OPT_INFO(node) \ (((node)->u.str.flag & STRING_DONT_GET_OPT_INFO) != 0) #define BACKREFS_P(br) \ @@ -118,7 +132,7 @@ typedef enum { #define QUANT_BODY_IS_EMPTY_MEM 2 #define QUANT_BODY_IS_EMPTY_REC 3 -/* status bits */ +/* node status bits */ #define NST_MIN_FIXED (1<<0) #define NST_MAX_FIXED (1<<1) #define NST_CLEN_FIXED (1<<2) @@ -136,28 +150,37 @@ typedef enum { #define NST_BY_NUMBER (1<<14) /* {n,m} */ #define NST_BY_NAME (1<<15) /* backref by name */ #define NST_BACKREF (1<<16) +#define NST_CHECKER (1<<17) +#define NST_FIXED_OPTION (1<<18) +#define NST_PROHIBIT_RECURSION (1<<19) +#define NST_SUPER (1<<20) #define NODE_STATUS(node) (((Node* )node)->u.base.status) #define NODE_STATUS_ADD(node,f) (NODE_STATUS(node) |= (f)) #define NODE_STATUS_REMOVE(node,f) (NODE_STATUS(node) &= ~(f)) -#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) +#define NODE_IS_BY_NUMBER(node) ((NODE_STATUS(node) & NST_BY_NUMBER) != 0) #define NODE_IS_IN_REAL_REPEAT(node) ((NODE_STATUS(node) & NST_IN_REAL_REPEAT) != 0) -#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0) +#define NODE_IS_CALLED(node) ((NODE_STATUS(node) & NST_CALLED) != 0) #define NODE_IS_IN_MULTI_ENTRY(node) ((NODE_STATUS(node) & NST_IN_MULTI_ENTRY) != 0) -#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0) +#define NODE_IS_RECURSION(node) ((NODE_STATUS(node) & NST_RECURSION) != 0) #define NODE_IS_IN_ZERO_REPEAT(node) ((NODE_STATUS(node) & NST_IN_ZERO_REPEAT) != 0) -#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) -#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0) -#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0) -#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0) -#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0) -#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0) -#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0) -#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0) -#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0) -#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0) +#define NODE_IS_NAMED_GROUP(node) ((NODE_STATUS(node) & NST_NAMED_GROUP) != 0) +#define NODE_IS_ADDR_FIXED(node) ((NODE_STATUS(node) & NST_ADDR_FIXED) != 0) +#define NODE_IS_CLEN_FIXED(node) ((NODE_STATUS(node) & NST_CLEN_FIXED) != 0) +#define NODE_IS_MIN_FIXED(node) ((NODE_STATUS(node) & NST_MIN_FIXED) != 0) +#define NODE_IS_MAX_FIXED(node) ((NODE_STATUS(node) & NST_MAX_FIXED) != 0) +#define NODE_IS_MARK1(node) ((NODE_STATUS(node) & NST_MARK1) != 0) +#define NODE_IS_MARK2(node) ((NODE_STATUS(node) & NST_MARK2) != 0) +#define NODE_IS_NEST_LEVEL(node) ((NODE_STATUS(node) & NST_NEST_LEVEL) != 0) +#define NODE_IS_BY_NAME(node) ((NODE_STATUS(node) & NST_BY_NAME) != 0) +#define NODE_IS_BACKREF(node) ((NODE_STATUS(node) & NST_BACKREF) != 0) +#define NODE_IS_CHECKER(node) ((NODE_STATUS(node) & NST_CHECKER) != 0) +#define NODE_IS_FIXED_OPTION(node) ((NODE_STATUS(node) & NST_FIXED_OPTION) != 0) +#define NODE_IS_SUPER(node) ((NODE_STATUS(node) & NST_SUPER) != 0) +#define NODE_IS_PROHIBIT_RECURSION(node) \ + ((NODE_STATUS(node) & NST_PROHIBIT_RECURSION) != 0) #define NODE_IS_STOP_BT_SIMPLE_REPEAT(node) \ ((NODE_STATUS(node) & NST_STOP_BT_SIMPLE_REPEAT) != 0) @@ -168,8 +191,6 @@ typedef enum { #define NODE_ANCHOR_BODY(node) ((node)->body) -#define CALLNODE_REFNUM_UNDEF -1 - typedef struct { NodeType node_type; int status; @@ -178,7 +199,7 @@ typedef struct { UChar* end; unsigned int flag; int capa; /* (allocated size - 1) or 0: use buf[] */ - UChar buf[NODE_STR_BUF_SIZE]; + UChar buf[NODE_STRING_BUF_SIZE]; } StrNode; typedef struct { @@ -221,17 +242,22 @@ typedef struct { int called_state; } m; struct { - OnigOptionType option; + OnigOptionType options; } o; + struct { + /* body is condition */ + struct _Node* Then; + struct _Node* Else; + } te; }; /* for multiple call reference */ - OnigLen min_len; /* min length (byte) */ - OnigLen max_len; /* max length (byte) */ - int char_len; /* character length */ - int opt_count; /* referenced count in optimize_node_left() */ + OnigLen min_len; /* min length (byte) */ + OnigLen max_len; /* max length (byte) */ + int char_len; /* character length */ + int opt_count; /* referenced count in optimize_node_left() */ } EnclosureNode; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL typedef struct { int offset; @@ -266,7 +292,7 @@ typedef struct { int back_static[NODE_BACKREFS_SIZE]; int* back_dynamic; int nest_level; -} BRefNode; +} BackRefNode; typedef struct { NodeType node_type; @@ -291,8 +317,18 @@ typedef struct { int ctype; int not; + OnigOptionType options; } CtypeNode; +typedef struct { + NodeType node_type; + int status; + + enum GimmickType type; + int detail_type; + int id; +} GimmickNode; + typedef struct _Node { union { struct { @@ -305,13 +341,14 @@ typedef struct _Node { CClassNode cclass; QuantNode quant; EnclosureNode enclosure; - BRefNode bref; + BackRefNode backref; AnchorNode anchor; ConsAltNode cons; CtypeNode ctype; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL CallNode call; #endif + GimmickNode gimmick; } u; } Node; @@ -332,7 +369,11 @@ typedef struct { } MemEnv; typedef struct { - OnigOptionType option; + enum SaveType type; +} SaveItem; + +typedef struct { + OnigOptionType options; OnigCaseFoldType case_fold_flag; OnigEncoding enc; OnigSyntaxType* syntax; @@ -346,7 +387,7 @@ typedef struct { UChar* error_end; regex_t* reg; /* for reg->names only */ int num_call; -#ifdef USE_SUBEXP_CALL +#ifdef USE_CALL UnsetAddrList* unset_addr_list; int has_call_zero; #endif @@ -364,6 +405,11 @@ typedef struct { int has_recursion; #endif unsigned int parse_depth; + + int keep_num; + int save_num; + int save_alloc_num; + SaveItem* saves; } ScanEnv; @@ -399,6 +445,7 @@ extern int onig_names_free P_((regex_t* reg)); extern int onig_parse_tree P_((Node** root, const UChar* pattern, const UChar* end, regex_t* reg, ScanEnv* env)); extern int onig_free_shared_cclass_table P_((void)); extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); +extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node); #ifdef ONIG_DEBUG #ifdef USE_NAMED_GROUP diff --git a/src/regposix.c b/src/regposix.c index bbe52dc..32b11b5 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -114,6 +114,9 @@ onig2posix_error_code(int code) { ONIGERR_NEVER_ENDING_RECURSION, REG_BADPAT }, { ONIGERR_GROUP_NUMBER_OVER_FOR_CAPTURE_HISTORY, REG_BADPAT }, { ONIGERR_INVALID_CHAR_PROPERTY_NAME, REG_BADPAT }, + { ONIGERR_INVALID_IF_ELSE_SYNTAX, REG_BADPAT }, + { ONIGERR_INVALID_ABSENT_GROUP_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN, REG_BADPAT }, { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, { ONIGERR_LIBRARY_IS_NOT_INITIALIZED, REG_EONIG_INTERNAL } }; diff --git a/src/regsyntax.c b/src/regsyntax.c index e751e24..6833e1d 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2006 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> + * Copyright (c) 2002-2017 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,13 @@ OnigSyntaxType OnigSyntaxPerl = { & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | + ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | - ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT ) + ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | + ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | + ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | + ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT ) , SYN_GNU_REGEX_BV , ONIG_OPTION_SINGLELINE , @@ -199,11 +204,16 @@ OnigSyntaxType OnigSyntaxPerl_NG = { & ~ONIG_SYN_OP_ESC_LTGT_WORD_BEGIN_END ) , ( ONIG_SYN_OP2_ESC_CAPITAL_Q_QUOTE | ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | + ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | + ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | - ONIG_SYN_OP2_ESC_G_SUBEXP_CALL ) + ONIG_SYN_OP2_ESC_G_SUBEXP_CALL | + ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | + ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | + ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT ) , ( SYN_GNU_REGEX_BV | ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP | ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME ) @@ -90,6 +90,7 @@ is_valid_mbc_string(const UChar* p, const UChar* end) return TRUE; } +#if 0 static int is_mbc_newline(const UChar* p, const UChar* end) { @@ -114,6 +115,7 @@ is_mbc_newline(const UChar* p, const UChar* end) return 0; } +#endif static OnigCodePoint mbc_to_code(const UChar* p, const UChar* end) @@ -246,43 +248,6 @@ mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, } } -#if 0 -static int -is_mbc_ambiguous(OnigCaseFoldType flag, const UChar** pp, const UChar* end) -{ - const UChar* p = *pp; - - if (ONIGENC_IS_MBC_ASCII(p)) { - (*pp)++; - return ONIGENC_IS_ASCII_CODE_CASE_AMBIG(*p); - } - else { - (*pp) += enclen(ONIG_ENCODING_UTF8, p); - - if (*p == 0xc3) { - int c = *(p + 1); - if (c >= 0x80) { - if (c <= (UChar )0x9e) { /* upper */ - if (c == (UChar )0x97) return FALSE; - return TRUE; - } - else if (c >= (UChar )0xa0 && c <= (UChar )0xbe) { /* lower */ - if (c == (UChar )'\267') return FALSE; - return TRUE; - } - else if (c == (UChar )0x9f && - (flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { - return TRUE; - } - } - } - } - - return FALSE; -} -#endif - - static int get_ctype_code_range(OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[]) @@ -317,7 +282,7 @@ OnigEncodingType OnigEncodingUTF8 = { "UTF-8", /* name */ 6, /* max byte length */ 1, /* min byte length */ - is_mbc_newline, + onigenc_is_mbc_newline_0x0a, mbc_to_code, code_to_mbclen, code_to_mbc, diff --git a/test-driver b/test-driver index d306056..8e575b0 100755 --- a/test-driver +++ b/test-driver @@ -3,7 +3,7 @@ scriptversion=2013-07-13.22; # UTC -# Copyright (C) 2011-2013 Free Software Foundation, Inc. +# Copyright (C) 2011-2014 Free Software Foundation, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -106,11 +106,14 @@ trap "st=143; $do_exit" 15 # Test script is run here. "$@" >$log_file 2>&1 estatus=$? + if test $enable_hard_errors = no && test $estatus -eq 99; then - estatus=1 + tweaked_estatus=1 +else + tweaked_estatus=$estatus fi -case $estatus:$expect_failure in +case $tweaked_estatus:$expect_failure in 0:yes) col=$red res=XPASS recheck=yes gcopy=yes;; 0:*) col=$grn res=PASS recheck=no gcopy=no;; 77:*) col=$blu res=SKIP recheck=no gcopy=yes;; @@ -119,6 +122,12 @@ case $estatus:$expect_failure in *:*) col=$red res=FAIL recheck=yes gcopy=yes;; esac +# Report the test outcome and exit status in the logs, so that one can +# know whether the test passed or failed simply by looking at the '.log' +# file, without the need of also peaking into the corresponding '.trs' +# file (automake bug#11814). +echo "$res $test_name (exit status: $estatus)" >>$log_file + # Report outcome to console. echo "${col}${res}${std}: $test_name" diff --git a/test/testc.c b/test/testc.c index 725e375..222c9cd 100644 --- a/test/testc.c +++ b/test/testc.c @@ -590,6 +590,88 @@ extern int main(int argc, char* argv[]) x2("\\g<+2>(abc)(ABC){0}", "ABCabc", 0, 6); // relative call by positive number x2("A\\g'0'|B()", "AAAAB", 0, 5); x3("(A\\g'0')|B", "AAAAB", 0, 5, 1); + x2("(a*)(?(1))aa", "aaaaa", 0, 5); + x2("(a*)(?(-1))aa", "aaaaa", 0, 5); + x2("(?<name>aaa)(?('name'))aa", "aaaaa", 0, 5); + x2("(a)(?(1)aa|bb)a", "aaaaa", 0, 4); + x2("(?:aa|())(?(<1>)aa|bb)a", "aabba", 0, 5); + x2("(?:aa|())(?('1')aa|bb|cc)a", "aacca", 0, 5); + x3("(a*)(?(1)aa|a)b", "aaab", 0, 1, 1); + n("(a)(?(1)a|b)c", "abc"); + x2("(a)(?(1)|)c", "ac", 0, 2); + n("(?()aaa|bbb)", "bbb"); + x2("(a)(?(1+0)b|c)d", "abd", 0, 3); + x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "ace", 0, 3); + x2("(?:(?'name'a)|(?'name'b))(?('name')c|d)e", "bce", 0, 3); + x2("\\R", "\r\n", 0, 2); + x2("\\R", "\r", 0, 1); + x2("\\R", "\n", 0, 1); + x2("\\R", "\x0b", 0, 1); + n("\\R\\n", "\r\n"); + n("\\R", "\xc2\x85"); // because euc-jp is not Unicode + x2("\\N", "a", 0, 1); + n("\\N", "\n"); + n("(?m:\\N)", "\n"); + n("(?-m:\\N)", "\n"); + x2("\\O", "a", 0, 1); + x2("\\O", "\n", 0, 1); + x2("(?m:\\O)", "\n", 0, 1); + x2("(?-m:\\O)", "\n", 0, 1); + x2("\\K", "a", 0, 0); + x2("a\\K", "a", 1, 1); + x2("a\\Kb", "ab", 1, 2); + x2("(a\\Kb|ac\\Kd)", "acd", 2, 3); + x2("(a\\Kb|\\Kac\\K)*", "acababacab", 9, 10); + + x2("(?~)", "", 0, 0); + x2("(?~)", "A", 0, 0); + x2("aaaaa(?~)", "aaaaaaaaaa", 0, 5); + x2("(?~(?:|aaa))", "aaa", 0, 0); + x2("(?~aaa|)", "aaa", 0, 0); + x2("a(?~(?~)).", "abcdefghijklmnopqrstuvwxyz", 0, 26); // !!! + x2("/\\*(?~\\*/)\\*/", "/* */ */", 0, 5); + x2("(?~\\w+)zzzzz", "zzzzz", 0, 5); + x2("(?~\\w*)zzzzz", "zzzzz", 0, 5); + x2("(?~A.C|B)", "ABC", 0, 0); + x2("(?~XYZ|ABC)a", "ABCa", 1, 4); + x2("(?~XYZ|ABC)a", "aABCa", 0, 1); + x2("<[^>]*>(?~[<>])</[^>]*>", "<a>vvv</a> <b> </b>", 0, 10); + x2("(?~ab)", "ccc\ndab", 0, 5); + x2("(?m:(?~ab))", "ccc\ndab", 0, 5); + x2("(?-m:(?~ab))", "ccc\ndab", 0, 5); + + // absent with expr + x2("(?~|78|\\d*)", "123456789", 0, 6); + x2("(?~|def|(?:abc|de|f){0,100})", "abcdedeabcfdefabc", 0, 11); + x2("(?~|ab|.*)", "ccc\nddd", 0, 3); + x2("(?~|ab|\\O*)", "ccc\ndab", 0, 5); + x2("(?~|ab|\\O{2,10})", "ccc\ndab", 0, 5); + x2("(?~|ab|\\O{1,10})", "ab", 1, 2); + n("(?~|ab|\\O{2,10})", "ab"); + x2("(?~|abc|\\O{1,10})", "abc", 1, 3); + x2("(?~|ab|\\O{5,10})|abc", "abc", 0, 3); + x2("(?~|ab|\\O{1,10})", "cccccccccccab", 0, 10); + x2("(?~|aaa|)", "aaa", 0, 0); + x2("(?~||a*)", "aaaaaa", 0, 0); + x2("(?~||a*?)", "aaaaaa", 0, 0); + x2("(a)(?~|b|\\1)", "aaaaaa", 0, 2); + x2("(a)(?~|bb|(?:a\\1)*)", "aaaaaa", 0, 5); + x2("(b|c)(?~|abac|(?:a\\1)*)", "abababacabab", 1, 4); + n("(?~|c|a*+)a", "aaaaa"); + x2("(?~|aaaaa|a*+)", "aaaaa", 0, 0); + x2("(?~|aaaaaa|a*+)b", "aaaaaab", 1, 7); + x2("(?~|abcd|(?>))", "zzzabcd", 0, 0); + + // absent range cutter + x2("(?~|abc)a*", "aaaaaabc", 0, 5); + x2("(?~|abc)a*z|aaaaaabc", "aaaaaabc", 0, 8); + x2("(?~|aaaaaa)a*", "aaaaaa", 0, 0); + x2("(?~|abc)aaaa|aaaabc", "aaaabc", 0, 6); + x2("(?>(?~|abc))aaaa|aaaabc", "aaaabc", 0, 6); + x2("(?~|)a", "a", 0, 1); + n("(?~|a)a", "a"); + x2("(?~|a)(?~|)a", "a", 0, 1); + x2("(?~|a).*(?~|)a", "bbbbbbbbbbbbbbbbbbbba", 0, 21); /* < ifndef IGNORE_EUC_JP > diff --git a/test/testu.c b/test/testu.c index 017ebef..6ff3a10 100644 --- a/test/testu.c +++ b/test/testu.c @@ -905,6 +905,15 @@ extern int main(int argc, char* argv[]) x2("\000^\000\\\000p\000{\000K\000a\000t\000a\000k\000a\000n\000a\000}\000$\000\000", "\060\277\000\000", 0, 2); x2("\000\\\000o\000{\0001\0000\0001\000}\000\000", "\000A\000\000", 0, 2); x2("\000\\\000o\000{\0001\0001\0000\0007\0002\0001\000}\000\000", "\221\321\000\000", 0, 2); + x2("\000\\\000R\000\000", "\000\015\000\012\000\000", 0, 4); // \R: general newline + x2("\000\\\000R\000\000", "\000\012\000\000", 0, 2); + x2("\000\\\000R\000\000", "\000\015\000\000", 0, 2); + x2("\000\\\000R\000\000", "\000\013\000\000", 0, 2); + n("\000\\\000R\000\012\000\000", "\000\015\000\012\000\000"); + x2("\000\\\000R\000\000", "\x00\x85\000\000", 0, 2); + x2("\000\\\000R\000\000", "\x20\x28\000\000", 0, 2); + x2("\000\\\000R\000\000", "\x20\x29\000\000", 0, 2); + n("\000\\\000R\000\000", "\x20\x2a\000\000"); fprintf(stdout, "\nRESULT SUCC: %d, FAIL: %d, ERROR: %d (by Oniguruma %s)\n", |