From 10abcf77cc24dfae451d96310b4391dad35906ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Tue, 20 Mar 2018 06:14:49 +0100 Subject: New upstream version 6.8.1 --- src/Makefile.am | 15 +- src/Makefile.windows | 197 ++-- src/ascii.c | 67 +- src/big5.c | 5 +- src/config.h.cmake.in | 6 +- src/cp1251.c | 5 +- src/euc_jp.c | 5 +- src/euc_kr.c | 5 +- src/euc_tw.c | 5 +- src/gb18030.c | 5 +- src/iso8859_1.c | 5 +- src/iso8859_10.c | 5 +- src/iso8859_11.c | 5 +- src/iso8859_13.c | 5 +- src/iso8859_14.c | 5 +- src/iso8859_15.c | 5 +- src/iso8859_16.c | 5 +- src/iso8859_2.c | 5 +- src/iso8859_3.c | 5 +- src/iso8859_4.c | 5 +- src/iso8859_5.c | 5 +- src/iso8859_6.c | 5 +- src/iso8859_7.c | 5 +- src/iso8859_8.c | 5 +- src/iso8859_9.c | 5 +- src/koi8.c | 5 +- src/koi8_r.c | 5 +- src/onig_init.c | 4 +- src/oniggnu.h | 10 +- src/onigposix.h | 4 +- src/oniguruma.h | 325 ++++-- src/regcomp.c | 1458 +++++++-------------------- src/regenc.c | 100 +- src/regenc.h | 103 +- src/regerror.c | 20 +- src/regexec.c | 2657 ++++++++++++++++++++++++++++++++++++++----------- src/reggnu.c | 8 +- src/regint.h | 293 ++++-- src/regparse.c | 1898 +++++++++++++++++++++++++++++++---- src/regparse.h | 40 +- src/regposerr.c | 9 +- src/regposix.c | 13 +- src/regsyntax.c | 6 +- src/regversion.c | 5 +- src/sjis.c | 6 +- src/utf16_be.c | 55 +- src/utf16_le.c | 53 +- src/utf32_be.c | 6 +- src/utf32_le.c | 6 +- src/utf8.c | 6 +- 50 files changed, 5120 insertions(+), 2365 deletions(-) (limited to 'src') diff --git a/src/Makefile.am b/src/Makefile.am index be35b24..911aecd 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -4,13 +4,24 @@ libname = libonig.la AM_CFLAGS = -Wall AM_CPPFLAGS = -I$(top_srcdir) -I$(includedir) -include_HEADERS = oniguruma.h oniggnu.h onigposix.h +include_HEADERS = oniguruma.h oniggnu.h + +posix_headers = onigposix.h + +if ENABLE_POSIX_API +posix_sources = regposix.c regposerr.c +include_HEADERS += $(posix_headers) +else +posix_sources = +endif + + lib_LTLIBRARIES = $(libname) libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ regenc.c regsyntax.c regtrav.c regversion.c st.c \ - regposix.c regposerr.c \ + $(posix_sources) \ unicode.c \ unicode_unfold_key.c \ unicode_fold1_key.c \ diff --git a/src/Makefile.windows b/src/Makefile.windows index 046345a..1ce8ce2 100644 --- a/src/Makefile.windows +++ b/src/Makefile.windows @@ -15,8 +15,7 @@ LINKFLAGS = -link -incremental:no -pdb:none INSTALL = install -c CP = copy CC = cl -DEFS = -DHAVE_CONFIG_H -DNOT_RUBY -DEXPORT -RUBYDIR = .. +DEFS = -DHAVE_CONFIG_H subdirs = @@ -25,44 +24,43 @@ libname = $(libbase)_s.lib dllname = $(libbase).dll dlllib = $(libbase).lib -onigheaders = oniguruma.h regint.h regparse.h regenc.h st.h -posixheaders = onigposix.h +onigheaders = $(ONIG_DIR)/oniguruma.h $(ONIG_DIR)/regint.h $(ONIG_DIR)/regparse.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/st.h +posixheaders = $(ONIG_DIR)/onigposix.h headers = $(posixheaders) $(onigheaders) -onigobjs = reggnu.obj regerror.obj regparse.obj regext.obj regcomp.obj \ - regexec.obj regenc.obj regsyntax.obj regtrav.obj \ - regversion.obj st.obj onig_init.obj -posixobjs = regposix.obj regposerr.obj +onigobjs = $(BUILD_DIR)/reggnu.obj $(BUILD_DIR)/regerror.obj $(BUILD_DIR)/regparse.obj $(BUILD_DIR)/regext.obj $(BUILD_DIR)/regcomp.obj \ + $(BUILD_DIR)/regexec.obj $(BUILD_DIR)/regenc.obj $(BUILD_DIR)/regsyntax.obj $(BUILD_DIR)/regtrav.obj \ + $(BUILD_DIR)/regversion.obj $(BUILD_DIR)/st.obj $(BUILD_DIR)/onig_init.obj +posixobjs = $(BUILD_DIR)/regposix.obj $(BUILD_DIR)/regposerr.obj libobjs = $(onigobjs) $(posixobjs) -jp_objs = euc_jp.obj sjis.obj -iso8859_objs = iso8859_1.obj iso8859_2.obj \ - iso8859_3.obj iso8859_4.obj \ - iso8859_5.obj iso8859_6.obj \ - iso8859_7.obj iso8859_8.obj \ - iso8859_9.obj iso8859_10.obj \ - iso8859_11.obj iso8859_13.obj \ - iso8859_14.obj iso8859_15.obj \ - iso8859_16.obj - -encobjs = ascii.obj utf8.obj \ - unicode.obj \ - utf16_be.obj utf16_le.obj \ - utf32_be.obj utf32_le.obj \ +jp_objs = $(BUILD_DIR)/euc_jp.obj $(BUILD_DIR)/sjis.obj +iso8859_objs = $(BUILD_DIR)/iso8859_1.obj $(BUILD_DIR)/iso8859_2.obj \ + $(BUILD_DIR)/iso8859_3.obj $(BUILD_DIR)/iso8859_4.obj \ + $(BUILD_DIR)/iso8859_5.obj $(BUILD_DIR)/iso8859_6.obj \ + $(BUILD_DIR)/iso8859_7.obj $(BUILD_DIR)/iso8859_8.obj \ + $(BUILD_DIR)/iso8859_9.obj $(BUILD_DIR)/iso8859_10.obj \ + $(BUILD_DIR)/iso8859_11.obj $(BUILD_DIR)/iso8859_13.obj \ + $(BUILD_DIR)/iso8859_14.obj $(BUILD_DIR)/iso8859_15.obj \ + $(BUILD_DIR)/iso8859_16.obj + +encobjs = $(BUILD_DIR)/ascii.obj $(BUILD_DIR)/utf8.obj \ + $(BUILD_DIR)/unicode.obj \ + $(BUILD_DIR)/utf16_be.obj $(BUILD_DIR)/utf16_le.obj \ + $(BUILD_DIR)/utf32_be.obj $(BUILD_DIR)/utf32_le.obj \ $(jp_objs) $(iso8859_objs) \ - euc_tw.obj euc_kr.obj big5.obj \ - gb18030.obj \ - koi8_r.obj \ - cp1251.obj \ - euc_jp_prop.obj sjis_prop.obj \ - unicode_unfold_key.obj unicode_fold1_key.obj \ - unicode_fold2_key.obj unicode_fold3_key.obj # koi8.obj - -onigsources = regerror.c regparse.c regext.c regcomp.c regexec.c regenc.c \ - regsyntax.c regtrav.c regversion.c reggnu.c st.c -posixsources = regposix.c regposerr.c + $(BUILD_DIR)/euc_tw.obj $(BUILD_DIR)/euc_kr.obj $(BUILD_DIR)/big5.obj \ + $(BUILD_DIR)/gb18030.obj \ + $(BUILD_DIR)/koi8_r.obj \ + $(BUILD_DIR)/cp1251.obj \ + $(BUILD_DIR)/euc_jp_prop.obj $(BUILD_DIR)/sjis_prop.obj \ + $(BUILD_DIR)/unicode_unfold_key.obj $(BUILD_DIR)/unicode_fold1_key.obj \ + $(BUILD_DIR)/unicode_fold2_key.obj $(BUILD_DIR)/unicode_fold3_key.obj # $(BUILD_DIR)/koi8.obj + +onigsources = $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regparse.c $(ONIG_DIR)/regext.c $(ONIG_DIR)/regcomp.c $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regenc.c \ + $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regtrav.c $(ONIG_DIR)/regversion.c $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/st.c +posixsources = $(ONIG_DIR)/regposix.c $(ONIG_DIR)/regposerr.c libsources = $(posixsources) $(onigsources) -rubysources = $(onigsources) patchfiles = re.c.168.patch re.c.181.patch distfiles = README COPYING HISTORY \ @@ -77,7 +75,7 @@ makeargs = $(MFLAGS) CPPFLAGS='$(CPPFLAGS)' CFLAGS='$(CFLAGS)' CC='$(CC)' .SUFFIXES: .SUFFIXES: .obj .c .h .ps .dvi .info .texinfo -.c.obj: +{$(ONIG_DIR)}.c{$(BUILD_DIR)}.obj: $(CC) $(CFLAGS) $(CPPFLAGS) $(DEFS) /I. /I.. /Fo$@ /c $< # targets @@ -96,58 +94,58 @@ $(libname): $(libobjs) $(encobjs) $(dllname): $(libobjs) $(encobjs) $(ARDLL) $(libobjs) $(encobjs) -Fe$@ $(ARDLL_FLAGS) -regparse.obj: regparse.c $(onigheaders) config.h st.h -regext.obj: regext.c $(onigheaders) config.h -regtrav.obj: regtrav.c $(onigheaders) config.h -regcomp.obj: regcomp.c $(onigheaders) config.h -regexec.obj: regexec.c regint.h regenc.h oniguruma.h config.h -reggnu.obj: reggnu.c regint.h regenc.h oniguruma.h config.h oniggnu.h -regerror.obj: regerror.c regint.h regenc.h oniguruma.h config.h -regenc.obj: regenc.c regenc.h oniguruma.h config.h -regsyntax.obj: regsyntax.c regint.h regenc.h oniguruma.h config.h -regversion.obj: regversion.c oniguruma.h config.h -regposix.obj: regposix.c $(posixheaders) oniguruma.h config.h -regposerr.obj: regposerr.c $(posixheaders) config.h -st.obj: st.c regint.h oniguruma.h config.h st.h -onig_init.obj: onig_init.c oniguruma.h - -ascii.obj: ascii.c regenc.h config.h -unicode.obj: unicode.c unicode_fold_data.c unicode_property_data.c regenc.h config.h -utf8.obj: utf8.c regenc.h config.h -utf16_be.obj: utf16_be.c regenc.h config.h -utf16_le.obj: utf16_le.c regenc.h config.h -utf32_be.obj: utf32_be.c regenc.h config.h -utf32_le.obj: utf32_le.c regenc.h config.h -euc_jp.obj: euc_jp.c regenc.h config.h -euc_tw.obj: euc_tw.c regenc.h config.h -euc_kr.obj: euc_kr.c regenc.h config.h -sjis.obj: sjis.c regenc.h config.h -iso8859_1.obj: iso8859_1.c regenc.h config.h -iso8859_2.obj: iso8859_2.c regenc.h config.h -iso8859_3.obj: iso8859_3.c regenc.h config.h -iso8859_4.obj: iso8859_4.c regenc.h config.h -iso8859_5.obj: iso8859_5.c regenc.h config.h -iso8859_6.obj: iso8859_6.c regenc.h config.h -iso8859_7.obj: iso8859_7.c regenc.h config.h -iso8859_8.obj: iso8859_8.c regenc.h config.h -iso8859_9.obj: iso8859_9.c regenc.h config.h -iso8859_10.obj: iso8859_10.c regenc.h config.h -iso8859_11.obj: iso8859_11.c regenc.h config.h -iso8859_13.obj: iso8859_13.c regenc.h config.h -iso8859_14.obj: iso8859_14.c regenc.h config.h -iso8859_15.obj: iso8859_15.c regenc.h config.h -iso8859_16.obj: iso8859_16.c regenc.h config.h -koi8.obj: koi8.c regenc.h config.h -koi8_r.obj: koi8_r.c regenc.h config.h -cp1251.obj: cp1251.c regenc.h config.h -big5.obj: big5.c regenc.h config.h -gb18030.obj: gb18030.c regenc.h config.h -euc_jp_prop.obj: euc_jp_prop.c regenc.h -sjis_prop.obj: sjis_prop.c regenc.h -unicode_unfold_key.obj: unicode_unfold_key.c regenc.h config.h -unicode_fold1_key.obj: unicode_fold1_key.c regenc.h config.h -unicode_fold2_key.obj: unicode_fold2_key.c regenc.h config.h -unicode_fold3_key.obj: unicode_fold3_key.c regenc.h config.h +$(BUILD_DIR)/regparse.obj: $(ONIG_DIR)/regparse.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regext.obj: $(ONIG_DIR)/regext.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regtrav.obj: $(ONIG_DIR)/regtrav.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regcomp.obj: $(ONIG_DIR)/regcomp.c $(onigheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/regexec.obj: $(ONIG_DIR)/regexec.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/reggnu.obj: $(ONIG_DIR)/reggnu.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/oniggnu.h +$(BUILD_DIR)/regerror.obj: $(ONIG_DIR)/regerror.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regenc.obj: $(ONIG_DIR)/regenc.c $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regsyntax.obj: $(ONIG_DIR)/regsyntax.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/regenc.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regversion.obj: $(ONIG_DIR)/regversion.c $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regposix.obj: $(ONIG_DIR)/regposix.c $(posixheaders) $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/regposerr.obj: $(ONIG_DIR)/regposerr.c $(posixheaders) $(BUILD_DIR)/config.h +$(BUILD_DIR)/st.obj: $(ONIG_DIR)/st.c $(ONIG_DIR)/regint.h $(ONIG_DIR)/oniguruma.h $(BUILD_DIR)/config.h $(ONIG_DIR)/st.h +$(BUILD_DIR)/onig_init.obj: $(ONIG_DIR)/onig_init.c $(ONIG_DIR)/oniguruma.h + +$(BUILD_DIR)/ascii.obj: $(ONIG_DIR)/ascii.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode.obj: $(ONIG_DIR)/unicode.c $(ONIG_DIR)/unicode_fold_data.c $(ONIG_DIR)/unicode_property_data.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf8.obj: $(ONIG_DIR)/utf8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf16_be.obj: $(ONIG_DIR)/utf16_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf16_le.obj: $(ONIG_DIR)/utf16_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf32_be.obj: $(ONIG_DIR)/utf32_be.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/utf32_le.obj: $(ONIG_DIR)/utf32_le.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_jp.obj: $(ONIG_DIR)/euc_jp.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_tw.obj: $(ONIG_DIR)/euc_tw.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_kr.obj: $(ONIG_DIR)/euc_kr.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/sjis.obj: $(ONIG_DIR)/sjis.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_1.obj: $(ONIG_DIR)/iso8859_1.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_2.obj: $(ONIG_DIR)/iso8859_2.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_3.obj: $(ONIG_DIR)/iso8859_3.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_4.obj: $(ONIG_DIR)/iso8859_4.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_5.obj: $(ONIG_DIR)/iso8859_5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_6.obj: $(ONIG_DIR)/iso8859_6.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_7.obj: $(ONIG_DIR)/iso8859_7.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_8.obj: $(ONIG_DIR)/iso8859_8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_9.obj: $(ONIG_DIR)/iso8859_9.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_10.obj: $(ONIG_DIR)/iso8859_10.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_11.obj: $(ONIG_DIR)/iso8859_11.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_13.obj: $(ONIG_DIR)/iso8859_13.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_14.obj: $(ONIG_DIR)/iso8859_14.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_15.obj: $(ONIG_DIR)/iso8859_15.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/iso8859_16.obj: $(ONIG_DIR)/iso8859_16.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/koi8.obj: $(ONIG_DIR)/koi8.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/koi8_r.obj: $(ONIG_DIR)/koi8_r.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/cp1251.obj: $(ONIG_DIR)/cp1251.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/big5.obj: $(ONIG_DIR)/big5.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/gb18030.obj: $(ONIG_DIR)/gb18030.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/euc_jp_prop.obj: $(ONIG_DIR)/euc_jp_prop.c $(ONIG_DIR)/regenc.h +$(BUILD_DIR)/sjis_prop.obj: $(ONIG_DIR)/sjis_prop.c $(ONIG_DIR)/regenc.h +$(BUILD_DIR)/unicode_unfold_key.obj: $(ONIG_DIR)/unicode_unfold_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold1_key.obj: $(ONIG_DIR)/unicode_fold1_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold2_key.obj: $(ONIG_DIR)/unicode_fold2_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h +$(BUILD_DIR)/unicode_fold3_key.obj: $(ONIG_DIR)/unicode_fold3_key.c $(ONIG_DIR)/regenc.h $(BUILD_DIR)/config.h # C library test ctest: $(testc) @@ -158,26 +156,23 @@ ptest: $(testp) .\$(testp) $(testc): $(testc).c $(libname) - $(CC) -nologo /Fe:$(testc) -DONIG_EXTERN=extern $(testc).c $(libname) + $(CC) -nologo -o $(testc) -DONIG_EXTERN=extern $(testc).c $(libname) $(testp): $(testc).c $(dlllib) - $(CC) -nologo -DPOSIX_TEST /Fe:$(testp) $(testc).c $(dlllib) - -#$(testc)u.c: test.rb testconvu.rb -# ruby -Ke testconvu.rb test.rb > $@ + $(CC) -nologo -DPOSIX_TEST -o $(testp) $(testc).c $(dlllib) $(testc)u: $(testc)u.c $(libname) - $(CC) -nologo /Fe:$(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) + $(CC) -nologo -o $(testc)u -DONIG_EXTERN=extern $(testc)u.c $(libname) clean: - del *.obj *.lib *.exp *.dll $(testp).exe $(testc).exe $(testc).obj + del $(BUILD_DIR)\*.obj $(BUILD_DIR)\*.lib $(BUILD_DIR)\*.exp $(BUILD_DIR)\*.dll $(BUILD_DIR)\$(testp).exe $(BUILD_DIR)\$(testc).exe $(BUILD_DIR)\$(testc).obj samples: all - $(CC) $(CFLAGS) -I. /Fe:simple sample\simple.c $(dlllib) - $(CC) $(CFLAGS) -I. /Fe:posix sample\posix.c $(dlllib) - $(CC) $(CFLAGS) -I. /Fe:names sample\names.c $(dlllib) - $(CC) $(CFLAGS) -I. /Fe:listcap sample\listcap.c $(dlllib) - $(CC) $(CFLAGS) -I. /Fe:sql sample\sql.c $(dlllib) - $(CC) $(CFLAGS) -I. /Fe:encode sample\encode.c $(dlllib) - $(CC) $(CFLAGS) -I. /Fe:syntax sample\syntax.c $(dlllib) + $(CC) $(CFLAGS) -I. -o simple $(ONIG_DIR)\sample\simple.c $(dlllib) + $(CC) $(CFLAGS) -I. -o posix $(ONIG_DIR)\sample\posix.c $(dlllib) + $(CC) $(CFLAGS) -I. -o names $(ONIG_DIR)\sample\names.c $(dlllib) + $(CC) $(CFLAGS) -I. -o listcap $(ONIG_DIR)\sample\listcap.c $(dlllib) + $(CC) $(CFLAGS) -I. -o sql $(ONIG_DIR)\sample\sql.c $(dlllib) + $(CC) $(CFLAGS) -I. -o encode $(ONIG_DIR)\sample\encode.c $(dlllib) + $(CC) $(CFLAGS) -I. -o syntax $(ONIG_DIR)\sample\syntax.c $(dlllib) \ No newline at end of file diff --git a/src/ascii.c b/src/ascii.c index b21878d..7efaa26 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -2,7 +2,7 @@ ascii.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,59 @@ * SUCH DAMAGE. */ -#include "regenc.h" +#include "regint.h" /* for USE_CALLOUT */ + +static int +init(void) +{ +#ifdef USE_CALLOUT + + int id; + OnigEncoding enc; + char* name; + unsigned int t_long; + unsigned int args[4]; + OnigValue opts[4]; + + enc = ONIG_ENCODING_ASCII; + t_long = ONIG_TYPE_LONG; + + name = "FAIL"; BC0_P(name, fail); + name = "MISMATCH"; BC0_P(name, mismatch); + name = "MAX"; BC_B(name, max, 1, &t_long); + + name = "ERROR"; + args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; + BC_P_O(name, error, 1, args, 1, opts); + + name = "COUNT"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, count, 1, args, 1, opts); + + name = "TOTAL_COUNT"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, total_count, 1, args, 1, opts); + + name = "CMP"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_STRING; + args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + + return ONIG_NORMAL; +} + +#if 0 +static int +is_initialized(void) +{ + /* Don't use this function */ + /* can't answer, because builtin callout entries removed in onig_end() */ + return 0; +} +#endif static int ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) @@ -41,8 +93,8 @@ ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) OnigEncodingType OnigEncodingASCII = { onigenc_single_byte_mbc_enc_len, "US-ASCII", /* name */ - 1, /* max byte length */ - 1, /* min byte length */ + 1, /* max enc length */ + 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, onigenc_single_byte_mbc_to_code, onigenc_single_byte_code_to_mbclen, @@ -55,7 +107,8 @@ OnigEncodingType OnigEncodingASCII = { onigenc_not_support_get_ctype_code_range, onigenc_single_byte_left_adjust_char_head, onigenc_always_true_is_allowed_reverse_match, - NULL, /* init */ - NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + init, + 0, /* is_initialized */ + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/big5.c b/src/big5.c index bc713ab..ff0c51b 100644 --- a/src/big5.c +++ b/src/big5.c @@ -2,7 +2,7 @@ big5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -186,5 +186,6 @@ OnigEncodingType OnigEncodingBIG5 = { big5_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/config.h.cmake.in b/src/config.h.cmake.in index e13fad1..b59cc8d 100644 --- a/src/config.h.cmake.in +++ b/src/config.h.cmake.in @@ -31,6 +31,9 @@ /* Define to 1 if you have the header file. */ #cmakedefine HAVE_STRING_H ${HAVE_STRING_H} +/* Define to 1 if you have the header file. */ +#cmakedefine HAVE_LIMITS_H ${HAVE_LIMITS_H} + /* Define to 1 if you have the header file. */ #cmakedefine HAVE_SYS_TIMES_H ${HAVE_SYS_TIMES_H} @@ -64,9 +67,6 @@ /* Define to 1 if you have the ANSI C header files. */ #cmakedefine STDC_HEADERS ${STDC_HEADERS} -/* Define if combination explosion check */ -#cmakedefine USE_COMBINATION_EXPLOSION_CHECK ${USE_COMBINATION_EXPLOSION_CHECK} - /* Define if enable CR+NL as line terminator */ #cmakedefine USE_CRNL_AS_LINE_TERMINATOR ${USE_CRNL_AS_LINE_TERMINATOR} diff --git a/src/cp1251.c b/src/cp1251.c index 4d655bb..f7b43c3 100644 --- a/src/cp1251.c +++ b/src/cp1251.c @@ -2,7 +2,7 @@ cp1251.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2006-2016 Byte + * Copyright (c) 2006-2018 Byte * K.Kosako * All rights reserved. * @@ -199,5 +199,6 @@ OnigEncodingType OnigEncodingCP1251 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/euc_jp.c b/src/euc_jp.c index 42c3bce..8dd6ac1 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -2,7 +2,7 @@ euc_jp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -306,5 +306,6 @@ OnigEncodingType OnigEncodingEUC_JP = { is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/euc_kr.c b/src/euc_kr.c index 450caf1..08bfa1c 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -2,7 +2,7 @@ euc_kr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -184,5 +184,6 @@ OnigEncodingType OnigEncodingEUC_CN = { euckr_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/euc_tw.c b/src/euc_tw.c index b3ee628..dbf0eac 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -2,7 +2,7 @@ euc_tw.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -167,5 +167,6 @@ OnigEncodingType OnigEncodingEUC_TW = { euctw_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/gb18030.c b/src/gb18030.c index c8b5865..073c83b 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -2,7 +2,7 @@ gb18030.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2005-2016 KUBO Takehiro + * Copyright (c) 2005-2018 KUBO Takehiro * K.Kosako * All rights reserved. * @@ -534,5 +534,6 @@ OnigEncodingType OnigEncodingGB18030 = { gb18030_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_1.c b/src/iso8859_1.c index 573931f..bcd7e26 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -2,7 +2,7 @@ iso8859_1.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -271,5 +271,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_10.c b/src/iso8859_10.c index 91b18d4..a5946cc 100644 --- a/src/iso8859_10.c +++ b/src/iso8859_10.c @@ -2,7 +2,7 @@ iso8859_10.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -238,5 +238,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_11.c b/src/iso8859_11.c index 518be25..ec94fd1 100644 --- a/src/iso8859_11.c +++ b/src/iso8859_11.c @@ -2,7 +2,7 @@ iso8859_11.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_13.c b/src/iso8859_13.c index d1f39a2..fba7fd4 100644 --- a/src/iso8859_13.c +++ b/src/iso8859_13.c @@ -2,7 +2,7 @@ iso8859_13.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_14.c b/src/iso8859_14.c index 3361b0d..e1f71f5 100644 --- a/src/iso8859_14.c +++ b/src/iso8859_14.c @@ -2,7 +2,7 @@ iso8859_14.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -240,5 +240,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_15.c b/src/iso8859_15.c index b09e876..236e9e7 100644 --- a/src/iso8859_15.c +++ b/src/iso8859_15.c @@ -2,7 +2,7 @@ iso8859_15.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_16.c b/src/iso8859_16.c index 29a350d..42045bd 100644 --- a/src/iso8859_16.c +++ b/src/iso8859_16.c @@ -2,7 +2,7 @@ iso8859_16.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_2.c b/src/iso8859_2.c index 9eb3536..db93046 100644 --- a/src/iso8859_2.c +++ b/src/iso8859_2.c @@ -2,7 +2,7 @@ iso8859_2.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_3.c b/src/iso8859_3.c index 862823a..6fe5e6f 100644 --- a/src/iso8859_3.c +++ b/src/iso8859_3.c @@ -2,7 +2,7 @@ iso8859_3.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -234,5 +234,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_4.c b/src/iso8859_4.c index db706da..ee1eb93 100644 --- a/src/iso8859_4.c +++ b/src/iso8859_4.c @@ -2,7 +2,7 @@ iso8859_4.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -236,5 +236,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_5.c b/src/iso8859_5.c index 0e03e9c..7d828e1 100644 --- a/src/iso8859_5.c +++ b/src/iso8859_5.c @@ -2,7 +2,7 @@ iso8859_5.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -225,5 +225,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_6.c b/src/iso8859_6.c index 6289af5..a959e98 100644 --- a/src/iso8859_6.c +++ b/src/iso8859_6.c @@ -2,7 +2,7 @@ iso8859_6.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_7.c b/src/iso8859_7.c index 75b520f..e695523 100644 --- a/src/iso8859_7.c +++ b/src/iso8859_7.c @@ -2,7 +2,7 @@ iso8859_7.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -221,5 +221,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_8.c b/src/iso8859_8.c index 5f18345..66b63b8 100644 --- a/src/iso8859_8.c +++ b/src/iso8859_8.c @@ -2,7 +2,7 @@ iso8859_8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -95,5 +95,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/iso8859_9.c b/src/iso8859_9.c index d0c06bb..d780293 100644 --- a/src/iso8859_9.c +++ b/src/iso8859_9.c @@ -2,7 +2,7 @@ iso8859_9.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -227,5 +227,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/koi8.c b/src/koi8.c index 80f89e9..8531825 100644 --- a/src/koi8.c +++ b/src/koi8.c @@ -2,7 +2,7 @@ koi8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -249,5 +249,6 @@ OnigEncodingType OnigEncodingKOI8 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/koi8_r.c b/src/koi8_r.c index f8ef34f..e88cfe3 100644 --- a/src/koi8_r.c +++ b/src/koi8_r.c @@ -2,7 +2,7 @@ koi8_r.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -211,5 +211,6 @@ OnigEncodingType OnigEncodingKOI8_R = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - onigenc_always_true_is_valid_mbc_string + onigenc_always_true_is_valid_mbc_string, + 0, 0, 0 }; diff --git a/src/onig_init.c b/src/onig_init.c index 9f53568..7ad98b7 100644 --- a/src/onig_init.c +++ b/src/onig_init.c @@ -2,7 +2,7 @@ onig_init.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2016 K.Kosako + * Copyright (c) 2016-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,7 @@ * SUCH DAMAGE. */ -#include "oniguruma.h" +#include "regint.h" /* onig_init(): deprecated function */ extern int diff --git a/src/oniggnu.h b/src/oniggnu.h index 3da9f23..d688883 100644 --- a/src/oniggnu.h +++ b/src/oniggnu.h @@ -35,10 +35,12 @@ extern "C" { #endif -#define RE_MBCTYPE_ASCII 0 -#define RE_MBCTYPE_EUC 1 -#define RE_MBCTYPE_SJIS 2 -#define RE_MBCTYPE_UTF8 3 +enum { + RE_MBCTYPE_ASCII = 0, + RE_MBCTYPE_EUC = 1, + RE_MBCTYPE_SJIS = 2, + RE_MBCTYPE_UTF8 = 3 +}; /* GNU regex options */ #ifndef RE_NREGS diff --git a/src/onigposix.h b/src/onigposix.h index 22211e4..da0f919 100644 --- a/src/onigposix.h +++ b/src/onigposix.h @@ -4,7 +4,7 @@ onigposix.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -97,7 +97,7 @@ typedef struct { #ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) +#if defined(ONIGURUMA_EXPORT) #define ONIG_EXTERN extern __declspec(dllexport) #else #define ONIG_EXTERN extern __declspec(dllimport) diff --git a/src/oniguruma.h b/src/oniguruma.h index 5ad4469..349c00e 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -4,7 +4,7 @@ oniguruma.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -35,30 +35,10 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 -#define ONIGURUMA_VERSION_MINOR 7 -#define ONIGURUMA_VERSION_TEENY 0 +#define ONIGURUMA_VERSION_MINOR 8 +#define ONIGURUMA_VERSION_TEENY 1 -#ifdef __cplusplus -# ifndef HAVE_PROTOTYPES -# define HAVE_PROTOTYPES 1 -# endif -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ -#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif - -#ifdef HAVE_STDARG_H -# ifndef HAVE_STDARG_PROTOTYPES -# define HAVE_STDARG_PROTOTYPES 1 -# endif -#endif +#define ONIGURUMA_VERSION_INT 60801 #ifndef P_ #if defined(__STDC__) || defined(_WIN32) @@ -69,16 +49,12 @@ extern "C" { #endif #ifndef PV_ -#ifdef HAVE_STDARG_PROTOTYPES # define PV_(args) args -#else -# define PV_(args) () -#endif #endif #ifndef ONIG_EXTERN #if defined(_WIN32) && !defined(__GNUC__) -#if defined(EXPORT) || defined(RUBY_EXPORT) +#if defined(ONIGURUMA_EXPORT) #define ONIG_EXTERN extern __declspec(dllexport) #else #define ONIG_EXTERN extern __declspec(dllimport) @@ -96,10 +72,6 @@ extern "C" { #define UChar OnigUChar #endif -#ifdef _WIN32 -#include -#endif - typedef unsigned int OnigCodePoint; typedef unsigned char OnigUChar; typedef unsigned int OnigCtype; @@ -166,6 +138,9 @@ typedef struct OnigEncodingTypeST { int (*init)(void); int (*is_initialized)(void); int (*is_valid_mbc_string)(const OnigUChar* s, const OnigUChar* end); + unsigned int flag; + OnigCodePoint sb_range; + int index; } OnigEncodingType; typedef OnigEncodingType* OnigEncoding; @@ -243,21 +218,24 @@ ONIG_EXTERN OnigEncodingType OnigEncodingGB18030; /* 18: 6(max-byte) * 3(case-fold chars) */ /* character types */ -#define ONIGENC_CTYPE_NEWLINE 0 -#define ONIGENC_CTYPE_ALPHA 1 -#define ONIGENC_CTYPE_BLANK 2 -#define ONIGENC_CTYPE_CNTRL 3 -#define ONIGENC_CTYPE_DIGIT 4 -#define ONIGENC_CTYPE_GRAPH 5 -#define ONIGENC_CTYPE_LOWER 6 -#define ONIGENC_CTYPE_PRINT 7 -#define ONIGENC_CTYPE_PUNCT 8 -#define ONIGENC_CTYPE_SPACE 9 -#define ONIGENC_CTYPE_UPPER 10 -#define ONIGENC_CTYPE_XDIGIT 11 -#define ONIGENC_CTYPE_WORD 12 -#define ONIGENC_CTYPE_ALNUM 13 /* alpha || digit */ -#define ONIGENC_CTYPE_ASCII 14 +typedef enum { + ONIGENC_CTYPE_NEWLINE = 0, + ONIGENC_CTYPE_ALPHA = 1, + ONIGENC_CTYPE_BLANK = 2, + ONIGENC_CTYPE_CNTRL = 3, + ONIGENC_CTYPE_DIGIT = 4, + ONIGENC_CTYPE_GRAPH = 5, + ONIGENC_CTYPE_LOWER = 6, + ONIGENC_CTYPE_PRINT = 7, + ONIGENC_CTYPE_PUNCT = 8, + ONIGENC_CTYPE_SPACE = 9, + ONIGENC_CTYPE_UPPER = 10, + ONIGENC_CTYPE_XDIGIT = 11, + ONIGENC_CTYPE_WORD = 12, + ONIGENC_CTYPE_ALNUM = 13, /* alpha || digit */ + ONIGENC_CTYPE_ASCII = 14 +} OnigEncCtype; + #define ONIGENC_MAX_STD_CTYPE ONIGENC_CTYPE_ASCII @@ -365,7 +343,8 @@ ONIG_EXTERN int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); - +ONIG_EXTERN +UChar* onigenc_strdup P_((OnigEncoding enc, const UChar* s, const UChar* end)); /* PART: regular expression */ @@ -513,6 +492,8 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (1U<<25) /* (?~...) */ #define ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER (1U<<26) /* \X \y \Y */ #define ONIG_SYN_OP2_QMARK_PERL_SUBEXP_CALL (1U<<27) /* (?R), (?&name)... */ +#define ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (1U<<28) /* (?{...}) (?{{...}}) */ +#define ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (1U<<29) /* (*name) (*name{a,..}) */ /* syntax (behavior) */ #define ONIG_SYN_CONTEXT_INDEP_ANCHORS (1U<<31) /* not implemented */ @@ -552,6 +533,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIG_NORMAL 0 #define ONIG_MISMATCH -1 #define ONIG_NO_SUPPORT_CONFIG -2 +#define ONIG_ABORT -3 /* internal error */ #define ONIGERR_MEMORY -5 @@ -562,6 +544,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_UNEXPECTED_BYTECODE -14 #define ONIGERR_MATCH_STACK_LIMIT_OVER -15 #define ONIGERR_PARSE_DEPTH_LIMIT_OVER -16 +#define ONIGERR_RETRY_LIMIT_IN_MATCH_OVER -17 #define ONIGERR_DEFAULT_ENCODING_IS_NOT_SETTED -21 #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_FAIL_TO_INITIALIZE -23 @@ -616,6 +599,12 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_INVALID_IF_ELSE_SYNTAX -224 #define ONIGERR_INVALID_ABSENT_GROUP_PATTERN -225 #define ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN -226 +#define ONIGERR_INVALID_CALLOUT_PATTERN -227 +#define ONIGERR_INVALID_CALLOUT_NAME -228 +#define ONIGERR_UNDEFINED_CALLOUT_NAME -229 +#define ONIGERR_INVALID_CALLOUT_BODY -230 +#define ONIGERR_INVALID_CALLOUT_TAG_NAME -231 +#define ONIGERR_INVALID_CALLOUT_ARG -232 #define ONIGERR_INVALID_CODE_POINT_VALUE -400 #define ONIGERR_INVALID_WIDE_CHAR_VALUE -400 #define ONIGERR_TOO_BIG_WIDE_CHAR_VALUE -401 @@ -681,49 +670,8 @@ extern void onig_null_warn P_((const char* s)); #define ONIG_CHAR_TABLE_SIZE 256 -typedef struct re_pattern_buffer { - /* common members of BBuf(bytes-buffer) */ - unsigned char* p; /* compiled pattern */ - unsigned int used; /* used space for p */ - unsigned int alloc; /* allocated space for p */ - - int num_mem; /* used memory(...) num counted from 1 */ - int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ - int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ - int num_comb_exp_check; /* combination explosion check */ - int num_call; /* number of subexp call */ - unsigned int capture_history; /* (?@...) flag (1-31) */ - unsigned int bt_mem_start; /* need backtrack flag */ - unsigned int bt_mem_end; /* need backtrack flag */ - int stack_pop_level; - int repeat_range_alloc; - OnigRepeatRange* repeat_range; - - OnigEncoding enc; - OnigOptionType options; - OnigSyntaxType* syntax; - OnigCaseFoldType case_fold_flag; - void* name_table; - - /* optimization info (string search, char-map and anchors) */ - int optimize; /* optimize flag */ - int threshold_len; /* search str-length for apply optimize */ - int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ - OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ - OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ - int sub_anchor; /* start-anchor for exact or map */ - unsigned char *exact; - unsigned char *exact_end; - unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ - int *int_map; /* BM skip for exact_len > 255 */ - int *int_map_backward; /* BM skip for backward search */ - OnigLen dmin; /* min-distance of exact or map */ - OnigLen dmax; /* max-distance of exact or map */ - - /* regex_t link chain */ - struct re_pattern_buffer* chain; /* escape compile-conflict */ -} OnigRegexType; - +struct re_pattern_buffer; +typedef struct re_pattern_buffer OnigRegexType; typedef OnigRegexType* OnigRegex; #ifndef ONIG_ESCAPE_REGEX_T_COLLISION @@ -740,10 +688,74 @@ typedef struct { OnigCaseFoldType case_fold_flag; } OnigCompileInfo; + +/* types for callout */ +typedef enum { + ONIG_CALLOUT_IN_PROGRESS = 1, /* 1<<0 */ + ONIG_CALLOUT_IN_RETRACTION = 2 /* 1<<1 */ +} OnigCalloutIn; + +#define ONIG_CALLOUT_IN_BOTH (ONIG_CALLOUT_IN_PROGRESS | ONIG_CALLOUT_IN_RETRACTION) + +typedef enum { + ONIG_CALLOUT_OF_CONTENTS = 0, + ONIG_CALLOUT_OF_NAME = 1 +} OnigCalloutOf; + +typedef enum { + ONIG_CALLOUT_TYPE_SINGLE = 0, + ONIG_CALLOUT_TYPE_START_CALL = 1, + ONIG_CALLOUT_TYPE_BOTH_CALL = 2, + ONIG_CALLOUT_TYPE_START_MARK_END_CALL = 3, +} OnigCalloutType; + + +#define ONIG_NON_NAME_ID -1 +#define ONIG_NON_CALLOUT_NUM 0 + +#define ONIG_CALLOUT_MAX_ARGS_NUM 4 +#define ONIG_CALLOUT_DATA_SLOT_NUM 5 + +struct OnigCalloutArgsStruct; +typedef struct OnigCalloutArgsStruct OnigCalloutArgs; + +typedef int (*OnigCalloutFunc)(OnigCalloutArgs* args, void* user_data); + +/* callout function return values (less than -1: error code) */ +typedef enum { + ONIG_CALLOUT_FAIL = 1, + ONIG_CALLOUT_SUCCESS = 0 +} OnigCalloutResult; + +typedef enum { + ONIG_TYPE_VOID = 0, + ONIG_TYPE_LONG = 1<<0, + ONIG_TYPE_CHAR = 1<<1, + ONIG_TYPE_STRING = 1<<2, + ONIG_TYPE_POINTER = 1<<3, + ONIG_TYPE_TAG = 1<<4, +} OnigType; + +typedef union { + long l; + OnigCodePoint c; + struct { + OnigUChar* start; + OnigUChar* end; + } s; + void* p; + int tag; /* tag -> callout_num */ +} OnigValue; + + +struct OnigMatchParamStruct; +typedef struct OnigMatchParamStruct OnigMatchParam; + + /* Oniguruma Native API */ ONIG_EXTERN -int onig_initialize P_((OnigEncoding encodings[], int n)); +int onig_initialize P_((OnigEncoding encodings[], int number_of_encodings)); /* onig_init(): deprecated function. Use onig_initialize(). */ ONIG_EXTERN int onig_init P_((void)); @@ -756,7 +768,7 @@ void onig_set_verb_warn_func P_((OnigWarnFunc f)); ONIG_EXTERN int onig_new P_((OnigRegex*, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN -int onig_reg_init P_((regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); +int onig_reg_init P_((OnigRegex reg, OnigOptionType option, OnigCaseFoldType case_fold_flag, OnigEncoding enc, OnigSyntaxType* syntax)); int onig_new_without_alloc P_((OnigRegex, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax, OnigErrorInfo* einfo)); ONIG_EXTERN int onig_new_deluxe P_((OnigRegex* reg, const OnigUChar* pattern, const OnigUChar* pattern_end, OnigCompileInfo* ci, OnigErrorInfo* einfo)); @@ -765,12 +777,16 @@ void onig_free P_((OnigRegex)); ONIG_EXTERN void onig_free_body P_((OnigRegex)); ONIG_EXTERN -int onig_scan(regex_t* reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); +int onig_scan(OnigRegex reg, const OnigUChar* str, const OnigUChar* end, OnigRegion* region, OnigOptionType option, int (*scan_callback)(int, int, OnigRegion*, void*), void* callback_arg); ONIG_EXTERN int onig_search P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN +int onig_search_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* start, const OnigUChar* range, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp)); +ONIG_EXTERN int onig_match P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option)); ONIG_EXTERN +int onig_match_with_param P_((OnigRegex, const OnigUChar* str, const OnigUChar* end, const OnigUChar* at, OnigRegion* region, OnigOptionType option, OnigMatchParam* mp)); +ONIG_EXTERN OnigRegion* onig_region_new P_((void)); ONIG_EXTERN void onig_region_init P_((OnigRegion* region)); @@ -843,6 +859,10 @@ unsigned int onig_get_match_stack_limit_size P_((void)); ONIG_EXTERN int onig_set_match_stack_limit_size P_((unsigned int size)); ONIG_EXTERN +unsigned long onig_get_retry_limit_in_match P_((void)); +ONIG_EXTERN +int onig_set_retry_limit_in_match P_((unsigned long n)); +ONIG_EXTERN unsigned int onig_get_parse_depth_limit P_((void)); ONIG_EXTERN int onig_set_capture_num_limit P_((int num)); @@ -857,6 +877,121 @@ const char* onig_version P_((void)); ONIG_EXTERN const char* onig_copyright P_((void)); +/* for OnigMatchParam */ +ONIG_EXTERN +OnigMatchParam* onig_new_match_param P_((void)); +ONIG_EXTERN +void onig_free_match_param P_((OnigMatchParam* p)); +ONIG_EXTERN +void onig_free_match_param_content P_((OnigMatchParam* p)); +ONIG_EXTERN +int onig_initialize_match_param P_((OnigMatchParam* mp)); +ONIG_EXTERN +int onig_set_match_stack_limit_size_of_match_param P_((OnigMatchParam* param, unsigned int limit)); +ONIG_EXTERN +int onig_set_retry_limit_in_match_of_match_param P_((OnigMatchParam* param, unsigned long limit)); +ONIG_EXTERN +int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); +ONIG_EXTERN +int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); + +/* for callout functions */ +ONIG_EXTERN +OnigCalloutFunc onig_get_progress_callout P_((void)); +ONIG_EXTERN +int onig_set_progress_callout P_((OnigCalloutFunc f)); +ONIG_EXTERN +OnigCalloutFunc onig_get_retraction_callout P_((void)); +ONIG_EXTERN +int onig_set_retraction_callout P_((OnigCalloutFunc f)); +ONIG_EXTERN +int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[])); /* name: single-byte string */ +ONIG_EXTERN +OnigUChar* onig_get_callout_name_by_name_id P_((int id)); +ONIG_EXTERN +int onig_get_callout_num_by_tag P_((OnigRegex reg, const UChar* tag, const UChar* tag_end)); +ONIG_EXTERN +int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType type, OnigValue* val)); + +/* used in callout functions */ +ONIG_EXTERN +int onig_get_callout_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +OnigCalloutIn onig_get_callout_in_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_name_id_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_contents_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_contents_end_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_args_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_passed_args_num_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_get_arg_by_callout_args P_((OnigCalloutArgs* args, int index, OnigType* type, OnigValue* val)); +ONIG_EXTERN +const OnigUChar* onig_get_string_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_string_end_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_start_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_right_range_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +const OnigUChar* onig_get_current_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +OnigRegex onig_get_regex_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +unsigned long onig_get_retry_counter_by_callout_args P_((OnigCalloutArgs* args)); +ONIG_EXTERN +int onig_callout_tag_is_exist_at_callout_num P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +const OnigUChar* onig_get_callout_tag_start P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +const OnigUChar* onig_get_callout_tag_end P_((OnigRegex reg, int callout_num)); +ONIG_EXTERN +int onig_get_callout_data_dont_clear_old P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args_self_dont_clear_old P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_get_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType* type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data P_((OnigRegex reg, OnigMatchParam* mp, int callout_num, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_callout_args P_((OnigCalloutArgs* args, int callout_num, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_set_callout_data_by_callout_args_self P_((OnigCalloutArgs* args, int slot, OnigType type, OnigValue* val)); +ONIG_EXTERN +int onig_get_capture_range_in_callout P_((OnigCalloutArgs* args, int mem_num, int* begin, int* end)); +ONIG_EXTERN +int onig_get_used_stack_size_in_callout P_((OnigCalloutArgs* args, int* used_num, int* used_bytes)); + +/* builtin callout functions */ +ONIG_EXTERN +int onig_builtin_fail P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_mismatch P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_error P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_count P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_total_count P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_max P_((OnigCalloutArgs* args, void* user_data)); +ONIG_EXTERN +int onig_builtin_cmp P_((OnigCalloutArgs* args, void* user_data)); + +ONIG_EXTERN +int onig_setup_builtin_monitors_by_ascii_encoded_name P_((void* fp)); + #ifdef __cplusplus } #endif diff --git a/src/regcomp.c b/src/regcomp.c index 63df18b..a19109f 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2,7 +2,7 @@ regcomp.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -78,7 +78,7 @@ int_stack_push(int_stack* s, int v) { if (s->n >= s->alloc) { int new_size = s->alloc * 2; - int* nv = (int* )xrealloc(s->v, new_size); + int* nv = (int* )xrealloc(s->v, sizeof(int) * new_size); if (IS_NULL(nv)) return ONIGERR_MEMORY; s->alloc = new_size; @@ -121,26 +121,28 @@ onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag) return 0; } - -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS -static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; -#endif - -static UChar* -str_dup(UChar* s, UChar* end) +static int +int_multiply_cmp(int x, int y, int v) { - int len = (int )(end - s); + if (x == 0 || y == 0) return -1; - if (len > 0) { - UChar* r = (UChar* )xmalloc(len + 1); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, len); - r[len] = (UChar )0; - return r; + if (x < INT_MAX / y) { + int xy = x * y; + if (xy > v) return 1; + else { + if (xy == v) return 0; + else return -1; + } } - else return NULL; + else + return 1; } + +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS +static unsigned char PadBuf[WORD_ALIGNMENT_SIZE]; +#endif + static void swap_node(Node* a, Node* b) { @@ -200,20 +202,6 @@ bitset_is_empty(BitSetRef bs) return 1; } -#ifdef ONIG_DEBUG -static int -bitset_on_num(BitSetRef bs) -{ - int i, n; - - n = 0; - for (i = 0; i < SINGLE_BYTE_SIZE; i++) { - if (BITSET_AT(bs, i)) n++; - } - return n; -} -#endif - extern int onig_bbuf_init(BBuf* buf, int size) { @@ -282,17 +270,6 @@ add_opcode(regex_t* reg, int opcode) return 0; } -#ifdef USE_COMBINATION_EXPLOSION_CHECK -static int -add_state_check_num(regex_t* reg, int num) -{ - StateCheckNumType n = (StateCheckNumType )num; - - BB_ADD(reg, &n, SIZE_STATE_CHECK_NUM); - return 0; -} -#endif - static int add_rel_addr(regex_t* reg, int addr) { @@ -811,7 +788,7 @@ compile_range_repeat_node(QuantNode* qn, int target_len, int empty_info, } static int -is_anychar_star_quantifier(QuantNode* qn) +is_anychar_infinite_greedy(QuantNode* qn) { if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) && NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) @@ -823,254 +800,21 @@ is_anychar_star_quantifier(QuantNode* qn) #define QUANTIFIER_EXPAND_LIMIT_SIZE 50 #define CKN_ON (ckn > 0) -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -static int -compile_length_quantifier_node(QuantNode* qn, regex_t* reg) -{ - int len, mod_tlen, cklen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; - int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0); - - /* anychar repeat */ - if (NODE_IS_ANYCHAR(NODE_QUANT_BODY(qn))) { - if (qn->greedy && infinite) { - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) - return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen; - else - return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen; - } - } - - if (empty_info == QUANT_BODY_IS_NOT_EMPTY) - mod_tlen = tlen; - else - mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) - len = SIZE_OP_JUMP; - else - len = 0; - - len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP; - } - else { - if (qn->lower == 0) - len = SIZE_OP_JUMP; - else - len = 0; - - len += mod_tlen + SIZE_OP_PUSH + cklen; - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) /* /(?..){0}/ */ - len = SIZE_OP_JUMP + tlen; - else - len = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - len = SIZE_OP_STATE_CHECK_PUSH + tlen; - } - else { - len = SIZE_OP_PUSH + tlen; - } - } - else { - len = tlen; - } - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen; - } - else { - len = SIZE_OP_REPEAT_INC + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM; - if (CKN_ON) - len += SIZE_OP_STATE_CHECK; - } - - return len; -} - -static int -compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) -{ - int r, mod_tlen; - int ckn; - int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; - int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); - - if (tlen < 0) return tlen; - - ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0); - - if (is_anychar_star_quantifier(qn)) { - r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); - if (r != 0) return r; - if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) { - if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) - r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT); - else - r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT); - if (r != 0) return r; - if (CKN_ON) { - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - } - - return add_bytes(reg, STR_(qn->next_head_exact)->s, 1); - } - else { - if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), reg))) { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_ML_STAR - : OP_ANYCHAR_ML_STAR)); - } - else { - r = add_opcode(reg, (CKN_ON ? - OP_STATE_CHECK_ANYCHAR_STAR - : OP_ANYCHAR_STAR)); - } - if (r != 0) return r; - if (CKN_ON) - r = add_state_check_num(reg, ckn); - - return r; - } - } - - if (empty_info == QUANT_BODY_IS_NOT_EMPTY) - mod_tlen = tlen; - else - mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); - - if (infinite && qn->lower <= 1) { - if (qn->greedy) { - if (qn->lower == 1) { - r = add_opcode_rel_addr(reg, OP_JUMP, - (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)); - if (r != 0) return r; - } - - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP); - } - if (r != 0) return r; - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); - if (r != 0) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, - -(mod_tlen + (int )SIZE_OP_JUMP - + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH))); - } - else { - if (qn->lower == 0) { - r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen); - if (r != 0) return r; - } - r = compile_tree_empty_check(NODE_QUANT_BODY(qn), reg, empty_info, env); - if (r != 0) return r; - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, - -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP)); - } - else - r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH)); - } - } - else if (qn->upper == 0) { - if (qn->is_refered != 0) { /* /(?..){0}/ */ - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r != 0) return r; - r = compile_tree(NODE_QUANT_BODY(qn), reg, env); - } - else - r = 0; - } - else if (qn->upper == 1 && qn->greedy) { - if (qn->lower == 0) { - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, tlen); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, tlen); - } - if (r != 0) return r; - } - - r = compile_tree(NODE_QUANT_BODY(qn), reg, env); - } - else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */ - if (CKN_ON) { - r = add_opcode(reg, OP_STATE_CHECK_PUSH); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - if (r != 0) return r; - r = add_rel_addr(reg, SIZE_OP_JUMP); - } - else { - r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP); - } - - if (r != 0) return r; - r = add_opcode_rel_addr(reg, OP_JUMP, tlen); - if (r != 0) return r; - r = compile_tree(NODE_QUANT_BODY(qn), reg, env); - } - else { - r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg, env); - if (CKN_ON) { - if (r != 0) return r; - r = add_opcode(reg, OP_STATE_CHECK); - if (r != 0) return r; - r = add_state_check_num(reg, ckn); - } - } - return r; -} - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - static int compile_length_quantifier_node(QuantNode* qn, regex_t* reg) { int len, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; + enum QuantBodyEmpty empty_info = qn->body_empty_info; int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; + if (tlen == 0) return 0; /* anychar repeat */ - if (is_anychar_star_quantifier(qn)) { - if (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE) { + if (is_anychar_infinite_greedy(qn)) { + if (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0) { if (IS_NOT_NULL(qn->next_head_exact)) return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower; else @@ -1084,7 +828,8 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { len = SIZE_OP_JUMP; } @@ -1107,8 +852,9 @@ compile_length_quantifier_node(QuantNode* qn, regex_t* reg) len = SIZE_OP_JUMP + tlen; } else if (!infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->upper == 1 || + int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, + QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { len = tlen * qn->lower; len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower); } @@ -1128,13 +874,15 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) { int i, r, mod_tlen; int infinite = IS_REPEAT_INFINITE(qn->upper); - int empty_info = qn->body_empty_info; + enum QuantBodyEmpty empty_info = qn->body_empty_info; int tlen = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (tlen < 0) return tlen; + if (tlen == 0) return 0; - if (is_anychar_star_quantifier(qn) && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + if (is_anychar_infinite_greedy(qn) && + (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); if (r != 0) return r; if (IS_NOT_NULL(qn->next_head_exact)) { @@ -1159,7 +907,8 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) mod_tlen = tlen + (SIZE_OP_EMPTY_CHECK_START + SIZE_OP_EMPTY_CHECK_END); if (infinite && - (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->lower <= 1 || + int_multiply_cmp(tlen, qn->lower, QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) { if (qn->greedy) { if (IS_NOT_NULL(qn->head_exact)) @@ -1223,8 +972,9 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) r = compile_tree(NODE_QUANT_BODY(qn), reg, env); } else if (! infinite && qn->greedy && - (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper - <= QUANTIFIER_EXPAND_LIMIT_SIZE)) { + (qn->upper == 1 || + int_multiply_cmp(tlen + SIZE_OP_PUSH, qn->upper, + QUANTIFIER_EXPAND_LIMIT_SIZE) <= 0)) { int n = qn->upper - qn->lower; r = compile_tree_n_times(NODE_QUANT_BODY(qn), qn->lower, reg, env); @@ -1250,7 +1000,6 @@ compile_quantifier_node(QuantNode* qn, regex_t* reg, ScanEnv* env) } return r; } -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ static int compile_length_option_node(EnclosureNode* node, regex_t* reg) @@ -1358,7 +1107,7 @@ compile_length_enclosure_node(EnclosureNode* node, regex_t* reg) if (tlen < 0) return tlen; len = tlen * qn->lower - + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP; + + SIZE_OP_PUSH + tlen + SIZE_OP_POP_OUT + SIZE_OP_JUMP; } else { len = SIZE_OP_ATOMIC_START + tlen + SIZE_OP_ATOMIC_END; @@ -1505,14 +1254,14 @@ compile_enclosure_node(EnclosureNode* node, regex_t* reg, ScanEnv* env) len = compile_length_tree(NODE_QUANT_BODY(qn), reg); if (len < 0) return len; - r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP); + r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP_OUT + SIZE_OP_JUMP); if (r != 0) return r; r = compile_tree(NODE_QUANT_BODY(qn), reg, env); if (r != 0) return r; - r = add_opcode(reg, OP_POP); + r = add_opcode(reg, OP_POP_OUT); if (r != 0) return r; r = add_opcode_rel_addr(reg, OP_JUMP, - -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP)); + -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP_OUT + (int )SIZE_OP_JUMP)); } else { r = add_opcode(reg, OP_ATOMIC_START); @@ -1762,6 +1511,30 @@ compile_gimmick_node(GimmickNode* node, regex_t* reg) if (r != 0) return r; r = add_mem_num(reg, node->id); break; + +#ifdef USE_CALLOUT + case GIMMICK_CALLOUT: + switch (node->detail_type) { + case ONIG_CALLOUT_OF_CONTENTS: + case ONIG_CALLOUT_OF_NAME: + { + r = add_opcode(reg, (node->detail_type == ONIG_CALLOUT_OF_CONTENTS) ? + OP_CALLOUT_CONTENTS : OP_CALLOUT_NAME); + if (r != 0) return r; + if (node->detail_type == ONIG_CALLOUT_OF_NAME) { + r = add_mem_num(reg, node->id); + if (r != 0) return r; + } + r = add_mem_num(reg, node->num); + if (r != 0) return r; + } + break; + + default: + r = ONIGERR_TYPE_BUG; + break; + } +#endif } return r; @@ -1785,6 +1558,23 @@ compile_length_gimmick_node(GimmickNode* node, regex_t* reg) case GIMMICK_UPDATE_VAR: len = SIZE_OP_UPDATE_VAR; break; + +#ifdef USE_CALLOUT + case GIMMICK_CALLOUT: + switch (node->detail_type) { + case ONIG_CALLOUT_OF_CONTENTS: + len = SIZE_OP_CALLOUT_CONTENTS; + break; + case ONIG_CALLOUT_OF_NAME: + len = SIZE_OP_CALLOUT_NAME; + break; + + default: + len = ONIGERR_TYPE_BUG; + break; + } + break; +#endif } return len; @@ -2337,7 +2127,7 @@ disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env) #ifdef USE_CALL static int -unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg) +fix_unset_addr_list(UnsetAddrList* uslist, regex_t* reg) { int i, offset; EnclosureNode* en; @@ -3725,11 +3515,12 @@ expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end, regex_t* re } static int -expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], - UChar *p, int slen, UChar *end, regex_t* reg, - Node **rnode) +expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[], UChar *p, + int slen, UChar *end, regex_t* reg, Node **rnode) { - int r, i, j, len, varlen; + int r, i, j; + int len; + int varlen; Node *anode, *var_anode, *snode, *xnode, *an; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; @@ -3972,145 +3763,8 @@ expand_case_fold_string(Node* node, regex_t* reg) return r; } - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define CEC_THRES_NUM_BIG_REPEAT 512 -#define CEC_INFINITE_NUM 0x7fffffff - -#define CEC_IN_INFINITE_REPEAT (1<<0) -#define CEC_IN_FINITE_REPEAT (1<<1) -#define CEC_CONT_BIG_REPEAT (1<<2) - -static int -setup_comb_exp_check(Node* node, int state, ScanEnv* env) -{ - int r = state; - - switch (NODE_TYPE(node)) { - case NODE_LIST: - { - do { - r = setup_comb_exp_check(NODE_CAR(node), r, env); - } while (r >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); - } - break; - - case NODE_ALT: - { - int ret; - do { - ret = setup_comb_exp_check(NODE_CAR(node), state, env); - r |= ret; - } while (ret >= 0 && IS_NOT_NULL(node = NODE_CDR(node))); - } - break; - - case NODE_QUANT: - { - int var_num; - int child_state = state; - int add_state = 0; - QuantNode* qn = QUANT_(node); - Node* target = NODE_QUANT_BODY(qn); - - if (! IS_REPEAT_INFINITE(qn->upper)) { - if (qn->upper > 1) { - /* {0,1}, {1,1} are allowed */ - child_state |= CEC_IN_FINITE_REPEAT; - - /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */ - if (env->backrefed_mem == 0) { - if (NODE_TYPE(NODE_QUANT_BODY(qn)) == NODE_ENCLOSURE) { - EnclosureNode* en = ENCLOSURE_(NODE_QUANT_BODY(qn)); - if (en->type == ENCLOSURE_MEMORY) { - if (NODE_TYPE(NODE_ENCLOSURE_BODY(en)) == NODE_QUANT) { - QuantNode* q = QUANT_(NODE_ENCLOSURE_BODY(en)); - if (IS_REPEAT_INFINITE(q->upper) - && q->greedy == qn->greedy) { - qn->upper = (qn->lower == 0 ? 1 : qn->lower); - if (qn->upper == 1) - child_state = state; - } - } - } - } - } - } - } - - if (state & CEC_IN_FINITE_REPEAT) { - qn->comb_exp_check_num = -1; - } - else { - if (IS_REPEAT_INFINITE(qn->upper)) { - var_num = CEC_INFINITE_NUM; - child_state |= CEC_IN_INFINITE_REPEAT; - } - else { - var_num = qn->upper - qn->lower; - } - - if (var_num >= CEC_THRES_NUM_BIG_REPEAT) - add_state |= CEC_CONT_BIG_REPEAT; - - if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) || - ((state & CEC_CONT_BIG_REPEAT) != 0 && - var_num >= CEC_THRES_NUM_BIG_REPEAT)) { - if (qn->comb_exp_check_num == 0) { - env->num_comb_exp_check++; - qn->comb_exp_check_num = env->num_comb_exp_check; - if (env->curr_max_regnum > env->comb_exp_max_regnum) - env->comb_exp_max_regnum = env->curr_max_regnum; - } - } - } - - r = setup_comb_exp_check(target, child_state, env); - r |= add_state; - } - break; - - case NODE_ENCLOSURE: - { - EnclosureNode* en = ENCLOSURE_(node); - - switch (en->type) { - case ENCLOSURE_MEMORY: - { - if (env->curr_max_regnum < en->m.regnum) - env->curr_max_regnum = en->m.regnum; - - r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); - } - break; - - default: - r = setup_comb_exp_check(NODE_ENCLOSURE_BODY(en), state, env); - break; - } - } - break; - -#ifdef USE_CALL - case NODE_CALL: - if (NODE_IS_RECURSION(node)) - env->has_recursion = 1; - else - r = setup_comb_exp_check(NODE_BODY(node), state, env); - break; -#endif - - default: - break; - } - - return r; -} -#endif - #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT -static int +static enum QuantBodyEmpty quantifiers_memory_node_info(Node* node) { int r = QUANT_BODY_IS_EMPTY; @@ -4638,7 +4292,7 @@ setup_anchor(Node* node, regex_t* reg, int state, ScanEnv* env) #define ALLOWED_TYPE_IN_LB \ ( BIT_NODE_LIST | BIT_NODE_ALT | BIT_NODE_STRING | BIT_NODE_CCLASS \ | BIT_NODE_CTYPE | BIT_NODE_ANCHOR | BIT_NODE_ENCLOSURE | BIT_NODE_QUANT \ - | BIT_NODE_CALL ) + | BIT_NODE_CALL | BIT_NODE_GIMMICK) #define ALLOWED_ENCLOSURE_IN_LB ( 1<greedy && (qn->body_empty_info != 0)) { + if (qn->greedy && (qn->body_empty_info != QUANT_BODY_IS_NOT_EMPTY)) { if (NODE_TYPE(body) == NODE_QUANT) { QuantNode* tqn = QUANT_(body); if (IS_NOT_NULL(tqn->head_exact)) { @@ -4948,10 +4602,10 @@ set_bm_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED, typedef struct { OnigLen min; /* min byte length */ OnigLen max; /* max byte length */ -} MinMaxLen; +} MinMax; typedef struct { - MinMaxLen mmd; + MinMax mmd; OnigEncoding enc; OnigOptionType options; OnigCaseFoldType case_fold_flag; @@ -4964,35 +4618,35 @@ typedef struct { } OptAnc; typedef struct { - MinMaxLen mmd; /* info position */ + MinMax mmd; /* position */ OptAnc anc; - int reach_end; - int ignore_case; - int len; - UChar s[OPT_EXACT_MAXLEN]; + int reach_end; + int ignore_case; + int len; + UChar s[OPT_EXACT_MAXLEN]; } OptExact; typedef struct { - MinMaxLen mmd; /* info position */ - OptAnc anc; - int value; /* weighted value */ - UChar map[ONIG_CHAR_TABLE_SIZE]; + MinMax mmd; /* position */ + OptAnc anc; + int value; /* weighted value */ + UChar map[ONIG_CHAR_TABLE_SIZE]; } OptMap; typedef struct { - MinMaxLen len; - OptAnc anc; - OptExact exb; /* boundary */ - OptExact exm; /* middle */ - OptExact expr; /* prec read (?=...) */ - OptMap map; /* boundary */ + MinMax len; + OptAnc anc; + OptExact exb; /* boundary */ + OptExact exm; /* middle */ + OptExact expr; /* prec read (?=...) */ + OptMap map; /* boundary */ } NodeOpt; static int map_position_value(OnigEncoding enc, int i) { - static const short int ByteValTable[] = { + static const short int Vals[] = { 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, @@ -5003,18 +4657,18 @@ map_position_value(OnigEncoding enc, int i) 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1 }; - if (i < (int )(sizeof(ByteValTable)/sizeof(ByteValTable[0]))) { + if (i < (int )(sizeof(Vals)/sizeof(Vals[0]))) { if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1) return 20; else - return (int )ByteValTable[i]; + return (int )Vals[i]; } else return 4; /* Take it easy. */ } static int -distance_value(MinMaxLen* mm) +distance_value(MinMax* mm) { /* 1000 / (min-max-dist + 1) */ static const short int dist_vals[] = { @@ -5043,7 +4697,7 @@ distance_value(MinMaxLen* mm) } static int -comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) +comp_distance_value(MinMax* d1, MinMax* d2, int v1, int v2) { if (v2 <= 0) return -1; if (v1 <= 0) return 1; @@ -5060,40 +4714,40 @@ comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2) } static int -is_equal_mml(MinMaxLen* a, MinMaxLen* b) +is_equal_mml(MinMax* a, MinMax* b) { return (a->min == b->min && a->max == b->max) ? 1 : 0; } static void -set_mml(MinMaxLen* mml, OnigLen min, OnigLen max) +set_mml(MinMax* l, OnigLen min, OnigLen max) { - mml->min = min; - mml->max = max; + l->min = min; + l->max = max; } static void -clear_mml(MinMaxLen* mml) +clear_mml(MinMax* l) { - mml->min = mml->max = 0; + l->min = l->max = 0; } static void -copy_mml(MinMaxLen* to, MinMaxLen* from) +copy_mml(MinMax* to, MinMax* from) { to->min = from->min; to->max = from->max; } static void -add_mml(MinMaxLen* to, MinMaxLen* from) +add_mml(MinMax* to, MinMax* from) { to->min = distance_add(to->min, from->min); to->max = distance_add(to->max, from->max); } static void -alt_merge_mml(MinMaxLen* to, MinMaxLen* from) +alt_merge_mml(MinMax* to, MinMax* from) { if (to->min > from->min) to->min = from->min; if (to->max < from->max) to->max = from->max; @@ -5106,10 +4760,10 @@ copy_opt_env(OptEnv* to, OptEnv* from) } static void -clear_opt_anc_info(OptAnc* anc) +clear_opt_anc_info(OptAnc* a) { - anc->left = 0; - anc->right = 0; + a->left = 0; + a->right = 0; } static void @@ -5139,11 +4793,10 @@ concat_opt_anc_info(OptAnc* to, OptAnc* left, OptAnc* right, } static int -is_left(int anc) +is_left(int a) { - if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF || - anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ || - anc == ANCHOR_PREC_READ_NOT) + if (a == ANCHOR_END_BUF || a == ANCHOR_SEMI_END_BUF || + a == ANCHOR_END_LINE || a == ANCHOR_PREC_READ || a == ANCHOR_PREC_READ_NOT) return 0; return 1; @@ -5183,20 +4836,20 @@ alt_merge_opt_anc_info(OptAnc* to, OptAnc* add) } static int -is_full_opt_exact(OptExact* ex) +is_full_opt_exact(OptExact* e) { - return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0); + return (e->len >= OPT_EXACT_MAXLEN ? 1 : 0); } static void -clear_opt_exact(OptExact* ex) +clear_opt_exact(OptExact* e) { - clear_mml(&ex->mmd); - clear_opt_anc_info(&ex->anc); - ex->reach_end = 0; - ex->ignore_case = 0; - ex->len = 0; - ex->s[0] = '\0'; + clear_mml(&e->mmd); + clear_opt_anc_info(&e->anc); + e->reach_end = 0; + e->ignore_case = 0; + e->len = 0; + e->s[0] = '\0'; } static void @@ -5205,24 +4858,28 @@ copy_opt_exact(OptExact* to, OptExact* from) *to = *from; } -static void +static int concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) { - int i, j, len; + int i, j, len, r; UChar *p, *end; OptAnc tanc; if (! to->ignore_case && add->ignore_case) { - if (to->len >= add->len) return ; /* avoid */ + if (to->len >= add->len) return 0; /* avoid */ to->ignore_case = 1; } + r = 0; p = add->s; end = p + add->len; for (i = to->len; p < end; ) { len = enclen(enc, p); - if (i + len > OPT_EXACT_MAXLEN) break; + if (i + len > OPT_EXACT_MAXLEN) { + r = 1; /* 1:full */ + break; + } for (j = 0; j < len && p < end; j++) to->s[i++] = *p++; } @@ -5233,11 +4890,12 @@ concat_opt_exact(OptExact* to, OptExact* add, OnigEncoding enc) concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1); if (! to->reach_end) tanc.right = 0; copy_opt_anc_info(&to->anc, &tanc); + + return r; } static void -concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, - int raw ARG_UNUSED, OnigEncoding enc) +concat_opt_exact_str(OptExact* to, UChar* s, UChar* end, OnigEncoding enc) { int i, j, len; UChar *p; @@ -5291,31 +4949,31 @@ alt_merge_opt_exact(OptExact* to, OptExact* add, OptEnv* env) static void select_opt_exact(OnigEncoding enc, OptExact* now, OptExact* alt) { - int v1, v2; + int vn, va; - v1 = now->len; - v2 = alt->len; + vn = now->len; + va = alt->len; - if (v2 == 0) { + if (va == 0) { return ; } - else if (v1 == 0) { + else if (vn == 0) { copy_opt_exact(now, alt); return ; } - else if (v1 <= 2 && v2 <= 2) { + else if (vn <= 2 && va <= 2) { /* ByteValTable[x] is big value --> low price */ - v2 = map_position_value(enc, now->s[0]); - v1 = map_position_value(enc, alt->s[0]); + va = map_position_value(enc, now->s[0]); + vn = map_position_value(enc, alt->s[0]); - if (now->len > 1) v1 += 5; - if (alt->len > 1) v2 += 5; + if (now->len > 1) vn += 5; + if (alt->len > 1) va += 5; } - if (now->ignore_case == 0) v1 *= 2; - if (alt->ignore_case == 0) v2 *= 2; + if (now->ignore_case == 0) vn *= 2; + if (alt->ignore_case == 0) va *= 2; - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0) copy_opt_exact(now, alt); } @@ -5354,17 +5012,17 @@ copy_opt_map(OptMap* to, OptMap* from) } static void -add_char_opt_map(OptMap* map, UChar c, OnigEncoding enc) +add_char_opt_map(OptMap* m, UChar c, OnigEncoding enc) { - if (map->map[c] == 0) { - map->map[c] = 1; - map->value += map_position_value(enc, c); + if (m->map[c] == 0) { + m->map[c] = 1; + m->value += map_position_value(enc, c); } } static int add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, - OnigEncoding enc, OnigCaseFoldType case_fold_flag) + OnigEncoding enc, OnigCaseFoldType fold_flag) { OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM]; UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN]; @@ -5372,8 +5030,8 @@ add_char_amb_opt_map(OptMap* map, UChar* p, UChar* end, add_char_opt_map(map, p[0], enc); - case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag); - n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items); + fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(fold_flag); + n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, fold_flag, p, end, items); if (n < 0) return n; for (i = 0; i < n; i++) { @@ -5389,7 +5047,7 @@ select_opt_map(OptMap* now, OptMap* alt) { static int z = 1<<15; /* 32768: something big value */ - int v1, v2; + int vn, va; if (alt->value == 0) return ; if (now->value == 0) { @@ -5397,9 +5055,9 @@ select_opt_map(OptMap* now, OptMap* alt) return ; } - v1 = z / now->value; - v2 = z / alt->value; - if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0) + vn = z / now->value; + va = z / alt->value; + if (comp_distance_value(&now->mmd, &alt->mmd, vn, va) > 0) copy_opt_map(now, alt); } @@ -5407,13 +5065,13 @@ static int comp_opt_exact_or_map(OptExact* e, OptMap* m) { #define COMP_EM_BASE 20 - int ve, vm; + int ae, am; if (m->value <= 0) return -1; - ve = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); - vm = COMP_EM_BASE * 5 * 2 / m->value; - return comp_distance_value(&e->mmd, &m->mmd, ve, vm); + ae = COMP_EM_BASE * e->len * (e->ignore_case ? 1 : 2); + am = COMP_EM_BASE * 5 * 2 / m->value; + return comp_distance_value(&e->mmd, &m->mmd, ae, am); } static void @@ -5444,11 +5102,11 @@ alt_merge_opt_map(OnigEncoding enc, OptMap* to, OptMap* add) } static void -set_bound_node_opt_info(NodeOpt* opt, MinMaxLen* mmd) +set_bound_node_opt_info(NodeOpt* opt, MinMax* plen) { - copy_mml(&(opt->exb.mmd), mmd); - copy_mml(&(opt->expr.mmd), mmd); - copy_mml(&(opt->map.mmd), mmd); + copy_mml(&(opt->exb.mmd), plen); + copy_mml(&(opt->expr.mmd), plen); + copy_mml(&(opt->map.mmd), plen); } static void @@ -5543,10 +5201,12 @@ alt_merge_node_opt_info(NodeOpt* to, NodeOpt* add, OptEnv* env) static int optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) { - OnigEncoding enc; int i; - int r = 0; + int r; + NodeOpt xo; + OnigEncoding enc; + r = 0; enc = env->enc; clear_node_opt_info(opt); set_bound_node_opt_info(opt, &env->mmd); @@ -5555,15 +5215,14 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case NODE_LIST: { OptEnv nenv; - NodeOpt nopt; Node* nd = node; copy_opt_env(&nenv, env); do { - r = optimize_nodes(NODE_CAR(nd), &nopt, &nenv); + r = optimize_nodes(NODE_CAR(nd), &xo, &nenv); if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(enc, opt, &nopt); + add_mml(&nenv.mmd, &xo.len); + concat_left_node_opt_info(enc, opt, &xo); } } while (r == 0 && IS_NOT_NULL(nd = NODE_CDR(nd))); } @@ -5571,14 +5230,13 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case NODE_ALT: { - NodeOpt nopt; Node* nd = node; do { - r = optimize_nodes(NODE_CAR(nd), &nopt, env); + r = optimize_nodes(NODE_CAR(nd), &xo, env); if (r == 0) { - if (nd == node) copy_node_opt_info(opt, &nopt); - else alt_merge_node_opt_info(opt, &nopt, env); + if (nd == node) copy_node_opt_info(opt, &xo); + else alt_merge_node_opt_info(opt, &xo, env); } } while ((r == 0) && IS_NOT_NULL(nd = NODE_CDR(nd))); } @@ -5588,11 +5246,10 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) { StrNode* sn = STR_(node); int slen = (int )(sn->end - sn->s); - int is_raw = NODE_STRING_IS_RAW(node); + /* int is_raw = NODE_STRING_IS_RAW(node); */ if (! NODE_STRING_IS_AMBIG(node)) { - concat_opt_exact_str(&opt->exb, sn->s, sn->end, - NODE_STRING_IS_RAW(node), enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); if (slen > 0) { add_char_opt_map(&opt->map, *(sn->s), enc); } @@ -5606,7 +5263,7 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) max = ONIGENC_MBC_MAXLEN_DIST(enc) * n; } else { - concat_opt_exact_str(&opt->exb, sn->s, sn->end, is_raw, enc); + concat_opt_exact_str(&opt->exb, sn->s, sn->end, enc); opt->exb.ignore_case = 1; if (slen > 0) { @@ -5709,19 +5366,17 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case ANCHOR_PREC_READ: { - NodeOpt nopt; - - r = optimize_nodes(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &xo, env); if (r == 0) { - if (nopt.exb.len > 0) - copy_opt_exact(&opt->expr, &nopt.exb); - else if (nopt.exm.len > 0) - copy_opt_exact(&opt->expr, &nopt.exm); + if (xo.exb.len > 0) + copy_opt_exact(&opt->expr, &xo.exb); + else if (xo.exm.len > 0) + copy_opt_exact(&opt->expr, &xo.exm); opt->expr.reach_end = 0; - if (nopt.map.value > 0) - copy_opt_map(&opt->map, &nopt.map); + if (xo.map.value > 0) + copy_opt_map(&opt->map, &xo.map); } } break; @@ -5771,48 +5426,47 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case NODE_QUANT: { OnigLen min, max; - NodeOpt nopt; QuantNode* qn = QUANT_(node); - r = optimize_nodes(NODE_BODY(node), &nopt, env); + r = optimize_nodes(NODE_BODY(node), &xo, env); if (r != 0) break; - if (qn->lower == 0 && IS_REPEAT_INFINITE(qn->upper)) { + if (qn->lower > 0) { + copy_node_opt_info(opt, &xo); + if (xo.exb.len > 0) { + if (xo.exb.reach_end) { + for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { + int rc = concat_opt_exact(&opt->exb, &xo.exb, enc); + if (rc > 0) break; + } + if (i < qn->lower) opt->exb.reach_end = 0; + } + } + + if (qn->lower != qn->upper) { + opt->exb.reach_end = 0; + opt->exm.reach_end = 0; + } + if (qn->lower > 1) + opt->exm.reach_end = 0; + } + + if (IS_REPEAT_INFINITE(qn->upper)) { if (env->mmd.max == 0 && NODE_IS_ANYCHAR(NODE_BODY(node)) && qn->greedy != 0) { if (IS_MULTILINE(CTYPE_OPTION(NODE_QUANT_BODY(qn), env))) - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML); + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_ML); else - add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR); + add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF); } + + max = (xo.len.max > 0 ? INFINITE_LEN : 0); } else { - if (qn->lower > 0) { - copy_node_opt_info(opt, &nopt); - if (nopt.exb.len > 0) { - if (nopt.exb.reach_end) { - for (i = 2; i <= qn->lower && ! is_full_opt_exact(&opt->exb); i++) { - concat_opt_exact(&opt->exb, &nopt.exb, enc); - } - if (i < qn->lower) opt->exb.reach_end = 0; - } - } - - if (qn->lower != qn->upper) { - opt->exb.reach_end = 0; - opt->exm.reach_end = 0; - } - if (qn->lower > 1) - opt->exm.reach_end = 0; - } + max = distance_multiply(xo.len.max, qn->upper); } - min = distance_multiply(nopt.len.min, qn->lower); - if (IS_REPEAT_INFINITE(qn->upper)) - max = (nopt.len.max > 0 ? INFINITE_LEN : 0); - else - max = distance_multiply(nopt.len.max, qn->upper); - + min = distance_multiply(xo.len.min, qn->lower); set_mml(&opt->len, min, max); } break; @@ -5848,9 +5502,9 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) #endif { r = optimize_nodes(NODE_BODY(node), opt, env); - if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) { + if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK)) { if (MEM_STATUS_AT0(env->scan_env->backrefed_mem, en->m.regnum)) - remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK); + remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_INF_MASK); } } break; @@ -5862,24 +5516,23 @@ optimize_nodes(Node* node, NodeOpt* opt, OptEnv* env) case ENCLOSURE_IF_ELSE: { OptEnv nenv; - NodeOpt nopt; copy_opt_env(&nenv, env); - r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &nopt, &nenv); + r = optimize_nodes(NODE_ENCLOSURE_BODY(en), &xo, &nenv); if (r == 0) { - add_mml(&nenv.mmd, &nopt.len); - concat_left_node_opt_info(enc, opt, &nopt); + add_mml(&nenv.mmd, &xo.len); + concat_left_node_opt_info(enc, opt, &xo); if (IS_NOT_NULL(en->te.Then)) { - r = optimize_nodes(en->te.Then, &nopt, &nenv); + r = optimize_nodes(en->te.Then, &xo, &nenv); if (r == 0) { - concat_left_node_opt_info(enc, opt, &nopt); + concat_left_node_opt_info(enc, opt, &xo); } } if (IS_NOT_NULL(en->te.Else)) { - r = optimize_nodes(en->te.Else, &nopt, env); + r = optimize_nodes(en->te.Else, &xo, env); if (r == 0) - alt_merge_node_opt_info(opt, &nopt, env); + alt_merge_node_opt_info(opt, &xo, env); } } } @@ -5914,12 +5567,12 @@ set_optimize_exact(regex_t* reg, OptExact* e) CHECK_NULL_RETURN_MEMERR(reg->exact); xmemcpy(reg->exact, e->s, e->len); reg->exact_end = reg->exact + e->len; - reg->optimize = ONIG_OPTIMIZE_EXACT_IC; + reg->optimize = OPTIMIZE_EXACT_IC; } else { int allow_reverse; - reg->exact = str_dup(e->s, e->s + e->len); + reg->exact = onigenc_strdup(reg->enc, e->s, e->s + e->len); CHECK_NULL_RETURN_MEMERR(reg->exact); reg->exact_end = reg->exact + e->len; @@ -5932,10 +5585,10 @@ set_optimize_exact(regex_t* reg, OptExact* e) if (r != 0) return r; reg->optimize = (allow_reverse != 0 - ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV); + ? OPTIMIZE_EXACT_BM : OPTIMIZE_EXACT_BM_NO_REV); } else { - reg->optimize = ONIG_OPTIMIZE_EXACT; + reg->optimize = OPTIMIZE_EXACT; } } @@ -5957,7 +5610,7 @@ set_optimize_map(regex_t* reg, OptMap* m) for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) reg->map[i] = m->map[i]; - reg->optimize = ONIG_OPTIMIZE_MAP; + reg->optimize = OPTIMIZE_MAP; reg->dmin = m->mmd.min; reg->dmax = m->mmd.max; @@ -5994,11 +5647,11 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) if (r != 0) return r; reg->anchor = opt.anc.left & (ANCHOR_BEGIN_BUF | - ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML | + ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML | ANCHOR_LOOK_BEHIND); if ((opt.anc.left & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0) - reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML; + reg->anchor &= ~ANCHOR_ANYCHAR_INF_ML; reg->anchor |= opt.anc.right & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF | ANCHOR_PREC_READ_NOT); @@ -6038,7 +5691,7 @@ set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env) static void clear_optimize_info(regex_t* reg) { - reg->optimize = ONIG_OPTIMIZE_NONE; + reg->optimize = OPTIMIZE_NONE; reg->anchor = 0; reg->anchor_dmin = 0; reg->anchor_dmax = 0; @@ -6141,14 +5794,14 @@ print_anchor(FILE* f, int anchor) q = 1; fprintf(f, "end-line"); } - if (anchor & ANCHOR_ANYCHAR_STAR) { + if (anchor & ANCHOR_ANYCHAR_INF) { if (q) fprintf(f, ", "); q = 1; - fprintf(f, "anychar-star"); + fprintf(f, "anychar-inf"); } - if (anchor & ANCHOR_ANYCHAR_STAR_ML) { + if (anchor & ANCHOR_ANYCHAR_INF_ML) { if (q) fprintf(f, ", "); - fprintf(f, "anychar-star-ml"); + fprintf(f, "anychar-inf-ml"); } fprintf(f, "]"); @@ -6180,7 +5833,7 @@ print_optimize_info(FILE* f, regex_t* reg) } fprintf(f, "]: length: %ld\n", (reg->exact_end - reg->exact)); } - else if (reg->optimize & ONIG_OPTIMIZE_MAP) { + else if (reg->optimize & OPTIMIZE_MAP) { int c, i, n = 0; for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) @@ -6208,6 +5861,66 @@ print_optimize_info(FILE* f, regex_t* reg) #endif +extern RegexExt* +onig_get_regex_ext(regex_t* reg) +{ + if (IS_NULL(REG_EXTP(reg))) { + RegexExt* ext = (RegexExt* )xmalloc(sizeof(*ext)); + if (IS_NULL(ext)) return 0; + + ext->pattern = 0; + ext->pattern_end = 0; +#ifdef USE_CALLOUT + ext->tag_table = 0; + ext->callout_num = 0; + ext->callout_list_alloc = 0; + ext->callout_list = 0; +#endif + + REG_EXTPL(reg) = (void* )ext; + } + + return REG_EXTP(reg); +} + +static void +free_regex_ext(RegexExt* ext) +{ + if (IS_NOT_NULL(ext)) { + if (IS_NOT_NULL(ext->pattern)) + xfree((void* )ext->pattern); + +#ifdef USE_CALLOUT + if (IS_NOT_NULL(ext->tag_table)) + onig_callout_tag_table_free(ext->tag_table); + + if (IS_NOT_NULL(ext->callout_list)) + onig_free_reg_callout_list(ext->callout_num, ext->callout_list); +#endif + + xfree(ext); + } +} + +extern int +onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end) +{ + RegexExt* ext; + UChar* s; + + ext = onig_get_regex_ext(reg); + CHECK_NULL_RETURN_MEMERR(ext); + + s = onigenc_strdup(reg->enc, pattern, pattern_end); + CHECK_NULL_RETURN_MEMERR(s); + + ext->pattern = s; + ext->pattern_end = s + (pattern_end - pattern); + + return ONIG_NORMAL; +} + + extern void onig_free_body(regex_t* reg) { @@ -6217,7 +5930,10 @@ onig_free_body(regex_t* reg) if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map); if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward); if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range); - if (IS_NOT_NULL(REG_EXTP(reg))) xfree(REG_EXTP(reg)); + if (IS_NOT_NULL(REG_EXTP(reg))) { + free_regex_ext(REG_EXTP(reg)); + REG_EXTPL(reg) = 0; + } onig_names_free(reg); } @@ -6245,9 +5961,6 @@ onig_transfer(regex_t* to, regex_t* from) } -#ifdef ONIG_DEBUG_COMPILE -static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg)); -#endif #ifdef ONIG_DEBUG_PARSE static void print_tree P_((FILE* f, Node* node)); #endif @@ -6286,9 +5999,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, reg->num_null_check = 0; reg->repeat_range_alloc = 0; reg->repeat_range = (OnigRepeatRange* )NULL; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - reg->num_comb_exp_check = 0; -#endif r = onig_parse_tree(&root, pattern, pattern_end, reg, &scan_env); if (r != 0) goto err; @@ -6346,33 +6056,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, } reg->bt_mem_start |= reg->bt_mem_end; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (scan_env.backrefed_mem == 0 -#ifdef USE_CALL - || scan_env.num_call == 0 -#endif - ) { - setup_comb_exp_check(root, 0, &scan_env); -#ifdef USE_CALL - if (scan_env.has_recursion != 0) { - scan_env.num_comb_exp_check = 0; - } - else -#endif - if (scan_env.comb_exp_max_regnum > 0) { - int i; - for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) { - if (MEM_STATUS_AT(scan_env.backrefed_mem, i) != 0) { - scan_env.num_comb_exp_check = 0; - break; - } - } - } - } - - reg->num_comb_exp_check = scan_env.num_comb_exp_check; -#endif - clear_optimize_info(reg); #ifndef ONIG_DONT_OPTIMIZE r = set_optimize_info_from_tree(root, reg, &scan_env); @@ -6398,13 +6081,17 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, r = add_opcode(reg, OP_END); #ifdef USE_CALL if (scan_env.num_call > 0) { - r = unset_addr_list_fix(&uslist, reg); + r = fix_unset_addr_list(&uslist, reg); unset_addr_list_end(&uslist); if (r != 0) goto err; } #endif - if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0)) + if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0) +#ifdef USE_CALLOUT + || (IS_NOT_NULL(REG_EXTP(reg)) && REG_EXTP(reg)->callout_num != 0) +#endif + ) reg->stack_pop_level = STACK_POP_LEVEL_ALL; else { if (reg->bt_mem_start != 0) @@ -6422,7 +6109,7 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #ifdef ONIG_DEBUG_COMPILE onig_print_names(stderr, reg); - print_compiled_byte_code_list(stderr, reg); + onig_print_compiled_byte_code_list(stderr, reg); #endif end: @@ -6464,11 +6151,7 @@ onig_reg_init(regex_t* reg, OnigOptionType option, OnigCaseFoldType case_fold_fl #if 0 return ONIGERR_LIBRARY_IS_NOT_INITIALIZED; #else - r = onig_initialize(NULL, 0); - if (r != 0) - return ONIGERR_FAIL_TO_INITIALIZE; - - r = onig_initialize_encoding(enc); + r = onig_initialize(&enc, 1); if (r != 0) return ONIGERR_FAIL_TO_INITIALIZE; @@ -6569,16 +6252,21 @@ onig_initialize(OnigEncoding encodings[], int n) return r; } - return 0; + return ONIG_NORMAL; } -static OnigEndCallListItemType* EndCallTop; +typedef struct EndCallListItem { + struct EndCallListItem* next; + void (*func)(void); +} EndCallListItemType; + +static EndCallListItemType* EndCallTop; extern void onig_add_end_call(void (*func)(void)) { - OnigEndCallListItemType* item; + EndCallListItemType* item; - item = (OnigEndCallListItemType* )xmalloc(sizeof(*item)); + item = (EndCallListItemType* )xmalloc(sizeof(*item)); if (item == 0) return ; item->next = EndCallTop; @@ -6590,7 +6278,7 @@ extern void onig_add_end_call(void (*func)(void)) static void exec_end_call_list(void) { - OnigEndCallListItemType* prev; + EndCallListItemType* prev; void (*func)(void); while (EndCallTop != 0) { @@ -6608,6 +6296,12 @@ onig_end(void) { exec_end_call_list(); +#ifdef USE_CALLOUT + onig_global_callout_names_free(); +#endif + + onigenc_end(); + onig_inited = 0; return 0; @@ -6673,144 +6367,7 @@ onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc) } -#ifdef ONIG_DEBUG - -/* arguments type */ -#define ARG_SPECIAL -1 -#define ARG_NON 0 -#define ARG_RELADDR 1 -#define ARG_ABSADDR 2 -#define ARG_LENGTH 3 -#define ARG_MEMNUM 4 -#define ARG_OPTION 5 -#define ARG_STATE_CHECK 6 -#define ARG_MODE 7 - -OnigOpInfoType OnigOpInfo[] = { - { OP_FINISH, "finish", ARG_NON }, - { OP_END, "end", ARG_NON }, - { OP_EXACT1, "exact1", ARG_SPECIAL }, - { OP_EXACT2, "exact2", ARG_SPECIAL }, - { OP_EXACT3, "exact3", ARG_SPECIAL }, - { OP_EXACT4, "exact4", ARG_SPECIAL }, - { OP_EXACT5, "exact5", ARG_SPECIAL }, - { OP_EXACTN, "exactn", ARG_SPECIAL }, - { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, - { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, - { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, - { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, - { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, - { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, - { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, - { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, - { OP_CCLASS, "cclass", ARG_SPECIAL }, - { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, - { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, - { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, - { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, - { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, -#ifdef USE_OP_CCLASS_NODE - { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, -#endif - { OP_ANYCHAR, "anychar", ARG_NON }, - { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, - { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, - { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, - { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, - { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, - { OP_WORD, "word", ARG_NON }, - { OP_WORD_ASCII, "word-ascii", ARG_NON }, - { OP_NO_WORD, "not-word", ARG_NON }, - { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON }, - { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE }, - { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE }, - { OP_WORD_BEGIN, "word-begin", ARG_MODE }, - { OP_WORD_END, "word-end", ARG_MODE }, - { OP_BEGIN_BUF, "begin-buf", ARG_NON }, - { OP_END_BUF, "end-buf", ARG_NON }, - { OP_BEGIN_LINE, "begin-line", ARG_NON }, - { OP_END_LINE, "end-line", ARG_NON }, - { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, - { OP_BEGIN_POSITION, "begin-position", ARG_NON }, - { OP_BACKREF1, "backref1", ARG_NON }, - { OP_BACKREF2, "backref2", ARG_NON }, - { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, - { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, - { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, - { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, - { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, - { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, - { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, - { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, - { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, - { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, - { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, - { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, - { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, - { OP_SET_OPTION, "set-option", ARG_OPTION }, - { OP_FAIL, "fail", ARG_NON }, - { OP_JUMP, "jump", ARG_RELADDR }, - { OP_PUSH, "push", ARG_RELADDR }, - { OP_PUSH_SUPER, "push_SUPER", ARG_RELADDR }, - { OP_POP, "pop", ARG_NON }, - { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, - { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, - { OP_REPEAT, "repeat", ARG_SPECIAL }, - { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, - { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, - { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, - { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, - { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, - { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM }, - { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, - { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, - { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, - { OP_PREC_READ_START, "push-pos", ARG_NON }, - { OP_PREC_READ_END, "pop-pos", ARG_NON }, - { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, - { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, - { OP_ATOMIC_START, "atomic-start", ARG_NON }, - { OP_ATOMIC_END, "atomic-end", ARG_NON }, - { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, - { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, - { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, - { OP_CALL, "call", ARG_ABSADDR }, - { OP_RETURN, "return", ARG_NON }, - { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, - { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, - { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL }, - { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL }, - { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK }, - { OP_STATE_CHECK_ANYCHAR_ML_STAR, - "state-check-anychar-ml*", ARG_STATE_CHECK }, - { -1, "", ARG_NON } -}; - -static char* -op2name(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].name; - } - return ""; -} - -static int -op2arg_type(int opcode) -{ - int i; - - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { - if (opcode == OnigOpInfo[i].opcode) - return OnigOpInfo[i].arg_type; - } - return ARG_SPECIAL; -} +#ifdef ONIG_DEBUG_PARSE static void p_string(FILE* f, int len, UChar* s) @@ -6819,326 +6376,6 @@ p_string(FILE* f, int len, UChar* s) while (len-- > 0) { fputc(*s++, f); } } -static void -p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) -{ - int x = len * mb_len; - - fprintf(f, ":%d:", len); - while (x-- > 0) { fputc(*s++, f); } -} - -static void -p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) -{ - RelAddrType curr = (RelAddrType )(p - start); - - fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); -} - -extern void -onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, - OnigEncoding enc) -{ - int i, n, arg_type; - RelAddrType addr; - LengthType len; - MemNumType mem; - StateCheckNumType scn; - OnigCodePoint code; - OnigOptionType option; - ModeType mode; - UChar *q; - - fprintf(f, "%s", op2name(*bp)); - arg_type = op2arg_type(*bp); - if (arg_type != ARG_SPECIAL) { - bp++; - switch (arg_type) { - case ARG_NON: - break; - case ARG_RELADDR: - GET_RELADDR_INC(addr, bp); - fputc(':', f); - p_rel_addr(f, addr, bp, start); - break; - case ARG_ABSADDR: - GET_ABSADDR_INC(addr, bp); - fprintf(f, ":{/%d}", addr); - break; - case ARG_LENGTH: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - case ARG_MEMNUM: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - case ARG_OPTION: - { - OnigOptionType option = *((OnigOptionType* )bp); - bp += SIZE_OPTION; - fprintf(f, ":%d", option); - } - break; - - case ARG_STATE_CHECK: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - fprintf(f, ":%d", scn); - break; - - case ARG_MODE: - mode = *((ModeType* )bp); - bp += SIZE_MODE; - fprintf(f, ":%d", mode); - break; - } - } - else { - switch (*bp++) { - case OP_EXACT1: - case OP_ANYCHAR_STAR_PEEK_NEXT: - case OP_ANYCHAR_ML_STAR_PEEK_NEXT: - p_string(f, 1, bp++); break; - case OP_EXACT2: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACT3: - p_string(f, 3, bp); bp += 3; break; - case OP_EXACT4: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACT5: - p_string(f, 5, bp); bp += 5; break; - case OP_EXACTN: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_EXACTMB2N1: - p_string(f, 2, bp); bp += 2; break; - case OP_EXACTMB2N2: - p_string(f, 4, bp); bp += 4; break; - case OP_EXACTMB2N3: - p_string(f, 6, bp); bp += 6; break; - case OP_EXACTMB2N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 2, bp); - bp += len * 2; - break; - case OP_EXACTMB3N: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 3, bp); - bp += len * 3; - break; - case OP_EXACTMBN: - { - int mb_len; - - GET_LENGTH_INC(mb_len, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:%d:", mb_len, len); - n = len * mb_len; - while (n-- > 0) { fputc(*bp++, f); } - } - break; - - case OP_EXACT1_IC: - len = enclen(enc, bp); - p_string(f, len, bp); - bp += len; - break; - case OP_EXACTN_IC: - GET_LENGTH_INC(len, bp); - p_len_string(f, len, 1, bp); - bp += len; - break; - - case OP_CCLASS: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - fprintf(f, ":%d", n); - break; - - case OP_CCLASS_MB: - case OP_CCLASS_MB_NOT: - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d", (int )code, len); - break; - - case OP_CCLASS_MIX: - case OP_CCLASS_MIX_NOT: - n = bitset_on_num((BitSetRef )bp); - bp += SIZE_BITSET; - GET_LENGTH_INC(len, bp); - q = bp; -#ifndef PLATFORM_UNALIGNED_WORD_ACCESS - ALIGNMENT_RIGHT(q); -#endif - GET_CODE_POINT(code, q); - bp += len; - fprintf(f, ":%d:%d:%d", n, (int )code, len); - break; - -#ifdef USE_OP_CCLASS_NODE - case OP_CCLASS_NODE: - { - CClassNode *cc; - - GET_POINTER_INC(cc, bp); - n = bitset_on_num(cc->bs); - fprintf(f, ":%p:%d", cc, n); - } - break; -#endif - - case OP_BACKREF_N_IC: - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - fprintf(f, ":%d", mem); - break; - - case OP_BACKREF_MULTI_IC: - case OP_BACKREF_MULTI: - case OP_BACKREF_CHECK: - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - break; - - case OP_BACKREF_WITH_LEVEL: - GET_OPTION_INC(option, bp); - fprintf(f, ":%d", option); - /* fall */ - case OP_BACKREF_CHECK_WITH_LEVEL: - { - LengthType level; - - GET_LENGTH_INC(level, bp); - fprintf(f, ":%d", level); - - fputs(" ", f); - GET_LENGTH_INC(len, bp); - for (i = 0; i < len; i++) { - GET_MEMNUM_INC(mem, bp); - if (i > 0) fputs(", ", f); - fprintf(f, "%d", mem); - } - } - break; - - case OP_REPEAT: - case OP_REPEAT_NG: - { - mem = *((MemNumType* )bp); - bp += SIZE_MEMNUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:%d", mem, addr); - } - break; - - case OP_PUSH_OR_JUMP_EXACT1: - case OP_PUSH_IF_PEEK_NEXT: - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fputc(':', f); - p_rel_addr(f, addr, bp, start); - p_string(f, 1, bp); - bp += 1; - break; - - case OP_LOOK_BEHIND: - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d", len); - break; - - case OP_LOOK_BEHIND_NOT_START: - GET_RELADDR_INC(addr, bp); - GET_LENGTH_INC(len, bp); - fprintf(f, ":%d:", len); - p_rel_addr(f, addr, bp, start); - break; - - case OP_STATE_CHECK_PUSH: - case OP_STATE_CHECK_PUSH_OR_JUMP: - scn = *((StateCheckNumType* )bp); - bp += SIZE_STATE_CHECK_NUM; - addr = *((RelAddrType* )bp); - bp += SIZE_RELADDR; - fprintf(f, ":%d:", scn); - p_rel_addr(f, addr, bp, start); - break; - - case OP_PUSH_SAVE_VAL: - { - SaveType type; - GET_SAVE_TYPE_INC(type, bp); - GET_MEMNUM_INC(mem, bp); - fprintf(f, ":%d:%d", type, mem); - } - break; - - case OP_UPDATE_VAR: - { - UpdateVarType type; - GET_UPDATE_VAR_TYPE_INC(type, bp); - GET_MEMNUM_INC(mem, bp); - fprintf(f, ":%d:%d", type, mem); - } - break; - - default: - fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); - } - } - if (nextp) *nextp = bp; -} -#endif /* ONIG_DEBUG */ - -#ifdef ONIG_DEBUG_COMPILE -static void -print_compiled_byte_code_list(FILE* f, regex_t* reg) -{ - UChar* bp; - UChar* start = reg->p; - UChar* end = reg->p + reg->used; - - fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", - reg->bt_mem_start, reg->bt_mem_end); - fprintf(f, "code-length: %d\n", reg->used); - - bp = start; - while (bp < end) { - int pos = bp - start; - - fprintf(f, "%4d: ", pos); - onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); - fprintf(f, "\n"); - } - fprintf(f, "\n"); -} -#endif - -#ifdef ONIG_DEBUG_PARSE - static void Indent(FILE* f, int indent) { @@ -7334,6 +6571,17 @@ print_indent_tree(FILE* f, Node* node, int indent) case GIMMICK_UPDATE_VAR: fprintf(f, "update_var:%d:%d", GIMMICK_(node)->detail_type, GIMMICK_(node)->id); break; +#ifdef USE_CALLOUT + case GIMMICK_CALLOUT: + switch (GIMMICK_(node)->detail_type) { + case ONIG_CALLOUT_OF_CONTENTS: + fprintf(f, "callout:contents:%d", GIMMICK_(node)->num); + break; + case ONIG_CALLOUT_OF_NAME: + fprintf(f, "callout:name:%d:%d", GIMMICK_(node)->id, GIMMICK_(node)->num); + break; + } +#endif } break; diff --git a/src/regenc.c b/src/regenc.c index 7ded5a8..21f3536 100644 --- a/src/regenc.c +++ b/src/regenc.c @@ -2,7 +2,7 @@ regenc.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -31,6 +31,66 @@ OnigEncoding OnigEncDefaultCharEncoding = ONIG_ENCODING_INIT_DEFAULT; +#define INITED_LIST_SIZE 20 + +static int InitedListNum; + +static struct { + OnigEncoding enc; + int inited; +} InitedList[INITED_LIST_SIZE]; + +static int +enc_inited_entry(OnigEncoding enc) +{ + int i; + + for (i = 0; i < InitedListNum; i++) { + if (InitedList[i].enc == enc) { + InitedList[i].inited = 1; + return i; + } + } + + i = InitedListNum; + if (i < INITED_LIST_SIZE - 1) { + InitedList[i].enc = enc; + InitedList[i].inited = 1; + InitedListNum++; + return i; + } + + return -1; +} + +static int +enc_is_inited(OnigEncoding enc) +{ + int i; + + for (i = 0; i < InitedListNum; i++) { + if (InitedList[i].enc == enc) { + return InitedList[i].inited; + } + } + + return 0; +} + +extern int +onigenc_end(void) +{ + int i; + + for (i = 0; i < InitedListNum; i++) { + InitedList[i].enc = 0; + InitedList[i].inited = 0; + } + + InitedListNum = 0; + return ONIG_NORMAL; +} + extern int onigenc_init(void) { @@ -40,8 +100,23 @@ onigenc_init(void) extern int onig_initialize_encoding(OnigEncoding enc) { - if (enc->init != 0 && (enc->is_initialized() == 0)) { - int r = (enc->init)(); + int r; + + if (enc != ONIG_ENCODING_ASCII && + ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) { + OnigEncoding ascii = ONIG_ENCODING_ASCII; + if (ascii->init != 0 && enc_is_inited(ascii) == 0) { + r = ascii->init(); + if (r != ONIG_NORMAL) return r; + enc_inited_entry(ascii); + } + } + + if (enc->init != 0 && + enc_is_inited(enc) == 0) { + r = (enc->init)(); + if (r == ONIG_NORMAL) + enc_inited_entry(enc); return r; } @@ -61,6 +136,25 @@ onigenc_set_default_encoding(OnigEncoding enc) return 0; } +extern UChar* +onigenc_strdup(OnigEncoding enc, const UChar* s, const UChar* end) +{ + int slen, term_len, i; + UChar *r; + + slen = (int )(end - s); + term_len = ONIGENC_MBC_MINLEN(enc); + + r = (UChar* )xmalloc(slen + term_len); + CHECK_NULL_RETURN(r); + xmemcpy(r, s, slen); + + for (i = 0; i < term_len; i++) + r[slen + i] = (UChar )0; + + return r; +} + extern UChar* onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s) { diff --git a/src/regenc.h b/src/regenc.h index cda3bcd..4dd89ba 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -4,7 +4,7 @@ regenc.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,11 +29,12 @@ * SUCH DAMAGE. */ -#ifndef PACKAGE -/* PACKAGE is defined in config.h */ -#include "config.h" +#ifndef ONIGURUMA_EXPORT +#define ONIGURUMA_EXPORT #endif +#include "config.h" + #ifdef ONIG_ESCAPE_UCHAR_COLLISION #undef ONIG_ESCAPE_UCHAR_COLLISION #endif @@ -118,51 +119,53 @@ struct PropertyNameCtype { #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII + /* for encoding system implementation (internal) */ -ONIG_EXTERN int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); +extern int onigenc_end(void); +extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_ascii_get_case_fold_codes_by_str P_((OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_apply_all_case_fold_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_get_case_fold_codes_by_str_with_map P_((int map_size, const OnigPairCaseFoldCodes map[], int ess_tsett_flag, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_not_support_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint* sb_out, const OnigCodePoint* ranges[])); +extern int onigenc_is_mbc_newline_0x0a P_((const UChar* p, const UChar* end)); /* methods for single byte encoding */ -ONIG_EXTERN int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); -ONIG_EXTERN OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); -ONIG_EXTERN UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); -ONIG_EXTERN int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); +extern int onigenc_ascii_mbc_case_fold P_((OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +extern int onigenc_single_byte_mbc_enc_len P_((const UChar* p)); +extern OnigCodePoint onigenc_single_byte_mbc_to_code P_((const UChar* p, const UChar* end)); +extern int onigenc_single_byte_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_single_byte_code_to_mbc P_((OnigCodePoint code, UChar *buf)); +extern UChar* onigenc_single_byte_left_adjust_char_head P_((const UChar* start, const UChar* s)); +extern int onigenc_always_true_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +extern int onigenc_always_false_is_allowed_reverse_match P_((const UChar* s, const UChar* end)); +extern int onigenc_always_true_is_valid_mbc_string P_((const UChar* s, const UChar* end)); +extern int onigenc_length_check_is_valid_mbc_string P_((OnigEncoding enc, const UChar* s, const UChar* end)); /* methods for multi byte encoding */ -ONIG_EXTERN OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); -ONIG_EXTERN int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); -ONIG_EXTERN int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); -ONIG_EXTERN int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end)); -ONIG_EXTERN int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); -ONIG_EXTERN int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); -ONIG_EXTERN int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); -ONIG_EXTERN struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); -//ONIG_EXTERN const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); +extern OnigCodePoint onigenc_mbn_mbc_to_code P_((OnigEncoding enc, const UChar* p, const UChar* end)); +extern int onigenc_mbn_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** p, const UChar* end, UChar* lower)); +extern int onigenc_mb2_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_mb2_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +extern int onigenc_minimum_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +extern int onigenc_unicode_property_name_to_ctype P_((OnigEncoding enc, UChar* p, UChar* end)); +extern int onigenc_is_mbc_word_ascii P_((OnigEncoding enc, UChar* s, const UChar* end)); +extern int onigenc_mb2_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +extern int onigenc_mb4_code_to_mbclen P_((OnigCodePoint code)); +extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UChar *buf)); +extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); +extern struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); +extern struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); +//extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); /* in enc/unicode.c */ -ONIG_EXTERN int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); -ONIG_EXTERN int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); -ONIG_EXTERN int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); -ONIG_EXTERN int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); -ONIG_EXTERN int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); -ONIG_EXTERN int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end)); +extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); +extern int onigenc_utf16_32_get_ctype_code_range P_((OnigCtype ctype, OnigCodePoint *sb_out, const OnigCodePoint* ranges[])); +extern int onigenc_unicode_ctype_code_range P_((OnigCtype ctype, const OnigCodePoint* ranges[])); +extern int onigenc_unicode_get_case_fold_codes_by_str P_((OnigEncoding enc, OnigCaseFoldType flag, const OnigUChar* p, const OnigUChar* end, OnigCaseFoldCodeItem items[])); +extern int onigenc_unicode_mbc_case_fold P_((OnigEncoding enc, OnigCaseFoldType flag, const UChar** pp, const UChar* end, UChar* fold)); +extern int onigenc_unicode_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); +extern int onigenc_egcb_is_break_position P_((OnigEncoding enc, UChar* p, UChar* prev, const UChar* start, const UChar* end)); #define UTF16_IS_SURROGATE_FIRST(c) (((c) & 0xfc) == 0xd8) @@ -218,21 +221,21 @@ extern int onig_codes_byte_at(OnigCodePoint code[], int at); #define ONIGENC_ISO_8859_1_TO_UPPER_CASE(c) \ OnigEncISO_8859_1_ToUpperCaseTable[c] -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; +extern const UChar OnigEncISO_8859_1_ToLowerCaseTable[]; +extern const UChar OnigEncISO_8859_1_ToUpperCaseTable[]; -ONIG_EXTERN int +extern int onigenc_with_ascii_strncmp P_((OnigEncoding enc, const UChar* p, const UChar* end, const UChar* sascii /* ascii */, int n)); -ONIG_EXTERN UChar* +extern UChar* onigenc_step P_((OnigEncoding enc, const UChar* p, const UChar* end, int n)); /* defined in regexec.c, but used in enc/xxx.c */ extern int onig_is_in_code_range P_((const UChar* p, OnigCodePoint code)); -ONIG_EXTERN OnigEncoding OnigEncDefaultCharEncoding; -ONIG_EXTERN const UChar OnigEncAsciiToLowerCaseTable[]; -ONIG_EXTERN const UChar OnigEncAsciiToUpperCaseTable[]; -ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; +extern OnigEncoding OnigEncDefaultCharEncoding; +extern const UChar OnigEncAsciiToLowerCaseTable[]; +extern const UChar OnigEncAsciiToUpperCaseTable[]; +extern const unsigned short OnigEncAsciiCtypeTable[]; #define ONIGENC_IS_ASCII_CODE(code) ((code) < 0x80) @@ -249,4 +252,6 @@ ONIG_EXTERN const unsigned short OnigEncAsciiCtypeTable[]; #define ONIGENC_IS_UNICODE_ENCODING(enc) \ ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype) +#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) ((enc)->min_enc_len == 1) + #endif /* REGENC_H */ diff --git a/src/regerror.c b/src/regerror.c index e7d2570..70efe9a 100644 --- a/src/regerror.c +++ b/src/regerror.c @@ -2,7 +2,7 @@ regerror.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -43,19 +43,21 @@ onig_error_code_to_format(int code) { char *p; - if (code >= 0) return (UChar* )0; - switch (code) { case ONIG_MISMATCH: p = "mismatch"; break; case ONIG_NO_SUPPORT_CONFIG: p = "no support in this configuration"; break; + case ONIG_ABORT: + p = "abort"; break; case ONIGERR_MEMORY: p = "fail to memory allocation"; break; case ONIGERR_MATCH_STACK_LIMIT_OVER: p = "match-stack limit over"; break; case ONIGERR_PARSE_DEPTH_LIMIT_OVER: p = "parse depth limit over"; break; + case ONIGERR_RETRY_LIMIT_IN_MATCH_OVER: + p = "retry-limit-in-match over"; break; case ONIGERR_TYPE_BUG: p = "undefined type (bug)"; break; case ONIGERR_PARSER_BUG: @@ -172,6 +174,18 @@ onig_error_code_to_format(int code) p = "invalid absent group pattern"; break; case ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN: p = "invalid absent group generator pattern"; break; + case ONIGERR_INVALID_CALLOUT_PATTERN: + p = "invalid callout pattern"; break; + case ONIGERR_INVALID_CALLOUT_NAME: + p = "invalid callout name"; break; + case ONIGERR_UNDEFINED_CALLOUT_NAME: + p = "undefined callout name"; break; + case ONIGERR_INVALID_CALLOUT_BODY: + p = "invalid callout body"; break; + case ONIGERR_INVALID_CALLOUT_TAG_NAME: + p = "invalid callout tag name"; break; + case ONIGERR_INVALID_CALLOUT_ARG: + p = "invalid callout arg"; break; case ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION: p = "not supported encoding combination"; break; case ONIGERR_INVALID_COMBINATION_OF_OPTIONS: diff --git a/src/regexec.c b/src/regexec.c index 53f42ee..35e3698 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -2,7 +2,7 @@ regexec.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,11 +26,8 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ - #include "regint.h" -#define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - #define IS_MBC_WORD_ASCII_MODE(enc,s,end,mode) \ ((mode) == 0 ? ONIGENC_IS_MBC_WORD(enc,s,end) : ONIGENC_IS_MBC_WORD_ASCII(enc,s,end)) @@ -40,6 +37,565 @@ ONIGENC_IS_MBC_NEWLINE(enc,(p+enclen(enc,p)),end)) #endif +#define CHECK_INTERRUPT_IN_MATCH + +#ifdef USE_CALLOUT +typedef struct { + int last_match_at_call_counter; + struct { + OnigType type; + OnigValue val; + } slot[ONIG_CALLOUT_DATA_SLOT_NUM]; +} CalloutData; +#endif + +struct OnigMatchParamStruct { + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; + OnigCalloutFunc progress_callout_of_contents; + OnigCalloutFunc retraction_callout_of_contents; +#ifdef USE_CALLOUT + int match_at_call_counter; + void* callout_user_data; + CalloutData* callout_data; + int callout_data_alloc_num; +#endif +}; + +extern int +onig_set_match_stack_limit_size_of_match_param(OnigMatchParam* param, + unsigned int limit) +{ + param->match_stack_limit = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param, + unsigned long limit) +{ + param->retry_limit_in_match = limit; + return ONIG_NORMAL; +} + +extern int +onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ + param->progress_callout_of_contents = f; + return ONIG_NORMAL; +} + +extern int +onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) +{ + param->retraction_callout_of_contents = f; + return ONIG_NORMAL; +} + + + +typedef struct { + void* stack_p; + int stack_n; + OnigOptionType options; + OnigRegion* region; + int ptr_num; + const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ + unsigned int match_stack_limit; + unsigned long retry_limit_in_match; + OnigMatchParam* mp; +#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE + int best_len; /* for ONIG_OPTION_FIND_LONGEST */ + UChar* best_s; +#endif +} MatchArg; + + +#ifdef ONIG_DEBUG + +/* arguments type */ +#define ARG_SPECIAL -1 +#define ARG_NON 0 +#define ARG_RELADDR 1 +#define ARG_ABSADDR 2 +#define ARG_LENGTH 3 +#define ARG_MEMNUM 4 +#define ARG_OPTION 5 +#define ARG_MODE 6 + +typedef struct { + short int opcode; + char* name; + short int arg_type; +} OpInfoType; + +static OpInfoType OpInfo[] = { + { OP_FINISH, "finish", ARG_NON }, + { OP_END, "end", ARG_NON }, + { OP_EXACT1, "exact1", ARG_SPECIAL }, + { OP_EXACT2, "exact2", ARG_SPECIAL }, + { OP_EXACT3, "exact3", ARG_SPECIAL }, + { OP_EXACT4, "exact4", ARG_SPECIAL }, + { OP_EXACT5, "exact5", ARG_SPECIAL }, + { OP_EXACTN, "exactn", ARG_SPECIAL }, + { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL }, + { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL }, + { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL }, + { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL }, + { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL }, + { OP_EXACTMBN, "exactmbn", ARG_SPECIAL }, + { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL }, + { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL }, + { OP_CCLASS, "cclass", ARG_SPECIAL }, + { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL }, + { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL }, + { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL }, + { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL }, + { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL }, +#ifdef USE_OP_CCLASS_NODE + { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL }, +#endif + { OP_ANYCHAR, "anychar", ARG_NON }, + { OP_ANYCHAR_ML, "anychar-ml", ARG_NON }, + { OP_ANYCHAR_STAR, "anychar*", ARG_NON }, + { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON }, + { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL }, + { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL }, + { OP_WORD, "word", ARG_NON }, + { OP_WORD_ASCII, "word-ascii", ARG_NON }, + { OP_NO_WORD, "not-word", ARG_NON }, + { OP_NO_WORD_ASCII, "not-word-ascii", ARG_NON }, + { OP_WORD_BOUNDARY, "word-boundary", ARG_MODE }, + { OP_NO_WORD_BOUNDARY, "not-word-boundary", ARG_MODE }, + { OP_WORD_BEGIN, "word-begin", ARG_MODE }, + { OP_WORD_END, "word-end", ARG_MODE }, + { OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "extended-grapheme-cluster-boundary", ARG_NON }, + { OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, "no-extended-grapheme-cluster-boundary", ARG_NON }, + { OP_BEGIN_BUF, "begin-buf", ARG_NON }, + { OP_END_BUF, "end-buf", ARG_NON }, + { OP_BEGIN_LINE, "begin-line", ARG_NON }, + { OP_END_LINE, "end-line", ARG_NON }, + { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON }, + { OP_BEGIN_POSITION, "begin-position", ARG_NON }, + { OP_BACKREF1, "backref1", ARG_NON }, + { OP_BACKREF2, "backref2", ARG_NON }, + { OP_BACKREF_N, "backref-n", ARG_MEMNUM }, + { OP_BACKREF_N_IC, "backref-n-ic", ARG_SPECIAL }, + { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL }, + { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL }, + { OP_BACKREF_WITH_LEVEL, "backref_with_level", ARG_SPECIAL }, + { OP_BACKREF_CHECK, "backref_check", ARG_SPECIAL }, + { OP_BACKREF_CHECK_WITH_LEVEL, "backref_check_with_level", ARG_SPECIAL }, + { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM }, + { OP_MEMORY_START, "mem-start", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM }, + { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM }, + { OP_MEMORY_END, "mem-end", ARG_MEMNUM }, + { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM }, + { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION }, + { OP_SET_OPTION, "set-option", ARG_OPTION }, + { OP_FAIL, "fail", ARG_NON }, + { OP_JUMP, "jump", ARG_RELADDR }, + { OP_PUSH, "push", ARG_RELADDR }, + { OP_PUSH_SUPER, "push-super", ARG_RELADDR }, + { OP_POP_OUT, "pop-out", ARG_NON }, + { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL }, + { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL }, + { OP_REPEAT, "repeat", ARG_SPECIAL }, + { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL }, + { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM }, + { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM }, + { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM }, + { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM }, + { OP_EMPTY_CHECK_START, "empty-check-start", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END, "empty-check-end", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST,"empty-check-end-memst", ARG_MEMNUM }, + { OP_EMPTY_CHECK_END_MEMST_PUSH,"empty-check-end-memst-push", ARG_MEMNUM }, + { OP_PREC_READ_START, "push-pos", ARG_NON }, + { OP_PREC_READ_END, "pop-pos", ARG_NON }, + { OP_PREC_READ_NOT_START, "prec-read-not-start", ARG_RELADDR }, + { OP_PREC_READ_NOT_END, "prec-read-not-end", ARG_NON }, + { OP_ATOMIC_START, "atomic-start", ARG_NON }, + { OP_ATOMIC_END, "atomic-end", ARG_NON }, + { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_START, "look-behind-not-start", ARG_SPECIAL }, + { OP_LOOK_BEHIND_NOT_END, "look-behind-not-end", ARG_NON }, + { OP_CALL, "call", ARG_ABSADDR }, + { OP_RETURN, "return", ARG_NON }, + { OP_PUSH_SAVE_VAL, "push-save-val", ARG_SPECIAL }, + { OP_UPDATE_VAR, "update-var", ARG_SPECIAL }, +#ifdef USE_CALLOUT + { OP_CALLOUT_CONTENTS, "callout-contents", ARG_SPECIAL }, + { OP_CALLOUT_NAME, "callout-name", ARG_SPECIAL }, +#endif + { -1, "", ARG_NON } +}; + +static char* +op2name(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].name; + } + return ""; +} + +static int +op2arg_type(int opcode) +{ + int i; + + for (i = 0; OpInfo[i].opcode >= 0; i++) { + if (opcode == OpInfo[i].opcode) + return OpInfo[i].arg_type; + } + return ARG_SPECIAL; +} + +static void +p_string(FILE* f, int len, UChar* s) +{ + fputs(":", f); + while (len-- > 0) { fputc(*s++, f); } +} + +static void +p_len_string(FILE* f, LengthType len, int mb_len, UChar* s) +{ + int x = len * mb_len; + + fprintf(f, ":%d:", len); + while (x-- > 0) { fputc(*s++, f); } +} + +static void +p_rel_addr(FILE* f, RelAddrType rel_addr, UChar* p, UChar* start) +{ + RelAddrType curr = (RelAddrType )(p - start); + + fprintf(f, "{%d/%d}", rel_addr, curr + rel_addr); +} + +static int +bitset_on_num(BitSetRef bs) +{ + int i, n; + + n = 0; + for (i = 0; i < SINGLE_BYTE_SIZE; i++) { + if (BITSET_AT(bs, i)) n++; + } + return n; +} + +extern void +onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, + OnigEncoding enc) +{ + int i, n, arg_type; + RelAddrType addr; + LengthType len; + MemNumType mem; + OnigCodePoint code; + OnigOptionType option; + ModeType mode; + UChar *q; + + fprintf(f, "%s", op2name(*bp)); + arg_type = op2arg_type(*bp); + if (arg_type != ARG_SPECIAL) { + bp++; + switch (arg_type) { + case ARG_NON: + break; + case ARG_RELADDR: + GET_RELADDR_INC(addr, bp); + fputc(':', f); + p_rel_addr(f, addr, bp, start); + break; + case ARG_ABSADDR: + GET_ABSADDR_INC(addr, bp); + fprintf(f, ":{/%d}", addr); + break; + case ARG_LENGTH: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + case ARG_MEMNUM: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + case ARG_OPTION: + { + OnigOptionType option = *((OnigOptionType* )bp); + bp += SIZE_OPTION; + fprintf(f, ":%d", option); + } + break; + + case ARG_MODE: + mode = *((ModeType* )bp); + bp += SIZE_MODE; + fprintf(f, ":%d", mode); + break; + } + } + else { + switch (*bp++) { + case OP_EXACT1: + case OP_ANYCHAR_STAR_PEEK_NEXT: + case OP_ANYCHAR_ML_STAR_PEEK_NEXT: + p_string(f, 1, bp++); break; + case OP_EXACT2: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACT3: + p_string(f, 3, bp); bp += 3; break; + case OP_EXACT4: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACT5: + p_string(f, 5, bp); bp += 5; break; + case OP_EXACTN: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_EXACTMB2N1: + p_string(f, 2, bp); bp += 2; break; + case OP_EXACTMB2N2: + p_string(f, 4, bp); bp += 4; break; + case OP_EXACTMB2N3: + p_string(f, 6, bp); bp += 6; break; + case OP_EXACTMB2N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 2, bp); + bp += len * 2; + break; + case OP_EXACTMB3N: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 3, bp); + bp += len * 3; + break; + case OP_EXACTMBN: + { + int mb_len; + + GET_LENGTH_INC(mb_len, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:%d:", mb_len, len); + n = len * mb_len; + while (n-- > 0) { fputc(*bp++, f); } + } + break; + + case OP_EXACT1_IC: + len = enclen(enc, bp); + p_string(f, len, bp); + bp += len; + break; + case OP_EXACTN_IC: + GET_LENGTH_INC(len, bp); + p_len_string(f, len, 1, bp); + bp += len; + break; + + case OP_CCLASS: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + fprintf(f, ":%d", n); + break; + + case OP_CCLASS_MB: + case OP_CCLASS_MB_NOT: + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d", (int )code, len); + break; + + case OP_CCLASS_MIX: + case OP_CCLASS_MIX_NOT: + n = bitset_on_num((BitSetRef )bp); + bp += SIZE_BITSET; + GET_LENGTH_INC(len, bp); + q = bp; +#ifndef PLATFORM_UNALIGNED_WORD_ACCESS + ALIGNMENT_RIGHT(q); +#endif + GET_CODE_POINT(code, q); + bp += len; + fprintf(f, ":%d:%d:%d", n, (int )code, len); + break; + +#ifdef USE_OP_CCLASS_NODE + case OP_CCLASS_NODE: + { + CClassNode *cc; + + GET_POINTER_INC(cc, bp); + n = bitset_on_num(cc->bs); + fprintf(f, ":%p:%d", cc, n); + } + break; +#endif + + case OP_BACKREF_N_IC: + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + fprintf(f, ":%d", mem); + break; + + case OP_BACKREF_MULTI_IC: + case OP_BACKREF_MULTI: + case OP_BACKREF_CHECK: + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + break; + + case OP_BACKREF_WITH_LEVEL: + GET_OPTION_INC(option, bp); + fprintf(f, ":%d", option); + /* fall */ + case OP_BACKREF_CHECK_WITH_LEVEL: + { + LengthType level; + + GET_LENGTH_INC(level, bp); + fprintf(f, ":%d", level); + + fputs(" ", f); + GET_LENGTH_INC(len, bp); + for (i = 0; i < len; i++) { + GET_MEMNUM_INC(mem, bp); + if (i > 0) fputs(", ", f); + fprintf(f, "%d", mem); + } + } + break; + + case OP_REPEAT: + case OP_REPEAT_NG: + { + mem = *((MemNumType* )bp); + bp += SIZE_MEMNUM; + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fprintf(f, ":%d:%d", mem, addr); + } + break; + + case OP_PUSH_OR_JUMP_EXACT1: + case OP_PUSH_IF_PEEK_NEXT: + addr = *((RelAddrType* )bp); + bp += SIZE_RELADDR; + fputc(':', f); + p_rel_addr(f, addr, bp, start); + p_string(f, 1, bp); + bp += 1; + break; + + case OP_LOOK_BEHIND: + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d", len); + break; + + case OP_LOOK_BEHIND_NOT_START: + GET_RELADDR_INC(addr, bp); + GET_LENGTH_INC(len, bp); + fprintf(f, ":%d:", len); + p_rel_addr(f, addr, bp, start); + break; + + case OP_PUSH_SAVE_VAL: + { + SaveType type; + GET_SAVE_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + + case OP_UPDATE_VAR: + { + UpdateVarType type; + GET_UPDATE_VAR_TYPE_INC(type, bp); + GET_MEMNUM_INC(mem, bp); + fprintf(f, ":%d:%d", type, mem); + } + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: + { + GET_MEMNUM_INC(mem, bp); // number + fprintf(f, ":%d", mem); + } + break; + + case OP_CALLOUT_NAME: + { + int id; + + GET_MEMNUM_INC(id, bp); // id + GET_MEMNUM_INC(mem, bp); // number + + fprintf(f, ":%d:%d", id, mem); + } + break; +#endif + + default: + fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n", *--bp); + } + } + if (nextp) *nextp = bp; +} +#endif /* ONIG_DEBUG */ + +#ifdef ONIG_DEBUG_COMPILE +extern void +onig_print_compiled_byte_code_list(FILE* f, regex_t* reg) +{ + UChar* bp; + UChar* start = reg->p; + UChar* end = reg->p + reg->used; + + fprintf(f, "bt_mem_start: 0x%x, bt_mem_end: 0x%x\n", + reg->bt_mem_start, reg->bt_mem_end); + fprintf(f, "code-length: %d\n", reg->used); + + bp = start; + while (bp < end) { + int pos = bp - start; + + fprintf(f, "%4d: ", pos); + onig_print_compiled_byte_code(f, bp, &bp, start, reg->enc); + fprintf(f, "\n"); + } + fprintf(f, "\n"); +} +#endif + + #ifdef USE_CAPTURE_HISTORY static void history_tree_free(OnigCaptureTreeNode* node); @@ -304,6 +860,45 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #endif } +#ifdef USE_CALLOUT +#define CALLOUT_BODY(func, ain, aname_id, anum, user, args, result) do { \ + args.in = (ain);\ + args.name_id = (aname_id);\ + args.num = anum;\ + args.regex = reg;\ + args.string = str;\ + args.string_end = end;\ + args.start = sstart;\ + args.right_range = right_range;\ + args.current = s;\ + args.retry_in_match_counter = retry_in_match_counter;\ + args.msa = msa;\ + args.stk_base = stk_base;\ + args.stk = stk;\ + args.mem_start_stk = mem_start_stk;\ + args.mem_end_stk = mem_end_stk;\ + result = (func)(&args, user);\ +} while (0) + +#define RETRACTION_CALLOUT(func, aname_id, anum, user) do {\ + int result;\ + OnigCalloutArgs args;\ + CALLOUT_BODY(func, ONIG_CALLOUT_IN_RETRACTION, aname_id, anum, user, args, result);\ + switch (result) {\ + case ONIG_CALLOUT_FAIL:\ + case ONIG_CALLOUT_SUCCESS:\ + break;\ + default:\ + if (result > 0) {\ + result = ONIGERR_INVALID_ARGUMENT;\ + }\ + best_len = result;\ + goto finish;\ + break;\ + }\ +} while(0) +#endif + /** stack **/ #define INVALID_STACK_INDEX -1 @@ -316,40 +911,43 @@ onig_region_copy(OnigRegion* to, OnigRegion* from) #define STK_ALT (0x0002 | STK_ALT_FLAG) #define STK_ALT_PREC_READ_NOT (0x0004 | STK_ALT_FLAG) #define STK_ALT_LOOK_BEHIND_NOT (0x0006 | STK_ALT_FLAG) + /* handled by normal-POP */ -#define STK_MEM_START 0x0100 -#define STK_MEM_END 0x8200 -#define STK_REPEAT_INC 0x0300 -#define STK_STATE_CHECK_MARK 0x1000 +#define STK_MEM_START 0x0010 +#define STK_MEM_END 0x8030 +#define STK_REPEAT_INC 0x0050 +#ifdef USE_CALLOUT +#define STK_CALLOUT 0x0070 +#endif + /* avoided by normal-POP */ #define STK_VOID 0x0000 /* for fill a blank */ #define STK_EMPTY_CHECK_START 0x3000 #define STK_EMPTY_CHECK_END 0x5000 /* for recursive call */ -#define STK_MEM_END_MARK 0x8400 -#define STK_TO_VOID_START 0x0500 /* mark for "(?>...)" */ -#define STK_REPEAT 0x0600 -#define STK_CALL_FRAME 0x0700 -#define STK_RETURN 0x0800 -#define STK_SAVE_VAL 0x0900 +#define STK_MEM_END_MARK 0x8100 +#define STK_TO_VOID_START 0x1200 /* mark for "(?>...)" */ +#define STK_REPEAT 0x0300 +#define STK_CALL_FRAME 0x0400 +#define STK_RETURN 0x0500 +#define STK_SAVE_VAL 0x0600 /* stack type check mask */ #define STK_MASK_POP_USED STK_ALT_FLAG -#define STK_MASK_TO_VOID_TARGET 0x10fe +#define STK_MASK_POP_HANDLED 0x0010 +#define STK_MASK_POP_HANDLED_TIL (STK_MASK_POP_HANDLED | 0x0004) +#define STK_MASK_TO_VOID_TARGET 0x100e #define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */ typedef intptr_t StackIndex; typedef struct _StackType { unsigned int type; - int id; + int zid; union { struct { UChar *pcode; /* byte code position */ UChar *pstr; /* string position */ UChar *pstr_prev; /* previous char position of pstr */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - unsigned int state_check; -#endif } state; struct { int count; /* for OP_REPEAT_INC, OP_REPEAT_INC_NG */ @@ -378,142 +976,419 @@ typedef struct _StackType { UChar* v; UChar* v2; } val; +#ifdef USE_CALLOUT + struct { + int num; + OnigCalloutFunc func; + } callout; +#endif } u; } StackType; +#ifdef USE_CALLOUT + +struct OnigCalloutArgsStruct { + OnigCalloutIn in; + int name_id; /* name id or ONIG_NON_NAME_ID */ + int num; + OnigRegex regex; + const OnigUChar* string; + const OnigUChar* string_end; + const OnigUChar* start; + const OnigUChar* right_range; + const OnigUChar* current; // current matching position + unsigned long retry_in_match_counter; + + /* invisible to users */ + MatchArg* msa; + StackType* stk_base; + StackType* stk; + StackIndex* mem_start_stk; + StackIndex* mem_end_stk; +}; + +#endif + #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ (msa).best_len = ONIG_MISMATCH;\ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #else -#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start) do {\ +#define MATCH_ARG_INIT(msa, reg, arg_option, arg_region, arg_start, mp) do { \ (msa).stack_p = (void* )0;\ (msa).options = (arg_option);\ (msa).region = (arg_region);\ (msa).start = (arg_start);\ + (msa).match_stack_limit = (mp)->match_stack_limit;\ + (msa).retry_limit_in_match = (mp)->retry_limit_in_match;\ + (msa).mp = mp;\ (msa).ptr_num = (reg)->num_repeat + ((reg)->num_mem + 1) * 2; \ } while(0) #endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - -#define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16 - -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do {\ - if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\ - unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\ - offset = ((offset) * (state_num)) >> 3;\ - if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\ - if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) \ - (msa).state_check_buff = (void* )xmalloc(size);\ - else \ - (msa).state_check_buff = (void* )xalloca(size);\ - xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \ - (size_t )(size - (offset))); \ - (msa).state_check_buff_size = size;\ - }\ - else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ - }\ +#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) + + +#define ALLOCA_PTR_NUM_LIMIT 50 + +#define STACK_INIT(stack_num) do {\ + if (msa->stack_p) {\ + is_alloca = 0;\ + alloc_base = msa->stack_p;\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ + stk = stk_base;\ + stk_end = stk_base + msa->stack_n;\ + }\ + else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\ + is_alloca = 0;\ + alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ + + sizeof(StackType) * (stack_num));\ + CHECK_NULL_RETURN_MEMERR(alloc_base);\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ }\ else {\ - (msa).state_check_buff = (void* )0;\ - (msa).state_check_buff_size = 0;\ + is_alloca = 1;\ + alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\ + + sizeof(StackType) * (stack_num));\ + CHECK_NULL_RETURN_MEMERR(alloc_base);\ + stk_base = (StackType* )(alloc_base\ + + (sizeof(StackIndex) * msa->ptr_num));\ + stk = stk_base;\ + stk_end = stk_base + (stack_num);\ }\ +} while(0); + + +#define STACK_SAVE do{\ + msa->stack_n = (int )(stk_end - stk_base);\ + if (is_alloca != 0) {\ + size_t size = sizeof(StackIndex) * msa->ptr_num \ + + sizeof(StackType) * msa->stack_n;\ + msa->stack_p = xmalloc(size);\ + CHECK_NULL_RETURN_MEMERR(msa->stack_p);\ + xmemcpy(msa->stack_p, alloc_base, size);\ + }\ + else {\ + msa->stack_p = alloc_base;\ + };\ } while(0) -#define MATCH_ARG_FREE(msa) do {\ - if ((msa).stack_p) xfree((msa).stack_p);\ - if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \ - if ((msa).state_check_buff) xfree((msa).state_check_buff);\ - }\ -} while(0) -#else -#define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) -#define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p) -#endif +#define UPDATE_FOR_STACK_REALLOC do{\ + repeat_stk = (StackIndex* )alloc_base;\ + mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ + mem_end_stk = mem_start_stk + num_mem + 1;\ +} while(0) + +static unsigned int MatchStackLimit = DEFAULT_MATCH_STACK_LIMIT_SIZE; + +extern unsigned int +onig_get_match_stack_limit_size(void) +{ + return MatchStackLimit; +} + +extern int +onig_set_match_stack_limit_size(unsigned int size) +{ + MatchStackLimit = size; + return 0; +} + +#ifdef USE_RETRY_LIMIT_IN_MATCH + +static unsigned long RetryLimitInMatch = DEFAULT_RETRY_LIMIT_IN_MATCH; + +#define CHECK_RETRY_LIMIT_IN_MATCH do {\ + if (retry_in_match_counter++ > retry_limit_in_match) goto retry_limit_in_match_over;\ +} while (0) + +#else + +#define CHECK_RETRY_LIMIT_IN_MATCH + +#endif /* USE_RETRY_LIMIT_IN_MATCH */ + +extern unsigned long +onig_get_retry_limit_in_match(void) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + return RetryLimitInMatch; +#else + //return ONIG_NO_SUPPORT_CONFIG; + return 0; +#endif +} + +extern int +onig_set_retry_limit_in_match(unsigned long size) +{ +#ifdef USE_RETRY_LIMIT_IN_MATCH + RetryLimitInMatch = size; + return 0; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +static OnigCalloutFunc DefaultProgressCallout; +static OnigCalloutFunc DefaultRetractionCallout; + +extern OnigMatchParam* +onig_new_match_param(void) +{ + OnigMatchParam* p; + + p = (OnigMatchParam* )xmalloc(sizeof(*p)); + if (IS_NOT_NULL(p)) { + onig_initialize_match_param(p); + } + + return p; +} + +extern void +onig_free_match_param_content(OnigMatchParam* p) +{ +#ifdef USE_CALLOUT + if (IS_NOT_NULL(p->callout_data)) { + xfree(p->callout_data); + p->callout_data = 0; + } +#endif +} + +extern void +onig_free_match_param(OnigMatchParam* p) +{ + if (IS_NOT_NULL(p)) { + onig_free_match_param_content(p); + xfree(p); + } +} + +extern int +onig_initialize_match_param(OnigMatchParam* mp) +{ + mp->match_stack_limit = MatchStackLimit; +#ifdef USE_RETRY_LIMIT_IN_MATCH + mp->retry_limit_in_match = RetryLimitInMatch; +#endif + mp->progress_callout_of_contents = DefaultProgressCallout; + mp->retraction_callout_of_contents = DefaultRetractionCallout; + +#ifdef USE_CALLOUT + mp->match_at_call_counter = 0; + mp->callout_user_data = 0; + mp->callout_data = 0; + mp->callout_data_alloc_num = 0; +#endif + + return ONIG_NORMAL; +} + +#ifdef USE_CALLOUT + +static int +adjust_match_param(regex_t* reg, OnigMatchParam* mp) +{ + RegexExt* ext = REG_EXTP(reg); + + mp->match_at_call_counter = 0; + + if (IS_NULL(ext) || ext->callout_num == 0) return ONIG_NORMAL; + + if (ext->callout_num > mp->callout_data_alloc_num) { + CalloutData* d; + size_t n = ext->callout_num * sizeof(*d); + if (IS_NOT_NULL(mp->callout_data)) + d = (CalloutData* )xrealloc(mp->callout_data, n); + else + d = (CalloutData* )xmalloc(n); + CHECK_NULL_RETURN_MEMERR(d); + + mp->callout_data = d; + mp->callout_data_alloc_num = ext->callout_num; + } + + xmemset(mp->callout_data, 0, mp->callout_data_alloc_num * sizeof(CalloutData)); + return ONIG_NORMAL; +} + +#define ADJUST_MATCH_PARAM(reg, mp) \ + r = adjust_match_param(reg, mp);\ + if (r != ONIG_NORMAL) return r; + +#define CALLOUT_DATA_AT_NUM(mp, num) ((mp)->callout_data + ((num) - 1)) + +extern int +onig_check_callout_data_and_clear_old_values(OnigCalloutArgs* args) +{ + OnigMatchParam* mp; + int num; + CalloutData* d; + + mp = args->msa->mp; + num = args->num; + + d = CALLOUT_DATA_AT_NUM(mp, num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + return d->last_match_at_call_counter; + } + + return 0; +} + +extern int +onig_get_callout_data_dont_clear_old(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_callout_args_self_dont_clear_old(OnigCalloutArgs* args, + int slot, OnigType* type, + OnigValue* val) +{ + return onig_get_callout_data_dont_clear_old(args->regex, args->msa->mp, + args->num, slot, type, val); +} + +extern int +onig_get_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + OnigType t; + CalloutData* d; + + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; + + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + if (d->last_match_at_call_counter != mp->match_at_call_counter) { + xmemset(d, 0, sizeof(*d)); + d->last_match_at_call_counter = mp->match_at_call_counter; + } + + t = d->slot[slot].type; + if (IS_NOT_NULL(type)) *type = t; + if (IS_NOT_NULL(val)) *val = d->slot[slot].val; + return (t == ONIG_TYPE_VOID ? 1 : ONIG_NORMAL); +} + +extern int +onig_get_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType* type, OnigValue* val) +{ + int num; + + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + return onig_get_callout_data(reg, mp, num, slot, type, val); +} + +extern int +onig_get_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); +} + +extern int +onig_get_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType* type, OnigValue* val) +{ + return onig_get_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); +} +extern int +onig_set_callout_data(regex_t* reg, OnigMatchParam* mp, + int callout_num, int slot, + OnigType type, OnigValue* val) +{ + CalloutData* d; -#define ALLOCA_PTR_NUM_LIMIT 50 + if (callout_num <= 0) return ONIGERR_INVALID_ARGUMENT; -#define STACK_INIT(stack_num) do {\ - if (msa->stack_p) {\ - is_alloca = 0;\ - alloc_base = msa->stack_p;\ - stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ - stk = stk_base;\ - stk_end = stk_base + msa->stack_n;\ - }\ - else if (msa->ptr_num > ALLOCA_PTR_NUM_LIMIT) {\ - is_alloca = 0;\ - alloc_base = (char* )xmalloc(sizeof(StackIndex) * msa->ptr_num\ - + sizeof(StackType) * (stack_num));\ - CHECK_NULL_RETURN_MEMERR(alloc_base);\ - stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ - stk = stk_base;\ - stk_end = stk_base + (stack_num);\ - }\ - else {\ - is_alloca = 1;\ - alloc_base = (char* )xalloca(sizeof(StackIndex) * msa->ptr_num\ - + sizeof(StackType) * (stack_num));\ - CHECK_NULL_RETURN_MEMERR(alloc_base);\ - stk_base = (StackType* )(alloc_base\ - + (sizeof(StackIndex) * msa->ptr_num));\ - stk = stk_base;\ - stk_end = stk_base + (stack_num);\ - }\ -} while(0); + d = CALLOUT_DATA_AT_NUM(mp, callout_num); + d->slot[slot].type = type; + d->slot[slot].val = *val; + d->last_match_at_call_counter = mp->match_at_call_counter; + return ONIG_NORMAL; +} -#define STACK_SAVE do{\ - msa->stack_n = (int )(stk_end - stk_base);\ - if (is_alloca != 0) {\ - size_t size = sizeof(StackIndex) * msa->ptr_num \ - + sizeof(StackType) * msa->stack_n;\ - msa->stack_p = xmalloc(size);\ - CHECK_NULL_RETURN_MEMERR(msa->stack_p);\ - xmemcpy(msa->stack_p, alloc_base, size);\ - }\ - else {\ - msa->stack_p = alloc_base;\ - };\ -} while(0) +extern int +onig_set_callout_data_by_tag(regex_t* reg, OnigMatchParam* mp, + const UChar* tag, const UChar* tag_end, int slot, + OnigType type, OnigValue* val) +{ + int num; -#define UPDATE_FOR_STACK_REALLOC do{\ - repeat_stk = (StackIndex* )alloc_base;\ - mem_start_stk = (StackIndex* )(repeat_stk + reg->num_repeat);\ - mem_end_stk = mem_start_stk + num_mem + 1;\ -} while(0) + num = onig_get_callout_num_by_tag(reg, tag, tag_end); + if (num < 0) return num; + if (num == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; -static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE; + return onig_set_callout_data(reg, mp, num, slot, type, val); +} -extern unsigned int -onig_get_match_stack_limit_size(void) +extern int +onig_set_callout_data_by_callout_args(OnigCalloutArgs* args, + int callout_num, int slot, + OnigType type, OnigValue* val) { - return MatchStackLimitSize; + return onig_set_callout_data(args->regex, args->msa->mp, callout_num, slot, + type, val); } extern int -onig_set_match_stack_limit_size(unsigned int size) +onig_set_callout_data_by_callout_args_self(OnigCalloutArgs* args, + int slot, OnigType type, OnigValue* val) { - MatchStackLimitSize = size; - return 0; + return onig_set_callout_data(args->regex, args->msa->mp, args->num, slot, + type, val); } +#else +#define ADJUST_MATCH_PARAM(reg, mp) +#endif /* USE_CALLOUT */ + + static int stack_double(int is_alloca, char** arg_alloc_base, StackType** arg_stk_base, StackType** arg_stk_end, StackType** arg_stk, - OnigMatchArg* msa) + MatchArg* msa) { unsigned int n; int used; @@ -541,11 +1416,11 @@ stack_double(int is_alloca, char** arg_alloc_base, xmemcpy(new_alloc_base, alloc_base, size); } else { - if (MatchStackLimitSize != 0 && n > MatchStackLimitSize) { - if ((unsigned int )(stk_end - stk_base) == MatchStackLimitSize) + if (msa->match_stack_limit != 0 && n > msa->match_stack_limit) { + if ((unsigned int )(stk_end - stk_base) == msa->match_stack_limit) return ONIGERR_MATCH_STACK_LIMIT_OVER; else - n = MatchStackLimitSize; + n = msa->match_stack_limit; } new_alloc_base = (char* )xrealloc(alloc_base, new_size); if (IS_NULL(new_alloc_base)) { @@ -584,80 +1459,36 @@ stack_double(int is_alloca, char** arg_alloc_base, #define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0) -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define STATE_CHECK_POS(s,snum) \ - (((s) - str) * num_comb_exp_check + ((snum) - 1)) -#define STATE_CHECK_VAL(v,snum) do {\ - if (IS_NOT_NULL(state_check_buff)) {\ - int x = STATE_CHECK_POS(s,snum);\ - (v) = state_check_buff[x/8] & (1<<(x%8));\ - }\ - else (v) = 0;\ -} while(0) - - -#define ELSE_IF_STATE_CHECK_MARK(stk) \ - else if ((stk)->type == STK_STATE_CHECK_MARK) { \ - int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\ - state_check_buff[x/8] |= (1<<(x%8));\ - } - #define STACK_PUSH(stack_type,pat,s,sprev) do {\ STACK_ENSURE(1);\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ stk->u.state.pstr = (s);\ stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = 0;\ STACK_INC;\ } while(0) #define STACK_PUSH_ENSURED(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ - stk->u.state.state_check = 0;\ - STACK_INC;\ -} while(0) - -#define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum) do {\ - STACK_ENSURE(1);\ - stk->type = STK_ALT;\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ - stk->u.state.state_check = (IS_NOT_NULL(state_check_buff) ? (snum) : 0);\ STACK_INC;\ } while(0) -#define STACK_PUSH_STATE_CHECK(s,snum) do {\ - if (IS_NOT_NULL(state_check_buff)) { \ - STACK_ENSURE(1);\ - stk->type = STK_STATE_CHECK_MARK;\ - stk->u.state.pstr = (s);\ - stk->u.state.state_check = (snum);\ - STACK_INC;\ - }\ -} while(0) - -#else /* USE_COMBINATION_EXPLOSION_CHECK */ - -#define ELSE_IF_STATE_CHECK_MARK(stk) - -#define STACK_PUSH(stack_type,pat,s,sprev) do {\ - STACK_ENSURE(1);\ +#ifdef ONIG_DEBUG_MATCH +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ stk->type = (stack_type);\ - stk->u.state.pcode = (pat);\ - stk->u.state.pstr = (s);\ - stk->u.state.pstr_prev = (sprev);\ + stk->u.state.pcode = (pat);\ + stk->u.state.pstr = s;\ + stk->u.state.pstr_prev = sprev;\ STACK_INC;\ -} while(0) - -#define STACK_PUSH_ENSURED(stack_type,pat) do {\ +} while (0) +#else +#define STACK_PUSH_BOTTOM(stack_type,pat) do {\ stk->type = (stack_type);\ stk->u.state.pcode = (pat);\ STACK_INC;\ -} while(0) -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ +} while (0) +#endif #define STACK_PUSH_ALT(pat,s,sprev) STACK_PUSH(STK_ALT,pat,s,sprev) #define STACK_PUSH_SUPER_ALT(pat,s,sprev) STACK_PUSH(STK_SUPER_ALT,pat,s,sprev) @@ -672,7 +1503,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_REPEAT(sid, pat) do {\ STACK_ENSURE(1);\ stk->type = STK_REPEAT;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.repeat.pcode = (pat);\ stk->u.repeat.count = 0;\ STACK_INC;\ @@ -688,7 +1519,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_START(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_START;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -700,7 +1531,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END(mnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ stk->u.mem.pstr = (s);\ stk->u.mem.start = mem_start_stk[mnum];\ stk->u.mem.end = mem_end_stk[mnum];\ @@ -711,7 +1542,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_MEM_END_MARK(mnum) do {\ STACK_ENSURE(1);\ stk->type = STK_MEM_END_MARK;\ - stk->id = (mnum);\ + stk->zid = (mnum);\ STACK_INC;\ } while(0) @@ -721,10 +1552,10 @@ stack_double(int is_alloca, char** arg_alloc_base, while (k > stk_base) {\ k--;\ if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \ - && k->id == (mnum)) {\ + && k->zid == (mnum)) {\ level++;\ }\ - else if (k->type == STK_MEM_START && k->id == (mnum)) {\ + else if (k->type == STK_MEM_START && k->zid == (mnum)) {\ if (level == 0) break;\ level--;\ }\ @@ -752,7 +1583,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_START(cnum, s) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_START;\ - stk->id = (cnum);\ + stk->zid = (cnum);\ stk->u.empty_check.pstr = (s);\ STACK_INC;\ } while(0) @@ -760,7 +1591,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_EMPTY_CHECK_END(cnum) do {\ STACK_ENSURE(1);\ stk->type = STK_EMPTY_CHECK_END;\ - stk->id = (cnum);\ + stk->zid = (cnum);\ STACK_INC;\ } while(0) @@ -780,7 +1611,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ STACK_INC;\ @@ -789,7 +1620,7 @@ stack_double(int is_alloca, char** arg_alloc_base, #define STACK_PUSH_SAVE_VAL_WITH_SPREV(sid, stype, sval) do {\ STACK_ENSURE(1);\ stk->type = STK_SAVE_VAL;\ - stk->id = (sid);\ + stk->zid = (sid);\ stk->u.val.type = (stype);\ stk->u.val.v = (UChar* )(sval);\ stk->u.val.v2 = sprev;\ @@ -815,7 +1646,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->id == (sid)) {\ + && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ break;\ @@ -835,7 +1666,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_GET_SAVE_VAL_TYPE_LAST_ID"); \ if (k->type == STK_SAVE_VAL && k->u.val.type == (stype)\ - && k->id == (sid)) {\ + && k->zid == (sid)) {\ if (level == 0) {\ (sval) = k->u.val.v;\ sprev = k->u.val.v2;\ @@ -869,6 +1700,24 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while (0) +#define STACK_PUSH_CALLOUT_CONTENTS(anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = ONIG_NON_NAME_ID;\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) + +#define STACK_PUSH_CALLOUT_NAME(aid, anum, func) do {\ + STACK_ENSURE(1);\ + stk->type = STK_CALLOUT;\ + stk->zid = (aid);\ + stk->u.callout.num = (anum);\ + stk->u.callout.func = (func);\ + STACK_INC;\ +} while(0) + #ifdef ONIG_DEBUG #define STACK_BASE_CHECK(p, at) \ if ((p) < stk_base) {\ @@ -884,6 +1733,16 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \ } while(0) + +#ifdef USE_CALLOUT +#define POP_CALLOUT_CASE \ + else if (stk->type == STK_CALLOUT) {\ + RETRACTION_CALLOUT(stk->u.callout.func, stk->zid, stk->u.callout.num, msa->mp->callout_user_data);\ + } +#else +#define POP_CALLOUT_CASE +#endif + #define STACK_POP do {\ switch (pop_level) {\ case STACK_POP_LEVEL_FREE:\ @@ -891,7 +1750,6 @@ stack_double(int is_alloca, char** arg_alloc_base, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ case STACK_POP_LEVEL_MEM_START:\ @@ -900,10 +1758,9 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ default:\ @@ -911,75 +1768,70 @@ stack_double(int is_alloca, char** arg_alloc_base, stk--;\ STACK_BASE_CHECK(stk, "STACK_POP 3"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + POP_CALLOUT_CASE\ }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ }\ break;\ }\ } while(0) -#define STACK_POP_TIL_ALT_PREC_READ_NOT do {\ +#define POP_TIL_BODY(aname, til_type) do {\ while (1) {\ stk--;\ - STACK_BASE_CHECK(stk, "STACK_POP_TIL_ALT_PREC_READ_NOT"); \ - if (stk->type == STK_ALT_PREC_READ_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ + STACK_BASE_CHECK(stk, (aname));\ + if ((stk->type & STK_MASK_POP_HANDLED_TIL) != 0) {\ + if (stk->type == (til_type)) break;\ + else {\ + if (stk->type == STK_MEM_START) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + else if (stk->type == STK_REPEAT_INC) {\ + STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ + }\ + else if (stk->type == STK_MEM_END) {\ + mem_start_stk[stk->zid] = stk->u.mem.start;\ + mem_end_stk[stk->zid] = stk->u.mem.end;\ + }\ + /* Don't call callout here because negation of total success by (?!..) (?type == STK_ALT_LOOK_BEHIND_NOT) break;\ - else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - else if (stk->type == STK_REPEAT_INC) {\ - STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ - }\ - else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->id] = stk->u.mem.start;\ - mem_end_stk[stk->id] = stk->u.mem.end;\ - }\ - ELSE_IF_STATE_CHECK_MARK(stk);\ - }\ + POP_TIL_BODY("STACK_POP_TIL_ALT_LOOK_BEHIND_NOT", STK_ALT_LOOK_BEHIND_NOT);\ } while(0) + #define STACK_EXEC_TO_VOID(k) do {\ k = stk;\ while (1) {\ k--;\ STACK_BASE_CHECK(k, "STACK_EXEC_TO_VOID"); \ if (IS_TO_VOID_TARGET(k)) {\ + if (k->type == STK_TO_VOID_START) {\ + k->type = STK_VOID;\ + break;\ + }\ k->type = STK_VOID;\ }\ - else if (k->type == STK_TO_VOID_START) {\ - k->type = STK_VOID;\ - break;\ - }\ }\ } while(0) @@ -989,7 +1841,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ (isnull) = (k->u.empty_check.pstr == (s));\ break;\ }\ @@ -1004,7 +1856,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ break;\ @@ -1017,7 +1869,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1045,7 +1897,7 @@ stack_double(int is_alloca, char** arg_alloc_base, k--;\ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ if (k->type == STK_EMPTY_CHECK_START) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ if (level == 0) {\ if (k->u.empty_check.pstr != (s)) {\ (isnull) = 0;\ @@ -1059,7 +1911,7 @@ stack_double(int is_alloca, char** arg_alloc_base, if (k->u.mem.end == INVALID_STACK_INDEX) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->id))\ + if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ else\ endp = (UChar* )k->u.mem.end;\ @@ -1081,7 +1933,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ }\ else if (k->type == STK_EMPTY_CHECK_END) {\ - if (k->id == (sid)) level++;\ + if (k->zid == (sid)) level++;\ }\ }\ } while(0) @@ -1116,7 +1968,7 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \ if (k->type == STK_REPEAT) {\ if (level == 0) {\ - if (k->id == (sid)) {\ + if (k->zid == (sid)) {\ break;\ }\ }\ @@ -1208,11 +2060,7 @@ static int string_cmp_ic(OnigEncoding enc, int case_fold_flag, #define DATA_ENSURE_CHECK(n) (s + (n) <= right_range) #define DATA_ENSURE(n) if (s + (n) > right_range) goto fail -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #define INIT_RIGHT_RANGE right_range = (UChar* )in_right_range -#else -#define INIT_RIGHT_RANGE right_range = (UChar* )end -#endif #ifdef USE_CAPTURE_HISTORY static int @@ -1225,7 +2073,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, while (k < stk_top) { if (k->type == STK_MEM_START) { - n = k->id; + n = k->zid; if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP && MEM_STATUS_AT(reg->capture_history, n) != 0) { child = history_node_new(); @@ -1243,7 +2091,7 @@ make_capture_history_tree(OnigCaptureTreeNode* node, StackType** kp, } } else if (k->type == STK_MEM_END) { - if (k->id == node->group) { + if (k->zid == node->group) { node->end = (int )(k->u.mem.pstr - str); *kp = k; return 0; @@ -1292,7 +2140,7 @@ backref_match_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_START) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { pstart = k->u.mem.pstr; if (IS_NOT_NULL(pend)) { if (pend - pstart > send - *s) return 0; /* or goto next_mem; */ @@ -1316,7 +2164,7 @@ backref_match_at_nested_level(regex_t* reg, } } else if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { pend = k->u.mem.pstr; } } @@ -1347,7 +2195,7 @@ backref_check_at_nested_level(regex_t* reg, } else if (level == nest) { if (k->type == STK_MEM_END) { - if (mem_is_in_memp(k->id, mem_num, memp)) { + if (mem_is_in_memp(k->zid, mem_num, memp)) { return 1; } } @@ -1391,14 +2239,14 @@ static int OpCurr = OP_FINISH; static int OpPrevTarget = OP_FAIL; static int MaxStackDepth = 0; -#define MOP_IN(opcode) do {\ +#define SOP_IN(opcode) do {\ if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\ OpCurr = opcode;\ OpCounter[opcode]++;\ GETTIME(ts);\ } while(0) -#define MOP_OUT do {\ +#define SOP_OUT do {\ GETTIME(te);\ OpTime[OpCurr] += TIMEDIFF(te, ts);\ } while(0) @@ -1422,9 +2270,9 @@ onig_print_statistics(FILE* f) r = fprintf(f, " count prev time\n"); if (r < 0) return -1; - for (i = 0; OnigOpInfo[i].opcode >= 0; i++) { + for (i = 0; OpInfo[i].opcode >= 0; i++) { r = fprintf(f, "%8d: %8d: %10ld: %s\n", - OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name); + OpCounter[i], OpPrevCounter[i], OpTime[i], OpInfo[i].name); if (r < 0) return -1; } r = fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth); @@ -1442,8 +2290,8 @@ onig_print_statistics(FILE* f) #else #define STACK_INC stk++ -#define MOP_IN(opcode) -#define MOP_OUT +#define SOP_IN(opcode) +#define SOP_OUT #endif @@ -1459,10 +2307,8 @@ typedef struct { /* if sstart == str then set sprev to NULL. */ static int match_at(regex_t* reg, const UChar* str, const UChar* end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - const UChar* in_right_range, -#endif - const UChar* sstart, UChar* sprev, OnigMatchArg* msa) + const UChar* in_right_range, const UChar* sstart, UChar* sprev, + MatchArg* msa) { static UChar FinishCode[] = { OP_FINISH }; @@ -1480,16 +2326,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, StackIndex *repeat_stk; StackIndex *mem_start_stk, *mem_end_stk; UChar* keep; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int scv; - unsigned char* state_check_buff = msa->state_check_buff; - int num_comb_exp_check = reg->num_comb_exp_check; +#ifdef USE_RETRY_LIMIT_IN_MATCH + unsigned long retry_limit_in_match; + unsigned long retry_in_match_counter; #endif + +#ifdef USE_CALLOUT + int of; +#endif + UChar *p = reg->p; OnigOptionType option = reg->options; OnigEncoding encode = reg->enc; OnigCaseFoldType case_fold_flag = reg->case_fold_flag; +#ifdef USE_CALLOUT + msa->mp->match_at_call_counter++; +#endif + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match = msa->retry_limit_in_match; +#endif + //n = reg->num_repeat + reg->num_mem * 2; pop_level = reg->stack_pop_level; num_mem = reg->num_mem; @@ -1506,11 +2364,15 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(sstart - str)); #endif - STACK_PUSH_ENSURED(STK_ALT, FinishCode); /* bottom stack */ best_len = ONIG_MISMATCH; keep = s = (UChar* )sstart; + STACK_PUSH_BOTTOM(STK_ALT, FinishCode); /* bottom stack */ INIT_RIGHT_RANGE; +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_in_match_counter = 0; +#endif + while (1) { #ifdef ONIG_DEBUG_MATCH { @@ -1533,7 +2395,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, fputs((char* )buf, stderr); for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); - fprintf(stderr, "%4d: ", (int )(p - reg->p)); + if (p == FinishCode) + fprintf(stderr, "----: "); + else + fprintf(stderr, "%4d: ", (int )(p - reg->p)); onig_print_compiled_byte_code(stderr, p, NULL, reg->p, encode); fprintf(stderr, "\n"); } @@ -1541,7 +2406,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sbegin = s; switch (*p++) { - case OP_END: MOP_IN(OP_END); + case OP_END: SOP_IN(OP_END); n = (int )(s - sstart); if (n > best_len) { OnigRegion* region; @@ -1639,7 +2504,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE end_best_len: #endif - MOP_OUT; + SOP_OUT; if (IS_FIND_CONDITION(option)) { if (IS_FIND_NOT_EMPTY(option) && s == sstart) { @@ -1655,14 +2520,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto finish; break; - case OP_EXACT1: MOP_IN(OP_EXACT1); + case OP_EXACT1: SOP_IN(OP_EXACT1); DATA_ENSURE(1); if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACT1_IC: MOP_IN(OP_EXACT1_IC); + case OP_EXACT1_IC: SOP_IN(OP_EXACT1_IC); { int len; UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1681,21 +2546,21 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; q++; } } - MOP_OUT; + SOP_OUT; break; - case OP_EXACT2: MOP_IN(OP_EXACT2); + case OP_EXACT2: SOP_IN(OP_EXACT2); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT3: MOP_IN(OP_EXACT3); + case OP_EXACT3: SOP_IN(OP_EXACT3); DATA_ENSURE(3); if (*p != *s) goto fail; p++; s++; @@ -1704,11 +2569,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT4: MOP_IN(OP_EXACT4); + case OP_EXACT4: SOP_IN(OP_EXACT4); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1719,11 +2584,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACT5: MOP_IN(OP_EXACT5); + case OP_EXACT5: SOP_IN(OP_EXACT5); DATA_ENSURE(5); if (*p != *s) goto fail; p++; s++; @@ -1736,22 +2601,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, if (*p != *s) goto fail; sprev = s; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN: MOP_IN(OP_EXACTN); + case OP_EXACTN: SOP_IN(OP_EXACTN); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen); while (tlen-- > 0) { if (*p++ != *s++) goto fail; } sprev = s - 1; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTN_IC: MOP_IN(OP_EXACTN_IC); + case OP_EXACTN_IC: SOP_IN(OP_EXACTN_IC); { int len; UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN]; @@ -1775,20 +2640,20 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N1: MOP_IN(OP_EXACTMB2N1); + case OP_EXACTMB2N1: SOP_IN(OP_EXACTMB2N1); DATA_ENSURE(2); if (*p != *s) goto fail; p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; break; - case OP_EXACTMB2N2: MOP_IN(OP_EXACTMB2N2); + case OP_EXACTMB2N2: SOP_IN(OP_EXACTMB2N2); DATA_ENSURE(4); if (*p != *s) goto fail; p++; s++; @@ -1799,11 +2664,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N3: MOP_IN(OP_EXACTMB2N3); + case OP_EXACTMB2N3: SOP_IN(OP_EXACTMB2N3); DATA_ENSURE(6); if (*p != *s) goto fail; p++; s++; @@ -1818,11 +2683,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; if (*p != *s) goto fail; p++; s++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB2N: MOP_IN(OP_EXACTMB2N); + case OP_EXACTMB2N: SOP_IN(OP_EXACTMB2N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 2); while (tlen-- > 0) { @@ -1832,11 +2697,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 2; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMB3N: MOP_IN(OP_EXACTMB3N); + case OP_EXACTMB3N: SOP_IN(OP_EXACTMB3N); GET_LENGTH_INC(tlen, p); DATA_ENSURE(tlen * 3); while (tlen-- > 0) { @@ -1848,11 +2713,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - 3; - MOP_OUT; + SOP_OUT; continue; break; - case OP_EXACTMBN: MOP_IN(OP_EXACTMBN); + case OP_EXACTMBN: SOP_IN(OP_EXACTMBN); GET_LENGTH_INC(tlen, p); /* mb-len */ GET_LENGTH_INC(tlen2, p); /* string len */ tlen2 *= tlen; @@ -1862,19 +2727,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p++; s++; } sprev = s - tlen; - MOP_OUT; + SOP_OUT; continue; break; - case OP_CCLASS: MOP_IN(OP_CCLASS); + case OP_CCLASS: SOP_IN(OP_CCLASS); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); /* OP_CCLASS can match mb-code. \D, \S */ - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB: MOP_IN(OP_CCLASS_MB); + case OP_CCLASS_MB: SOP_IN(OP_CCLASS_MB); if (! ONIGENC_IS_MBC_HEAD(encode, s)) goto fail; cclass_mb: @@ -1900,10 +2765,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif } p += tlen; - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX: MOP_IN(OP_CCLASS_MIX); + case OP_CCLASS_MIX: SOP_IN(OP_CCLASS_MIX); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1918,18 +2783,18 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_NOT: MOP_IN(OP_CCLASS_NOT); + case OP_CCLASS_NOT: SOP_IN(OP_CCLASS_NOT); DATA_ENSURE(1); if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail; p += SIZE_BITSET; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MB_NOT: MOP_IN(OP_CCLASS_MB_NOT); + case OP_CCLASS_MB_NOT: SOP_IN(OP_CCLASS_MB_NOT); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_HEAD(encode, s)) { s++; @@ -1967,10 +2832,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; cc_mb_not_success: - MOP_OUT; + SOP_OUT; break; - case OP_CCLASS_MIX_NOT: MOP_IN(OP_CCLASS_MIX_NOT); + case OP_CCLASS_MIX_NOT: SOP_IN(OP_CCLASS_MIX_NOT); DATA_ENSURE(1); if (ONIGENC_IS_MBC_HEAD(encode, s)) { p += SIZE_BITSET; @@ -1985,11 +2850,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += tlen; s++; } - MOP_OUT; + SOP_OUT; break; #ifdef USE_OP_CCLASS_NODE - case OP_CCLASS_NODE: MOP_IN(OP_CCLASS_NODE); + case OP_CCLASS_NODE: SOP_IN(OP_CCLASS_NODE); { OnigCodePoint code; void *node; @@ -2005,28 +2870,28 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, code = ONIGENC_MBC_TO_CODE(encode, ss, s); if (onig_is_code_in_cc_len(mb_len, code, node) == 0) goto fail; } - MOP_OUT; + SOP_OUT; break; #endif - case OP_ANYCHAR: MOP_IN(OP_ANYCHAR); + case OP_ANYCHAR: SOP_IN(OP_ANYCHAR); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML: MOP_IN(OP_ANYCHAR_ML); + case OP_ANYCHAR_ML: SOP_IN(OP_ANYCHAR_ML); DATA_ENSURE(1); n = enclen(encode, s); DATA_ENSURE(n); s += n; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_STAR: MOP_IN(OP_ANYCHAR_STAR); + case OP_ANYCHAR_STAR: SOP_IN(OP_ANYCHAR_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2035,11 +2900,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; s += n; } - MOP_OUT; + SOP_OUT; continue; break; - case OP_ANYCHAR_ML_STAR: MOP_IN(OP_ANYCHAR_ML_STAR); + case OP_ANYCHAR_ML_STAR: SOP_IN(OP_ANYCHAR_ML_STAR); while (DATA_ENSURE_CHECK1) { STACK_PUSH_ALT(p, s, sprev); n = enclen(encode, s); @@ -2053,11 +2918,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s++; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_ANYCHAR_STAR_PEEK_NEXT: MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); + case OP_ANYCHAR_STAR_PEEK_NEXT: SOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2069,10 +2934,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s += n; } p++; - MOP_OUT; + SOP_OUT; break; - case OP_ANYCHAR_ML_STAR_PEEK_NEXT:MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); + case OP_ANYCHAR_ML_STAR_PEEK_NEXT:SOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT); while (DATA_ENSURE_CHECK1) { if (*p == *s) { STACK_PUSH_ALT(p + 1, s, sprev); @@ -2089,87 +2954,46 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } p++; - MOP_OUT; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_ANYCHAR_STAR: MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR); - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - DATA_ENSURE(n); - if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) goto fail; - sprev = s; - s += n; - } - MOP_OUT; - break; - - case OP_STATE_CHECK_ANYCHAR_ML_STAR: - MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR); - - GET_STATE_CHECK_NUM_INC(mem, p); - while (DATA_ENSURE_CHECK1) { - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem); - n = enclen(encode, s); - if (n > 1) { - DATA_ENSURE(n); - sprev = s; - s += n; - } - else { - sprev = s; - s++; - } - } - MOP_OUT; + SOP_OUT; break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_WORD: MOP_IN(OP_WORD); + case OP_WORD: SOP_IN(OP_WORD); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_WORD_ASCII: MOP_IN(OP_WORD_ASCII); + case OP_WORD_ASCII: SOP_IN(OP_WORD_ASCII); DATA_ENSURE(1); if (! ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_NO_WORD: MOP_IN(OP_NO_WORD); + case OP_NO_WORD: SOP_IN(OP_NO_WORD); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_NO_WORD_ASCII: MOP_IN(OP_NO_WORD_ASCII); + case OP_NO_WORD_ASCII: SOP_IN(OP_NO_WORD_ASCII); DATA_ENSURE(1); if (ONIGENC_IS_MBC_WORD_ASCII(encode, s, end)) goto fail; s += enclen(encode, s); - MOP_OUT; + SOP_OUT; break; - case OP_WORD_BOUNDARY: MOP_IN(OP_WORD_BOUNDARY); + case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode @@ -2189,11 +3013,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_NO_WORD_BOUNDARY: MOP_IN(OP_NO_WORD_BOUNDARY); + case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode @@ -2212,20 +3036,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; } } - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_WORD_BEGIN_END - case OP_WORD_BEGIN: MOP_IN(OP_WORD_BEGIN); + case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - if (ON_STR_BEGIN(s) || - ! IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { - MOP_OUT; + if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { + SOP_OUT; continue; } } @@ -2233,14 +3056,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_WORD_END: MOP_IN(OP_WORD_END); + case OP_WORD_END: SOP_IN(OP_WORD_END); { ModeType mode; GET_MODE_INC(mode, p); // ascii_mode if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { - MOP_OUT; + SOP_OUT; continue; } } @@ -2250,82 +3073,82 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, #endif case OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - MOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + SOP_IN(OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) { - MOP_OUT; + SOP_OUT; continue; } goto fail; break; case OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY: - MOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); + SOP_IN(OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY); if (onigenc_egcb_is_break_position(encode, s, sprev, str, end)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_BEGIN_BUF: MOP_IN(OP_BEGIN_BUF); + case OP_BEGIN_BUF: SOP_IN(OP_BEGIN_BUF); if (! ON_STR_BEGIN(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_END_BUF: MOP_IN(OP_END_BUF); + case OP_END_BUF: SOP_IN(OP_END_BUF); if (! ON_STR_END(s)) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_BEGIN_LINE: MOP_IN(OP_BEGIN_LINE); + case OP_BEGIN_LINE: SOP_IN(OP_BEGIN_LINE); if (ON_STR_BEGIN(s)) { if (IS_NOTBOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; } else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end) && !ON_STR_END(s)) { - MOP_OUT; + SOP_OUT; continue; } goto fail; break; - case OP_END_LINE: MOP_IN(OP_END_LINE); + case OP_END_LINE: SOP_IN(OP_END_LINE); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } #endif } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end)) { - MOP_OUT; + SOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR else if (ONIGENC_IS_MBC_CRNL(encode, s, end)) { - MOP_OUT; + SOP_OUT; continue; } #endif goto fail; break; - case OP_SEMI_END_BUF: MOP_IN(OP_SEMI_END_BUF); + case OP_SEMI_END_BUF: SOP_IN(OP_SEMI_END_BUF); if (ON_STR_END(s)) { #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)) { #endif if (IS_NOTEOL(msa->options)) goto fail; - MOP_OUT; + SOP_OUT; continue; #ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE } @@ -2333,7 +3156,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } else if (ONIGENC_IS_MBC_NEWLINE(encode, s, end) && ON_STR_END(s + enclen(encode, s))) { - MOP_OUT; + SOP_OUT; continue; } #ifdef USE_CRNL_AS_LINE_TERMINATOR @@ -2341,7 +3164,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, UChar* ss = s + enclen(encode, s); ss += enclen(encode, ss); if (ON_STR_END(ss)) { - MOP_OUT; + SOP_OUT; continue; } } @@ -2349,53 +3172,53 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto fail; break; - case OP_BEGIN_POSITION: MOP_IN(OP_BEGIN_POSITION); + case OP_BEGIN_POSITION: SOP_IN(OP_BEGIN_POSITION); if (s != msa->start) goto fail; - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START_PUSH: MOP_IN(OP_MEMORY_START_PUSH); + case OP_MEMORY_START_PUSH: SOP_IN(OP_MEMORY_START_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_START(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_START: MOP_IN(OP_MEMORY_START); + case OP_MEMORY_START: SOP_IN(OP_MEMORY_START); GET_MEMNUM_INC(mem, p); mem_start_stk[mem] = (StackIndex )((void* )s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END_PUSH: MOP_IN(OP_MEMORY_END_PUSH); + case OP_MEMORY_END_PUSH: SOP_IN(OP_MEMORY_END_PUSH); GET_MEMNUM_INC(mem, p); STACK_PUSH_MEM_END(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END: MOP_IN(OP_MEMORY_END); + case OP_MEMORY_END: SOP_IN(OP_MEMORY_END); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_CALL - case OP_MEMORY_END_PUSH_REC: MOP_IN(OP_MEMORY_END_PUSH_REC); + case OP_MEMORY_END_PUSH_REC: SOP_IN(OP_MEMORY_END_PUSH_REC); GET_MEMNUM_INC(mem, p); STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */ STACK_PUSH_MEM_END(mem, s); mem_start_stk[mem] = GET_STACK_INDEX(stkp); - MOP_OUT; + SOP_OUT; continue; break; - case OP_MEMORY_END_REC: MOP_IN(OP_MEMORY_END_REC); + case OP_MEMORY_END_REC: SOP_IN(OP_MEMORY_END_REC); GET_MEMNUM_INC(mem, p); mem_end_stk[mem] = (StackIndex )((void* )s); STACK_GET_MEM_START(mem, stkp); @@ -2406,22 +3229,22 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, mem_start_stk[mem] = (StackIndex )((void* )stkp->u.mem.pstr); STACK_PUSH_MEM_END_MARK(mem); - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_BACKREF1: MOP_IN(OP_BACKREF1); + case OP_BACKREF1: SOP_IN(OP_BACKREF1); mem = 1; goto backref; break; - case OP_BACKREF2: MOP_IN(OP_BACKREF2); + case OP_BACKREF2: SOP_IN(OP_BACKREF2); mem = 2; goto backref; break; - case OP_BACKREF_N: MOP_IN(OP_BACKREF_N); + case OP_BACKREF_N: SOP_IN(OP_BACKREF_N); GET_MEMNUM_INC(mem, p); backref: { @@ -2446,12 +3269,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_N_IC: MOP_IN(OP_BACKREF_N_IC); + case OP_BACKREF_N_IC: SOP_IN(OP_BACKREF_N_IC); GET_MEMNUM_INC(mem, p); { int len; @@ -2475,12 +3298,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, while (sprev + (len = enclen(encode, sprev)) < s) sprev += len; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_MULTI: MOP_IN(OP_BACKREF_MULTI); + case OP_BACKREF_MULTI: SOP_IN(OP_BACKREF_MULTI); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2514,12 +3337,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; - case OP_BACKREF_MULTI_IC: MOP_IN(OP_BACKREF_MULTI_IC); + case OP_BACKREF_MULTI_IC: SOP_IN(OP_BACKREF_MULTI_IC); { int len, is_fail; UChar *pstart, *pend, *swork; @@ -2553,7 +3376,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; @@ -2580,13 +3403,13 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else goto fail; - MOP_OUT; + SOP_OUT; continue; } break; #endif - case OP_BACKREF_CHECK: MOP_IN(OP_BACKREF_CHECK); + case OP_BACKREF_CHECK: SOP_IN(OP_BACKREF_CHECK); { GET_LENGTH_INC(tlen, p); for (i = 0; i < tlen; i++) { @@ -2599,7 +3422,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; /* success */ } if (i == tlen) goto fail; - MOP_OUT; + SOP_OUT; continue; } break; @@ -2619,36 +3442,36 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else goto fail; - MOP_OUT; + SOP_OUT; continue; } break; #endif #if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */ - case OP_SET_OPTION_PUSH: MOP_IN(OP_SET_OPTION_PUSH); + case OP_SET_OPTION_PUSH: SOP_IN(OP_SET_OPTION_PUSH); GET_OPTION_INC(option, p); STACK_PUSH_ALT(p, s, sprev); p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL; - MOP_OUT; + SOP_OUT; continue; break; - case OP_SET_OPTION: MOP_IN(OP_SET_OPTION); + case OP_SET_OPTION: SOP_IN(OP_SET_OPTION); GET_OPTION_INC(option, p); - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_EMPTY_CHECK_START: MOP_IN(OP_EMPTY_CHECK_START); + case OP_EMPTY_CHECK_START: SOP_IN(OP_EMPTY_CHECK_START); GET_MEMNUM_INC(mem, p); /* mem: null check id */ STACK_PUSH_EMPTY_CHECK_START(mem, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_EMPTY_CHECK_END: MOP_IN(OP_EMPTY_CHECK_END); + case OP_EMPTY_CHECK_END: SOP_IN(OP_EMPTY_CHECK_END); { int is_empty; @@ -2677,12 +3500,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, } } } - MOP_OUT; + SOP_OUT; continue; break; #ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT - case OP_EMPTY_CHECK_END_MEMST: MOP_IN(OP_EMPTY_CHECK_END_MEMST); + case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST); { int is_empty; @@ -2696,14 +3519,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, goto empty_check_found; } } - MOP_OUT; + SOP_OUT; continue; break; #endif #ifdef USE_CALL case OP_EMPTY_CHECK_END_MEMST_PUSH: - MOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); + SOP_IN(OP_EMPTY_CHECK_END_MEMST_PUSH); { int is_empty; @@ -2725,103 +3548,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_EMPTY_CHECK_END(mem); } } - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_JUMP: MOP_IN(OP_JUMP); + case OP_JUMP: SOP_IN(OP_JUMP); GET_RELADDR_INC(addr, p); p += addr; - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_PUSH: MOP_IN(OP_PUSH); + case OP_PUSH: SOP_IN(OP_PUSH); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_SUPER: MOP_IN(OP_PUSH_SUPER); + case OP_PUSH_SUPER: SOP_IN(OP_PUSH_SUPER); GET_RELADDR_INC(addr, p); STACK_PUSH_SUPER_ALT(p + addr, s, sprev); - MOP_OUT; - continue; - break; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - case OP_STATE_CHECK_PUSH: MOP_IN(OP_STATE_CHECK_PUSH); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - GET_RELADDR_INC(addr, p); - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK_PUSH_OR_JUMP: MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP); - GET_STATE_CHECK_NUM_INC(mem, p); - GET_RELADDR_INC(addr, p); - STATE_CHECK_VAL(scv, mem); - if (scv) { - p += addr; - } - else { - STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem); - } - MOP_OUT; - continue; - break; - - case OP_STATE_CHECK: MOP_IN(OP_STATE_CHECK); - GET_STATE_CHECK_NUM_INC(mem, p); - STATE_CHECK_VAL(scv, mem); - if (scv) goto fail; - - STACK_PUSH_STATE_CHECK(s, mem); - MOP_OUT; + SOP_OUT; continue; break; -#endif /* USE_COMBINATION_EXPLOSION_CHECK */ - case OP_POP: MOP_IN(OP_POP); + case OP_POP_OUT: SOP_IN(OP_POP_OUT); STACK_POP_ONE; - MOP_OUT; + // for stop backtrack + //CHECK_RETRY_LIMIT_IN_MATCH; + SOP_OUT; continue; break; - case OP_PUSH_OR_JUMP_EXACT1: MOP_IN(OP_PUSH_OR_JUMP_EXACT1); + case OP_PUSH_OR_JUMP_EXACT1: SOP_IN(OP_PUSH_OR_JUMP_EXACT1); GET_RELADDR_INC(addr, p); if (*p == *s && DATA_ENSURE_CHECK1) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; } p += (addr + 1); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PUSH_IF_PEEK_NEXT: MOP_IN(OP_PUSH_IF_PEEK_NEXT); + case OP_PUSH_IF_PEEK_NEXT: SOP_IN(OP_PUSH_IF_PEEK_NEXT); GET_RELADDR_INC(addr, p); if (*p == *s) { p++; STACK_PUSH_ALT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; } p++; - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT: MOP_IN(OP_REPEAT); + case OP_REPEAT: SOP_IN(OP_REPEAT); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2834,11 +3622,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, STACK_PUSH_ALT(p + addr, s, sprev); } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_NG: MOP_IN(OP_REPEAT_NG); + case OP_REPEAT_NG: SOP_IN(OP_REPEAT_NG); { GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ GET_RELADDR_INC(addr, p); @@ -2852,11 +3640,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p += addr; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_REPEAT_INC: MOP_IN(OP_REPEAT_INC); + case OP_REPEAT_INC: SOP_IN(OP_REPEAT_INC); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2874,19 +3662,19 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, p = stkp->u.repeat.pcode; } STACK_PUSH_REPEAT_INC(si); - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_SG: MOP_IN(OP_REPEAT_INC_SG); + case OP_REPEAT_INC_SG: SOP_IN(OP_REPEAT_INC_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc; break; - case OP_REPEAT_INC_NG: MOP_IN(OP_REPEAT_INC_NG); + case OP_REPEAT_INC_NG: SOP_IN(OP_REPEAT_INC_NG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ si = repeat_stk[mem]; stkp = STACK_AT(si); @@ -2908,68 +3696,68 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) { STACK_PUSH_REPEAT_INC(si); } - MOP_OUT; - CHECK_INTERRUPT_IN_MATCH_AT; + SOP_OUT; + CHECK_INTERRUPT_IN_MATCH; continue; break; - case OP_REPEAT_INC_NG_SG: MOP_IN(OP_REPEAT_INC_NG_SG); + case OP_REPEAT_INC_NG_SG: SOP_IN(OP_REPEAT_INC_NG_SG); GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */ STACK_GET_REPEAT(mem, stkp); si = GET_STACK_INDEX(stkp); goto repeat_inc_ng; break; - case OP_PREC_READ_START: MOP_IN(OP_PREC_READ_START); + case OP_PREC_READ_START: SOP_IN(OP_PREC_READ_START); STACK_PUSH_POS(s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_END: MOP_IN(OP_PREC_READ_END); + case OP_PREC_READ_END: SOP_IN(OP_PREC_READ_END); { STACK_EXEC_TO_VOID(stkp); s = stkp->u.state.pstr; sprev = stkp->u.state.pstr_prev; } - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_NOT_START: MOP_IN(OP_PREC_READ_NOT_START); + case OP_PREC_READ_NOT_START: SOP_IN(OP_PREC_READ_NOT_START); GET_RELADDR_INC(addr, p); STACK_PUSH_ALT_PREC_READ_NOT(p + addr, s, sprev); - MOP_OUT; + SOP_OUT; continue; break; - case OP_PREC_READ_NOT_END: MOP_IN(OP_PREC_READ_NOT_END); + case OP_PREC_READ_NOT_END: SOP_IN(OP_PREC_READ_NOT_END); STACK_POP_TIL_ALT_PREC_READ_NOT; goto fail; break; - case OP_ATOMIC_START: MOP_IN(OP_ATOMIC_START); + case OP_ATOMIC_START: SOP_IN(OP_ATOMIC_START); STACK_PUSH_TO_VOID_START; - MOP_OUT; + SOP_OUT; continue; break; - case OP_ATOMIC_END: MOP_IN(OP_ATOMIC_END); + case OP_ATOMIC_END: SOP_IN(OP_ATOMIC_END); STACK_EXEC_TO_VOID(stkp); - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND: MOP_IN(OP_LOOK_BEHIND); + case OP_LOOK_BEHIND: SOP_IN(OP_LOOK_BEHIND); GET_LENGTH_INC(tlen, p); s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); if (IS_NULL(s)) goto fail; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND_NOT_START: MOP_IN(OP_LOOK_BEHIND_NOT_START); + case OP_LOOK_BEHIND_NOT_START: SOP_IN(OP_LOOK_BEHIND_NOT_START); GET_RELADDR_INC(addr, p); GET_LENGTH_INC(tlen, p); q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, (int )tlen); @@ -2984,33 +3772,33 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, s = q; sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s); } - MOP_OUT; + SOP_OUT; continue; break; - case OP_LOOK_BEHIND_NOT_END: MOP_IN(OP_LOOK_BEHIND_NOT_END); + case OP_LOOK_BEHIND_NOT_END: SOP_IN(OP_LOOK_BEHIND_NOT_END); STACK_POP_TIL_ALT_LOOK_BEHIND_NOT; goto fail; break; #ifdef USE_CALL - case OP_CALL: MOP_IN(OP_CALL); + case OP_CALL: SOP_IN(OP_CALL); GET_ABSADDR_INC(addr, p); STACK_PUSH_CALL_FRAME(p); p = reg->p + addr; - MOP_OUT; + SOP_OUT; continue; break; - case OP_RETURN: MOP_IN(OP_RETURN); + case OP_RETURN: SOP_IN(OP_RETURN); STACK_RETURN(p); STACK_PUSH_RETURN; - MOP_OUT; + SOP_OUT; continue; break; #endif - case OP_PUSH_SAVE_VAL: MOP_IN(OP_PUSH_SAVE_VAL); + case OP_PUSH_SAVE_VAL: SOP_IN(OP_PUSH_SAVE_VAL); { SaveType type; GET_SAVE_TYPE_INC(type, p); @@ -3029,11 +3817,11 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - MOP_OUT; + SOP_OUT; continue; break; - case OP_UPDATE_VAR: MOP_IN(OP_UPDATE_VAR); + case OP_UPDATE_VAR: SOP_IN(OP_UPDATE_VAR); { UpdateVarType type; enum SaveType save_type; @@ -3061,31 +3849,99 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, break; } } - MOP_OUT; + SOP_OUT; + continue; + break; + +#ifdef USE_CALLOUT + case OP_CALLOUT_CONTENTS: SOP_IN(OP_CALLOUT_CONTENTS); + of = ONIG_CALLOUT_OF_CONTENTS; + goto callout_common_entry; + + SOP_OUT; + continue; + break; + + case OP_CALLOUT_NAME: SOP_IN(OP_CALLOUT_NAME); + { + int call_result; + int name_id; + int num; + int in; + CalloutListEntry* e; + OnigCalloutFunc func; + OnigCalloutArgs args; + + of = ONIG_CALLOUT_OF_NAME; + GET_MEMNUM_INC(name_id, p); + + callout_common_entry: + GET_MEMNUM_INC(num, p); + e = onig_reg_callout_list_at(reg, num); + in = e->in; + if (of == ONIG_CALLOUT_OF_NAME) { + func = onig_get_callout_start_func(reg, num); + } + else { + name_id = ONIG_NON_NAME_ID; + func = msa->mp->progress_callout_of_contents; + } + + if (IS_NOT_NULL(func) && (in & ONIG_CALLOUT_IN_PROGRESS) != 0) { + CALLOUT_BODY(func, ONIG_CALLOUT_IN_PROGRESS, name_id, + num, msa->mp->callout_user_data, args, call_result); + switch (call_result) { + case ONIG_CALLOUT_FAIL: + goto fail; + break; + case ONIG_CALLOUT_SUCCESS: + goto retraction_callout2; + break; + default: /* error code */ + if (call_result > 0) { + call_result = ONIGERR_INVALID_ARGUMENT; + } + best_len = call_result; + goto finish; + break; + } + } + else { + retraction_callout2: + if ((in & ONIG_CALLOUT_IN_RETRACTION) != 0) { + if (of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_NAME(name_id, num, func); + } + } + else { + func = msa->mp->retraction_callout_of_contents; + if (IS_NOT_NULL(func)) { + STACK_PUSH_CALLOUT_CONTENTS(num, func); + } + } + } + } + } + SOP_OUT; continue; break; +#endif case OP_FINISH: goto finish; break; fail: - MOP_OUT; + SOP_OUT; /* fall */ - case OP_FAIL: MOP_IN(OP_FAIL); + case OP_FAIL: SOP_IN(OP_FAIL); STACK_POP; p = stk->u.state.pcode; s = stk->u.state.pstr; sprev = stk->u.state.pstr_prev; - -#ifdef USE_COMBINATION_EXPLOSION_CHECK - if (stk->u.state.state_check != 0) { - stk->type = STK_STATE_CHECK_MARK; - stk++; - } -#endif - - MOP_OUT; + CHECK_RETRY_LIMIT_IN_MATCH; + SOP_OUT; continue; break; @@ -3113,6 +3969,12 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, unexpected_bytecode_error: STACK_SAVE; return ONIGERR_UNEXPECTED_BYTECODE; + +#ifdef USE_RETRY_LIMIT_IN_MATCH + retry_limit_in_match_over: + STACK_SAVE; + return ONIGERR_RETRY_LIMIT_IN_MATCH_OVER; +#endif } @@ -3423,23 +4285,30 @@ map_search_backward(OnigEncoding enc, UChar map[], } return (UChar* )NULL; } - extern int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region, OnigOptionType option) { int r; - UChar *prev; - OnigMatchArg msa; + OnigMatchParam mp; - MATCH_ARG_INIT(msa, reg, option, region, at); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = at - str; - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + onig_initialize_match_param(&mp); + r = onig_match_with_param(reg, str, end, at, region, option, &mp); + onig_free_match_param_content(&mp); + return r; +} + +extern int +onig_match_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* at, OnigRegion* region, OnigOptionType option, + OnigMatchParam* mp) +{ + int r; + UChar *prev; + MatchArg msa; + ADJUST_MATCH_PARAM(reg, mp); + MATCH_ARG_INIT(msa, reg, option, region, at, mp); if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3459,11 +4328,7 @@ onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, } prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at); - r = match_at(reg, str, end, -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE - end, -#endif - at, prev, &msa); + r = match_at(reg, str, end, end, at, prev, &msa); } end: @@ -3497,23 +4362,23 @@ forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s, retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM: p = bm_search(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM_NO_REV: p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range); break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search(reg->enc, reg->map, p, range); break; } @@ -3621,20 +4486,20 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, retry: switch (reg->optimize) { - case ONIG_OPTIMIZE_EXACT: + case OPTIMIZE_EXACT: exact_method: p = slow_search_backward(reg->enc, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_IC: + case OPTIMIZE_EXACT_IC: p = slow_search_backward_ic(reg->enc, reg->case_fold_flag, reg->exact, reg->exact_end, range, adjrange, end, p); break; - case ONIG_OPTIMIZE_EXACT_BM: - case ONIG_OPTIMIZE_EXACT_BM_NOT_REV: + case OPTIMIZE_EXACT_BM: + case OPTIMIZE_EXACT_BM_NO_REV: #ifdef USE_INT_MAP_BACKWARD if (IS_NULL(reg->int_map_backward)) { int r; @@ -3653,7 +4518,7 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, #endif break; - case ONIG_OPTIMIZE_MAP: + case OPTIMIZE_MAP: p = map_search_backward(reg->enc, reg->map, range, adjrange, p); break; } @@ -3720,17 +4585,30 @@ backward_search_range(regex_t* reg, const UChar* str, const UChar* end, extern int -onig_search(regex_t* reg, const UChar* str, const UChar* end, - const UChar* start, const UChar* range, OnigRegion* region, - OnigOptionType option) +onig_search(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option) +{ + int r; + OnigMatchParam mp; + + onig_initialize_match_param(&mp); + r = onig_search_with_param(reg, str, end, start, range, region, option, &mp); + onig_free_match_param_content(&mp); + return r; + +} + +extern int +onig_search_with_param(regex_t* reg, const UChar* str, const UChar* end, + const UChar* start, const UChar* range, OnigRegion* region, + OnigOptionType option, OnigMatchParam* mp) { int r; UChar *s, *prev; - OnigMatchArg msa; + MatchArg msa; const UChar *orig_start = start; -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE const UChar *orig_range = range; -#endif #ifdef ONIG_DEBUG_SEARCH fprintf(stderr, @@ -3738,6 +4616,8 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, str, (int )(end - str), (int )(start - str), (int )(range - str)); #endif + ADJUST_MATCH_PARAM(reg, mp); + if (region #ifdef USE_POSIX_API_REGION_OPTION && !IS_POSIX_REGION(option) @@ -3757,7 +4637,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE #ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE #define MATCH_AND_RETURN_CHECK(upper_range) \ r = match_at(reg, str, end, (upper_range), s, prev, &msa); \ @@ -3779,29 +4658,6 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else goto finish; /* error */ \ } #endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#else -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - if (! IS_FIND_LONGEST(reg->options)) {\ - goto match;\ - }\ - }\ - else goto finish; /* error */ \ - } -#else -#define MATCH_AND_RETURN_CHECK(none) \ - r = match_at(reg, str, end, s, prev, &msa);\ - if (r != ONIG_MISMATCH) {\ - if (r >= 0) {\ - goto match;\ - }\ - else goto finish; /* error */ \ - } -#endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */ -#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */ /* anchor optimize: resume search range */ @@ -3886,7 +4742,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, goto end_buf; } } - else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) { + else if ((reg->anchor & ANCHOR_ANYCHAR_INF_ML)) { goto begin_position; } } @@ -3902,11 +4758,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, s = (UChar* )start; prev = (UChar* )NULL; - MATCH_ARG_INIT(msa, reg, option, region, start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - msa.state_check_buff = (void* )0; - msa.state_check_buff_size = 0; /* NO NEED, for valgrind */ -#endif + MATCH_ARG_INIT(msa, reg, option, region, start, mp); MATCH_AND_RETURN_CHECK(end); goto mismatch; } @@ -3918,13 +4770,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, (int )(end - str), (int )(start - str), (int )(range - str)); #endif - MATCH_ARG_INIT(msa, reg, option, region, orig_start); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - { - int offset = (MIN(start, range) - str); - STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check); - } -#endif + MATCH_ARG_INIT(msa, reg, option, region, orig_start, mp); s = (UChar* )start; if (range > start) { /* forward search */ @@ -3933,7 +4779,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, else prev = (UChar* )NULL; - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *sch_range, *low, *high, *low_prev; sch_range = (UChar* )range; @@ -3969,7 +4815,7 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, if (! forward_search_range(reg, str, end, s, sch_range, &low, &high, (UChar** )NULL)) goto mismatch; - if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) { + if ((reg->anchor & ANCHOR_ANYCHAR_INF) != 0) { do { MATCH_AND_RETURN_CHECK(orig_range); prev = s; @@ -3998,12 +4844,10 @@ onig_search(regex_t* reg, const UChar* str, const UChar* end, } } else { /* backward search */ -#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE if (orig_start < end) orig_start += enclen(reg->enc, orig_start); /* is upper range */ -#endif - if (reg->optimize != ONIG_OPTIMIZE_NONE) { + if (reg->optimize != OPTIMIZE_NONE) { UChar *low, *high, *adjrange, *sch_start; if (range < end) @@ -4204,3 +5048,600 @@ onig_copy_encoding(OnigEncoding to, OnigEncoding from) *to = *from; } + +/* for callout functions */ + +#ifdef USE_CALLOUT + +extern OnigCalloutFunc +onig_get_progress_callout(void) +{ + return DefaultProgressCallout; +} + +extern int +onig_set_progress_callout(OnigCalloutFunc f) +{ + DefaultProgressCallout = f; + return ONIG_NORMAL; +} + +extern OnigCalloutFunc +onig_get_retraction_callout(void) +{ + return DefaultRetractionCallout; +} + +extern int +onig_set_retraction_callout(OnigCalloutFunc f) +{ + DefaultRetractionCallout = f; + return ONIG_NORMAL; +} + +extern int +onig_get_callout_num_by_callout_args(OnigCalloutArgs* args) +{ + return args->num; +} + +extern OnigCalloutIn +onig_get_callout_in_by_callout_args(OnigCalloutArgs* args) +{ + return args->in; +} + +extern int +onig_get_name_id_by_callout_args(OnigCalloutArgs* args) +{ + return args->name_id; +} + +extern const UChar* +onig_get_contents_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.start; + } + + return 0; +} + +extern const UChar* +onig_get_contents_end_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_CONTENTS) { + return e->u.content.end; + } + + return 0; +} + +extern int +onig_get_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + return e->u.arg.passed_num; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern int +onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, + OnigType* type, OnigValue* val) +{ + int num; + CalloutListEntry* e; + + num = args->num; + e = onig_reg_callout_list_at(args->regex, num); + if (IS_NULL(e)) return 0; + if (e->of == ONIG_CALLOUT_OF_NAME) { + if (IS_NOT_NULL(type)) *type = e->u.arg.types[index]; + if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index]; + return ONIG_NORMAL; + } + + return ONIGERR_INVALID_ARGUMENT; +} + +extern const UChar* +onig_get_string_by_callout_args(OnigCalloutArgs* args) +{ + return args->string; +} + +extern const UChar* +onig_get_string_end_by_callout_args(OnigCalloutArgs* args) +{ + return args->string_end; +} + +extern const UChar* +onig_get_start_by_callout_args(OnigCalloutArgs* args) +{ + return args->start; +} + +extern const UChar* +onig_get_right_range_by_callout_args(OnigCalloutArgs* args) +{ + return args->right_range; +} + +extern const UChar* +onig_get_current_by_callout_args(OnigCalloutArgs* args) +{ + return args->current; +} + +extern OnigRegex +onig_get_regex_by_callout_args(OnigCalloutArgs* args) +{ + return args->regex; +} + +extern unsigned long +onig_get_retry_counter_by_callout_args(OnigCalloutArgs* args) +{ + return args->retry_in_match_counter; +} + + +extern int +onig_get_capture_range_in_callout(OnigCalloutArgs* a, int mem_num, int* begin, int* end) +{ + OnigRegex reg; + const UChar* str; + StackType* stk_base; + int i; + + i = mem_num; + reg = a->regex; + str = a->string; + stk_base = a->stk_base; + + if (i > 0) { + if (a->mem_end_stk[i] != INVALID_STACK_INDEX) { + if (MEM_STATUS_AT(reg->bt_mem_start, i)) + *begin = (int )(STACK_AT(a->mem_start_stk[i])->u.mem.pstr - str); + else + *begin = (int )((UChar* )((void* )a->mem_start_stk[i]) - str); + + *end = (int )((MEM_STATUS_AT(reg->bt_mem_end, i) + ? STACK_AT(a->mem_end_stk[i])->u.mem.pstr + : (UChar* )((void* )a->mem_end_stk[i])) - str); + } + else { + *begin = *end = ONIG_REGION_NOTPOS; + } + } + else if (i == 0) { +#if 0 + *begin = a->start - str; + *end = a->current - str; +#else + return ONIGERR_INVALID_ARGUMENT; +#endif + } + else + return ONIGERR_INVALID_ARGUMENT; + + return ONIG_NORMAL; +} + +extern int +onig_get_used_stack_size_in_callout(OnigCalloutArgs* a, int* used_num, int* used_bytes) +{ + int n; + + n = (int )(a->stk - a->stk_base); + + if (used_num != 0) + *used_num = n; + + if (used_bytes != 0) + *used_bytes = n * sizeof(StackType); + + return ONIG_NORMAL; +} + + +/* builtin callout functions */ + +extern int +onig_builtin_fail(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_FAIL; +} + +extern int +onig_builtin_mismatch(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_MISMATCH; +} + +#if 0 +extern int +onig_builtin_success(OnigCalloutArgs* args ARG_UNUSED, void* user_data ARG_UNUSED) +{ + return ONIG_CALLOUT_SUCCESS; +} +#endif + +extern int +onig_builtin_error(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int n; + OnigValue val; + + r = onig_get_arg_by_callout_args(args, 0, 0, &val); + if (r != ONIG_NORMAL) return r; + + n = (int )val.l; + if (n >= 0) { + n = ONIGERR_INVALID_CALLOUT_BODY; + } + + return n; +} + +extern int +onig_builtin_count(OnigCalloutArgs* args, void* user_data) +{ + (void )onig_check_callout_data_and_clear_old_values(args); + + return onig_builtin_total_count(args, user_data); +} + +extern int +onig_builtin_total_count(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + OnigType type; + OnigValue val; + OnigValue aval; + OnigCodePoint count_type; + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + + count_type = aval.c; + if (count_type != '>' && count_type != 'X' && count_type != '<') + return ONIGERR_INVALID_CALLOUT_ARG; + + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, 0, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + val.l = 0; + } + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + slot = 2; + if (count_type == '<') + val.l++; + else if (count_type == 'X') + val.l--; + } + else { + slot = 1; + if (count_type != '<') + val.l++; + } + + r = onig_set_callout_data_by_callout_args_self(args, 0, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + /* slot 1: in progress counter, slot 2: in retraction counter */ + r = onig_get_callout_data_by_callout_args_self_dont_clear_old(args, slot, + &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + val.l = 0; + } + + val.l++; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + OnigType type; + OnigValue val; + OnigValue aval; + + (void )onig_check_callout_data_and_clear_old_values(args); + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + type = ONIG_TYPE_LONG; + val.l = 0; + } + + r = onig_get_arg_by_callout_args(args, 0, &type, &aval); + if (r != ONIG_NORMAL) return r; + + if (args->in == ONIG_CALLOUT_IN_RETRACTION) { + val.l--; + } + else { + if (val.l >= aval.l) return ONIG_CALLOUT_FAIL; + val.l++; + } + + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + + return ONIG_CALLOUT_SUCCESS; +} + +enum OP_CMP { + OP_EQ, + OP_NE, + OP_LT, + OP_GT, + OP_LE, + OP_GE +}; + +extern int +onig_builtin_cmp(OnigCalloutArgs* args, void* user_data ARG_UNUSED) +{ + int r; + int slot; + long lv; + long rv; + OnigType type; + OnigValue val; + regex_t* reg; + enum OP_CMP op; + + reg = args->regex; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + lv = 0L; + else + lv = val.l; + } + else { /* ONIG_TYPE_LONG */ + lv = val.l; + } + + r = onig_get_arg_by_callout_args(args, 2, &type, &val); + if (r != ONIG_NORMAL) return r; + + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, val.tag, 0, &type, &val); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + rv = 0L; + else + rv = val.l; + } + else { /* ONIG_TYPE_LONG */ + rv = val.l; + } + + slot = 0; + r = onig_get_callout_data_by_callout_args_self(args, slot, &type, &val); + if (r < ONIG_NORMAL) + return r; + else if (r > ONIG_NORMAL) { + /* type == void: initial state */ + OnigCodePoint c1, c2; + UChar* p; + + r = onig_get_arg_by_callout_args(args, 1, &type, &val); + if (r != ONIG_NORMAL) return r; + + p = val.s.start; + c1 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p < val.s.end) { + c2 = ONIGENC_MBC_TO_CODE(reg->enc, p, val.s.end); + p += ONIGENC_MBC_ENC_LEN(reg->enc, p); + if (p != val.s.end) return ONIGERR_INVALID_CALLOUT_ARG; + } + else + c2 = 0; + + switch (c1) { + case '=': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_EQ; + break; + case '!': + if (c2 != '=') return ONIGERR_INVALID_CALLOUT_ARG; + op = OP_NE; + break; + case '<': + if (c2 == '=') op = OP_LE; + else if (c2 == 0) op = OP_LT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + case '>': + if (c2 == '=') op = OP_GE; + else if (c2 == 0) op = OP_GT; + else return ONIGERR_INVALID_CALLOUT_ARG; + break; + default: + return ONIGERR_INVALID_CALLOUT_ARG; + break; + } + val.l = (long )op; + r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); + if (r != ONIG_NORMAL) return r; + } + else { + op = (enum OP_CMP )val.l; + } + + switch (op) { + case OP_EQ: r = (lv == rv); break; + case OP_NE: r = (lv != rv); break; + case OP_LT: r = (lv < rv); break; + case OP_GT: r = (lv > rv); break; + case OP_LE: r = (lv <= rv); break; + case OP_GE: r = (lv >= rv); break; + } + + return r == 0 ? ONIG_CALLOUT_FAIL : ONIG_CALLOUT_SUCCESS; +} + + +#include + +static FILE* OutFp; + +/* name start with "onig_" for macros. */ +static int +onig_builtin_monitor(OnigCalloutArgs* args, void* user_data) +{ + int r; + int num; + size_t tag_len; + const UChar* start; + const UChar* right; + const UChar* current; + const UChar* string; + const UChar* strend; + const UChar* tag_start; + const UChar* tag_end; + regex_t* reg; + OnigCalloutIn in; + OnigType type; + OnigValue val; + char buf[20]; + FILE* fp; + + fp = OutFp; + + r = onig_get_arg_by_callout_args(args, 0, &type, &val); + if (r != ONIG_NORMAL) return r; + + in = onig_get_callout_in_by_callout_args(args); + if (in == ONIG_CALLOUT_IN_PROGRESS) { + if (val.c == '<') + return ONIG_CALLOUT_SUCCESS; + } + else { + if (val.c != 'X' && val.c != '<') + return ONIG_CALLOUT_SUCCESS; + } + + num = onig_get_callout_num_by_callout_args(args); + start = onig_get_start_by_callout_args(args); + right = onig_get_right_range_by_callout_args(args); + current = onig_get_current_by_callout_args(args); + string = onig_get_string_by_callout_args(args); + strend = onig_get_string_end_by_callout_args(args); + reg = onig_get_regex_by_callout_args(args); + tag_start = onig_get_callout_tag_start(reg, num); + tag_end = onig_get_callout_tag_end(reg, num); + + if (tag_start == 0) + xsnprintf(buf, sizeof(buf), "#%d", num); + else { + /* CAUTION: tag string is not terminated with NULL. */ + int i; + + tag_len = tag_end - tag_start; + if (tag_len >= sizeof(buf)) tag_len = sizeof(buf) - 1; + for (i = 0; i < tag_len; i++) buf[i] = tag_start[i]; + buf[tag_len] = '\0'; + } + + fprintf(fp, "ONIG-MONITOR: %-4s %s at: %d [%d - %d] len: %d\n", + buf, + in == ONIG_CALLOUT_IN_PROGRESS ? "=>" : "<=", + (int )(current - string), + (int )(start - string), + (int )(right - string), + (int )(strend - string)); + fflush(fp); + + return ONIG_CALLOUT_SUCCESS; +} + +extern int +onig_setup_builtin_monitors_by_ascii_encoded_name(void* fp /* FILE* */) +{ + int id; + char* name; + OnigEncoding enc; + unsigned int ts[4]; + OnigValue opts[4]; + + if (IS_NOT_NULL(fp)) + OutFp = (FILE* )fp; + else + OutFp = stdout; + + enc = ONIG_ENCODING_ASCII; + + name = "MON"; + ts[0] = ONIG_TYPE_CHAR; + opts[0].c = '>'; + BC_B_O(name, monitor, 1, ts, 1, opts); + + return ONIG_NORMAL; +} + +#endif /* USE_CALLOUT */ diff --git a/src/reggnu.c b/src/reggnu.c index 50eb9b4..37c7519 100644 --- a/src/reggnu.c +++ b/src/reggnu.c @@ -2,7 +2,7 @@ reggnu.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -28,10 +28,7 @@ */ #include "regint.h" - -#ifndef ONIGGNU_H #include "oniggnu.h" -#endif extern void re_free_registers(OnigRegion* r) @@ -140,8 +137,7 @@ re_mbcinit(int mb_code) break; } - onig_initialize(0, 0); - onig_initialize_encoding(enc); + onig_initialize(&enc, 1); onigenc_set_default_encoding(enc); } diff --git a/src/regint.h b/src/regint.h index 256b045..ba8407a 100644 --- a/src/regint.h +++ b/src/regint.h @@ -4,7 +4,7 @@ regint.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -57,29 +57,48 @@ /* config */ /* spec. config */ #define USE_CALL +#define USE_CALLOUT #define USE_BACKREF_WITH_LEVEL /* \k, \k */ #define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR +#define USE_RETRY_LIMIT_IN_MATCH + /* internal config */ #define USE_OP_PUSH_OR_JUMP_EXACT #define USE_QUANT_PEEK_NEXT #define USE_ST_LIBRARY +#include "regenc.h" + +#ifdef __cplusplus +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +/* escape Mac OS X/Xcode 2.4/gcc 4.0.1 problem */ +#if defined(__APPLE__) && defined(__GNUC__) && __GNUC__ >= 4 +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + +#ifdef HAVE_STDARG_H +# ifndef HAVE_STDARG_PROTOTYPES +# define HAVE_STDARG_PROTOTYPES 1 +# endif +#endif + + #define INIT_MATCH_STACK_SIZE 160 #define DEFAULT_MATCH_STACK_LIMIT_SIZE 0 /* unlimited */ +#define DEFAULT_RETRY_LIMIT_IN_MATCH 10000000 #define DEFAULT_PARSE_DEPTH_LIMIT 4096 -#if defined(__GNUC__) -# define ARG_UNUSED __attribute__ ((unused)) -#else -# define ARG_UNUSED -#endif - /* */ /* escape other system UChar definition */ -#include "config.h" #ifdef ONIG_ESCAPE_UCHAR_COLLISION #undef ONIG_ESCAPE_UCHAR_COLLISION #endif @@ -89,15 +108,12 @@ #define USE_VARIABLE_META_CHARS #define USE_POSIX_API_REGION_OPTION #define USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE -/* #define USE_COMBINATION_EXPLOSION_CHECK */ /* (X*)* */ #define xmalloc malloc #define xrealloc realloc #define xcalloc calloc #define xfree free -#define CHECK_INTERRUPT_IN_MATCH_AT - #define st_init_table onig_st_init_table #define st_init_table_with_size onig_st_init_table_with_size #define st_init_numtable onig_st_init_numtable @@ -118,9 +134,6 @@ /* */ #define onig_st_is_member st_is_member -#define STATE_CHECK_STRING_THRESHOLD_LEN 7 -#define STATE_CHECK_BUFF_MAX_SIZE 0x4000 - #define xmemset memset #define xmemcpy memcpy #define xmemmove memmove @@ -140,6 +153,10 @@ #include +#ifdef HAVE_LIMITS_H +#include +#endif + #ifdef HAVE_STDLIB_H #include #endif @@ -184,8 +201,6 @@ typedef unsigned int uintptr_t; #endif #endif -#include "regenc.h" - #ifdef MIN #undef MIN #endif @@ -237,14 +252,93 @@ typedef unsigned int uintptr_t; #endif /* PLATFORM_UNALIGNED_WORD_ACCESS */ + +#ifdef USE_CALLOUT + +typedef struct { + int flag; + OnigCalloutOf of; + int in; + int name_id; + const UChar* tag_start; + const UChar* tag_end; + OnigCalloutType type; + OnigCalloutFunc start_func; + OnigCalloutFunc end_func; + union { + struct { + const UChar* start; + const UChar* end; + } content; + struct { + int num; + int passed_num; + OnigType types[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM]; + } arg; + } u; +} CalloutListEntry; + +#endif + typedef struct { - int num_keeper; - int* keepers; -} RegExt; + const UChar* pattern; + const UChar* pattern_end; +#ifdef USE_CALLOUT + void* tag_table; + int callout_num; + int callout_list_alloc; + CalloutListEntry* callout_list; /* index: callout num */ +#endif +} RegexExt; -#define REG_EXTP(reg) (RegExt* )((reg)->chain) +#define REG_EXTP(reg) ((RegexExt* )((reg)->chain)) #define REG_EXTPL(reg) ((reg)->chain) +struct re_pattern_buffer { + /* common members of BBuf(bytes-buffer) */ + unsigned char* p; /* compiled pattern */ + unsigned int used; /* used space for p */ + unsigned int alloc; /* allocated space for p */ + + int num_mem; /* used memory(...) num counted from 1 */ + int num_repeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ + int num_null_check; /* OP_EMPTY_CHECK_START/END id counter */ + int num_comb_exp_check; /* no longer used (combination explosion check) */ + int num_call; /* number of subexp call */ + unsigned int capture_history; /* (?@...) flag (1-31) */ + unsigned int bt_mem_start; /* need backtrack flag */ + unsigned int bt_mem_end; /* need backtrack flag */ + int stack_pop_level; + int repeat_range_alloc; + OnigRepeatRange* repeat_range; + + OnigEncoding enc; + OnigOptionType options; + OnigSyntaxType* syntax; + OnigCaseFoldType case_fold_flag; + void* name_table; + + /* optimization info (string search, char-map and anchors) */ + int optimize; /* optimize flag */ + int threshold_len; /* search str-length for apply optimize */ + int anchor; /* BEGIN_BUF, BEGIN_POS, (SEMI_)END_BUF */ + OnigLen anchor_dmin; /* (SEMI_)END_BUF anchor distance */ + OnigLen anchor_dmax; /* (SEMI_)END_BUF anchor distance */ + int sub_anchor; /* start-anchor for exact or map */ + unsigned char *exact; + unsigned char *exact_end; + unsigned char map[ONIG_CHAR_TABLE_SIZE]; /* used as BM skip or char-map */ + int *int_map; /* BM skip for exact_len > 255 */ + int *int_map_backward; /* BM skip for backward search */ + OnigLen dmin; /* min-distance of exact or map */ + OnigLen dmax; /* max-distance of exact or map */ + + /* regex_t link chain */ + struct re_pattern_buffer* chain; /* escape compile-conflict */ +}; + + /* stack pop level */ enum StackPopLevel { STACK_POP_LEVEL_FREE = 0, @@ -253,12 +347,14 @@ enum StackPopLevel { }; /* optimize flags */ -#define ONIG_OPTIMIZE_NONE 0 -#define ONIG_OPTIMIZE_EXACT 1 /* Slow Search */ -#define ONIG_OPTIMIZE_EXACT_BM 2 /* Boyer Moore Search */ -#define ONIG_OPTIMIZE_EXACT_BM_NOT_REV 3 /* BM (but not simple match) */ -#define ONIG_OPTIMIZE_EXACT_IC 4 /* Slow Search (ignore case) */ -#define ONIG_OPTIMIZE_MAP 5 /* char map */ +enum OptimizeType { + OPTIMIZE_NONE = 0, + OPTIMIZE_EXACT = 1, /* Slow Search */ + OPTIMIZE_EXACT_BM = 2, /* Boyer Moore Search */ + OPTIMIZE_EXACT_BM_NO_REV = 3, /* BM (but not simple match) */ + OPTIMIZE_EXACT_IC = 4, /* Slow Search (ignore case) */ + OPTIMIZE_MAP = 5 /* char map */ +}; /* bit status */ typedef unsigned int MemStatusType; @@ -467,8 +563,8 @@ typedef struct _BBuf { #define ANCHOR_NO_WORD_BOUNDARY (1<<11) #define ANCHOR_WORD_BEGIN (1<<12) #define ANCHOR_WORD_END (1<<13) -#define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ -#define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ +#define ANCHOR_ANYCHAR_INF (1<<14) +#define ANCHOR_ANYCHAR_INF_ML (1<<15) #define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16) #define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) @@ -557,7 +653,7 @@ enum OpCode { OP_JUMP, OP_PUSH, OP_PUSH_SUPER, - OP_POP, + OP_POP_OUT, OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */ OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */ OP_REPEAT, /* {n,m} */ @@ -581,16 +677,14 @@ enum OpCode { OP_LOOK_BEHIND_NOT_START, /* (? */ + OP_CALL, /* \g */ OP_RETURN, OP_PUSH_SAVE_VAL, OP_UPDATE_VAR, - - OP_STATE_CHECK_PUSH, /* combination explosion check and push */ - OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */ - OP_STATE_CHECK, /* check only */ - OP_STATE_CHECK_ANYCHAR_STAR, - OP_STATE_CHECK_ANYCHAR_ML_STAR, +#ifdef USE_CALLOUT + OP_CALLOUT_CONTENTS, /* (?{...}) (?{{...}}) */ + OP_CALLOUT_NAME, /* (*name) (*name[tag](args...)) */ +#endif /* no need: IS_DYNAMIC_OPTION() == 0 */ OP_SET_OPTION_PUSH, /* set option and push recover option */ @@ -627,7 +721,6 @@ typedef int ModeType; #define SIZE_ABSADDR sizeof(AbsAddrType) #define SIZE_LENGTH sizeof(LengthType) #define SIZE_MEMNUM sizeof(MemNumType) -#define SIZE_STATE_CHECK_NUM sizeof(StateCheckNumType) #define SIZE_REPEATNUM sizeof(RepeatNumType) #define SIZE_OPTION sizeof(OnigOptionType) #define SIZE_CODE_POINT sizeof(OnigCodePoint) @@ -643,7 +736,6 @@ typedef int ModeType; #define GET_REPEATNUM_INC(num,p) PLATFORM_GET_INC(num, p, RepeatNumType) #define GET_OPTION_INC(option,p) PLATFORM_GET_INC(option, p, OnigOptionType) #define GET_POINTER_INC(ptr,p) PLATFORM_GET_INC(ptr, p, PointerType) -#define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) #define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType) #define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType) #define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType) @@ -662,7 +754,7 @@ typedef int ModeType; #define SIZE_OP_JUMP (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_PUSH (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_PUSH_SUPER (SIZE_OPCODE + SIZE_RELADDR) -#define SIZE_OP_POP SIZE_OPCODE +#define SIZE_OP_POP_OUT SIZE_OPCODE #define SIZE_OP_PUSH_OR_JUMP_EXACT1 (SIZE_OPCODE + SIZE_RELADDR + 1) #define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) #define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) @@ -693,11 +785,9 @@ typedef int ModeType; #define SIZE_OP_PUSH_SAVE_VAL (SIZE_OPCODE + SIZE_SAVE_TYPE + SIZE_MEMNUM) #define SIZE_OP_UPDATE_VAR (SIZE_OPCODE + SIZE_UPDATE_VAR_TYPE + SIZE_MEMNUM) -#ifdef USE_COMBINATION_EXPLOSION_CHECK -#define SIZE_OP_STATE_CHECK (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) -#define SIZE_OP_STATE_CHECK_PUSH (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_PUSH_OR_JUMP (SIZE_OPCODE + SIZE_STATE_CHECK_NUM + SIZE_RELADDR) -#define SIZE_OP_STATE_CHECK_ANYCHAR_STAR (SIZE_OPCODE + SIZE_STATE_CHECK_NUM) +#ifdef USE_CALLOUT +#define SIZE_OP_CALLOUT_CONTENTS (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_CALLOUT_NAME (SIZE_OPCODE + SIZE_MEMNUM + SIZE_MEMNUM) #endif #define MC_ESC(syn) (syn)->meta_char_table.esc @@ -751,44 +841,14 @@ typedef int ModeType; #define NCCLASS_CLEAR_NOT(nd) NCCLASS_FLAG_CLEAR(nd, FLAG_NCCLASS_NOT) #define IS_NCCLASS_NOT(nd) IS_NCCLASS_FLAG_ON(nd, FLAG_NCCLASS_NOT) -typedef struct { - void* stack_p; - int stack_n; - OnigOptionType options; - OnigRegion* region; - int ptr_num; - const UChar* start; /* search start position (for \G: BEGIN_POSITION) */ -#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE - int best_len; /* for ONIG_OPTION_FIND_LONGEST */ - UChar* best_s; -#endif -#ifdef USE_COMBINATION_EXPLOSION_CHECK - void* state_check_buff; - int state_check_buff_size; -#endif -} OnigMatchArg; - - -typedef struct OnigEndCallListItem { - struct OnigEndCallListItem* next; - void (*func)(void); -} OnigEndCallListItemType; - extern void onig_add_end_call(void (*func)(void)); #ifdef ONIG_DEBUG -typedef struct { - short int opcode; - char* name; - short int arg_type; -} OnigOpInfoType; - -extern OnigOpInfoType OnigOpInfo[]; - - -extern void onig_print_compiled_byte_code P_((FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc)); +#ifdef ONIG_DEBUG_COMPILE +extern void onig_print_compiled_byte_code_list(FILE* f, regex_t* reg); +#endif #ifdef ONIG_DEBUG_STATISTICS extern void onig_statistics_init P_((void)); @@ -803,6 +863,85 @@ extern int onig_bbuf_init P_((BBuf* buf, int size)); extern int onig_compile P_((regex_t* reg, const UChar* pattern, const UChar* pattern_end, OnigErrorInfo* einfo)); extern void onig_transfer P_((regex_t* to, regex_t* from)); extern int onig_is_code_in_cc_len P_((int enclen, OnigCodePoint code, void* /* CClassNode* */ cc)); +extern RegexExt* onig_get_regex_ext(regex_t* reg); +extern int onig_ext_set_pattern(regex_t* reg, const UChar* pattern, const UChar* pattern_end); + +#ifdef USE_CALLOUT + +extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id); +extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int id); +extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int id); +extern int onig_callout_tag_table_free(void* table); +extern void onig_free_reg_callout_list(int n, CalloutListEntry* list); +extern CalloutListEntry* onig_reg_callout_list_at(regex_t* reg, int num); +extern OnigCalloutFunc onig_get_callout_start_func(regex_t* reg, int callout_num); + +/* for definition of builtin callout */ +#define BC0_P(name, func) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_PROGRESS,\ + onig_builtin_ ## func, 0, 0, 0, 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC0_R(name, func) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_RETRACTION,\ + onig_builtin_ ## func, 0, 0, 0, 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC0_B(name, func) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_BOTH,\ + onig_builtin_ ## func, 0, 0, 0, 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC_P(name, func, na, ts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_PROGRESS,\ + onig_builtin_ ## func, 0, (na), (ts), 0, 0); \ + if (id < 0) return id;\ +} while(0) + +#define BC_P_O(name, func, nts, ts, nopts, opts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_PROGRESS,\ + onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ + if (id < 0) return id;\ +} while(0) + +#define BC_B(name, func, na, ts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_BOTH,\ + onig_builtin_ ## func, 0, (na), (ts), 0, 0);\ + if (id < 0) return id;\ +} while(0) + +#define BC_B_O(name, func, nts, ts, nopts, opts) do {\ + int len = onigenc_str_bytelen_null(enc, (UChar* )name);\ + id = onig_set_callout_of_name(enc, ONIG_CALLOUT_TYPE_SINGLE,\ + (UChar* )(name), (UChar* )((name) + len),\ + ONIG_CALLOUT_IN_BOTH,\ + onig_builtin_ ## func, 0, (nts), (ts), (nopts), (opts));\ + if (id < 0) return id;\ +} while(0) + +#endif /* USE_CALLOUT */ + /* strend hash */ typedef void hash_table_type; diff --git a/src/regparse.c b/src/regparse.c index 1e4dc30..6e95a14 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -2,7 +2,7 @@ regparse.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,6 +26,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + #include "regparse.h" #include "st.h" @@ -33,10 +34,17 @@ #include #endif +#define INIT_TAG_NAMES_ALLOC_NUM 5 + #define WARN_BUFSIZE 256 #define CASE_FOLD_IS_APPLIED_INSIDE_NEGATIVE_CCLASS +#define IS_ALLOWED_CODE_IN_CALLOUT_NAME(c) \ + ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_' /* || c == '!' */) +#define IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c) \ + ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_') + OnigSyntaxType OnigSyntaxOniguruma = { (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | @@ -50,6 +58,8 @@ OnigSyntaxType OnigSyntaxOniguruma = { ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP | ONIG_SYN_OP2_ESC_K_NAMED_BACKREF | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | + ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE | ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT | @@ -188,6 +198,16 @@ onig_set_parse_depth_limit(unsigned int depth) return 0; } +static int +positive_int_multiply(int x, int y) +{ + if (x == 0 || y == 0) return 0; + + if (x < INT_MAX / y) + return x * y; + else + return -1; +} static void bbuf_free(BBuf* bbuf) @@ -331,25 +351,6 @@ onig_strcpy(UChar* dest, const UChar* src, const UChar* end) } } -static UChar* -strdup_with_null(OnigEncoding enc, UChar* s, UChar* end) -{ - int slen, term_len, i; - UChar *r; - - slen = (int )(end - s); - term_len = ONIGENC_MBC_MINLEN(enc); - - r = (UChar* )xmalloc(slen + term_len); - CHECK_NULL_RETURN(r); - xmemcpy(r, s, slen); - - for (i = 0; i < term_len; i++) - r[slen + i] = (UChar )0; - - return r; -} - static int save_entry(ScanEnv* env, enum SaveType type, int* id) { @@ -521,6 +522,106 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, return result; } + +typedef struct { + OnigEncoding enc; + int type; // callout type: single or not + UChar* s; + UChar* end; +} st_callout_name_key; + +static int +callout_name_table_cmp(st_callout_name_key* x, st_callout_name_key* y) +{ + UChar *p, *q; + int c; + + if (x->enc != y->enc) return 1; + if (x->type != y->type) return 1; + if ((x->end - x->s) != (y->end - y->s)) + return 1; + + p = x->s; + q = y->s; + while (p < x->end) { + c = (int )*p - (int )*q; + if (c != 0) return c; + + p++; q++; + } + + return 0; +} + +static int +callout_name_table_hash(st_callout_name_key* x) +{ + UChar *p; + int val = 0; + + p = x->s; + while (p < x->end) { + val = val * 997 + (int )*p++; + } + + /* use intptr_t for escape warning in Windows */ + return val + (val >> 5) + ((intptr_t )x->enc & 0xffff) + x->type; +} + +extern hash_table_type* +onig_st_init_callout_name_table_with_size(int size) +{ + static struct st_hash_type hashType = { + callout_name_table_cmp, + callout_name_table_hash, + }; + + return (hash_table_type* ) + onig_st_init_table_with_size(&hashType, size); +} + +extern int +onig_st_lookup_callout_name_table(hash_table_type* table, + OnigEncoding enc, + int type, + const UChar* str_key, + const UChar* end_key, + hash_data_type *value) +{ + st_callout_name_key key; + + key.enc = enc; + key.type = type; + key.s = (UChar* )str_key; + key.end = (UChar* )end_key; + + return onig_st_lookup(table, (st_data_t )(&key), value); +} + +static int +st_insert_callout_name_table(hash_table_type* table, + OnigEncoding enc, int type, + UChar* str_key, UChar* end_key, + hash_data_type value) +{ + st_callout_name_key* key; + int result; + + key = (st_callout_name_key* )xmalloc(sizeof(st_callout_name_key)); + CHECK_NULL_RETURN_MEMERR(key); + + /* key->s: don't duplicate, because str_key is duped in callout_name_entry() */ + key->enc = enc; + key->type = type; + key->s = str_key; + key->end = end_key; + result = onig_st_insert(table, (st_data_t )key, value); + if (result) { + xfree(key); + } + return result; +} + #endif /* USE_ST_LIBRARY */ @@ -537,6 +638,8 @@ typedef struct { #ifdef USE_ST_LIBRARY +#define INIT_NAMES_ALLOC_NUM 5 + typedef st_table NameTable; typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ @@ -862,13 +965,13 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) if (IS_NULL(e)) { #ifdef USE_ST_LIBRARY if (IS_NULL(t)) { - t = onig_st_init_strend_table_with_size(5); + t = onig_st_init_strend_table_with_size(INIT_NAMES_ALLOC_NUM); reg->name_table = (void* )t; } e = (NameEntry* )xmalloc(sizeof(NameEntry)); CHECK_NULL_RETURN_MEMERR(e); - e->name = strdup_with_null(reg->enc, name, name_end); + e->name = onigenc_strdup(reg->enc, name, name_end); if (IS_NULL(e->name)) { xfree(e); return ONIGERR_MEMORY; } @@ -919,7 +1022,7 @@ name_add(regex_t* reg, UChar* name, UChar* name_end, int backref, ScanEnv* env) } e = &(t->e[t->num]); t->num++; - e->name = strdup_with_null(reg->enc, name, name_end); + e->name = onigenc_strdup(reg->enc, name, name_end); if (IS_NULL(e->name)) return ONIGERR_MEMORY; e->name_len = name_end - name; #endif @@ -1019,177 +1122,946 @@ onig_noname_group_capture_is_active(regex_t* reg) return 1; } +#ifdef USE_CALLOUT -#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16 +typedef struct { + OnigCalloutType type; + int in; + OnigCalloutFunc start_func; + OnigCalloutFunc end_func; + int arg_num; + int opt_arg_num; + unsigned int arg_types[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigValue opt_defaults[ONIG_CALLOUT_MAX_ARGS_NUM]; + UChar* name; /* reference to GlobalCalloutNameTable entry: e->name */ +} CalloutNameListEntry; -static void -scan_env_clear(ScanEnv* env) -{ - MEM_STATUS_CLEAR(env->capture_history); - MEM_STATUS_CLEAR(env->bt_mem_start); - MEM_STATUS_CLEAR(env->bt_mem_end); - MEM_STATUS_CLEAR(env->backrefed_mem); - env->error = (UChar* )NULL; - env->error_end = (UChar* )NULL; - env->num_call = 0; +typedef struct { + int n; + int alloc; + CalloutNameListEntry* v; +} CalloutNameListType; -#ifdef USE_CALL - env->unset_addr_list = NULL; - env->has_call_zero = 0; -#endif +static CalloutNameListType* GlobalCalloutNameList; - env->num_mem = 0; - env->num_named = 0; - env->mem_alloc = 0; - env->mem_env_dynamic = (MemEnv* )NULL; +static int +make_callout_func_list(CalloutNameListType** rs, int init_size) +{ + CalloutNameListType* s; + CalloutNameListEntry* v; - xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); + *rs = 0; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - env->num_comb_exp_check = 0; - env->comb_exp_max_regnum = 0; - env->curr_max_regnum = 0; - env->has_recursion = 0; -#endif - env->parse_depth = 0; - env->keep_num = 0; - env->save_num = 0; - env->save_alloc_num = 0; - env->saves = 0; -} + s = xmalloc(sizeof(*s)); + if (IS_NULL(s)) return ONIGERR_MEMORY; -static int -scan_env_add_mem_entry(ScanEnv* env) -{ - int i, need, alloc; - MemEnv* p; + v = (CalloutNameListEntry* )xmalloc(sizeof(CalloutNameListEntry) * init_size); + if (IS_NULL(v)) { + xfree(s); + return ONIGERR_MEMORY; + } - need = env->num_mem + 1; - if (need > MaxCaptureNum && MaxCaptureNum != 0) - return ONIGERR_TOO_MANY_CAPTURES; + s->n = 0; + s->alloc = init_size; + s->v = v; - if (need >= SCANENV_MEMENV_SIZE) { - if (env->mem_alloc <= need) { - if (IS_NULL(env->mem_env_dynamic)) { - alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE; - p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc); - CHECK_NULL_RETURN_MEMERR(p); - xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static)); - } - else { - alloc = env->mem_alloc * 2; - p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc); - CHECK_NULL_RETURN_MEMERR(p); - } + *rs = s; + return ONIG_NORMAL; +} - for (i = env->num_mem + 1; i < alloc; i++) { - p[i].node = NULL_NODE; -#if 0 - p[i].in = 0; - p[i].recursion = 0; -#endif +static void +free_callout_func_list(CalloutNameListType* s) +{ + if (IS_NOT_NULL(s)) { + if (IS_NOT_NULL(s->v)) { + int i, j; + + for (i = 0; i < s->n; i++) { + CalloutNameListEntry* e = s->v + i; + for (j = e->arg_num - e->opt_arg_num; j < e->arg_num; j++) { + if (e->arg_types[j] == ONIG_TYPE_STRING) { + UChar* p = e->opt_defaults[j].s.start; + if (IS_NOT_NULL(p)) xfree(p); + } + } } - - env->mem_env_dynamic = p; - env->mem_alloc = alloc; + xfree(s->v); } + xfree(s); } - - env->num_mem++; - return env->num_mem; } static int -scan_env_set_mem_node(ScanEnv* env, int num, Node* node) +callout_func_list_add(CalloutNameListType* s, int* rid) { - if (env->num_mem >= num) - SCANENV_MEMENV(env)[num].node = node; - else - return ONIGERR_PARSER_BUG; - return 0; -} + if (s->n >= s->alloc) { + int new_size = s->alloc * 2; + CalloutNameListEntry* nv = (CalloutNameListEntry* ) + xrealloc(s->v, sizeof(CalloutNameListEntry) * new_size); + if (IS_NULL(nv)) return ONIGERR_MEMORY; -extern void -onig_node_free(Node* node) -{ - start: - if (IS_NULL(node)) return ; + s->alloc = new_size; + s->v = nv; + } -#ifdef DEBUG_NODE_FREE - fprintf(stderr, "onig_node_free: %p\n", node); -#endif + *rid = s->n; - switch (NODE_TYPE(node)) { - case NODE_STRING: - if (STR_(node)->capa != 0 && - IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { - xfree(STR_(node)->s); - } - break; + xmemset(&(s->v[s->n]), 0, sizeof(*(s->v))); + s->n++; + return ONIG_NORMAL; +} - case NODE_LIST: - case NODE_ALT: - onig_node_free(NODE_CAR(node)); - { - Node* next_node = NODE_CDR(node); - xfree(node); - node = next_node; - goto start; - } - break; +typedef struct { + UChar* name; + int name_len; /* byte length */ + int id; +} CalloutNameEntry; - case NODE_CCLASS: - { - CClassNode* cc = CCLASS_(node); +#ifdef USE_ST_LIBRARY +typedef st_table CalloutNameTable; +#else +typedef struct { + CalloutNameEntry* e; + int num; + int alloc; +} CalloutNameTable; +#endif - if (cc->mbuf) - bbuf_free(cc->mbuf); - } - break; +static CalloutNameTable* GlobalCalloutNameTable; +static int CalloutNameIDCounter; - case NODE_BACKREF: - if (IS_NOT_NULL(BACKREF_(node)->back_dynamic)) - xfree(BACKREF_(node)->back_dynamic); - break; +#ifdef USE_ST_LIBRARY - case NODE_ENCLOSURE: - if (NODE_BODY(node)) - onig_node_free(NODE_BODY(node)); +static int +i_free_callout_name_entry(st_callout_name_key* key, CalloutNameEntry* e, + void* arg ARG_UNUSED) +{ + xfree(e->name); + /*xfree(key->s); */ /* is same as e->name */ + xfree(key); + xfree(e); + return ST_DELETE; +} - { - EnclosureNode* en = ENCLOSURE_(node); - if (en->type == ENCLOSURE_IF_ELSE) { - onig_node_free(en->te.Then); - onig_node_free(en->te.Else); - } - } - break; +static int +callout_name_table_clear(CalloutNameTable* t) +{ + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_callout_name_entry, 0); + } + return 0; +} - case NODE_QUANT: - case NODE_ANCHOR: - if (NODE_BODY(node)) - onig_node_free(NODE_BODY(node)); - break; +static int +global_callout_name_table_free(void) +{ + if (IS_NOT_NULL(GlobalCalloutNameTable)) { + int r = callout_name_table_clear(GlobalCalloutNameTable); + if (r != 0) return r; - case NODE_CTYPE: - case NODE_CALL: - case NODE_GIMMICK: - break; + onig_st_free_table(GlobalCalloutNameTable); + GlobalCalloutNameTable = 0; + CalloutNameIDCounter = 0; } - xfree(node); + return 0; } -static void -cons_node_free_alone(Node* node) +static CalloutNameEntry* +callout_name_find(OnigEncoding enc, int is_not_single, + const UChar* name, const UChar* name_end) { - NODE_CAR(node) = 0; - NODE_CDR(node) = 0; - onig_node_free(node); -} + int r; + CalloutNameEntry* e; + CalloutNameTable* t = GlobalCalloutNameTable; -extern void + e = (CalloutNameEntry* )NULL; + if (IS_NOT_NULL(t)) { + r = onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end, + (HashDataType* )((void* )(&e))); + if (r == 0) { /* not found */ + if (enc != ONIG_ENCODING_ASCII && + ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc)) { + enc = ONIG_ENCODING_ASCII; + onig_st_lookup_callout_name_table(t, enc, is_not_single, name, name_end, + (HashDataType* )((void* )(&e))); + } + } + } + return e; +} + +#else + +static int +callout_name_table_clear(CalloutNameTable* t) +{ + int i; + CalloutNameEntry* e; + + if (IS_NOT_NULL(t)) { + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (IS_NOT_NULL(e->name)) { + xfree(e->name); + e->name = NULL; + e->name_len = 0; + e->id = 0; + e->func = 0; + } + } + if (IS_NOT_NULL(t->e)) { + xfree(t->e); + t->e = NULL; + } + t->num = 0; + } + return 0; +} + +static int +global_callout_name_table_free(void) +{ + if (IS_NOT_NULL(GlobalCalloutNameTable)) { + int r = callout_name_table_clear(GlobalCalloutNameTable); + if (r != 0) return r; + + xfree(GlobalCalloutNameTable); + GlobalCalloutNameTable = 0; + CalloutNameIDCounter = 0; + } + return 0; +} + +static CalloutNameEntry* +callout_name_find(UChar* name, UChar* name_end) +{ + int i, len; + CalloutNameEntry* e; + CalloutNameTable* t = Calloutnames; + + if (IS_NOT_NULL(t)) { + len = name_end - name; + for (i = 0; i < t->num; i++) { + e = &(t->e[i]); + if (len == e->name_len && onig_strncmp(name, e->name, len) == 0) + return e; + } + } + return (CalloutNameEntry* )NULL; +} + +#endif + +/* name string must be single byte char string. */ +static int +callout_name_entry(CalloutNameEntry** rentry, OnigEncoding enc, + int is_not_single, UChar* name, UChar* name_end) +{ + int r; + CalloutNameEntry* e; + CalloutNameTable* t = GlobalCalloutNameTable; + + *rentry = 0; + if (name_end - name <= 0) + return ONIGERR_INVALID_CALLOUT_NAME; + + e = callout_name_find(enc, is_not_single, name, name_end); + if (IS_NULL(e)) { +#ifdef USE_ST_LIBRARY + if (IS_NULL(t)) { + t = onig_st_init_callout_name_table_with_size(INIT_NAMES_ALLOC_NUM); + GlobalCalloutNameTable = t; + } + e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry)); + CHECK_NULL_RETURN_MEMERR(e); + + e->name = onigenc_strdup(enc, name, name_end); + if (IS_NULL(e->name)) { + xfree(e); return ONIGERR_MEMORY; + } + + r = st_insert_callout_name_table(t, enc, is_not_single, + e->name, (e->name + (name_end - name)), + (HashDataType )e); + if (r < 0) return r; + +#else + + int alloc; + + if (IS_NULL(t)) { + alloc = INIT_NAMES_ALLOC_NUM; + t = (CalloutNameTable* )xmalloc(sizeof(CalloutNameTable)); + CHECK_NULL_RETURN_MEMERR(t); + t->e = NULL; + t->alloc = 0; + t->num = 0; + + t->e = (CalloutNameEntry* )xmalloc(sizeof(CalloutNameEntry) * alloc); + if (IS_NULL(t->e)) { + xfree(t); + return ONIGERR_MEMORY; + } + t->alloc = alloc; + GlobalCalloutNameTable = t; + goto clear; + } + else if (t->num == t->alloc) { + int i; + + alloc = t->alloc * 2; + t->e = (CalloutNameEntry* )xrealloc(t->e, sizeof(CalloutNameEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(t->e); + t->alloc = alloc; + + clear: + for (i = t->num; i < t->alloc; i++) { + t->e[i].name = NULL; + t->e[i].name_len = 0; + t->e[i].id = 0; + } + } + e = &(t->e[t->num]); + t->num++; + e->name = onigenc_strdup(enc, name, name_end); + if (IS_NULL(e->name)) return ONIGERR_MEMORY; +#endif + + CalloutNameIDCounter++; + e->id = CalloutNameIDCounter; + e->name_len = (int )(name_end - name); + } + + *rentry = e; + return e->id; +} + +static int +is_allowed_callout_name(OnigEncoding enc, UChar* name, UChar* name_end) +{ + UChar* p; + OnigCodePoint c; + + if (name >= name_end) return 0; + + p = name; + while (p < name_end) { + c = ONIGENC_MBC_TO_CODE(enc, p, name_end); + if (! IS_ALLOWED_CODE_IN_CALLOUT_NAME(c)) + return 0; + + if (p == name) { + if (c >= '0' && c <= '9') return 0; + } + + p += ONIGENC_MBC_ENC_LEN(enc, p); + } + + return 1; +} + +static int +is_allowed_callout_tag_name(OnigEncoding enc, UChar* name, UChar* name_end) +{ + UChar* p; + OnigCodePoint c; + + if (name >= name_end) return 0; + + p = name; + while (p < name_end) { + c = ONIGENC_MBC_TO_CODE(enc, p, name_end); + if (! IS_ALLOWED_CODE_IN_CALLOUT_TAG_NAME(c)) + return 0; + + if (p == name) { + if (c >= '0' && c <= '9') return 0; + } + + p += ONIGENC_MBC_ENC_LEN(enc, p); + } + + return 1; +} + +extern int +onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type, + UChar* name, UChar* name_end, int in, + OnigCalloutFunc start_func, + OnigCalloutFunc end_func, + int arg_num, unsigned int arg_types[], + int opt_arg_num, OnigValue opt_defaults[]) +{ + int r; + int i; + int j; + int id; + int is_not_single; + CalloutNameEntry* e; + CalloutNameListEntry* fe; + + if (callout_type != ONIG_CALLOUT_TYPE_SINGLE) + return ONIGERR_INVALID_ARGUMENT; + + if (arg_num < 0 || arg_num > ONIG_CALLOUT_MAX_ARGS_NUM) + return ONIGERR_INVALID_CALLOUT_ARG; + + if (opt_arg_num < 0 || opt_arg_num > arg_num) + return ONIGERR_INVALID_CALLOUT_ARG; + + if (start_func == 0 && end_func == 0) + return ONIGERR_INVALID_CALLOUT_ARG; + + if ((in & ONIG_CALLOUT_IN_PROGRESS) == 0 && (in & ONIG_CALLOUT_IN_RETRACTION) == 0) + return ONIGERR_INVALID_CALLOUT_ARG; + + for (i = 0; i < arg_num; i++) { + unsigned int t = arg_types[i]; + if (t == ONIG_TYPE_VOID) + return ONIGERR_INVALID_CALLOUT_ARG; + else { + if (i >= arg_num - opt_arg_num) { + if (t != ONIG_TYPE_LONG && t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && + t != ONIG_TYPE_TAG) + return ONIGERR_INVALID_CALLOUT_ARG; + } + else { + if (t != ONIG_TYPE_LONG) { + t = t & ~ONIG_TYPE_LONG; + if (t != ONIG_TYPE_CHAR && t != ONIG_TYPE_STRING && t != ONIG_TYPE_TAG) + return ONIGERR_INVALID_CALLOUT_ARG; + } + } + } + } + + if (! is_allowed_callout_name(enc, name, name_end)) { + return ONIGERR_INVALID_CALLOUT_NAME; + } + + is_not_single = (callout_type != ONIG_CALLOUT_TYPE_SINGLE); + id = callout_name_entry(&e, enc, is_not_single, name, name_end); + if (id < 0) return id; + + r = ONIG_NORMAL; + if (IS_NULL(GlobalCalloutNameList)) { + r = make_callout_func_list(&GlobalCalloutNameList, 10); + if (r != ONIG_NORMAL) return r; + } + + while (id >= GlobalCalloutNameList->n) { + int rid; + r = callout_func_list_add(GlobalCalloutNameList, &rid); + if (r != ONIG_NORMAL) return r; + } + + fe = GlobalCalloutNameList->v + id; + fe->type = callout_type; + fe->in = in; + fe->start_func = start_func; + fe->end_func = end_func; + fe->arg_num = arg_num; + fe->opt_arg_num = opt_arg_num; + fe->name = e->name; + + for (i = 0; i < arg_num; i++) { + fe->arg_types[i] = arg_types[i]; + } + for (i = arg_num - opt_arg_num, j = 0; i < arg_num; i++, j++) { + if (fe->arg_types[i] == ONIG_TYPE_STRING) { + OnigValue* val = opt_defaults + j; + UChar* ds = onigenc_strdup(enc, val->s.start, val->s.end); + CHECK_NULL_RETURN_MEMERR(ds); + + fe->opt_defaults[i].s.start = ds; + fe->opt_defaults[i].s.end = ds + (val->s.end - val->s.start); + } + else { + fe->opt_defaults[i] = opt_defaults[j]; + } + } + + r = id; // return id + return r; +} + +static int +get_callout_name_id_by_name(OnigEncoding enc, int is_not_single, + UChar* name, UChar* name_end, int* rid) +{ + int r; + CalloutNameEntry* e; + + if (! is_allowed_callout_name(enc, name, name_end)) { + return ONIGERR_INVALID_CALLOUT_NAME; + } + + e = callout_name_find(enc, is_not_single, name, name_end); + if (IS_NULL(e)) { + return ONIGERR_UNDEFINED_CALLOUT_NAME; + } + + r = ONIG_NORMAL; + *rid = e->id; + + return r; +} + +extern OnigCalloutFunc +onig_get_callout_start_func(regex_t* reg, int callout_num) +{ + /* If used for callouts of contents, return 0. */ + CalloutListEntry* e; + + e = onig_reg_callout_list_at(reg, callout_num); + return e->start_func; +} + +extern const UChar* +onig_get_callout_tag_start(regex_t* reg, int callout_num) +{ + CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + return e->tag_start; +} + +extern const UChar* +onig_get_callout_tag_end(regex_t* reg, int callout_num) +{ + CalloutListEntry* e = onig_reg_callout_list_at(reg, callout_num); + return e->tag_end; +} + + +extern OnigCalloutType +onig_get_callout_type_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].type; +} + +extern OnigCalloutFunc +onig_get_callout_start_func_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].start_func; +} + +extern OnigCalloutFunc +onig_get_callout_end_func_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].end_func; +} + +extern int +onig_get_callout_in_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].in; +} + +static int +get_callout_arg_num_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].arg_num; +} + +static int +get_callout_opt_arg_num_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].opt_arg_num; +} + +static unsigned int +get_callout_arg_type_by_name_id(int name_id, int index) +{ + return GlobalCalloutNameList->v[name_id].arg_types[index]; +} + +static OnigValue +get_callout_opt_default_by_name_id(int name_id, int index) +{ + return GlobalCalloutNameList->v[name_id].opt_defaults[index]; +} + +extern UChar* +onig_get_callout_name_by_name_id(int name_id) +{ + return GlobalCalloutNameList->v[name_id].name; +} + +extern int +onig_global_callout_names_free(void) +{ + free_callout_func_list(GlobalCalloutNameList); + GlobalCalloutNameList = 0; + + global_callout_name_table_free(); + return ONIG_NORMAL; +} + + +typedef st_table CalloutTagTable; +typedef intptr_t CalloutTagVal; + +#define CALLOUT_TAG_LIST_FLAG_TAG_EXIST (1<<0) + +static int +i_callout_callout_list_set(UChar* key, CalloutTagVal e, void* arg) +{ + int num; + RegexExt* ext = (RegexExt* )arg; + + num = (int )e - 1; + ext->callout_list[num].flag |= CALLOUT_TAG_LIST_FLAG_TAG_EXIST; + return ST_CONTINUE; +} + +static int +setup_ext_callout_list_values(regex_t* reg) +{ + int i, j; + RegexExt* ext; + + ext = REG_EXTP(reg); + if (IS_NOT_NULL(ext->tag_table)) { + onig_st_foreach((CalloutTagTable *)ext->tag_table, i_callout_callout_list_set, + (st_data_t )ext); + } + + for (i = 0; i < ext->callout_num; i++) { + CalloutListEntry* e = ext->callout_list + i; + if (e->of == ONIG_CALLOUT_OF_NAME) { + for (j = 0; j < e->u.arg.num; j++) { + if (e->u.arg.types[j] == ONIG_TYPE_TAG) { + UChar* start; + UChar* end; + int num; + start = e->u.arg.vals[j].s.start; + end = e->u.arg.vals[j].s.end; + num = onig_get_callout_num_by_tag(reg, start, end); + if (num < 0) return num; + e->u.arg.vals[j].tag = num; + } + } + } + } + + return ONIG_NORMAL; +} + +extern int +onig_callout_tag_is_exist_at_callout_num(regex_t* reg, int callout_num) +{ + RegexExt* ext = REG_EXTP(reg); + + if (IS_NULL(ext) || IS_NULL(ext->callout_list)) return 0; + if (callout_num > ext->callout_num) return 0; + + return (ext->callout_list[callout_num].flag & + CALLOUT_TAG_LIST_FLAG_TAG_EXIST) != 0 ? 1 : 0; +} + +static int +i_free_callout_tag_entry(UChar* key, CalloutTagVal e, void* arg ARG_UNUSED) +{ + xfree(key); + return ST_DELETE; +} + +static int +callout_tag_table_clear(CalloutTagTable* t) +{ + if (IS_NOT_NULL(t)) { + onig_st_foreach(t, i_free_callout_tag_entry, 0); + } + return 0; +} + +extern int +onig_callout_tag_table_free(void* table) +{ + CalloutTagTable* t = (CalloutTagTable* )table; + + if (IS_NOT_NULL(t)) { + int r = callout_tag_table_clear(t); + if (r != 0) return r; + + onig_st_free_table(t); + } + + return 0; +} + +extern int +onig_get_callout_num_by_tag(regex_t* reg, + const UChar* tag, const UChar* tag_end) +{ + int r; + RegexExt* ext; + CalloutTagVal e; + + ext = REG_EXTP(reg); + if (IS_NULL(ext) || IS_NULL(ext->tag_table)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + r = onig_st_lookup_strend(ext->tag_table, tag, tag_end, + (HashDataType* )((void* )(&e))); + if (r == 0) return ONIGERR_INVALID_CALLOUT_TAG_NAME; + return (int )e; +} + +static CalloutTagVal +callout_tag_find(CalloutTagTable* t, const UChar* name, const UChar* name_end) +{ + CalloutTagVal e; + + e = -1; + if (IS_NOT_NULL(t)) { + onig_st_lookup_strend(t, name, name_end, (HashDataType* )((void* )(&e))); + } + return e; +} + +static int +callout_tag_table_new(CalloutTagTable** rt) +{ + CalloutTagTable* t; + + *rt = 0; + t = onig_st_init_strend_table_with_size(INIT_TAG_NAMES_ALLOC_NUM); + CHECK_NULL_RETURN_MEMERR(t); + + *rt = t; + return ONIG_NORMAL; +} + +static int +callout_tag_entry_raw(CalloutTagTable* t, UChar* name, UChar* name_end, + CalloutTagVal entry_val) +{ + int r; + CalloutTagVal val; + + if (name_end - name <= 0) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + val = callout_tag_find(t, name, name_end); + if (val >= 0) + return ONIGERR_MULTIPLEX_DEFINED_NAME; + + r = onig_st_insert_strend(t, name, name_end, (HashDataType )entry_val); + if (r < 0) return r; + + return ONIG_NORMAL; +} + +static int +ext_ensure_tag_table(regex_t* reg) +{ + int r; + RegexExt* ext; + CalloutTagTable* t; + + ext = onig_get_regex_ext(reg); + CHECK_NULL_RETURN_MEMERR(ext); + + if (IS_NULL(ext->tag_table)) { + r = callout_tag_table_new(&t); + if (r != ONIG_NORMAL) return r; + + ext->tag_table = t; + } + + return ONIG_NORMAL; +} + +static int +callout_tag_entry(regex_t* reg, UChar* name, UChar* name_end, + CalloutTagVal entry_val) +{ + int r; + RegexExt* ext; + CalloutListEntry* e; + + r = ext_ensure_tag_table(reg); + if (r != ONIG_NORMAL) return r; + + ext = onig_get_regex_ext(reg); + r = callout_tag_entry_raw(ext->tag_table, name, name_end, entry_val); + + e = onig_reg_callout_list_at(reg, (int )entry_val); + e->tag_start = name; + e->tag_end = name_end; + + return r; +} + +#endif /* USE_CALLOUT */ + + +#define INIT_SCANENV_MEMENV_ALLOC_SIZE 16 + +static void +scan_env_clear(ScanEnv* env) +{ + MEM_STATUS_CLEAR(env->capture_history); + MEM_STATUS_CLEAR(env->bt_mem_start); + MEM_STATUS_CLEAR(env->bt_mem_end); + MEM_STATUS_CLEAR(env->backrefed_mem); + env->error = (UChar* )NULL; + env->error_end = (UChar* )NULL; + env->num_call = 0; + +#ifdef USE_CALL + env->unset_addr_list = NULL; + env->has_call_zero = 0; +#endif + + env->num_mem = 0; + env->num_named = 0; + env->mem_alloc = 0; + env->mem_env_dynamic = (MemEnv* )NULL; + + xmemset(env->mem_env_static, 0, sizeof(env->mem_env_static)); + + env->parse_depth = 0; + env->keep_num = 0; + env->save_num = 0; + env->save_alloc_num = 0; + env->saves = 0; +} + +static int +scan_env_add_mem_entry(ScanEnv* env) +{ + int i, need, alloc; + MemEnv* p; + + need = env->num_mem + 1; + if (need > MaxCaptureNum && MaxCaptureNum != 0) + return ONIGERR_TOO_MANY_CAPTURES; + + if (need >= SCANENV_MEMENV_SIZE) { + if (env->mem_alloc <= need) { + if (IS_NULL(env->mem_env_dynamic)) { + alloc = INIT_SCANENV_MEMENV_ALLOC_SIZE; + p = (MemEnv* )xmalloc(sizeof(MemEnv) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + xmemcpy(p, env->mem_env_static, sizeof(env->mem_env_static)); + } + else { + alloc = env->mem_alloc * 2; + p = (MemEnv* )xrealloc(env->mem_env_dynamic, sizeof(MemEnv) * alloc); + CHECK_NULL_RETURN_MEMERR(p); + } + + for (i = env->num_mem + 1; i < alloc; i++) { + p[i].node = NULL_NODE; +#if 0 + p[i].in = 0; + p[i].recursion = 0; +#endif + } + + env->mem_env_dynamic = p; + env->mem_alloc = alloc; + } + } + + env->num_mem++; + return env->num_mem; +} + +static int +scan_env_set_mem_node(ScanEnv* env, int num, Node* node) +{ + if (env->num_mem >= num) + SCANENV_MEMENV(env)[num].node = node; + else + return ONIGERR_PARSER_BUG; + return 0; +} + +extern void +onig_node_free(Node* node) +{ + start: + if (IS_NULL(node)) return ; + +#ifdef DEBUG_NODE_FREE + fprintf(stderr, "onig_node_free: %p\n", node); +#endif + + switch (NODE_TYPE(node)) { + case NODE_STRING: + if (STR_(node)->capa != 0 && + IS_NOT_NULL(STR_(node)->s) && STR_(node)->s != STR_(node)->buf) { + xfree(STR_(node)->s); + } + break; + + case NODE_LIST: + case NODE_ALT: + onig_node_free(NODE_CAR(node)); + { + Node* next_node = NODE_CDR(node); + + xfree(node); + node = next_node; + goto start; + } + break; + + case NODE_CCLASS: + { + CClassNode* cc = CCLASS_(node); + + if (cc->mbuf) + bbuf_free(cc->mbuf); + } + break; + + case NODE_BACKREF: + if (IS_NOT_NULL(BACKREF_(node)->back_dynamic)) + xfree(BACKREF_(node)->back_dynamic); + break; + + case NODE_ENCLOSURE: + if (NODE_BODY(node)) + onig_node_free(NODE_BODY(node)); + + { + EnclosureNode* en = ENCLOSURE_(node); + if (en->type == ENCLOSURE_IF_ELSE) { + onig_node_free(en->te.Then); + onig_node_free(en->te.Else); + } + } + break; + + case NODE_QUANT: + case NODE_ANCHOR: + if (NODE_BODY(node)) + onig_node_free(NODE_BODY(node)); + break; + + case NODE_CTYPE: + case NODE_CALL: + case NODE_GIMMICK: + break; + } + + xfree(node); +} + +static void +cons_node_free_alone(Node* node) +{ + NODE_CAR(node) = 0; + NODE_CDR(node) = 0; + onig_node_free(node); +} + +extern void list_node_free_not_car(Node* node) { Node* next_node; @@ -1504,10 +2376,6 @@ node_new_quantifier(int lower, int upper, int by_number) if (by_number != 0) NODE_STATUS_ADD(node, NST_BY_NUMBER); -#ifdef USE_COMBINATION_EXPLOSION_CHECK - QUANT_(node)->comb_exp_check_num = 0; -#endif - return node; } @@ -1604,43 +2472,153 @@ node_new_save_gimmick(Node** node, enum SaveType save_type, ScanEnv* env) r = save_entry(env, save_type, &id); if (r != ONIG_NORMAL) return r; - *node = node_new(); - CHECK_NULL_RETURN_MEMERR(*node); + *node = node_new(); + CHECK_NULL_RETURN_MEMERR(*node); + + NODE_SET_TYPE(*node, NODE_GIMMICK); + GIMMICK_(*node)->id = id; + GIMMICK_(*node)->type = GIMMICK_SAVE; + GIMMICK_(*node)->detail_type = (int )save_type; + + return ONIG_NORMAL; +} + +static int +node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type, + int id, ScanEnv* env) +{ + *node = node_new(); + CHECK_NULL_RETURN_MEMERR(*node); + + NODE_SET_TYPE(*node, NODE_GIMMICK); + GIMMICK_(*node)->id = id; + GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR; + GIMMICK_(*node)->detail_type = (int )update_var_type; + + return ONIG_NORMAL; +} + +static int +node_new_keep(Node** node, ScanEnv* env) +{ + int r; + + r = node_new_save_gimmick(node, SAVE_KEEP, env); + if (r != 0) return r; + + env->keep_num++; + return ONIG_NORMAL; +} + +#ifdef USE_CALLOUT + +extern void +onig_free_reg_callout_list(int n, CalloutListEntry* list) +{ + int i; + int j; + + if (IS_NULL(list)) return ; + + for (i = 0; i < n; i++) { + if (list[i].of == ONIG_CALLOUT_OF_NAME) { + for (j = 0; j < list[i].u.arg.passed_num; j++) { + if (list[i].u.arg.types[j] == ONIG_TYPE_STRING) { + if (IS_NOT_NULL(list[i].u.arg.vals[j].s.start)) + xfree(list[i].u.arg.vals[j].s.start); + } + } + } + else { /* ONIG_CALLOUT_OF_CONTENTS */ + if (IS_NOT_NULL(list[i].u.content.start)) { + xfree((void* )list[i].u.content.start); + } + } + } + + xfree(list); +} + +extern CalloutListEntry* +onig_reg_callout_list_at(regex_t* reg, int num) +{ + RegexExt* ext = REG_EXTP(reg); + CHECK_NULL_RETURN(ext); + + if (num <= 0 || num > ext->callout_num) + return 0; + + num--; + return ext->callout_list + num; +} + +static int +reg_callout_list_entry(ScanEnv* env, int* rnum) +{ +#define INIT_CALLOUT_LIST_NUM 3 + + int num; + CalloutListEntry* list; + CalloutListEntry* e; + RegexExt* ext; + + ext = onig_get_regex_ext(env->reg); + CHECK_NULL_RETURN_MEMERR(ext); + + if (IS_NULL(ext->callout_list)) { + list = (CalloutListEntry* )xmalloc(sizeof(*list) * INIT_CALLOUT_LIST_NUM); + CHECK_NULL_RETURN_MEMERR(list); + + ext->callout_list = list; + ext->callout_list_alloc = INIT_CALLOUT_LIST_NUM; + ext->callout_num = 0; + } + + num = ext->callout_num + 1; + if (num > ext->callout_list_alloc) { + int alloc = ext->callout_list_alloc * 2; + list = (CalloutListEntry* )xrealloc(ext->callout_list, + sizeof(CalloutListEntry) * alloc); + CHECK_NULL_RETURN_MEMERR(list); + + ext->callout_list = list; + ext->callout_list_alloc = alloc; + } + + e = ext->callout_list + (num - 1); - NODE_SET_TYPE(*node, NODE_GIMMICK); - GIMMICK_(*node)->id = id; - GIMMICK_(*node)->type = GIMMICK_SAVE; - GIMMICK_(*node)->detail_type = (int )save_type; + e->flag = 0; + e->of = 0; + e->in = ONIG_CALLOUT_OF_CONTENTS; + e->type = 0; + e->tag_start = 0; + e->tag_end = 0; + e->start_func = 0; + e->end_func = 0; + e->u.arg.num = 0; + e->u.arg.passed_num = 0; + ext->callout_num = num; + *rnum = num; return ONIG_NORMAL; } static int -node_new_update_var_gimmick(Node** node, enum UpdateVarType update_var_type, - int id, ScanEnv* env) +node_new_callout(Node** node, OnigCalloutOf callout_of, int num, int id, + ScanEnv* env) { *node = node_new(); CHECK_NULL_RETURN_MEMERR(*node); NODE_SET_TYPE(*node, NODE_GIMMICK); - GIMMICK_(*node)->id = id; - GIMMICK_(*node)->type = GIMMICK_UPDATE_VAR; - GIMMICK_(*node)->detail_type = (int )update_var_type; - - return ONIG_NORMAL; -} + GIMMICK_(*node)->id = id; + GIMMICK_(*node)->num = num; + GIMMICK_(*node)->type = GIMMICK_CALLOUT; + GIMMICK_(*node)->detail_type = (int )callout_of; -static int -node_new_keep(Node** node, ScanEnv* env) -{ - int r; - - r = node_new_save_gimmick(node, SAVE_KEEP, env); - if (r != 0) return r; - - env->keep_num++; return ONIG_NORMAL; } +#endif static int make_extended_grapheme_cluster(Node** node, ScanEnv* env) @@ -2838,7 +3816,7 @@ is_invalid_quantifier_target(Node* node) /* ?:0, *:1, +:2, ??:3, *?:4, +?:5 */ static int -popular_quantifier_num(QuantNode* q) +quantifier_type_num(QuantNode* q) { if (q->greedy) { if (q->lower == 0) { @@ -2889,9 +3867,22 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) p = QUANT_(pnode); c = QUANT_(cnode); - pnum = popular_quantifier_num(p); - cnum = popular_quantifier_num(c); - if (pnum < 0 || cnum < 0) return ; + pnum = quantifier_type_num(p); + cnum = quantifier_type_num(c); + if (pnum < 0 || cnum < 0) { + if ((p->lower == p->upper) && ! IS_REPEAT_INFINITE(p->upper)) { + if ((c->lower == c->upper) && ! IS_REPEAT_INFINITE(c->upper)) { + int n = positive_int_multiply(p->lower, c->lower); + if (n >= 0) { + p->lower = p->upper = n; + NODE_BODY(pnode) = NODE_BODY(cnode); + goto remove_cnode; + } + } + } + + return ; + } switch(ReduceTypeTable[cnum][pnum]) { case RQ_DEL: @@ -2927,6 +3918,7 @@ onig_reduce_nested_quantifier(Node* pnode, Node* cnode) break; } + remove_cnode: NODE_BODY(cnode) = NULL_NODE; onig_node_free(cnode); } @@ -5508,6 +6500,452 @@ parse_char_class(Node** np, OnigToken* tok, UChar** src, UChar* end, ScanEnv* en static int parse_subexp(Node** top, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env); +#ifdef USE_CALLOUT + +/* (?{...}[tag][+-]) (?{{...}}[tag][+-]) */ +static int +parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + int i; + int in; + int num; + OnigCodePoint c; + UChar* code_start; + UChar* code_end; + UChar* contents; + UChar* tag_start; + UChar* tag_end; + int brace_nest; + CalloutListEntry* e; + RegexExt* ext; + OnigEncoding enc = env->enc; + UChar* p = *src; + + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + brace_nest = 0; + while (PPEEK_IS('{')) { + brace_nest++; + PINC_S; + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + } + + in = ONIG_CALLOUT_IN_PROGRESS; + code_start = p; + while (1) { + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + code_end = p; + PFETCH_S(c); + if (c == '}') { + i = brace_nest; + while (i > 0) { + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + PFETCH_S(c); + if (c == '}') i--; + else break; + } + if (i == 0) break; + } + } + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + PFETCH_S(c); + if (c == '[') { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_start = p; + while (! PEND) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_end = p; + PFETCH_S(c); + if (c == ']') break; + } + if (! is_allowed_callout_tag_name(enc, tag_start, tag_end)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else { + tag_start = tag_end = 0; + } + + if (c == 'X') { + in |= ONIG_CALLOUT_IN_RETRACTION; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else if (c == '<') { + in = ONIG_CALLOUT_IN_RETRACTION; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else if (c == '>') { /* no needs (default) */ + //in = ONIG_CALLOUT_IN_PROGRESS; + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + + if (c != cterm) + return ONIGERR_INVALID_CALLOUT_PATTERN; + + r = reg_callout_list_entry(env, &num); + if (r != 0) return r; + + ext = onig_get_regex_ext(env->reg); + if (IS_NULL(ext->pattern)) { + r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); + if (r != ONIG_NORMAL) return r; + } + + if (tag_start != tag_end) { + r = callout_tag_entry(env->reg, tag_start, tag_end, num); + if (r != ONIG_NORMAL) return r; + } + + contents = onigenc_strdup(enc, code_start, code_end); + CHECK_NULL_RETURN_MEMERR(contents); + + r = node_new_callout(np, ONIG_CALLOUT_OF_CONTENTS, num, ONIG_NON_NAME_ID, env); + if (r != 0) { + xfree(contents); + return r; + } + + e = onig_reg_callout_list_at(env->reg, num); + e->of = ONIG_CALLOUT_OF_CONTENTS; + e->in = in; + e->name_id = ONIG_NON_NAME_ID; + e->u.content.start = contents; + e->u.content.end = contents + (code_end - code_start); + + *src = p; + return 0; +} + +static long +parse_long(OnigEncoding enc, UChar* s, UChar* end, int sign_on, long max, long* rl) +{ + long v; + long d; + int flag; + UChar* p; + OnigCodePoint c; + + if (s >= end) return ONIGERR_INVALID_CALLOUT_ARG; + + flag = 1; + v = 0; + p = s; + while (p < end) { + c = ONIGENC_MBC_TO_CODE(enc, p, end); + p += ONIGENC_MBC_ENC_LEN(enc, p); + if (c >= '0' && c <= '9') { + d = (long )(c - '0'); + if (v > (max - d) / 10) + return ONIGERR_INVALID_CALLOUT_ARG; + + v = v * 10 + d; + } + else if (sign_on != 0 && (c == '-' || c == '+')) { + if (c == '-') flag = -1; + } + else + return ONIGERR_INVALID_CALLOUT_ARG; + + sign_on = 0; + } + + *rl = flag * v; + return ONIG_NORMAL; +} + +static int +parse_callout_args(int skip_mode, int cterm, UChar** src, UChar* end, + unsigned int types[], OnigValue vals[], ScanEnv* env) +{ +#define MAX_CALLOUT_ARG_BYTE_LENGTH 128 + + int r; + int n; + int esc; + int cn; + UChar* s; + UChar* e; + UChar* eesc; + OnigCodePoint c; + UChar* bufend; + UChar buf[MAX_CALLOUT_ARG_BYTE_LENGTH]; + OnigEncoding enc = env->enc; + UChar* p = *src; + + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + n = 0; + while (n < ONIG_CALLOUT_MAX_ARGS_NUM) { + c = 0; + cn = 0; + esc = 0; + eesc = 0; + bufend = buf; + s = e = p; + while (1) { + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + e = p; + PFETCH_S(c); + if (esc != 0) { + esc = 0; + if (c == '\\' || c == cterm || c == ',') { + /* */ + } + else { + e = eesc; + cn++; + } + goto add_char; + } + else { + if (c == '\\') { + esc = 1; + eesc = e; + } + else if (c == cterm || c == ',') + break; + else { + size_t clen; + + add_char: + if (skip_mode == 0) { + clen = p - e; + if (bufend + clen > buf + MAX_CALLOUT_ARG_BYTE_LENGTH) + return ONIGERR_INVALID_CALLOUT_ARG; /* too long argument */ + + xmemcpy(bufend, e, clen); + bufend += clen; + } + cn++; + } + } + } + + if (cn != 0) { + if (skip_mode == 0) { + if ((types[n] & ONIG_TYPE_LONG) != 0) { + int fixed = 0; + if (cn > 0) { + long rl; + r = parse_long(enc, buf, bufend, 1, LONG_MAX, &rl); + if (r == ONIG_NORMAL) { + vals[n].l = rl; + fixed = 1; + types[n] = ONIG_TYPE_LONG; + } + } + + if (fixed == 0) { + types[n] = (types[n] & ~ONIG_TYPE_LONG); + if (types[n] == ONIG_TYPE_VOID) + return ONIGERR_INVALID_CALLOUT_ARG; + } + } + + switch (types[n]) { + case ONIG_TYPE_LONG: + break; + + case ONIG_TYPE_CHAR: + if (cn != 1) return ONIGERR_INVALID_CALLOUT_ARG; + vals[n].c = ONIGENC_MBC_TO_CODE(enc, buf, bufend); + break; + + case ONIG_TYPE_STRING: + { + UChar* rs = onigenc_strdup(enc, buf, bufend); + CHECK_NULL_RETURN_MEMERR(rs); + vals[n].s.start = rs; + vals[n].s.end = rs + (e - s); + } + break; + + case ONIG_TYPE_TAG: + if (eesc != 0 || ! is_allowed_callout_tag_name(enc, s, e)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + vals[n].s.start = s; + vals[n].s.end = e; + break; + + case ONIG_TYPE_VOID: + case ONIG_TYPE_POINTER: + return ONIGERR_PARSER_BUG; + break; + } + } + + n++; + } + + if (c == cterm) break; + } + + if (c != cterm) return ONIGERR_INVALID_CALLOUT_PATTERN; + + *src = p; + return n; +} + +/* (*name[TAG]) (*name[TAG]{a,b,..}) */ +static int +parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* env) +{ + int r; + int i; + int in; + int num; + int name_id; + int arg_num; + int max_arg_num; + int opt_arg_num; + int is_not_single; + OnigCodePoint c; + UChar* name_start; + UChar* name_end; + UChar* tag_start; + UChar* tag_end; + Node* node; + CalloutListEntry* e; + RegexExt* ext; + unsigned int types[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigValue vals[ONIG_CALLOUT_MAX_ARGS_NUM]; + OnigEncoding enc = env->enc; + UChar* p = *src; + + //PFETCH_READY; + if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; + + node = 0; + name_start = p; + while (1) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + name_end = p; + PFETCH_S(c); + if (c == cterm || c == '[' || c == '{') break; + } + + if (! is_allowed_callout_name(enc, name_start, name_end)) + return ONIGERR_INVALID_CALLOUT_NAME; + + if (c == '[') { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_start = p; + while (! PEND) { + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + tag_end = p; + PFETCH_S(c); + if (c == ']') break; + } + if (! is_allowed_callout_tag_name(enc, tag_start, tag_end)) + return ONIGERR_INVALID_CALLOUT_TAG_NAME; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else { + tag_start = tag_end = 0; + } + + if (c == '{') { + UChar* save; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + + /* read for single check only */ + save = p; + arg_num = parse_callout_args(1, '}', &p, end, 0, 0, env); + if (arg_num < 0) return arg_num; + + is_not_single = PPEEK_IS(cterm) ? 0 : 1; + p = save; + r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end, + &name_id); + if (r != ONIG_NORMAL) return r; + + max_arg_num = get_callout_arg_num_by_name_id(name_id); + for (i = 0; i < max_arg_num; i++) { + types[i] = get_callout_arg_type_by_name_id(name_id, i); + } + + arg_num = parse_callout_args(0, '}', &p, end, types, vals, env); + if (arg_num < 0) return arg_num; + + if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; + PFETCH_S(c); + } + else { + arg_num = 0; + + is_not_single = 0; + r = get_callout_name_id_by_name(enc, is_not_single, name_start, name_end, + &name_id); + if (r != ONIG_NORMAL) return r; + + max_arg_num = get_callout_arg_num_by_name_id(name_id); + for (i = 0; i < max_arg_num; i++) { + types[i] = get_callout_arg_type_by_name_id(name_id, i); + } + } + + in = onig_get_callout_in_by_name_id(name_id); + opt_arg_num = get_callout_opt_arg_num_by_name_id(name_id); + if (arg_num > max_arg_num || arg_num < (max_arg_num - opt_arg_num)) + return ONIGERR_INVALID_CALLOUT_ARG; + + if (c != cterm) + return ONIGERR_INVALID_CALLOUT_PATTERN; + + r = reg_callout_list_entry(env, &num); + if (r != 0) return r; + + ext = onig_get_regex_ext(env->reg); + if (IS_NULL(ext->pattern)) { + r = onig_ext_set_pattern(env->reg, env->pattern, env->pattern_end); + if (r != ONIG_NORMAL) return r; + } + + if (tag_start != tag_end) { + r = callout_tag_entry(env->reg, tag_start, tag_end, num); + if (r != ONIG_NORMAL) return r; + } + + r = node_new_callout(&node, ONIG_CALLOUT_OF_NAME, num, name_id, env); + if (r != ONIG_NORMAL) return r; + + e = onig_reg_callout_list_at(env->reg, num); + e->of = ONIG_CALLOUT_OF_NAME; + e->in = in; + e->name_id = name_id; + e->type = onig_get_callout_type_by_name_id(name_id); + e->start_func = onig_get_callout_start_func_by_name_id(name_id); + e->end_func = onig_get_callout_end_func_by_name_id(name_id); + e->u.arg.num = max_arg_num; + e->u.arg.passed_num = arg_num; + for (i = 0; i < max_arg_num; i++) { + e->u.arg.types[i] = types[i]; + if (i < arg_num) + e->u.arg.vals[i] = vals[i]; + else + e->u.arg.vals[i] = get_callout_opt_default_by_name_id(name_id, i); + } + + *np = node; + *src = p; + return 0; +} +#endif + static int parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, ScanEnv* env) @@ -5526,8 +6964,8 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; option = env->options; - if (PPEEK_IS('?') && - IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { + c = PPEEK; + if (c == '?' && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { PINC; if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; @@ -5673,6 +7111,18 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } break; +#ifdef USE_CALLOUT + case '{': + if (! IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) + return ONIGERR_UNDEFINED_GROUP_OPTION; + + r = parse_callout_of_contents(np, ')', &p, end, env); + if (r != 0) return r; + + goto end; + break; +#endif + case '(': /* (?()...) */ if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE)) { @@ -5769,6 +7219,29 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (c != ')') goto err_if_else; } } +#ifdef USE_CALLOUT + else if (c == '?') { + if (IS_SYNTAX_OP2(env->syntax, + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS)) { + if (! PEND && PPEEK_IS('{')) { + /* condition part is callouts of contents: (?(?{...})THEN|ELSE) */ + condition_is_checker = 0; + PFETCH(c); + r = parse_callout_of_contents(&condition, ')', &p, end, env); + if (r != 0) return r; + goto end_condition; + } + } + goto any_condition; + } + else if (c == '*' && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { + condition_is_checker = 0; + r = parse_callout_of_name(&condition, ')', &p, end, env); + if (r != 0) return r; + goto end_condition; + } +#endif else { any_condition: PUNFETCH; @@ -5782,6 +7255,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, } } + end_condition: CHECK_NULL_RETURN_MEMERR(condition); if (PEND) { @@ -5970,6 +7444,16 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, return ONIGERR_UNDEFINED_GROUP_OPTION; } } +#ifdef USE_CALLOUT + else if (c == '*' && + IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME)) { + PINC; + r = parse_callout_of_name(np, ')', &p, end, env); + if (r != 0) return r; + + goto end; + } +#endif else { if (ONIG_IS_OPTION_ON(env->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) goto group; @@ -6040,11 +7524,11 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) { /* check redundant double repeat. */ /* verbose warn (?:.?)? etc... but not warn (.?)? etc... */ QuantNode* qnt = QUANT_(target); - int nestq_num = popular_quantifier_num(qn); - int targetq_num = popular_quantifier_num(qnt); + int nestq_num = quantifier_type_num(qn); + int targetq_num = quantifier_type_num(qnt); #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR - if (! NODE_IS_BY_NUMBER(qnode) && ! NODE_IS_BY_NUMBER(target) && + if (targetq_num >= 0 && nestq_num >= 0 && IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { UChar buf[WARN_BUFSIZE]; @@ -6078,18 +7562,19 @@ set_quantifier(Node* qnode, Node* target, int group, ScanEnv* env) warn_exit: #endif - if (targetq_num >= 0) { - if (nestq_num >= 0) { - onig_reduce_nested_quantifier(qnode, target); - goto q_exit; - } - else if (targetq_num == 1 || targetq_num == 2) { /* * or + */ + if (targetq_num >= 0 && nestq_num < 0) { + if (targetq_num == 1 || targetq_num == 2) { /* * or + */ /* (?:a*){n,m}, (?:a+){n,m} => (?:a*){n,n}, (?:a+){n,n} */ if (! IS_REPEAT_INFINITE(qn->upper) && qn->upper > 1 && qn->greedy) { qn->upper = (qn->lower == 0 ? 1 : qn->lower); } } } + else { + NODE_BODY(qnode) = target; + onig_reduce_nested_quantifier(qnode, target); + goto q_exit; + } } break; @@ -6717,6 +8202,9 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, { int r; UChar* p; +#ifdef USE_CALLOUT + RegexExt* ext; +#endif names_clear(reg); @@ -6750,6 +8238,14 @@ onig_parse_tree(Node** root, const UChar* pattern, const UChar* end, #endif reg->num_mem = env->num_mem; + +#ifdef USE_CALLOUT + ext = REG_EXTP(reg); + if (IS_NOT_NULL(ext) && ext->callout_num > 0) { + r = setup_ext_callout_list_values(reg); + } +#endif + return r; } diff --git a/src/regparse.h b/src/regparse.h index 99fe7c9..3ffbea4 100644 --- a/src/regparse.h +++ b/src/regparse.h @@ -4,7 +4,7 @@ regparse.h - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -51,8 +51,12 @@ enum GimmickType { GIMMICK_KEEP = 1, GIMMICK_SAVE = 2, GIMMICK_UPDATE_VAR = 3, +#ifdef USE_CALLOUT + GIMMICK_CALLOUT = 4, +#endif }; + /* node type bit */ #define NODE_TYPE2BIT(type) (1<<(type)) @@ -97,7 +101,7 @@ enum GimmickType { (NODE_IS_FIXED_OPTION(node) ? CTYPE_(node)->options : reg->options) -#define ANCHOR_ANYCHAR_STAR_MASK (ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML) +#define ANCHOR_ANYCHAR_INF_MASK (ANCHOR_ANYCHAR_INF | ANCHOR_ANYCHAR_INF_ML) #define ANCHOR_END_BUF_MASK (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF) enum EnclosureType { @@ -129,10 +133,12 @@ enum EnclosureType { #define BACKREFS_P(br) \ (IS_NOT_NULL((br)->back_dynamic) ? (br)->back_dynamic : (br)->back_static) -#define QUANT_BODY_IS_NOT_EMPTY 0 -#define QUANT_BODY_IS_EMPTY 1 -#define QUANT_BODY_IS_EMPTY_MEM 2 -#define QUANT_BODY_IS_EMPTY_REC 3 +enum QuantBodyEmpty { + QUANT_BODY_IS_NOT_EMPTY = 0, + QUANT_BODY_IS_EMPTY = 1, + QUANT_BODY_IS_EMPTY_MEM = 2, + QUANT_BODY_IS_EMPTY_REC = 3 +}; /* node status bits */ #define NST_MIN_FIXED (1<<0) @@ -221,13 +227,10 @@ typedef struct { int lower; int upper; int greedy; - int body_empty_info; + enum QuantBodyEmpty body_empty_info; struct _Node* head_exact; struct _Node* next_head_exact; int is_refered; /* include called node. don't eliminate even if {0} */ -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int comb_exp_check_num; /* 1,2,3...: check, 0: no check */ -#endif } QuantNode; typedef struct { @@ -330,6 +333,7 @@ typedef struct { enum GimmickType type; int detail_type; + int num; int id; } GimmickNode; @@ -398,15 +402,9 @@ typedef struct { int num_mem; int num_named; int mem_alloc; - MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; - MemEnv* mem_env_dynamic; -#ifdef USE_COMBINATION_EXPLOSION_CHECK - int num_comb_exp_check; - int comb_exp_max_regnum; - int curr_max_regnum; - int has_recursion; -#endif - unsigned int parse_depth; + MemEnv mem_env_static[SCANENV_MEMENV_SIZE]; + MemEnv* mem_env_dynamic; + unsigned int parse_depth; int keep_num; int save_num; @@ -447,6 +445,10 @@ extern int onig_free_shared_cclass_table P_((void)); extern int onig_is_code_in_cc P_((OnigEncoding enc, OnigCodePoint code, CClassNode* cc)); extern OnigLen onig_get_tiny_min_len(Node* node, unsigned int inhibit_node_types, int* invalid_node); +#ifdef USE_CALLOUT +extern int onig_global_callout_names_free(void); +#endif + #ifdef ONIG_DEBUG extern int onig_print_names(FILE*, regex_t*); #endif diff --git a/src/regposerr.c b/src/regposerr.c index fc71eee..2e2a8e2 100644 --- a/src/regposerr.c +++ b/src/regposerr.c @@ -2,7 +2,7 @@ regposerr.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2007 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,6 +27,13 @@ * SUCH DAMAGE. */ +/* Can't include regint.h etc.. for conflict of regex_t. + Define ONIGURUMA_EXPORT here for onigposix.h. + */ +#ifndef ONIGURUMA_EXPORT +#define ONIGURUMA_EXPORT +#endif + #include "config.h" #include "onigposix.h" diff --git a/src/regposix.c b/src/regposix.c index 0fdbcbb..895cf29 100644 --- a/src/regposix.c +++ b/src/regposix.c @@ -2,7 +2,7 @@ regposix.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -58,8 +58,10 @@ onig2posix_error_code(int code) static const O2PERR o2p[] = { { ONIG_MISMATCH, REG_NOMATCH }, { ONIG_NO_SUPPORT_CONFIG, REG_EONIG_INTERNAL }, + { ONIG_ABORT, REG_EONIG_INTERNAL }, { ONIGERR_MEMORY, REG_ESPACE }, { ONIGERR_MATCH_STACK_LIMIT_OVER, REG_EONIG_INTERNAL }, + { ONIGERR_RETRY_LIMIT_IN_MATCH_OVER, REG_EONIG_INTERNAL }, { ONIGERR_TYPE_BUG, REG_EONIG_INTERNAL }, { ONIGERR_PARSER_BUG, REG_EONIG_INTERNAL }, { ONIGERR_STACK_BUG, REG_EONIG_INTERNAL }, @@ -117,6 +119,12 @@ onig2posix_error_code(int code) { ONIGERR_INVALID_IF_ELSE_SYNTAX, REG_BADPAT }, { ONIGERR_INVALID_ABSENT_GROUP_PATTERN, REG_BADPAT }, { ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_PATTERN, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_NAME, REG_BADPAT }, + { ONIGERR_UNDEFINED_CALLOUT_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_BODY, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_TAG_NAME, REG_BADPAT }, + { ONIGERR_INVALID_CALLOUT_ARG, REG_BADPAT }, { ONIGERR_NOT_SUPPORTED_ENCODING_COMBINATION, REG_EONIG_BADARG }, { ONIGERR_LIBRARY_IS_NOT_INITIALIZED, REG_EONIG_INTERNAL } }; @@ -260,8 +268,7 @@ reg_set_encoding(int mb_code) break; } - onig_initialize(0, 0); - onig_initialize_encoding(enc); + onig_initialize(&enc, 1); onigenc_set_default_encoding(enc); } diff --git a/src/regsyntax.c b/src/regsyntax.c index 3817d38..aa95479 100644 --- a/src/regsyntax.c +++ b/src/regsyntax.c @@ -2,7 +2,7 @@ regsyntax.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -176,6 +176,8 @@ OnigSyntaxType OnigSyntaxPerl = { ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | + ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | @@ -207,6 +209,8 @@ OnigSyntaxType OnigSyntaxPerl_NG = { ONIG_SYN_OP2_QMARK_GROUP_EFFECT | ONIG_SYN_OP2_OPTION_PERL | ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE | ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP | + ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS | + ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME | ONIG_SYN_OP2_ESC_X_Y_GRAPHEME_CLUSTER | ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY | ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT | diff --git a/src/regversion.c b/src/regversion.c index 245a001..594a52c 100644 --- a/src/regversion.c +++ b/src/regversion.c @@ -2,7 +2,7 @@ regversion.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2008 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,6 @@ * SUCH DAMAGE. */ -#include "config.h" #include "regint.h" #include @@ -49,7 +48,7 @@ onig_copyright(void) static char s[58]; xsnprintf(s, sizeof(s), - "Oniguruma %d.%d.%d : Copyright (C) 2002-2016 K.Kosako", + "Oniguruma %d.%d.%d : Copyright (C) 2002-2018 K.Kosako", ONIGURUMA_VERSION_MAJOR, ONIGURUMA_VERSION_MINOR, ONIGURUMA_VERSION_TEENY); diff --git a/src/sjis.c b/src/sjis.c index 88b8d02..e1bf3e1 100644 --- a/src/sjis.c +++ b/src/sjis.c @@ -2,7 +2,7 @@ sjis.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -321,8 +321,8 @@ get_ctype_code_range(OnigCtype ctype, OnigCodePoint* sb_out, OnigEncodingType OnigEncodingSJIS = { mbc_enc_len, "Shift_JIS", /* name */ - 2, /* max byte length */ - 1, /* min byte length */ + 2, /* max enc length */ + 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, mbc_to_code, code_to_mbclen, diff --git a/src/utf16_be.c b/src/utf16_be.c index f220cca..098ab54 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -2,7 +2,7 @@ utf16_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -27,7 +27,50 @@ * SUCH DAMAGE. */ -#include "regenc.h" +#include "regint.h" /* for USE_CALLOUT */ + + +static int +init(void) +{ +#ifdef USE_CALLOUT + + int id; + OnigEncoding enc; + char* name; + unsigned int t_long; + unsigned int args[4]; + OnigValue opts[4]; + + enc = ONIG_ENCODING_UTF16_BE; + t_long = ONIG_TYPE_LONG; + + name = "\000F\000A\000I\000L\000\000"; BC0_P(name, fail); + name = "\000M\000I\000S\000M\000A\000T\000C\000H\000\000"; BC0_P(name, mismatch); + name = "\000M\000A\000X\000\000"; BC_B(name, max, 1, &t_long); + + name = "\000E\000R\000R\000O\000R\000\000"; + args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; + BC_P_O(name, error, 1, args, 1, opts); + + name = "\000C\000O\000U\000N\000T\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, count, 1, args, 1, opts); + + name = "\000T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, total_count, 1, args, 1, opts); + + name = "\000C\000M\000P\000\000"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_STRING; + args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + + return ONIG_NORMAL; +} static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -215,8 +258,8 @@ utf16be_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF16_BE = { utf16be_mbc_enc_len, "UTF-16BE", /* name */ - 4, /* max byte length */ - 2, /* min byte length */ + 4, /* max enc length */ + 2, /* min enc length */ utf16be_is_mbc_newline, utf16be_mbc_to_code, utf16be_code_to_mbclen, @@ -229,7 +272,7 @@ OnigEncodingType OnigEncodingUTF16_BE = { onigenc_utf16_32_get_ctype_code_range, utf16be_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, - NULL, /* init */ - NULL, /* is_initialized */ + init, + 0, /* is_initialized */ is_valid_mbc_string }; diff --git a/src/utf16_le.c b/src/utf16_le.c index 89bc72f..dc0d3f1 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -2,7 +2,7 @@ utf16_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -26,8 +26,49 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ +#include "regint.h" /* for USE_CALLOUT */ -#include "regenc.h" +static int +init(void) +{ +#ifdef USE_CALLOUT + + int id; + OnigEncoding enc; + char* name; + unsigned int t_long; + unsigned int args[4]; + OnigValue opts[4]; + + enc = ONIG_ENCODING_UTF16_LE; + t_long = ONIG_TYPE_LONG; + + name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail); + name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch); + name = "M\000A\000X\000\000\000"; BC_B(name, max, 1, &t_long); + + name = "E\000R\000R\000O\000R\000\000\000"; + args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; + BC_P_O(name, error, 1, args, 1, opts); + + name = "C\000O\000U\000N\000T\000\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, count, 1, args, 1, opts); + + name = "T\000O\000T\000A\000L\000_\000C\000O\000U\000N\000T\000\000\000"; + args[0] = ONIG_TYPE_CHAR; opts[0].c = '>'; + BC_B_O(name, total_count, 1, args, 1, opts); + + name = "C\000M\000P\000\000\000"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_STRING; + args[2] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + BC_P(name, cmp, 3, args); + +#endif /* USE_CALLOUT */ + + return ONIG_NORMAL; +} static const int EncLen_UTF16[] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -225,8 +266,8 @@ utf16le_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF16_LE = { utf16le_mbc_enc_len, "UTF-16LE", /* name */ - 4, /* max byte length */ - 2, /* min byte length */ + 4, /* max enc length */ + 2, /* min enc length */ utf16le_is_mbc_newline, utf16le_mbc_to_code, utf16le_code_to_mbclen, @@ -239,7 +280,7 @@ OnigEncodingType OnigEncodingUTF16_LE = { onigenc_utf16_32_get_ctype_code_range, utf16le_left_adjust_char_head, onigenc_always_false_is_allowed_reverse_match, - NULL, /* init */ - NULL, /* is_initialized */ + init, + 0, /* is_initialized */ is_valid_mbc_string }; diff --git a/src/utf32_be.c b/src/utf32_be.c index d0c7f39..68760bb 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -2,7 +2,7 @@ utf32_be.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,8 @@ utf32be_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF32_BE = { utf32be_mbc_enc_len, "UTF-32BE", /* name */ - 4, /* max byte length */ - 4, /* min byte length */ + 4, /* max enc length */ + 4, /* min enc length */ utf32be_is_mbc_newline, utf32be_mbc_to_code, utf32be_code_to_mbclen, diff --git a/src/utf32_le.c b/src/utf32_le.c index 33200d1..8208cd0 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -2,7 +2,7 @@ utf32_le.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2016 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -174,8 +174,8 @@ utf32le_get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF32_LE = { utf32le_mbc_enc_len, "UTF-32LE", /* name */ - 4, /* max byte length */ - 4, /* min byte length */ + 4, /* max enc length */ + 4, /* min enc length */ utf32le_is_mbc_newline, utf32le_mbc_to_code, utf32le_code_to_mbclen, diff --git a/src/utf8.c b/src/utf8.c index e5e59b2..a5c4dbe 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -2,7 +2,7 @@ utf8.c - Oniguruma (regular expression library) **********************************************************************/ /*- - * Copyright (c) 2002-2017 K.Kosako + * Copyright (c) 2002-2018 K.Kosako * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -280,8 +280,8 @@ get_case_fold_codes_by_str(OnigCaseFoldType flag, OnigEncodingType OnigEncodingUTF8 = { mbc_enc_len, "UTF-8", /* name */ - 6, /* max byte length */ - 1, /* min byte length */ + 6, /* max enc length */ + 1, /* min enc length */ onigenc_is_mbc_newline_0x0a, mbc_to_code, code_to_mbclen, -- cgit v1.2.3