diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 5 | ||||
-rw-r--r-- | src/ascii.c | 12 | ||||
-rw-r--r-- | src/big5.c | 3 | ||||
-rw-r--r-- | src/cp1251.c | 3 | ||||
-rw-r--r-- | src/euc_jp.c | 3 | ||||
-rw-r--r-- | src/euc_kr.c | 3 | ||||
-rw-r--r-- | src/euc_tw.c | 3 | ||||
-rw-r--r-- | src/gb18030.c | 199 | ||||
-rw-r--r-- | src/iso8859_1.c | 3 | ||||
-rw-r--r-- | src/iso8859_10.c | 3 | ||||
-rw-r--r-- | src/iso8859_11.c | 3 | ||||
-rw-r--r-- | src/iso8859_13.c | 3 | ||||
-rw-r--r-- | src/iso8859_14.c | 3 | ||||
-rw-r--r-- | src/iso8859_15.c | 3 | ||||
-rw-r--r-- | src/iso8859_16.c | 3 | ||||
-rw-r--r-- | src/iso8859_2.c | 3 | ||||
-rw-r--r-- | src/iso8859_3.c | 3 | ||||
-rw-r--r-- | src/iso8859_4.c | 3 | ||||
-rw-r--r-- | src/iso8859_5.c | 3 | ||||
-rw-r--r-- | src/iso8859_6.c | 3 | ||||
-rw-r--r-- | src/iso8859_7.c | 3 | ||||
-rw-r--r-- | src/iso8859_8.c | 3 | ||||
-rw-r--r-- | src/iso8859_9.c | 3 | ||||
-rw-r--r-- | src/koi8.c | 3 | ||||
-rw-r--r-- | src/koi8_r.c | 3 | ||||
-rw-r--r-- | src/oniguruma.h | 18 | ||||
-rw-r--r-- | src/regcomp.c | 15 | ||||
-rw-r--r-- | src/regenc.h | 11 | ||||
-rw-r--r-- | src/regexec.c | 237 | ||||
-rw-r--r-- | src/regint.h | 3 | ||||
-rw-r--r-- | src/regparse.c | 42 | ||||
-rw-r--r-- | src/sjis.c | 4 | ||||
-rw-r--r-- | src/unicode.c | 2 | ||||
-rw-r--r-- | src/utf16_be.c | 13 | ||||
-rw-r--r-- | src/utf16_le.c | 13 | ||||
-rw-r--r-- | src/utf32_be.c | 4 | ||||
-rw-r--r-- | src/utf32_le.c | 4 | ||||
-rw-r--r-- | src/utf8.c | 6 |
38 files changed, 395 insertions, 259 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 911aecd..c7a4705 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -19,8 +19,9 @@ endif lib_LTLIBRARIES = $(libname) libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \ - regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \ - regenc.c regsyntax.c regtrav.c regversion.c st.c \ + regparse.c regcomp.c regexec.c \ + regenc.c regerror.c regext.c regsyntax.c regtrav.c regversion.c st.c \ + reggnu.c \ $(posix_sources) \ unicode.c \ unicode_unfold_key.c \ diff --git a/src/ascii.c b/src/ascii.c index 7efaa26..eb38944 100644 --- a/src/ascii.c +++ b/src/ascii.c @@ -37,16 +37,19 @@ init(void) int id; OnigEncoding enc; char* name; - unsigned int t_long; unsigned int args[4]; OnigValue opts[4]; enc = ONIG_ENCODING_ASCII; - t_long = ONIG_TYPE_LONG; name = "FAIL"; BC0_P(name, fail); name = "MISMATCH"; BC0_P(name, mismatch); - name = "MAX"; BC_B(name, max, 1, &t_long); + + name = "MAX"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_CHAR; + opts[0].c = 'X'; + BC_B_O(name, max, 2, args, 1, opts); name = "ERROR"; args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; @@ -110,5 +113,6 @@ OnigEncodingType OnigEncodingASCII = { init, 0, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; @@ -187,5 +187,6 @@ OnigEncodingType OnigEncodingBIG5 = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/cp1251.c b/src/cp1251.c index f7b43c3..e217037 100644 --- a/src/cp1251.c +++ b/src/cp1251.c @@ -200,5 +200,6 @@ OnigEncodingType OnigEncodingCP1251 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/euc_jp.c b/src/euc_jp.c index 8dd6ac1..ae8c2fe 100644 --- a/src/euc_jp.c +++ b/src/euc_jp.c @@ -307,5 +307,6 @@ OnigEncodingType OnigEncodingEUC_JP = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/euc_kr.c b/src/euc_kr.c index 08bfa1c..def311b 100644 --- a/src/euc_kr.c +++ b/src/euc_kr.c @@ -185,5 +185,6 @@ OnigEncodingType OnigEncodingEUC_CN = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/euc_tw.c b/src/euc_tw.c index dbf0eac..8738598 100644 --- a/src/euc_tw.c +++ b/src/euc_tw.c @@ -168,5 +168,6 @@ OnigEncodingType OnigEncodingEUC_TW = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/gb18030.c b/src/gb18030.c index 073c83b..d4a1108 100644 --- a/src/gb18030.c +++ b/src/gb18030.c @@ -89,25 +89,25 @@ is_valid_mbc_string(const UChar* p, const UChar* end) p++; if (p >= end) return FALSE; if (*p < 0x40) { - if (*p < 0x30 || *p > 0x39) - return FALSE; + if (*p < 0x30 || *p > 0x39) + return FALSE; - p++; - if (p >= end) return FALSE; - if (*p < 0x81 || *p == 0xff) return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0x81 || *p == 0xff) return FALSE; - p++; - if (p >= end) return FALSE; - if (*p < 0x30 || *p > 0x39) - return FALSE; + p++; + if (p >= end) return FALSE; + if (*p < 0x30 || *p > 0x39) + return FALSE; - p++; + p++; } else if (*p == 0x7f || *p == 0xff) { - return FALSE; + return FALSE; } else { - p++; + p++; } } } @@ -138,7 +138,7 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end, #if 0 static int gb18030_is_mbc_ambiguous(OnigCaseFoldType flag, - const UChar** pp, const UChar* end) + const UChar** pp, const UChar* end) { return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end); } @@ -197,16 +197,16 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case S_START: switch (GB18030_MAP[*p]) { case C1: - return (UChar *)s; + return (UChar *)s; case C2: - state = S_one_C2; /* C2 */ - break; + state = S_one_C2; /* C2 */ + break; case C4: - state = S_one_C4; /* C4 */ - break; + state = S_one_C4; /* C4 */ + break; case CM: - state = S_one_CM; /* CM */ - break; + state = S_one_CM; /* CM */ + break; } break; case S_one_C2: /* C2 */ @@ -214,10 +214,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)s; + return (UChar *)s; case CM: - state = S_odd_CM_one_CX; /* CM C2 */ - break; + state = S_odd_CM_one_CX; /* CM C2 */ + break; } break; case S_one_C4: /* C4 */ @@ -225,23 +225,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)s; + return (UChar *)s; case CM: - state = S_one_CMC4; - break; + state = S_one_CMC4; + break; } break; case S_one_CM: /* CM */ switch (GB18030_MAP[*p]) { case C1: case C2: - return (UChar *)s; + return (UChar *)s; case C4: - state = S_odd_C4CM; - break; + state = S_odd_C4CM; + break; case CM: - state = S_odd_CM_one_CX; /* CM CM */ - break; + state = S_odd_CM_one_CX; /* CM CM */ + break; } break; @@ -250,10 +250,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 1); + return (UChar *)(s - 1); case CM: - state = S_even_CM_one_CX; - break; + state = S_even_CM_one_CX; + break; } break; case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */ @@ -261,10 +261,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)s; + return (UChar *)s; case CM: - state = S_odd_CM_one_CX; - break; + state = S_odd_CM_one_CX; + break; } break; @@ -272,26 +272,26 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) switch (GB18030_MAP[*p]) { case C1: case C2: - return (UChar *)(s - 1); + return (UChar *)(s - 1); case C4: - state = S_one_C4_odd_CMC4; /* C4 CM C4 */ - break; + state = S_one_C4_odd_CMC4; /* C4 CM C4 */ + break; case CM: - state = S_even_CM_one_CX; /* CM CM C4 */ - break; + state = S_even_CM_one_CX; /* CM CM C4 */ + break; } break; case S_odd_CMC4: /* CM C4 CM C4 CM C4 */ switch (GB18030_MAP[*p]) { case C1: case C2: - return (UChar *)(s - 1); + return (UChar *)(s - 1); case C4: - state = S_one_C4_odd_CMC4; - break; + state = S_one_C4_odd_CMC4; + break; case CM: - state = S_odd_CM_odd_CMC4; - break; + state = S_odd_CM_odd_CMC4; + break; } break; case S_one_C4_odd_CMC4: /* C4 CM C4 */ @@ -299,23 +299,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 1); + return (UChar *)(s - 1); case CM: - state = S_even_CMC4; /* CM C4 CM C4 */ - break; + state = S_even_CMC4; /* CM C4 CM C4 */ + break; } break; case S_even_CMC4: /* CM C4 CM C4 */ switch (GB18030_MAP[*p]) { case C1: case C2: - return (UChar *)(s - 3); + return (UChar *)(s - 3); case C4: - state = S_one_C4_even_CMC4; - break; + state = S_one_C4_even_CMC4; + break; case CM: - state = S_odd_CM_even_CMC4; - break; + state = S_odd_CM_even_CMC4; + break; } break; case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */ @@ -323,10 +323,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 3); + return (UChar *)(s - 3); case CM: - state = S_odd_CMC4; - break; + state = S_odd_CMC4; + break; } break; @@ -335,10 +335,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 3); + return (UChar *)(s - 3); case CM: - state = S_even_CM_odd_CMC4; - break; + state = S_even_CM_odd_CMC4; + break; } break; case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */ @@ -346,10 +346,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 1); + return (UChar *)(s - 1); case CM: - state = S_odd_CM_odd_CMC4; - break; + state = S_odd_CM_odd_CMC4; + break; } break; @@ -358,10 +358,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 1); + return (UChar *)(s - 1); case CM: - state = S_even_CM_even_CMC4; - break; + state = S_even_CM_even_CMC4; + break; } break; case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */ @@ -369,10 +369,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 3); + return (UChar *)(s - 3); case CM: - state = S_odd_CM_even_CMC4; - break; + state = S_odd_CM_even_CMC4; + break; } break; @@ -381,23 +381,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)s; + return (UChar *)s; case CM: - state = S_one_CM_odd_C4CM; /* CM C4 CM */ - break; + state = S_one_CM_odd_C4CM; /* CM C4 CM */ + break; } break; case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */ switch (GB18030_MAP[*p]) { case C1: case C2: - return (UChar *)(s - 2); /* |CM C4 CM */ + return (UChar *)(s - 2); /* |CM C4 CM */ case C4: - state = S_even_C4CM; - break; + state = S_even_C4CM; + break; case CM: - state = S_even_CM_odd_C4CM; - break; + state = S_even_CM_odd_C4CM; + break; } break; case S_even_C4CM: /* C4 CM C4 CM */ @@ -405,23 +405,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 2); /* C4|CM C4 CM */ + return (UChar *)(s - 2); /* C4|CM C4 CM */ case CM: - state = S_one_CM_even_C4CM; - break; + state = S_one_CM_even_C4CM; + break; } break; case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */ switch (GB18030_MAP[*p]) { case C1: case C2: - return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ + return (UChar *)(s - 0); /*|CM C4 CM C4|CM */ case C4: - state = S_odd_C4CM; - break; + state = S_odd_C4CM; + break; case CM: - state = S_even_CM_even_C4CM; - break; + state = S_even_CM_even_C4CM; + break; } break; @@ -430,10 +430,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 0); /* |CM CM|C4|CM */ + return (UChar *)(s - 0); /* |CM CM|C4|CM */ case CM: - state = S_odd_CM_odd_C4CM; - break; + state = S_odd_CM_odd_C4CM; + break; } break; case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */ @@ -441,10 +441,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ + return (UChar *)(s - 2); /* |CM CM|CM C4 CM */ case CM: - state = S_even_CM_odd_C4CM; - break; + state = S_even_CM_odd_C4CM; + break; } break; @@ -453,10 +453,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ + return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */ case CM: - state = S_odd_CM_even_C4CM; - break; + state = S_odd_CM_even_C4CM; + break; } break; case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */ @@ -464,10 +464,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s) case C1: case C2: case C4: - return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ + return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */ case CM: - state = S_even_CM_even_C4CM; - break; + state = S_even_CM_even_C4CM; + break; } break; } @@ -535,5 +535,6 @@ OnigEncodingType OnigEncodingGB18030 = { NULL, /* init */ NULL, /* is_initialized */ is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_1.c b/src/iso8859_1.c index bcd7e26..ff47b80 100644 --- a/src/iso8859_1.c +++ b/src/iso8859_1.c @@ -272,5 +272,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_10.c b/src/iso8859_10.c index a5946cc..f9804e2 100644 --- a/src/iso8859_10.c +++ b/src/iso8859_10.c @@ -239,5 +239,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_11.c b/src/iso8859_11.c index ec94fd1..108ee8a 100644 --- a/src/iso8859_11.c +++ b/src/iso8859_11.c @@ -96,5 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_13.c b/src/iso8859_13.c index fba7fd4..9585355 100644 --- a/src/iso8859_13.c +++ b/src/iso8859_13.c @@ -228,5 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_14.c b/src/iso8859_14.c index e1f71f5..83fc551 100644 --- a/src/iso8859_14.c +++ b/src/iso8859_14.c @@ -241,5 +241,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_15.c b/src/iso8859_15.c index 236e9e7..3a7ad05 100644 --- a/src/iso8859_15.c +++ b/src/iso8859_15.c @@ -235,5 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_16.c b/src/iso8859_16.c index 42045bd..02022d9 100644 --- a/src/iso8859_16.c +++ b/src/iso8859_16.c @@ -237,5 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_2.c b/src/iso8859_2.c index db93046..ecdbb99 100644 --- a/src/iso8859_2.c +++ b/src/iso8859_2.c @@ -235,5 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_3.c b/src/iso8859_3.c index 6fe5e6f..739f1c9 100644 --- a/src/iso8859_3.c +++ b/src/iso8859_3.c @@ -235,5 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_4.c b/src/iso8859_4.c index ee1eb93..4f2b6a0 100644 --- a/src/iso8859_4.c +++ b/src/iso8859_4.c @@ -237,5 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_5.c b/src/iso8859_5.c index 7d828e1..cf41061 100644 --- a/src/iso8859_5.c +++ b/src/iso8859_5.c @@ -226,5 +226,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_6.c b/src/iso8859_6.c index a959e98..1ffe99f 100644 --- a/src/iso8859_6.c +++ b/src/iso8859_6.c @@ -96,5 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_7.c b/src/iso8859_7.c index e695523..87288c2 100644 --- a/src/iso8859_7.c +++ b/src/iso8859_7.c @@ -222,5 +222,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_8.c b/src/iso8859_8.c index 66b63b8..8f162a4 100644 --- a/src/iso8859_8.c +++ b/src/iso8859_8.c @@ -96,5 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/iso8859_9.c b/src/iso8859_9.c index d780293..52589cf 100644 --- a/src/iso8859_9.c +++ b/src/iso8859_9.c @@ -228,5 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; @@ -250,5 +250,6 @@ OnigEncodingType OnigEncodingKOI8 = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/koi8_r.c b/src/koi8_r.c index e88cfe3..8adc399 100644 --- a/src/koi8_r.c +++ b/src/koi8_r.c @@ -212,5 +212,6 @@ OnigEncodingType OnigEncodingKOI8_R = { NULL, /* init */ NULL, /* is_initialized */ onigenc_always_true_is_valid_mbc_string, - 0, 0, 0 + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/oniguruma.h b/src/oniguruma.h index 349c00e..322959a 100644 --- a/src/oniguruma.h +++ b/src/oniguruma.h @@ -36,7 +36,7 @@ extern "C" { #define ONIGURUMA #define ONIGURUMA_VERSION_MAJOR 6 #define ONIGURUMA_VERSION_MINOR 8 -#define ONIGURUMA_VERSION_TEENY 1 +#define ONIGURUMA_VERSION_TEENY 2 #define ONIGURUMA_VERSION_INT 60801 @@ -115,7 +115,7 @@ typedef struct { OnigCodePoint one_or_more_time; OnigCodePoint anychar_anytime; } OnigMetaCharTableType; - + typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg); typedef struct OnigEncodingTypeST { @@ -344,7 +344,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p)); ONIG_EXTERN int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); ONIG_EXTERN -UChar* onigenc_strdup P_((OnigEncoding enc, const UChar* s, const UChar* end)); +OnigUChar* onigenc_strdup P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end)); /* PART: regular expression */ @@ -549,7 +549,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax; #define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22 #define ONIGERR_FAIL_TO_INITIALIZE -23 /* general error */ -#define ONIGERR_INVALID_ARGUMENT -30 +#define ONIGERR_INVALID_ARGUMENT -30 /* syntax error */ #define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100 #define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101 @@ -894,6 +894,8 @@ ONIG_EXTERN int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); ONIG_EXTERN int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f)); +ONIG_EXTERN +int onig_set_callout_user_data_of_match_param P_((OnigMatchParam* param, void* user_data)); /* for callout functions */ ONIG_EXTERN @@ -905,15 +907,15 @@ OnigCalloutFunc onig_get_retraction_callout P_((void)); ONIG_EXTERN int onig_set_retraction_callout P_((OnigCalloutFunc f)); ONIG_EXTERN -int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[])); /* name: single-byte string */ +int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[])); ONIG_EXTERN OnigUChar* onig_get_callout_name_by_name_id P_((int id)); ONIG_EXTERN -int onig_get_callout_num_by_tag P_((OnigRegex reg, const UChar* tag, const UChar* tag_end)); +int onig_get_callout_num_by_tag P_((OnigRegex reg, const OnigUChar* tag, const OnigUChar* tag_end)); ONIG_EXTERN -int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType* type, OnigValue* val)); +int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType* type, OnigValue* val)); ONIG_EXTERN -int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType type, OnigValue* val)); +int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType type, OnigValue* val)); /* used in callout functions */ ONIG_EXTERN diff --git a/src/regcomp.c b/src/regcomp.c index a19109f..f953ed1 100644 --- a/src/regcomp.c +++ b/src/regcomp.c @@ -2450,7 +2450,6 @@ is_exclusive(Node* x, Node* y, regex_t* reg) if (NODE_STRING_LEN(x) == 0) break; - //c = *(xs->s); switch (ytype) { case NODE_CTYPE: switch (CTYPE_(y)->ctype) { @@ -2758,7 +2757,7 @@ tree_min_len(Node* node, ScanEnv* env) len = en->min_len; else { if (NODE_IS_MARK1(node)) - len = 0; // recursive + len = 0; /* recursive */ else { NODE_STATUS_ADD(node, NST_MARK1); len = tree_min_len(NODE_BODY(node), env); @@ -3763,7 +3762,7 @@ expand_case_fold_string(Node* node, regex_t* reg) return r; } -#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT static enum QuantBodyEmpty quantifiers_memory_node_info(Node* node) { @@ -3847,7 +3846,7 @@ quantifiers_memory_node_info(Node* node) return r; } -#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */ +#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */ #define IN_ALT (1<<0) @@ -4375,7 +4374,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env) if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) { d = tree_min_len(body, env); if (d == 0) { -#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT qn->body_empty_info = quantifiers_memory_node_info(body); if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) { if (NODE_TYPE(body) == NODE_ENCLOSURE && @@ -5979,7 +5978,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, #endif root = 0; - if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL; + if (IS_NOT_NULL(einfo)) { + einfo->enc = reg->enc; + einfo->par = (UChar* )NULL; + } #ifdef ONIG_DEBUG print_enc_string(stderr, reg->enc, pattern, pattern_end); @@ -6124,7 +6126,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end, err: if (IS_NOT_NULL(scan_env.error)) { if (IS_NOT_NULL(einfo)) { - einfo->enc = scan_env.enc; einfo->par = scan_env.error; einfo->par_end = scan_env.error_end; } diff --git a/src/regenc.h b/src/regenc.h index 4dd89ba..ae7a774 100644 --- a/src/regenc.h +++ b/src/regenc.h @@ -120,6 +120,10 @@ struct PropertyNameCtype { #define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII +#define ENC_FLAG_ASCII_COMPATIBLE (1<<0) +#define ENC_FLAG_UNICODE (1<<1) + + /* for encoding system implementation (internal) */ extern int onigenc_end(void); extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg)); @@ -156,7 +160,7 @@ extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UCh extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype)); extern struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len)); extern struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len)); -//extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); +/* extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); */ /* in enc/unicode.c */ extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype)); @@ -250,8 +254,9 @@ extern const unsigned short OnigEncAsciiCtypeTable[]; ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER)) #define ONIGENC_IS_UNICODE_ENCODING(enc) \ - ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype) + (((enc)->flag & ENC_FLAG_UNICODE) != 0) -#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) ((enc)->min_enc_len == 1) +#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) \ + (((enc)->flag & ENC_FLAG_ASCII_COMPATIBLE) != 0) #endif /* REGENC_H */ diff --git a/src/regexec.c b/src/regexec.c index 35e3698..1ec5183 100644 --- a/src/regexec.c +++ b/src/regexec.c @@ -52,9 +52,9 @@ typedef struct { struct OnigMatchParamStruct { unsigned int match_stack_limit; unsigned long retry_limit_in_match; +#ifdef USE_CALLOUT OnigCalloutFunc progress_callout_of_contents; OnigCalloutFunc retraction_callout_of_contents; -#ifdef USE_CALLOUT int match_at_call_counter; void* callout_user_data; CalloutData* callout_data; @@ -81,15 +81,34 @@ onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param, extern int onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) { +#ifdef USE_CALLOUT param->progress_callout_of_contents = f; return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif } extern int onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f) { +#ifdef USE_CALLOUT param->retraction_callout_of_contents = f; return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif +} + +extern int +onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data) +{ +#ifdef USE_CALLOUT + param->callout_user_data = user_data; + return ONIG_NORMAL; +#else + return ONIG_NO_SUPPORT_CONFIG; +#endif } @@ -114,19 +133,21 @@ typedef struct { #ifdef ONIG_DEBUG /* arguments type */ -#define ARG_SPECIAL -1 -#define ARG_NON 0 -#define ARG_RELADDR 1 -#define ARG_ABSADDR 2 -#define ARG_LENGTH 3 -#define ARG_MEMNUM 4 -#define ARG_OPTION 5 -#define ARG_MODE 6 +typedef enum { + ARG_SPECIAL = -1, + ARG_NON = 0, + ARG_RELADDR = 1, + ARG_ABSADDR = 2, + ARG_LENGTH = 3, + ARG_MEMNUM = 4, + ARG_OPTION = 5, + ARG_MODE = 6 +} OpArgType; typedef struct { short int opcode; char* name; - short int arg_type; + OpArgType arg_type; } OpInfoType; static OpInfoType OpInfo[] = { @@ -295,11 +316,12 @@ extern void onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, OnigEncoding enc) { - int i, n, arg_type; + int i, n; + OpArgType arg_type; RelAddrType addr; - LengthType len; - MemNumType mem; - OnigCodePoint code; + LengthType len; + MemNumType mem; + OnigCodePoint code; OnigOptionType option; ModeType mode; UChar *q; @@ -336,12 +358,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, fprintf(f, ":%d", option); } break; - case ARG_MODE: mode = *((ModeType* )bp); bp += SIZE_MODE; fprintf(f, ":%d", mode); break; + default: + break; } } else { @@ -546,7 +569,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, #ifdef USE_CALLOUT case OP_CALLOUT_CONTENTS: { - GET_MEMNUM_INC(mem, bp); // number + GET_MEMNUM_INC(mem, bp); /* number */ fprintf(f, ":%d", mem); } break; @@ -555,8 +578,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start, { int id; - GET_MEMNUM_INC(id, bp); // id - GET_MEMNUM_INC(mem, bp); // number + GET_MEMNUM_INC(id, bp); /* id */ + GET_MEMNUM_INC(mem, bp); /* number */ fprintf(f, ":%d:%d", id, mem); } @@ -959,8 +982,8 @@ typedef struct _StackType { struct { UChar *pstr; /* start/end position */ /* Following information is set, if this stack type is MEM-START */ - StackIndex start; /* prev. info (for backtrack "(...)*" ) */ - StackIndex end; /* prev. info (for backtrack "(...)*" ) */ + StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */ + StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */ } mem; struct { UChar *pstr; /* start position */ @@ -996,7 +1019,7 @@ struct OnigCalloutArgsStruct { const OnigUChar* string_end; const OnigUChar* start; const OnigUChar* right_range; - const OnigUChar* current; // current matching position + const OnigUChar* current; /* current matching position */ unsigned long retry_in_match_counter; /* invisible to users */ @@ -1127,7 +1150,7 @@ onig_get_retry_limit_in_match(void) #ifdef USE_RETRY_LIMIT_IN_MATCH return RetryLimitInMatch; #else - //return ONIG_NO_SUPPORT_CONFIG; + /* return ONIG_NO_SUPPORT_CONFIG; */ return 0; #endif } @@ -1520,11 +1543,11 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_ENSURE(1);\ stk->type = STK_MEM_START;\ stk->zid = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ - mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ - mem_end_stk[mnum] = INVALID_STACK_INDEX;\ + stk->u.mem.pstr = (s);\ + stk->u.mem.prev_start = mem_start_stk[mnum];\ + stk->u.mem.prev_end = mem_end_stk[mnum];\ + mem_start_stk[mnum] = GET_STACK_INDEX(stk);\ + mem_end_stk[mnum] = INVALID_STACK_INDEX;\ STACK_INC;\ } while(0) @@ -1532,9 +1555,9 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_ENSURE(1);\ stk->type = STK_MEM_END;\ stk->zid = (mnum);\ - stk->u.mem.pstr = (s);\ - stk->u.mem.start = mem_start_stk[mnum];\ - stk->u.mem.end = mem_end_stk[mnum];\ + stk->u.mem.pstr = (s);\ + stk->u.mem.prev_start = mem_start_stk[mnum];\ + stk->u.mem.prev_end = mem_end_stk[mnum];\ mem_end_stk[mnum] = GET_STACK_INDEX(stk);\ STACK_INC;\ } while(0) @@ -1758,8 +1781,8 @@ stack_double(int is_alloca, char** arg_alloc_base, STACK_BASE_CHECK(stk, "STACK_POP 2"); \ if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->zid] = stk->u.mem.start;\ - mem_end_stk[stk->zid] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ }\ }\ break;\ @@ -1770,15 +1793,15 @@ stack_double(int is_alloca, char** arg_alloc_base, if ((stk->type & STK_MASK_POP_USED) != 0) break;\ else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\ if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->zid] = stk->u.mem.start;\ - mem_end_stk[stk->zid] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ }\ else if (stk->type == STK_REPEAT_INC) {\ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ }\ else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->zid] = stk->u.mem.start;\ - mem_end_stk[stk->zid] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ }\ POP_CALLOUT_CASE\ }\ @@ -1795,15 +1818,15 @@ stack_double(int is_alloca, char** arg_alloc_base, if (stk->type == (til_type)) break;\ else {\ if (stk->type == STK_MEM_START) {\ - mem_start_stk[stk->zid] = stk->u.mem.start;\ - mem_end_stk[stk->zid] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ }\ else if (stk->type == STK_REPEAT_INC) {\ STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\ }\ else if (stk->type == STK_MEM_END) {\ - mem_start_stk[stk->zid] = stk->u.mem.start;\ - mem_end_stk[stk->zid] = stk->u.mem.end;\ + mem_start_stk[stk->zid] = stk->u.mem.prev_start;\ + mem_end_stk[stk->zid] = stk->u.mem.prev_end;\ }\ /* Don't call callout here because negation of total success by (?!..) (?<!..) */\ }\ @@ -1849,12 +1872,24 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT -#define STACK_EMPTY_CHECK_MEMST(isnull,sid,s,reg) do {\ +#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\ + if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\ + (addr) = 0;\ + }\ + else {\ + if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\ + (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\ + else\ + (addr) = (UChar* )k->u.mem.prev_end;\ + }\ +} while (0) + +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT +#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\ StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \ if (k->type == STK_EMPTY_CHECK_START) {\ if (k->zid == (sid)) {\ if (k->u.empty_check.pstr != (s)) {\ @@ -1866,15 +1901,11 @@ stack_double(int is_alloca, char** arg_alloc_base, (isnull) = 1;\ while (k < stk) {\ if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ + STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ + if (endp == 0) {\ (isnull) = 0; break;\ }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - /*fprintf(stderr, "num: %d, pstr: %p, endp: %p\n", k->u.mem.num, STACK_AT(k->u.mem.start)->u.mem.pstr, endp);*/ \ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ + else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\ (isnull) = 0; break;\ }\ else if (endp != s) {\ @@ -1890,12 +1921,12 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ } while(0) -#define STACK_EMPTY_CHECK_MEMST_REC(isnull,sid,s,reg) do {\ +#define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\ int level = 0;\ StackType* k = stk;\ while (1) {\ k--;\ - STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \ + STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\ if (k->type == STK_EMPTY_CHECK_START) {\ if (k->zid == (sid)) {\ if (level == 0) {\ @@ -1908,20 +1939,25 @@ stack_double(int is_alloca, char** arg_alloc_base, (isnull) = 1;\ while (k < stk) {\ if (k->type == STK_MEM_START) {\ - if (k->u.mem.end == INVALID_STACK_INDEX) {\ - (isnull) = 0; break;\ - }\ - if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\ - endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\ - else\ - endp = (UChar* )k->u.mem.end;\ - if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\ - (isnull) = 0; break;\ - }\ - else if (endp != s) {\ - (isnull) = -1; /* empty, but position changed */ \ + if (level == 0) {\ + STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\ + if (endp == 0) {\ + (isnull) = 0; break;\ + }\ + else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \ + (isnull) = 0; break;\ + }\ + else if (endp != s) {\ + (isnull) = -1; /* empty, but position changed */\ + }\ }\ }\ + else if (k->type == STK_EMPTY_CHECK_START) {\ + if (k->zid == (sid)) level++;\ + }\ + else if (k->type == STK_EMPTY_CHECK_END) {\ + if (k->zid == (sid)) level--;\ + }\ k++;\ }\ break;\ @@ -1958,7 +1994,7 @@ stack_double(int is_alloca, char** arg_alloc_base, }\ }\ } while(0) -#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */ +#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */ #define STACK_GET_REPEAT(sid, k) do {\ int level = 0;\ @@ -2348,7 +2384,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, retry_limit_in_match = msa->retry_limit_in_match; #endif - //n = reg->num_repeat + reg->num_mem * 2; pop_level = reg->stack_pop_level; num_mem = reg->num_mem; STACK_INIT(INIT_MATCH_STACK_SIZE); @@ -2996,7 +3031,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY); { ModeType mode; - GET_MODE_INC(mode, p); // ascii_mode + GET_MODE_INC(mode, p); /* ascii_mode */ if (ON_STR_BEGIN(s)) { DATA_ENSURE(1); @@ -3020,7 +3055,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY); { ModeType mode; - GET_MODE_INC(mode, p); // ascii_mode + GET_MODE_INC(mode, p); /* ascii_mode */ if (ON_STR_BEGIN(s)) { if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) @@ -3044,7 +3079,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN); { ModeType mode; - GET_MODE_INC(mode, p); // ascii_mode + GET_MODE_INC(mode, p); /* ascii_mode */ if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { @@ -3059,7 +3094,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_WORD_END: SOP_IN(OP_WORD_END); { ModeType mode; - GET_MODE_INC(mode, p); // ascii_mode + GET_MODE_INC(mode, p); /* ascii_mode */ if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) { if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) { @@ -3395,9 +3430,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, sprev = s; if (backref_match_at_nested_level(reg, stk, stk_base, ic , case_fold_flag, (int )level, (int )tlen, p, &s, end)) { - while (sprev + (len = enclen(encode, sprev)) < s) - sprev += len; - + if (sprev < end) { + while (sprev + (len = enclen(encode, sprev)) < s) + sprev += len; + } p += (SIZE_MEMNUM * tlen); } else @@ -3504,16 +3540,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, continue; break; -#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST); { int is_empty; GET_MEMNUM_INC(mem, p); /* mem: null check id */ - STACK_EMPTY_CHECK_MEMST(is_empty, mem, s, reg); + STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg); if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "EMPTY_CHECK_END_MEMST: skip id:%d, s:%p\n", (int)mem, s); + fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s); #endif if (is_empty == -1) goto fail; goto empty_check_found; @@ -3531,14 +3567,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, int is_empty; GET_MEMNUM_INC(mem, p); /* mem: null check id */ -#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT - STACK_EMPTY_CHECK_MEMST_REC(is_empty, mem, s, reg); +#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT + STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg); #else STACK_EMPTY_CHECK_REC(is_empty, mem, s); #endif if (is_empty) { #ifdef ONIG_DEBUG_MATCH - fprintf(stderr, "EMPTY_CHECK_END_MEMST_PUSH: skip id:%d, s:%p\n", + fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n", (int )mem, s); #endif if (is_empty == -1) goto fail; @@ -3577,8 +3613,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end, case OP_POP_OUT: SOP_IN(OP_POP_OUT); STACK_POP_ONE; - // for stop backtrack - //CHECK_RETRY_LIMIT_IN_MATCH; + /* for stop backtrack */ + /* CHECK_RETRY_LIMIT_IN_MATCH; */ SOP_OUT; continue; break; @@ -5137,7 +5173,7 @@ onig_get_args_num_by_callout_args(OnigCalloutArgs* args) num = args->num; e = onig_reg_callout_list_at(args->regex, num); - if (IS_NULL(e)) return 0; + if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT; if (e->of == ONIG_CALLOUT_OF_NAME) { return e->u.arg.num; } @@ -5153,7 +5189,7 @@ onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args) num = args->num; e = onig_reg_callout_list_at(args->regex, num); - if (IS_NULL(e)) return 0; + if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT; if (e->of == ONIG_CALLOUT_OF_NAME) { return e->u.arg.passed_num; } @@ -5170,7 +5206,7 @@ onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index, num = args->num; e = onig_reg_callout_list_at(args->regex, num); - if (IS_NULL(e)) return 0; + if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT; if (e->of == ONIG_CALLOUT_OF_NAME) { if (IS_NOT_NULL(type)) *type = e->u.arg.types[index]; if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index]; @@ -5393,6 +5429,8 @@ onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED) { int r; int slot; + long max_val; + OnigCodePoint count_type; OnigType type; OnigValue val; OnigValue aval; @@ -5411,13 +5449,38 @@ onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED) r = onig_get_arg_by_callout_args(args, 0, &type, &aval); if (r != ONIG_NORMAL) return r; + if (type == ONIG_TYPE_TAG) { + r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval); + if (r < ONIG_NORMAL) return r; + else if (r > ONIG_NORMAL) + max_val = 0L; + else + max_val = aval.l; + } + else { /* LONG */ + max_val = aval.l; + } + + r = onig_get_arg_by_callout_args(args, 1, &type, &aval); + if (r != ONIG_NORMAL) return r; + + count_type = aval.c; + if (count_type != '>' && count_type != 'X' && count_type != '<') + return ONIGERR_INVALID_CALLOUT_ARG; if (args->in == ONIG_CALLOUT_IN_RETRACTION) { - val.l--; + if (count_type == '<') { + if (val.l >= max_val) return ONIG_CALLOUT_FAIL; + val.l++; + } + else if (count_type == 'X') + val.l--; } else { - if (val.l >= aval.l) return ONIG_CALLOUT_FAIL; - val.l++; + if (count_type != '<') { + if (val.l >= max_val) return ONIG_CALLOUT_FAIL; + val.l++; + } } r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val); diff --git a/src/regint.h b/src/regint.h index ba8407a..357b489 100644 --- a/src/regint.h +++ b/src/regint.h @@ -59,7 +59,7 @@ #define USE_CALL #define USE_CALLOUT #define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */ -#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ +#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */ #define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */ #define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR @@ -710,7 +710,6 @@ typedef int AbsAddrType; typedef int LengthType; typedef int RepeatNumType; typedef int MemNumType; -typedef short int StateCheckNumType; typedef void* PointerType; typedef int SaveType; typedef int UpdateVarType; diff --git a/src/regparse.c b/src/regparse.c index 6e95a14..64923ad 100644 --- a/src/regparse.c +++ b/src/regparse.c @@ -525,7 +525,7 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key, typedef struct { OnigEncoding enc; - int type; // callout type: single or not + int type; /* callout type: single or not */ UChar* s; UChar* end; } st_callout_name_key; @@ -1583,7 +1583,7 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type, } } - r = id; // return id + r = id; return r; } @@ -1637,24 +1637,36 @@ onig_get_callout_tag_end(regex_t* reg, int callout_num) extern OnigCalloutType onig_get_callout_type_by_name_id(int name_id) { + if (name_id < 0 || name_id >= GlobalCalloutNameList->n) + return 0; + return GlobalCalloutNameList->v[name_id].type; } extern OnigCalloutFunc onig_get_callout_start_func_by_name_id(int name_id) { + if (name_id < 0 || name_id >= GlobalCalloutNameList->n) + return 0; + return GlobalCalloutNameList->v[name_id].start_func; } extern OnigCalloutFunc onig_get_callout_end_func_by_name_id(int name_id) { + if (name_id < 0 || name_id >= GlobalCalloutNameList->n) + return 0; + return GlobalCalloutNameList->v[name_id].end_func; } extern int onig_get_callout_in_by_name_id(int name_id) { + if (name_id < 0 || name_id >= GlobalCalloutNameList->n) + return 0; + return GlobalCalloutNameList->v[name_id].in; } @@ -1685,6 +1697,9 @@ get_callout_opt_default_by_name_id(int name_id, int index) extern UChar* onig_get_callout_name_by_name_id(int name_id) { + if (name_id < 0 || name_id >= GlobalCalloutNameList->n) + return 0; + return GlobalCalloutNameList->v[name_id].name; } @@ -2689,7 +2704,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent, for (i = 0; i < 4; i++) ns[i] = NULL_NODE; ns[1] = absent; - ns[3] = step_one; // for err + ns[3] = step_one; /* for err */ r = node_new_save_gimmick(&ns[0], SAVE_S, env); if (r != 0) goto err; @@ -5341,8 +5356,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) if (num_type != IS_NOT_NUM) { if (num_type == IS_REL_NUM) { gnum = backref_rel_to_abs(gnum, env); - if (gnum < 0) + if (gnum < 0) { + onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE, + prev, name_end); return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } } tok->u.call.by_number = 1; tok->u.call.gnum = gnum; @@ -5563,8 +5581,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env) else { if (num_type == IS_REL_NUM) { gnum = backref_rel_to_abs(gnum, env); - if (gnum < 0) + if (gnum < 0) { + onig_scan_env_set_error_string(env, + ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end); return ONIGERR_UNDEFINED_GROUP_REFERENCE; + } } tok->u.call.by_number = 1; tok->u.call.gnum = gnum; @@ -6583,7 +6604,6 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv PFETCH_S(c); } else if (c == '>') { /* no needs (default) */ - //in = ONIG_CALLOUT_IN_PROGRESS; if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; PFETCH_S(c); } @@ -6823,7 +6843,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en OnigEncoding enc = env->enc; UChar* p = *src; - //PFETCH_READY; + /* PFETCH_READY; */ if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN; node = 0; @@ -7053,12 +7073,12 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; - if (PPEEK_IS('|')) { // (?~|generator|absent) + if (PPEEK_IS('|')) { /* (?~|generator|absent) */ PINC; if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; head_bar = 1; - if (PPEEK_IS(')')) { // (?~|) : range clear + if (PPEEK_IS(')')) { /* (?~|) : range clear */ PINC; r = make_range_clear(np, env); if (r != 0) return r; @@ -7083,7 +7103,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) { expr = NULL_NODE; is_range_cutter = 1; - //return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; + /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */ } else { absent = NODE_CAR(top); @@ -7778,7 +7798,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end, len = 1; while (1) { if (len >= ONIGENC_MBC_MINLEN(env->enc)) { - if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end() + if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */ r = fetch_token(tok, src, end, env); NODE_STRING_CLEAR_RAW(*np); goto string_end; @@ -337,5 +337,7 @@ OnigEncodingType OnigEncodingSJIS = { is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + ENC_FLAG_ASCII_COMPATIBLE, + 0, 0 }; diff --git a/src/unicode.c b/src/unicode.c index e585937..a8bae66 100644 --- a/src/unicode.c +++ b/src/unicode.c @@ -335,7 +335,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc, n++; } } - code = items[0].code[0]; // for multi-code to unfold search. + code = items[0].code[0]; /* for multi-code to unfold search. */ } else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) { OnigCodePoint cs[3][4]; diff --git a/src/utf16_be.c b/src/utf16_be.c index 098ab54..8f5b8bf 100644 --- a/src/utf16_be.c +++ b/src/utf16_be.c @@ -38,16 +38,19 @@ init(void) int id; OnigEncoding enc; char* name; - unsigned int t_long; unsigned int args[4]; OnigValue opts[4]; enc = ONIG_ENCODING_UTF16_BE; - t_long = ONIG_TYPE_LONG; name = "\000F\000A\000I\000L\000\000"; BC0_P(name, fail); name = "\000M\000I\000S\000M\000A\000T\000C\000H\000\000"; BC0_P(name, mismatch); - name = "\000M\000A\000X\000\000"; BC_B(name, max, 1, &t_long); + + name = "\000M\000A\000X\000\000"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_CHAR; + opts[0].c = 'X'; + BC_B_O(name, max, 2, args, 1, opts); name = "\000E\000R\000R\000O\000R\000\000"; args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; @@ -274,5 +277,7 @@ OnigEncodingType OnigEncodingUTF16_BE = { onigenc_always_false_is_allowed_reverse_match, init, 0, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + ENC_FLAG_UNICODE, + 0, 0 }; diff --git a/src/utf16_le.c b/src/utf16_le.c index dc0d3f1..92bf318 100644 --- a/src/utf16_le.c +++ b/src/utf16_le.c @@ -36,16 +36,19 @@ init(void) int id; OnigEncoding enc; char* name; - unsigned int t_long; unsigned int args[4]; OnigValue opts[4]; enc = ONIG_ENCODING_UTF16_LE; - t_long = ONIG_TYPE_LONG; name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail); name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch); - name = "M\000A\000X\000\000\000"; BC_B(name, max, 1, &t_long); + + name = "M\000A\000X\000\000\000"; + args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG; + args[1] = ONIG_TYPE_CHAR; + opts[0].c = 'X'; + BC_B_O(name, max, 2, args, 1, opts); name = "E\000R\000R\000O\000R\000\000\000"; args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT; @@ -282,5 +285,7 @@ OnigEncodingType OnigEncodingUTF16_LE = { onigenc_always_false_is_allowed_reverse_match, init, 0, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + ENC_FLAG_UNICODE, + 0, 0 }; diff --git a/src/utf32_be.c b/src/utf32_be.c index 68760bb..92476ec 100644 --- a/src/utf32_be.c +++ b/src/utf32_be.c @@ -190,5 +190,7 @@ OnigEncodingType OnigEncodingUTF32_BE = { onigenc_always_false_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + ENC_FLAG_UNICODE, + 0, 0 }; diff --git a/src/utf32_le.c b/src/utf32_le.c index 8208cd0..dc3fd92 100644 --- a/src/utf32_le.c +++ b/src/utf32_le.c @@ -190,5 +190,7 @@ OnigEncodingType OnigEncodingUTF32_LE = { onigenc_always_false_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + ENC_FLAG_UNICODE, + 0, 0 }; @@ -29,7 +29,7 @@ #include "regenc.h" -//#define USE_INVALID_CODE_SCHEME +/* #define USE_INVALID_CODE_SCHEME */ #ifdef USE_INVALID_CODE_SCHEME /* virtual codepoint values for invalid encoding byte 0xfe and 0xff */ @@ -296,5 +296,7 @@ OnigEncodingType OnigEncodingUTF8 = { onigenc_always_true_is_allowed_reverse_match, NULL, /* init */ NULL, /* is_initialized */ - is_valid_mbc_string + is_valid_mbc_string, + ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE, + 0, 0 }; |