summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2018-05-04 18:26:00 +0200
committerJörg Frings-Fürst <debian@jff.email>2018-05-04 18:26:00 +0200
commita7c6cf32519f775b01975b104a0c8da3c76beab5 (patch)
treece8bdf27499179198a1f264a29d34a93c3c92902 /src
parentfd4ff4e58174679784d7698880717eefc9399ba7 (diff)
parent0ad6ddc1cbc5b0f36547798994b352a09ae5cf1c (diff)
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'src')
-rw-r--r--src/Makefile.am5
-rw-r--r--src/ascii.c12
-rw-r--r--src/big5.c3
-rw-r--r--src/cp1251.c3
-rw-r--r--src/euc_jp.c3
-rw-r--r--src/euc_kr.c3
-rw-r--r--src/euc_tw.c3
-rw-r--r--src/gb18030.c199
-rw-r--r--src/iso8859_1.c3
-rw-r--r--src/iso8859_10.c3
-rw-r--r--src/iso8859_11.c3
-rw-r--r--src/iso8859_13.c3
-rw-r--r--src/iso8859_14.c3
-rw-r--r--src/iso8859_15.c3
-rw-r--r--src/iso8859_16.c3
-rw-r--r--src/iso8859_2.c3
-rw-r--r--src/iso8859_3.c3
-rw-r--r--src/iso8859_4.c3
-rw-r--r--src/iso8859_5.c3
-rw-r--r--src/iso8859_6.c3
-rw-r--r--src/iso8859_7.c3
-rw-r--r--src/iso8859_8.c3
-rw-r--r--src/iso8859_9.c3
-rw-r--r--src/koi8.c3
-rw-r--r--src/koi8_r.c3
-rw-r--r--src/oniguruma.h18
-rw-r--r--src/regcomp.c15
-rw-r--r--src/regenc.h11
-rw-r--r--src/regexec.c237
-rw-r--r--src/regint.h3
-rw-r--r--src/regparse.c42
-rw-r--r--src/sjis.c4
-rw-r--r--src/unicode.c2
-rw-r--r--src/utf16_be.c13
-rw-r--r--src/utf16_le.c13
-rw-r--r--src/utf32_be.c4
-rw-r--r--src/utf32_le.c4
-rw-r--r--src/utf8.c6
38 files changed, 395 insertions, 259 deletions
diff --git a/src/Makefile.am b/src/Makefile.am
index 911aecd..c7a4705 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -19,8 +19,9 @@ endif
lib_LTLIBRARIES = $(libname)
libonig_la_SOURCES = regint.h regparse.h regenc.h st.h \
- regerror.c regparse.c regext.c regcomp.c regexec.c reggnu.c \
- regenc.c regsyntax.c regtrav.c regversion.c st.c \
+ regparse.c regcomp.c regexec.c \
+ regenc.c regerror.c regext.c regsyntax.c regtrav.c regversion.c st.c \
+ reggnu.c \
$(posix_sources) \
unicode.c \
unicode_unfold_key.c \
diff --git a/src/ascii.c b/src/ascii.c
index 7efaa26..eb38944 100644
--- a/src/ascii.c
+++ b/src/ascii.c
@@ -37,16 +37,19 @@ init(void)
int id;
OnigEncoding enc;
char* name;
- unsigned int t_long;
unsigned int args[4];
OnigValue opts[4];
enc = ONIG_ENCODING_ASCII;
- t_long = ONIG_TYPE_LONG;
name = "FAIL"; BC0_P(name, fail);
name = "MISMATCH"; BC0_P(name, mismatch);
- name = "MAX"; BC_B(name, max, 1, &t_long);
+
+ name = "MAX";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_CHAR;
+ opts[0].c = 'X';
+ BC_B_O(name, max, 2, args, 1, opts);
name = "ERROR";
args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
@@ -110,5 +113,6 @@ OnigEncodingType OnigEncodingASCII = {
init,
0, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/big5.c b/src/big5.c
index ff0c51b..dbc750d 100644
--- a/src/big5.c
+++ b/src/big5.c
@@ -187,5 +187,6 @@ OnigEncodingType OnigEncodingBIG5 = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/cp1251.c b/src/cp1251.c
index f7b43c3..e217037 100644
--- a/src/cp1251.c
+++ b/src/cp1251.c
@@ -200,5 +200,6 @@ OnigEncodingType OnigEncodingCP1251 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/euc_jp.c b/src/euc_jp.c
index 8dd6ac1..ae8c2fe 100644
--- a/src/euc_jp.c
+++ b/src/euc_jp.c
@@ -307,5 +307,6 @@ OnigEncodingType OnigEncodingEUC_JP = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/euc_kr.c b/src/euc_kr.c
index 08bfa1c..def311b 100644
--- a/src/euc_kr.c
+++ b/src/euc_kr.c
@@ -185,5 +185,6 @@ OnigEncodingType OnigEncodingEUC_CN = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/euc_tw.c b/src/euc_tw.c
index dbf0eac..8738598 100644
--- a/src/euc_tw.c
+++ b/src/euc_tw.c
@@ -168,5 +168,6 @@ OnigEncodingType OnigEncodingEUC_TW = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/gb18030.c b/src/gb18030.c
index 073c83b..d4a1108 100644
--- a/src/gb18030.c
+++ b/src/gb18030.c
@@ -89,25 +89,25 @@ is_valid_mbc_string(const UChar* p, const UChar* end)
p++;
if (p >= end) return FALSE;
if (*p < 0x40) {
- if (*p < 0x30 || *p > 0x39)
- return FALSE;
+ if (*p < 0x30 || *p > 0x39)
+ return FALSE;
- p++;
- if (p >= end) return FALSE;
- if (*p < 0x81 || *p == 0xff) return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x81 || *p == 0xff) return FALSE;
- p++;
- if (p >= end) return FALSE;
- if (*p < 0x30 || *p > 0x39)
- return FALSE;
+ p++;
+ if (p >= end) return FALSE;
+ if (*p < 0x30 || *p > 0x39)
+ return FALSE;
- p++;
+ p++;
}
else if (*p == 0x7f || *p == 0xff) {
- return FALSE;
+ return FALSE;
}
else {
- p++;
+ p++;
}
}
}
@@ -138,7 +138,7 @@ gb18030_mbc_case_fold(OnigCaseFoldType flag, const UChar** pp, const UChar* end,
#if 0
static int
gb18030_is_mbc_ambiguous(OnigCaseFoldType flag,
- const UChar** pp, const UChar* end)
+ const UChar** pp, const UChar* end)
{
return onigenc_mbn_is_mbc_ambiguous(ONIG_ENCODING_GB18030, flag, pp, end);
}
@@ -197,16 +197,16 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case S_START:
switch (GB18030_MAP[*p]) {
case C1:
- return (UChar *)s;
+ return (UChar *)s;
case C2:
- state = S_one_C2; /* C2 */
- break;
+ state = S_one_C2; /* C2 */
+ break;
case C4:
- state = S_one_C4; /* C4 */
- break;
+ state = S_one_C4; /* C4 */
+ break;
case CM:
- state = S_one_CM; /* CM */
- break;
+ state = S_one_CM; /* CM */
+ break;
}
break;
case S_one_C2: /* C2 */
@@ -214,10 +214,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)s;
+ return (UChar *)s;
case CM:
- state = S_odd_CM_one_CX; /* CM C2 */
- break;
+ state = S_odd_CM_one_CX; /* CM C2 */
+ break;
}
break;
case S_one_C4: /* C4 */
@@ -225,23 +225,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)s;
+ return (UChar *)s;
case CM:
- state = S_one_CMC4;
- break;
+ state = S_one_CMC4;
+ break;
}
break;
case S_one_CM: /* CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
- return (UChar *)s;
+ return (UChar *)s;
case C4:
- state = S_odd_C4CM;
- break;
+ state = S_odd_C4CM;
+ break;
case CM:
- state = S_odd_CM_one_CX; /* CM CM */
- break;
+ state = S_odd_CM_one_CX; /* CM CM */
+ break;
}
break;
@@ -250,10 +250,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 1);
+ return (UChar *)(s - 1);
case CM:
- state = S_even_CM_one_CX;
- break;
+ state = S_even_CM_one_CX;
+ break;
}
break;
case S_even_CM_one_CX: /* CM CM C2 */ /* CM CM CM */ /* CM CM C4 */
@@ -261,10 +261,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)s;
+ return (UChar *)s;
case CM:
- state = S_odd_CM_one_CX;
- break;
+ state = S_odd_CM_one_CX;
+ break;
}
break;
@@ -272,26 +272,26 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
switch (GB18030_MAP[*p]) {
case C1:
case C2:
- return (UChar *)(s - 1);
+ return (UChar *)(s - 1);
case C4:
- state = S_one_C4_odd_CMC4; /* C4 CM C4 */
- break;
+ state = S_one_C4_odd_CMC4; /* C4 CM C4 */
+ break;
case CM:
- state = S_even_CM_one_CX; /* CM CM C4 */
- break;
+ state = S_even_CM_one_CX; /* CM CM C4 */
+ break;
}
break;
case S_odd_CMC4: /* CM C4 CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
- return (UChar *)(s - 1);
+ return (UChar *)(s - 1);
case C4:
- state = S_one_C4_odd_CMC4;
- break;
+ state = S_one_C4_odd_CMC4;
+ break;
case CM:
- state = S_odd_CM_odd_CMC4;
- break;
+ state = S_odd_CM_odd_CMC4;
+ break;
}
break;
case S_one_C4_odd_CMC4: /* C4 CM C4 */
@@ -299,23 +299,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 1);
+ return (UChar *)(s - 1);
case CM:
- state = S_even_CMC4; /* CM C4 CM C4 */
- break;
+ state = S_even_CMC4; /* CM C4 CM C4 */
+ break;
}
break;
case S_even_CMC4: /* CM C4 CM C4 */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
- return (UChar *)(s - 3);
+ return (UChar *)(s - 3);
case C4:
- state = S_one_C4_even_CMC4;
- break;
+ state = S_one_C4_even_CMC4;
+ break;
case CM:
- state = S_odd_CM_even_CMC4;
- break;
+ state = S_odd_CM_even_CMC4;
+ break;
}
break;
case S_one_C4_even_CMC4: /* C4 CM C4 CM C4 */
@@ -323,10 +323,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 3);
+ return (UChar *)(s - 3);
case CM:
- state = S_odd_CMC4;
- break;
+ state = S_odd_CMC4;
+ break;
}
break;
@@ -335,10 +335,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 3);
+ return (UChar *)(s - 3);
case CM:
- state = S_even_CM_odd_CMC4;
- break;
+ state = S_even_CM_odd_CMC4;
+ break;
}
break;
case S_even_CM_odd_CMC4: /* CM CM CM C4 CM C4 CM C4 */
@@ -346,10 +346,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 1);
+ return (UChar *)(s - 1);
case CM:
- state = S_odd_CM_odd_CMC4;
- break;
+ state = S_odd_CM_odd_CMC4;
+ break;
}
break;
@@ -358,10 +358,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 1);
+ return (UChar *)(s - 1);
case CM:
- state = S_even_CM_even_CMC4;
- break;
+ state = S_even_CM_even_CMC4;
+ break;
}
break;
case S_even_CM_even_CMC4: /* CM CM CM C4 CM C4 */
@@ -369,10 +369,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 3);
+ return (UChar *)(s - 3);
case CM:
- state = S_odd_CM_even_CMC4;
- break;
+ state = S_odd_CM_even_CMC4;
+ break;
}
break;
@@ -381,23 +381,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)s;
+ return (UChar *)s;
case CM:
- state = S_one_CM_odd_C4CM; /* CM C4 CM */
- break;
+ state = S_one_CM_odd_C4CM; /* CM C4 CM */
+ break;
}
break;
case S_one_CM_odd_C4CM: /* CM C4 CM */ /* CM C4 CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
- return (UChar *)(s - 2); /* |CM C4 CM */
+ return (UChar *)(s - 2); /* |CM C4 CM */
case C4:
- state = S_even_C4CM;
- break;
+ state = S_even_C4CM;
+ break;
case CM:
- state = S_even_CM_odd_C4CM;
- break;
+ state = S_even_CM_odd_C4CM;
+ break;
}
break;
case S_even_C4CM: /* C4 CM C4 CM */
@@ -405,23 +405,23 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 2); /* C4|CM C4 CM */
+ return (UChar *)(s - 2); /* C4|CM C4 CM */
case CM:
- state = S_one_CM_even_C4CM;
- break;
+ state = S_one_CM_even_C4CM;
+ break;
}
break;
case S_one_CM_even_C4CM: /* CM C4 CM C4 CM */
switch (GB18030_MAP[*p]) {
case C1:
case C2:
- return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
+ return (UChar *)(s - 0); /*|CM C4 CM C4|CM */
case C4:
- state = S_odd_C4CM;
- break;
+ state = S_odd_C4CM;
+ break;
case CM:
- state = S_even_CM_even_C4CM;
- break;
+ state = S_even_CM_even_C4CM;
+ break;
}
break;
@@ -430,10 +430,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 0); /* |CM CM|C4|CM */
+ return (UChar *)(s - 0); /* |CM CM|C4|CM */
case CM:
- state = S_odd_CM_odd_C4CM;
- break;
+ state = S_odd_CM_odd_C4CM;
+ break;
}
break;
case S_odd_CM_odd_C4CM: /* CM CM CM C4 CM */
@@ -441,10 +441,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
+ return (UChar *)(s - 2); /* |CM CM|CM C4 CM */
case CM:
- state = S_even_CM_odd_C4CM;
- break;
+ state = S_even_CM_odd_C4CM;
+ break;
}
break;
@@ -453,10 +453,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
+ return (UChar *)(s - 2); /* |CM CM|C4|CM C4 CM */
case CM:
- state = S_odd_CM_even_C4CM;
- break;
+ state = S_odd_CM_even_C4CM;
+ break;
}
break;
case S_odd_CM_even_C4CM: /* CM CM CM C4 CM C4 CM */
@@ -464,10 +464,10 @@ gb18030_left_adjust_char_head(const UChar* start, const UChar* s)
case C1:
case C2:
case C4:
- return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
+ return (UChar *)(s - 0); /* |CM CM|CM C4 CM C4|CM */
case CM:
- state = S_even_CM_even_C4CM;
- break;
+ state = S_even_CM_even_C4CM;
+ break;
}
break;
}
@@ -535,5 +535,6 @@ OnigEncodingType OnigEncodingGB18030 = {
NULL, /* init */
NULL, /* is_initialized */
is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_1.c b/src/iso8859_1.c
index bcd7e26..ff47b80 100644
--- a/src/iso8859_1.c
+++ b/src/iso8859_1.c
@@ -272,5 +272,6 @@ OnigEncodingType OnigEncodingISO_8859_1 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_10.c b/src/iso8859_10.c
index a5946cc..f9804e2 100644
--- a/src/iso8859_10.c
+++ b/src/iso8859_10.c
@@ -239,5 +239,6 @@ OnigEncodingType OnigEncodingISO_8859_10 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_11.c b/src/iso8859_11.c
index ec94fd1..108ee8a 100644
--- a/src/iso8859_11.c
+++ b/src/iso8859_11.c
@@ -96,5 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_11 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_13.c b/src/iso8859_13.c
index fba7fd4..9585355 100644
--- a/src/iso8859_13.c
+++ b/src/iso8859_13.c
@@ -228,5 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_13 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_14.c b/src/iso8859_14.c
index e1f71f5..83fc551 100644
--- a/src/iso8859_14.c
+++ b/src/iso8859_14.c
@@ -241,5 +241,6 @@ OnigEncodingType OnigEncodingISO_8859_14 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_15.c b/src/iso8859_15.c
index 236e9e7..3a7ad05 100644
--- a/src/iso8859_15.c
+++ b/src/iso8859_15.c
@@ -235,5 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_15 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_16.c b/src/iso8859_16.c
index 42045bd..02022d9 100644
--- a/src/iso8859_16.c
+++ b/src/iso8859_16.c
@@ -237,5 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_16 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_2.c b/src/iso8859_2.c
index db93046..ecdbb99 100644
--- a/src/iso8859_2.c
+++ b/src/iso8859_2.c
@@ -235,5 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_2 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_3.c b/src/iso8859_3.c
index 6fe5e6f..739f1c9 100644
--- a/src/iso8859_3.c
+++ b/src/iso8859_3.c
@@ -235,5 +235,6 @@ OnigEncodingType OnigEncodingISO_8859_3 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_4.c b/src/iso8859_4.c
index ee1eb93..4f2b6a0 100644
--- a/src/iso8859_4.c
+++ b/src/iso8859_4.c
@@ -237,5 +237,6 @@ OnigEncodingType OnigEncodingISO_8859_4 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_5.c b/src/iso8859_5.c
index 7d828e1..cf41061 100644
--- a/src/iso8859_5.c
+++ b/src/iso8859_5.c
@@ -226,5 +226,6 @@ OnigEncodingType OnigEncodingISO_8859_5 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_6.c b/src/iso8859_6.c
index a959e98..1ffe99f 100644
--- a/src/iso8859_6.c
+++ b/src/iso8859_6.c
@@ -96,5 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_6 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_7.c b/src/iso8859_7.c
index e695523..87288c2 100644
--- a/src/iso8859_7.c
+++ b/src/iso8859_7.c
@@ -222,5 +222,6 @@ OnigEncodingType OnigEncodingISO_8859_7 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_8.c b/src/iso8859_8.c
index 66b63b8..8f162a4 100644
--- a/src/iso8859_8.c
+++ b/src/iso8859_8.c
@@ -96,5 +96,6 @@ OnigEncodingType OnigEncodingISO_8859_8 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/iso8859_9.c b/src/iso8859_9.c
index d780293..52589cf 100644
--- a/src/iso8859_9.c
+++ b/src/iso8859_9.c
@@ -228,5 +228,6 @@ OnigEncodingType OnigEncodingISO_8859_9 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/koi8.c b/src/koi8.c
index 8531825..9fb2ee5 100644
--- a/src/koi8.c
+++ b/src/koi8.c
@@ -250,5 +250,6 @@ OnigEncodingType OnigEncodingKOI8 = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/koi8_r.c b/src/koi8_r.c
index e88cfe3..8adc399 100644
--- a/src/koi8_r.c
+++ b/src/koi8_r.c
@@ -212,5 +212,6 @@ OnigEncodingType OnigEncodingKOI8_R = {
NULL, /* init */
NULL, /* is_initialized */
onigenc_always_true_is_valid_mbc_string,
- 0, 0, 0
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/oniguruma.h b/src/oniguruma.h
index 349c00e..322959a 100644
--- a/src/oniguruma.h
+++ b/src/oniguruma.h
@@ -36,7 +36,7 @@ extern "C" {
#define ONIGURUMA
#define ONIGURUMA_VERSION_MAJOR 6
#define ONIGURUMA_VERSION_MINOR 8
-#define ONIGURUMA_VERSION_TEENY 1
+#define ONIGURUMA_VERSION_TEENY 2
#define ONIGURUMA_VERSION_INT 60801
@@ -115,7 +115,7 @@ typedef struct {
OnigCodePoint one_or_more_time;
OnigCodePoint anychar_anytime;
} OnigMetaCharTableType;
-
+
typedef int (*OnigApplyAllCaseFoldFunc)(OnigCodePoint from, OnigCodePoint* to, int to_len, void* arg);
typedef struct OnigEncodingTypeST {
@@ -344,7 +344,7 @@ int onigenc_str_bytelen_null P_((OnigEncoding enc, const OnigUChar* p));
ONIG_EXTERN
int onigenc_is_valid_mbc_string P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
ONIG_EXTERN
-UChar* onigenc_strdup P_((OnigEncoding enc, const UChar* s, const UChar* end));
+OnigUChar* onigenc_strdup P_((OnigEncoding enc, const OnigUChar* s, const OnigUChar* end));
/* PART: regular expression */
@@ -549,7 +549,7 @@ ONIG_EXTERN OnigSyntaxType* OnigDefaultSyntax;
#define ONIGERR_SPECIFIED_ENCODING_CANT_CONVERT_TO_WIDE_CHAR -22
#define ONIGERR_FAIL_TO_INITIALIZE -23
/* general error */
-#define ONIGERR_INVALID_ARGUMENT -30
+#define ONIGERR_INVALID_ARGUMENT -30
/* syntax error */
#define ONIGERR_END_PATTERN_AT_LEFT_BRACE -100
#define ONIGERR_END_PATTERN_AT_LEFT_BRACKET -101
@@ -894,6 +894,8 @@ ONIG_EXTERN
int onig_set_progress_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f));
ONIG_EXTERN
int onig_set_retraction_callout_of_match_param P_((OnigMatchParam* param, OnigCalloutFunc f));
+ONIG_EXTERN
+int onig_set_callout_user_data_of_match_param P_((OnigMatchParam* param, void* user_data));
/* for callout functions */
ONIG_EXTERN
@@ -905,15 +907,15 @@ OnigCalloutFunc onig_get_retraction_callout P_((void));
ONIG_EXTERN
int onig_set_retraction_callout P_((OnigCalloutFunc f));
ONIG_EXTERN
-int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[])); /* name: single-byte string */
+int onig_set_callout_of_name P_((OnigEncoding enc, OnigCalloutType type, OnigUChar* name, OnigUChar* name_end, int callout_in, OnigCalloutFunc callout, OnigCalloutFunc end_callout, int arg_num, unsigned int arg_types[], int optional_arg_num, OnigValue opt_defaults[]));
ONIG_EXTERN
OnigUChar* onig_get_callout_name_by_name_id P_((int id));
ONIG_EXTERN
-int onig_get_callout_num_by_tag P_((OnigRegex reg, const UChar* tag, const UChar* tag_end));
+int onig_get_callout_num_by_tag P_((OnigRegex reg, const OnigUChar* tag, const OnigUChar* tag_end));
ONIG_EXTERN
-int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType* type, OnigValue* val));
+int onig_get_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType* type, OnigValue* val));
ONIG_EXTERN
-int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const UChar* tag, const UChar* tag_end, int slot, OnigType type, OnigValue* val));
+int onig_set_callout_data_by_tag P_((OnigRegex reg, OnigMatchParam* mp, const OnigUChar* tag, const OnigUChar* tag_end, int slot, OnigType type, OnigValue* val));
/* used in callout functions */
ONIG_EXTERN
diff --git a/src/regcomp.c b/src/regcomp.c
index a19109f..f953ed1 100644
--- a/src/regcomp.c
+++ b/src/regcomp.c
@@ -2450,7 +2450,6 @@ is_exclusive(Node* x, Node* y, regex_t* reg)
if (NODE_STRING_LEN(x) == 0)
break;
- //c = *(xs->s);
switch (ytype) {
case NODE_CTYPE:
switch (CTYPE_(y)->ctype) {
@@ -2758,7 +2757,7 @@ tree_min_len(Node* node, ScanEnv* env)
len = en->min_len;
else {
if (NODE_IS_MARK1(node))
- len = 0; // recursive
+ len = 0; /* recursive */
else {
NODE_STATUS_ADD(node, NST_MARK1);
len = tree_min_len(NODE_BODY(node), env);
@@ -3763,7 +3762,7 @@ expand_case_fold_string(Node* node, regex_t* reg)
return r;
}
-#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
+#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
static enum QuantBodyEmpty
quantifiers_memory_node_info(Node* node)
{
@@ -3847,7 +3846,7 @@ quantifiers_memory_node_info(Node* node)
return r;
}
-#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */
+#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#define IN_ALT (1<<0)
@@ -4375,7 +4374,7 @@ setup_quant(Node* node, regex_t* reg, int state, ScanEnv* env)
if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
d = tree_min_len(body, env);
if (d == 0) {
-#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
+#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
qn->body_empty_info = quantifiers_memory_node_info(body);
if (qn->body_empty_info == QUANT_BODY_IS_EMPTY_REC) {
if (NODE_TYPE(body) == NODE_ENCLOSURE &&
@@ -5979,7 +5978,10 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
#endif
root = 0;
- if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
+ if (IS_NOT_NULL(einfo)) {
+ einfo->enc = reg->enc;
+ einfo->par = (UChar* )NULL;
+ }
#ifdef ONIG_DEBUG
print_enc_string(stderr, reg->enc, pattern, pattern_end);
@@ -6124,7 +6126,6 @@ onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
err:
if (IS_NOT_NULL(scan_env.error)) {
if (IS_NOT_NULL(einfo)) {
- einfo->enc = scan_env.enc;
einfo->par = scan_env.error;
einfo->par_end = scan_env.error_end;
}
diff --git a/src/regenc.h b/src/regenc.h
index 4dd89ba..ae7a774 100644
--- a/src/regenc.h
+++ b/src/regenc.h
@@ -120,6 +120,10 @@ struct PropertyNameCtype {
#define ONIG_ENCODING_INIT_DEFAULT ONIG_ENCODING_ASCII
+#define ENC_FLAG_ASCII_COMPATIBLE (1<<0)
+#define ENC_FLAG_UNICODE (1<<1)
+
+
/* for encoding system implementation (internal) */
extern int onigenc_end(void);
extern int onigenc_ascii_apply_all_case_fold P_((OnigCaseFoldType flag, OnigApplyAllCaseFoldFunc f, void* arg));
@@ -156,7 +160,7 @@ extern int onigenc_mb4_code_to_mbc P_((OnigEncoding enc, OnigCodePoint code, UCh
extern int onigenc_mb4_is_code_ctype P_((OnigEncoding enc, OnigCodePoint code, unsigned int ctype));
extern struct PropertyNameCtype* euc_jp_lookup_property_name P_((register const char *str, register unsigned int len));
extern struct PropertyNameCtype* sjis_lookup_property_name P_((register const char *str, register unsigned int len));
-//extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len));
+/* extern const struct PropertyNameCtype* unicode_lookup_property_name P_((register const char *str, register unsigned int len)); */
/* in enc/unicode.c */
extern int onigenc_unicode_is_code_ctype P_((OnigCodePoint code, unsigned int ctype));
@@ -250,8 +254,9 @@ extern const unsigned short OnigEncAsciiCtypeTable[];
ONIGENC_IS_ASCII_CODE_CTYPE(code, ONIGENC_CTYPE_LOWER))
#define ONIGENC_IS_UNICODE_ENCODING(enc) \
- ((enc)->is_code_ctype == onigenc_unicode_is_code_ctype)
+ (((enc)->flag & ENC_FLAG_UNICODE) != 0)
-#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) ((enc)->min_enc_len == 1)
+#define ONIGENC_IS_ASCII_COMPATIBLE_ENCODING(enc) \
+ (((enc)->flag & ENC_FLAG_ASCII_COMPATIBLE) != 0)
#endif /* REGENC_H */
diff --git a/src/regexec.c b/src/regexec.c
index 35e3698..1ec5183 100644
--- a/src/regexec.c
+++ b/src/regexec.c
@@ -52,9 +52,9 @@ typedef struct {
struct OnigMatchParamStruct {
unsigned int match_stack_limit;
unsigned long retry_limit_in_match;
+#ifdef USE_CALLOUT
OnigCalloutFunc progress_callout_of_contents;
OnigCalloutFunc retraction_callout_of_contents;
-#ifdef USE_CALLOUT
int match_at_call_counter;
void* callout_user_data;
CalloutData* callout_data;
@@ -81,15 +81,34 @@ onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* param,
extern int
onig_set_progress_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
{
+#ifdef USE_CALLOUT
param->progress_callout_of_contents = f;
return ONIG_NORMAL;
+#else
+ return ONIG_NO_SUPPORT_CONFIG;
+#endif
}
extern int
onig_set_retraction_callout_of_match_param(OnigMatchParam* param, OnigCalloutFunc f)
{
+#ifdef USE_CALLOUT
param->retraction_callout_of_contents = f;
return ONIG_NORMAL;
+#else
+ return ONIG_NO_SUPPORT_CONFIG;
+#endif
+}
+
+extern int
+onig_set_callout_user_data_of_match_param(OnigMatchParam* param, void* user_data)
+{
+#ifdef USE_CALLOUT
+ param->callout_user_data = user_data;
+ return ONIG_NORMAL;
+#else
+ return ONIG_NO_SUPPORT_CONFIG;
+#endif
}
@@ -114,19 +133,21 @@ typedef struct {
#ifdef ONIG_DEBUG
/* arguments type */
-#define ARG_SPECIAL -1
-#define ARG_NON 0
-#define ARG_RELADDR 1
-#define ARG_ABSADDR 2
-#define ARG_LENGTH 3
-#define ARG_MEMNUM 4
-#define ARG_OPTION 5
-#define ARG_MODE 6
+typedef enum {
+ ARG_SPECIAL = -1,
+ ARG_NON = 0,
+ ARG_RELADDR = 1,
+ ARG_ABSADDR = 2,
+ ARG_LENGTH = 3,
+ ARG_MEMNUM = 4,
+ ARG_OPTION = 5,
+ ARG_MODE = 6
+} OpArgType;
typedef struct {
short int opcode;
char* name;
- short int arg_type;
+ OpArgType arg_type;
} OpInfoType;
static OpInfoType OpInfo[] = {
@@ -295,11 +316,12 @@ extern void
onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
OnigEncoding enc)
{
- int i, n, arg_type;
+ int i, n;
+ OpArgType arg_type;
RelAddrType addr;
- LengthType len;
- MemNumType mem;
- OnigCodePoint code;
+ LengthType len;
+ MemNumType mem;
+ OnigCodePoint code;
OnigOptionType option;
ModeType mode;
UChar *q;
@@ -336,12 +358,13 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
fprintf(f, ":%d", option);
}
break;
-
case ARG_MODE:
mode = *((ModeType* )bp);
bp += SIZE_MODE;
fprintf(f, ":%d", mode);
break;
+ default:
+ break;
}
}
else {
@@ -546,7 +569,7 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
#ifdef USE_CALLOUT
case OP_CALLOUT_CONTENTS:
{
- GET_MEMNUM_INC(mem, bp); // number
+ GET_MEMNUM_INC(mem, bp); /* number */
fprintf(f, ":%d", mem);
}
break;
@@ -555,8 +578,8 @@ onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp, UChar* start,
{
int id;
- GET_MEMNUM_INC(id, bp); // id
- GET_MEMNUM_INC(mem, bp); // number
+ GET_MEMNUM_INC(id, bp); /* id */
+ GET_MEMNUM_INC(mem, bp); /* number */
fprintf(f, ":%d:%d", id, mem);
}
@@ -959,8 +982,8 @@ typedef struct _StackType {
struct {
UChar *pstr; /* start/end position */
/* Following information is set, if this stack type is MEM-START */
- StackIndex start; /* prev. info (for backtrack "(...)*" ) */
- StackIndex end; /* prev. info (for backtrack "(...)*" ) */
+ StackIndex prev_start; /* prev. info (for backtrack "(...)*" ) */
+ StackIndex prev_end; /* prev. info (for backtrack "(...)*" ) */
} mem;
struct {
UChar *pstr; /* start position */
@@ -996,7 +1019,7 @@ struct OnigCalloutArgsStruct {
const OnigUChar* string_end;
const OnigUChar* start;
const OnigUChar* right_range;
- const OnigUChar* current; // current matching position
+ const OnigUChar* current; /* current matching position */
unsigned long retry_in_match_counter;
/* invisible to users */
@@ -1127,7 +1150,7 @@ onig_get_retry_limit_in_match(void)
#ifdef USE_RETRY_LIMIT_IN_MATCH
return RetryLimitInMatch;
#else
- //return ONIG_NO_SUPPORT_CONFIG;
+ /* return ONIG_NO_SUPPORT_CONFIG; */
return 0;
#endif
}
@@ -1520,11 +1543,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_ENSURE(1);\
stk->type = STK_MEM_START;\
stk->zid = (mnum);\
- stk->u.mem.pstr = (s);\
- stk->u.mem.start = mem_start_stk[mnum];\
- stk->u.mem.end = mem_end_stk[mnum];\
- mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
- mem_end_stk[mnum] = INVALID_STACK_INDEX;\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.prev_start = mem_start_stk[mnum];\
+ stk->u.mem.prev_end = mem_end_stk[mnum];\
+ mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
+ mem_end_stk[mnum] = INVALID_STACK_INDEX;\
STACK_INC;\
} while(0)
@@ -1532,9 +1555,9 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_ENSURE(1);\
stk->type = STK_MEM_END;\
stk->zid = (mnum);\
- stk->u.mem.pstr = (s);\
- stk->u.mem.start = mem_start_stk[mnum];\
- stk->u.mem.end = mem_end_stk[mnum];\
+ stk->u.mem.pstr = (s);\
+ stk->u.mem.prev_start = mem_start_stk[mnum];\
+ stk->u.mem.prev_end = mem_end_stk[mnum];\
mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
STACK_INC;\
} while(0)
@@ -1758,8 +1781,8 @@ stack_double(int is_alloca, char** arg_alloc_base,
STACK_BASE_CHECK(stk, "STACK_POP 2"); \
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->zid] = stk->u.mem.start;\
- mem_end_stk[stk->zid] = stk->u.mem.end;\
+ mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
+ mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
}\
break;\
@@ -1770,15 +1793,15 @@ stack_double(int is_alloca, char** arg_alloc_base,
if ((stk->type & STK_MASK_POP_USED) != 0) break;\
else if ((stk->type & STK_MASK_POP_HANDLED) != 0) {\
if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->zid] = stk->u.mem.start;\
- mem_end_stk[stk->zid] = stk->u.mem.end;\
+ mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
+ mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
else if (stk->type == STK_REPEAT_INC) {\
STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
}\
else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->zid] = stk->u.mem.start;\
- mem_end_stk[stk->zid] = stk->u.mem.end;\
+ mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
+ mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
POP_CALLOUT_CASE\
}\
@@ -1795,15 +1818,15 @@ stack_double(int is_alloca, char** arg_alloc_base,
if (stk->type == (til_type)) break;\
else {\
if (stk->type == STK_MEM_START) {\
- mem_start_stk[stk->zid] = stk->u.mem.start;\
- mem_end_stk[stk->zid] = stk->u.mem.end;\
+ mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
+ mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
else if (stk->type == STK_REPEAT_INC) {\
STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
}\
else if (stk->type == STK_MEM_END) {\
- mem_start_stk[stk->zid] = stk->u.mem.start;\
- mem_end_stk[stk->zid] = stk->u.mem.end;\
+ mem_start_stk[stk->zid] = stk->u.mem.prev_start;\
+ mem_end_stk[stk->zid] = stk->u.mem.prev_end;\
}\
/* Don't call callout here because negation of total success by (?!..) (?<!..) */\
}\
@@ -1849,12 +1872,24 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
-#define STACK_EMPTY_CHECK_MEMST(isnull,sid,s,reg) do {\
+#define STACK_MEM_START_GET_PREV_END_ADDR(k /* STK_MEM_START*/, reg, addr) do {\
+ if (k->u.mem.prev_end == INVALID_STACK_INDEX) {\
+ (addr) = 0;\
+ }\
+ else {\
+ if (MEM_STATUS_AT((reg)->bt_mem_end, k->zid))\
+ (addr) = STACK_AT(k->u.mem.prev_end)->u.mem.pstr;\
+ else\
+ (addr) = (UChar* )k->u.mem.prev_end;\
+ }\
+} while (0)
+
+#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+#define STACK_EMPTY_CHECK_MEM(isnull,sid,s,reg) do {\
StackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST"); \
+ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM"); \
if (k->type == STK_EMPTY_CHECK_START) {\
if (k->zid == (sid)) {\
if (k->u.empty_check.pstr != (s)) {\
@@ -1866,15 +1901,11 @@ stack_double(int is_alloca, char** arg_alloc_base,
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START) {\
- if (k->u.mem.end == INVALID_STACK_INDEX) {\
+ STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
+ if (endp == 0) {\
(isnull) = 0; break;\
}\
- if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\
- endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
- else\
- endp = (UChar* )k->u.mem.end;\
- /*fprintf(stderr, "num: %d, pstr: %p, endp: %p\n", k->u.mem.num, STACK_AT(k->u.mem.start)->u.mem.pstr, endp);*/ \
- if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
+ else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) {\
(isnull) = 0; break;\
}\
else if (endp != s) {\
@@ -1890,12 +1921,12 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
} while(0)
-#define STACK_EMPTY_CHECK_MEMST_REC(isnull,sid,s,reg) do {\
+#define STACK_EMPTY_CHECK_MEM_REC(isnull,sid,s,reg) do {\
int level = 0;\
StackType* k = stk;\
while (1) {\
k--;\
- STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEMST_REC"); \
+ STACK_BASE_CHECK(k, "STACK_EMPTY_CHECK_MEM_REC");\
if (k->type == STK_EMPTY_CHECK_START) {\
if (k->zid == (sid)) {\
if (level == 0) {\
@@ -1908,20 +1939,25 @@ stack_double(int is_alloca, char** arg_alloc_base,
(isnull) = 1;\
while (k < stk) {\
if (k->type == STK_MEM_START) {\
- if (k->u.mem.end == INVALID_STACK_INDEX) {\
- (isnull) = 0; break;\
- }\
- if (MEM_STATUS_AT(reg->bt_mem_end, k->zid))\
- endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
- else\
- endp = (UChar* )k->u.mem.end;\
- if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
- (isnull) = 0; break;\
- }\
- else if (endp != s) {\
- (isnull) = -1; /* empty, but position changed */ \
+ if (level == 0) {\
+ STACK_MEM_START_GET_PREV_END_ADDR(k, reg, endp);\
+ if (endp == 0) {\
+ (isnull) = 0; break;\
+ }\
+ else if (STACK_AT(k->u.mem.prev_start)->u.mem.pstr != endp) { \
+ (isnull) = 0; break;\
+ }\
+ else if (endp != s) {\
+ (isnull) = -1; /* empty, but position changed */\
+ }\
}\
}\
+ else if (k->type == STK_EMPTY_CHECK_START) {\
+ if (k->zid == (sid)) level++;\
+ }\
+ else if (k->type == STK_EMPTY_CHECK_END) {\
+ if (k->zid == (sid)) level--;\
+ }\
k++;\
}\
break;\
@@ -1958,7 +1994,7 @@ stack_double(int is_alloca, char** arg_alloc_base,
}\
}\
} while(0)
-#endif /* USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT */
+#endif /* USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT */
#define STACK_GET_REPEAT(sid, k) do {\
int level = 0;\
@@ -2348,7 +2384,6 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
retry_limit_in_match = msa->retry_limit_in_match;
#endif
- //n = reg->num_repeat + reg->num_mem * 2;
pop_level = reg->stack_pop_level;
num_mem = reg->num_mem;
STACK_INIT(INIT_MATCH_STACK_SIZE);
@@ -2996,7 +3031,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_WORD_BOUNDARY: SOP_IN(OP_WORD_BOUNDARY);
{
ModeType mode;
- GET_MODE_INC(mode, p); // ascii_mode
+ GET_MODE_INC(mode, p); /* ascii_mode */
if (ON_STR_BEGIN(s)) {
DATA_ENSURE(1);
@@ -3020,7 +3055,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_NO_WORD_BOUNDARY: SOP_IN(OP_NO_WORD_BOUNDARY);
{
ModeType mode;
- GET_MODE_INC(mode, p); // ascii_mode
+ GET_MODE_INC(mode, p); /* ascii_mode */
if (ON_STR_BEGIN(s)) {
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode))
@@ -3044,7 +3079,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_WORD_BEGIN: SOP_IN(OP_WORD_BEGIN);
{
ModeType mode;
- GET_MODE_INC(mode, p); // ascii_mode
+ GET_MODE_INC(mode, p); /* ascii_mode */
if (DATA_ENSURE_CHECK1 && IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
if (ON_STR_BEGIN(s) || !IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
@@ -3059,7 +3094,7 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_WORD_END: SOP_IN(OP_WORD_END);
{
ModeType mode;
- GET_MODE_INC(mode, p); // ascii_mode
+ GET_MODE_INC(mode, p); /* ascii_mode */
if (!ON_STR_BEGIN(s) && IS_MBC_WORD_ASCII_MODE(encode, sprev, end, mode)) {
if (ON_STR_END(s) || ! IS_MBC_WORD_ASCII_MODE(encode, s, end, mode)) {
@@ -3395,9 +3430,10 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
sprev = s;
if (backref_match_at_nested_level(reg, stk, stk_base, ic
, case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
- while (sprev + (len = enclen(encode, sprev)) < s)
- sprev += len;
-
+ if (sprev < end) {
+ while (sprev + (len = enclen(encode, sprev)) < s)
+ sprev += len;
+ }
p += (SIZE_MEMNUM * tlen);
}
else
@@ -3504,16 +3540,16 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
continue;
break;
-#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
+#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
case OP_EMPTY_CHECK_END_MEMST: SOP_IN(OP_EMPTY_CHECK_END_MEMST);
{
int is_empty;
GET_MEMNUM_INC(mem, p); /* mem: null check id */
- STACK_EMPTY_CHECK_MEMST(is_empty, mem, s, reg);
+ STACK_EMPTY_CHECK_MEM(is_empty, mem, s, reg);
if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "EMPTY_CHECK_END_MEMST: skip id:%d, s:%p\n", (int)mem, s);
+ fprintf(stderr, "EMPTY_CHECK_END_MEM: skip id:%d, s:%p\n", (int)mem, s);
#endif
if (is_empty == -1) goto fail;
goto empty_check_found;
@@ -3531,14 +3567,14 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
int is_empty;
GET_MEMNUM_INC(mem, p); /* mem: null check id */
-#ifdef USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT
- STACK_EMPTY_CHECK_MEMST_REC(is_empty, mem, s, reg);
+#ifdef USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT
+ STACK_EMPTY_CHECK_MEM_REC(is_empty, mem, s, reg);
#else
STACK_EMPTY_CHECK_REC(is_empty, mem, s);
#endif
if (is_empty) {
#ifdef ONIG_DEBUG_MATCH
- fprintf(stderr, "EMPTY_CHECK_END_MEMST_PUSH: skip id:%d, s:%p\n",
+ fprintf(stderr, "EMPTY_CHECK_END_MEM_PUSH: skip id:%d, s:%p\n",
(int )mem, s);
#endif
if (is_empty == -1) goto fail;
@@ -3577,8 +3613,8 @@ match_at(regex_t* reg, const UChar* str, const UChar* end,
case OP_POP_OUT: SOP_IN(OP_POP_OUT);
STACK_POP_ONE;
- // for stop backtrack
- //CHECK_RETRY_LIMIT_IN_MATCH;
+ /* for stop backtrack */
+ /* CHECK_RETRY_LIMIT_IN_MATCH; */
SOP_OUT;
continue;
break;
@@ -5137,7 +5173,7 @@ onig_get_args_num_by_callout_args(OnigCalloutArgs* args)
num = args->num;
e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return 0;
+ if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
if (e->of == ONIG_CALLOUT_OF_NAME) {
return e->u.arg.num;
}
@@ -5153,7 +5189,7 @@ onig_get_passed_args_num_by_callout_args(OnigCalloutArgs* args)
num = args->num;
e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return 0;
+ if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
if (e->of == ONIG_CALLOUT_OF_NAME) {
return e->u.arg.passed_num;
}
@@ -5170,7 +5206,7 @@ onig_get_arg_by_callout_args(OnigCalloutArgs* args, int index,
num = args->num;
e = onig_reg_callout_list_at(args->regex, num);
- if (IS_NULL(e)) return 0;
+ if (IS_NULL(e)) return ONIGERR_INVALID_ARGUMENT;
if (e->of == ONIG_CALLOUT_OF_NAME) {
if (IS_NOT_NULL(type)) *type = e->u.arg.types[index];
if (IS_NOT_NULL(val)) *val = e->u.arg.vals[index];
@@ -5393,6 +5429,8 @@ onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
{
int r;
int slot;
+ long max_val;
+ OnigCodePoint count_type;
OnigType type;
OnigValue val;
OnigValue aval;
@@ -5411,13 +5449,38 @@ onig_builtin_max(OnigCalloutArgs* args, void* user_data ARG_UNUSED)
r = onig_get_arg_by_callout_args(args, 0, &type, &aval);
if (r != ONIG_NORMAL) return r;
+ if (type == ONIG_TYPE_TAG) {
+ r = onig_get_callout_data_by_callout_args(args, aval.tag, 0, &type, &aval);
+ if (r < ONIG_NORMAL) return r;
+ else if (r > ONIG_NORMAL)
+ max_val = 0L;
+ else
+ max_val = aval.l;
+ }
+ else { /* LONG */
+ max_val = aval.l;
+ }
+
+ r = onig_get_arg_by_callout_args(args, 1, &type, &aval);
+ if (r != ONIG_NORMAL) return r;
+
+ count_type = aval.c;
+ if (count_type != '>' && count_type != 'X' && count_type != '<')
+ return ONIGERR_INVALID_CALLOUT_ARG;
if (args->in == ONIG_CALLOUT_IN_RETRACTION) {
- val.l--;
+ if (count_type == '<') {
+ if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
+ val.l++;
+ }
+ else if (count_type == 'X')
+ val.l--;
}
else {
- if (val.l >= aval.l) return ONIG_CALLOUT_FAIL;
- val.l++;
+ if (count_type != '<') {
+ if (val.l >= max_val) return ONIG_CALLOUT_FAIL;
+ val.l++;
+ }
}
r = onig_set_callout_data_by_callout_args_self(args, slot, ONIG_TYPE_LONG, &val);
diff --git a/src/regint.h b/src/regint.h
index ba8407a..357b489 100644
--- a/src/regint.h
+++ b/src/regint.h
@@ -59,7 +59,7 @@
#define USE_CALL
#define USE_CALLOUT
#define USE_BACKREF_WITH_LEVEL /* \k<name+n>, \k<name-n> */
-#define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */
+#define USE_INSISTENT_CHECK_CAPTURES_IN_EMPTY_REPEAT /* /(?:()|())*\2/ */
#define USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE /* /\n$/ =~ "\n" */
#define USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR
@@ -710,7 +710,6 @@ typedef int AbsAddrType;
typedef int LengthType;
typedef int RepeatNumType;
typedef int MemNumType;
-typedef short int StateCheckNumType;
typedef void* PointerType;
typedef int SaveType;
typedef int UpdateVarType;
diff --git a/src/regparse.c b/src/regparse.c
index 6e95a14..64923ad 100644
--- a/src/regparse.c
+++ b/src/regparse.c
@@ -525,7 +525,7 @@ onig_st_insert_strend(hash_table_type* table, const UChar* str_key,
typedef struct {
OnigEncoding enc;
- int type; // callout type: single or not
+ int type; /* callout type: single or not */
UChar* s;
UChar* end;
} st_callout_name_key;
@@ -1583,7 +1583,7 @@ onig_set_callout_of_name(OnigEncoding enc, OnigCalloutType callout_type,
}
}
- r = id; // return id
+ r = id;
return r;
}
@@ -1637,24 +1637,36 @@ onig_get_callout_tag_end(regex_t* reg, int callout_num)
extern OnigCalloutType
onig_get_callout_type_by_name_id(int name_id)
{
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
+ return 0;
+
return GlobalCalloutNameList->v[name_id].type;
}
extern OnigCalloutFunc
onig_get_callout_start_func_by_name_id(int name_id)
{
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
+ return 0;
+
return GlobalCalloutNameList->v[name_id].start_func;
}
extern OnigCalloutFunc
onig_get_callout_end_func_by_name_id(int name_id)
{
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
+ return 0;
+
return GlobalCalloutNameList->v[name_id].end_func;
}
extern int
onig_get_callout_in_by_name_id(int name_id)
{
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
+ return 0;
+
return GlobalCalloutNameList->v[name_id].in;
}
@@ -1685,6 +1697,9 @@ get_callout_opt_default_by_name_id(int name_id, int index)
extern UChar*
onig_get_callout_name_by_name_id(int name_id)
{
+ if (name_id < 0 || name_id >= GlobalCalloutNameList->n)
+ return 0;
+
return GlobalCalloutNameList->v[name_id].name;
}
@@ -2689,7 +2704,7 @@ make_absent_engine(Node** node, int pre_save_right_id, Node* absent,
for (i = 0; i < 4; i++) ns[i] = NULL_NODE;
ns[1] = absent;
- ns[3] = step_one; // for err
+ ns[3] = step_one; /* for err */
r = node_new_save_gimmick(&ns[0], SAVE_S, env);
if (r != 0) goto err;
@@ -5341,8 +5356,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
if (num_type != IS_NOT_NUM) {
if (num_type == IS_REL_NUM) {
gnum = backref_rel_to_abs(gnum, env);
- if (gnum < 0)
+ if (gnum < 0) {
+ onig_scan_env_set_error_string(env, ONIGERR_UNDEFINED_NAME_REFERENCE,
+ prev, name_end);
return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
}
tok->u.call.by_number = 1;
tok->u.call.gnum = gnum;
@@ -5563,8 +5581,11 @@ fetch_token(OnigToken* tok, UChar** src, UChar* end, ScanEnv* env)
else {
if (num_type == IS_REL_NUM) {
gnum = backref_rel_to_abs(gnum, env);
- if (gnum < 0)
+ if (gnum < 0) {
+ onig_scan_env_set_error_string(env,
+ ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end);
return ONIGERR_UNDEFINED_GROUP_REFERENCE;
+ }
}
tok->u.call.by_number = 1;
tok->u.call.gnum = gnum;
@@ -6583,7 +6604,6 @@ parse_callout_of_contents(Node** np, int cterm, UChar** src, UChar* end, ScanEnv
PFETCH_S(c);
}
else if (c == '>') { /* no needs (default) */
- //in = ONIG_CALLOUT_IN_PROGRESS;
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
PFETCH_S(c);
}
@@ -6823,7 +6843,7 @@ parse_callout_of_name(Node** np, int cterm, UChar** src, UChar* end, ScanEnv* en
OnigEncoding enc = env->enc;
UChar* p = *src;
- //PFETCH_READY;
+ /* PFETCH_READY; */
if (PEND) return ONIGERR_INVALID_CALLOUT_PATTERN;
node = 0;
@@ -7053,12 +7073,12 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
- if (PPEEK_IS('|')) { // (?~|generator|absent)
+ if (PPEEK_IS('|')) { /* (?~|generator|absent) */
PINC;
if (PEND) return ONIGERR_END_PATTERN_IN_GROUP;
head_bar = 1;
- if (PPEEK_IS(')')) { // (?~|) : range clear
+ if (PPEEK_IS(')')) { /* (?~|) : range clear */
PINC;
r = make_range_clear(np, env);
if (r != 0) return r;
@@ -7083,7 +7103,7 @@ parse_enclosure(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
if (NODE_TYPE(top) != NODE_ALT || IS_NULL(NODE_CDR(top))) {
expr = NULL_NODE;
is_range_cutter = 1;
- //return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN;
+ /* return ONIGERR_INVALID_ABSENT_GROUP_GENERATOR_PATTERN; */
}
else {
absent = NODE_CAR(top);
@@ -7778,7 +7798,7 @@ parse_exp(Node** np, OnigToken* tok, int term, UChar** src, UChar* end,
len = 1;
while (1) {
if (len >= ONIGENC_MBC_MINLEN(env->enc)) {
- if (len == enclen(env->enc, STR_(*np)->s)) {//should not enclen_end()
+ if (len == enclen(env->enc, STR_(*np)->s)) {/* should not enclen_end() */
r = fetch_token(tok, src, end, env);
NODE_STRING_CLEAR_RAW(*np);
goto string_end;
diff --git a/src/sjis.c b/src/sjis.c
index e1bf3e1..12e93e8 100644
--- a/src/sjis.c
+++ b/src/sjis.c
@@ -337,5 +337,7 @@ OnigEncodingType OnigEncodingSJIS = {
is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ ENC_FLAG_ASCII_COMPATIBLE,
+ 0, 0
};
diff --git a/src/unicode.c b/src/unicode.c
index e585937..a8bae66 100644
--- a/src/unicode.c
+++ b/src/unicode.c
@@ -335,7 +335,7 @@ onigenc_unicode_get_case_fold_codes_by_str(OnigEncoding enc,
n++;
}
}
- code = items[0].code[0]; // for multi-code to unfold search.
+ code = items[0].code[0]; /* for multi-code to unfold search. */
}
else if ((flag & INTERNAL_ONIGENC_CASE_FOLD_MULTI_CHAR) != 0) {
OnigCodePoint cs[3][4];
diff --git a/src/utf16_be.c b/src/utf16_be.c
index 098ab54..8f5b8bf 100644
--- a/src/utf16_be.c
+++ b/src/utf16_be.c
@@ -38,16 +38,19 @@ init(void)
int id;
OnigEncoding enc;
char* name;
- unsigned int t_long;
unsigned int args[4];
OnigValue opts[4];
enc = ONIG_ENCODING_UTF16_BE;
- t_long = ONIG_TYPE_LONG;
name = "\000F\000A\000I\000L\000\000"; BC0_P(name, fail);
name = "\000M\000I\000S\000M\000A\000T\000C\000H\000\000"; BC0_P(name, mismatch);
- name = "\000M\000A\000X\000\000"; BC_B(name, max, 1, &t_long);
+
+ name = "\000M\000A\000X\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_CHAR;
+ opts[0].c = 'X';
+ BC_B_O(name, max, 2, args, 1, opts);
name = "\000E\000R\000R\000O\000R\000\000";
args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
@@ -274,5 +277,7 @@ OnigEncodingType OnigEncodingUTF16_BE = {
onigenc_always_false_is_allowed_reverse_match,
init,
0, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ ENC_FLAG_UNICODE,
+ 0, 0
};
diff --git a/src/utf16_le.c b/src/utf16_le.c
index dc0d3f1..92bf318 100644
--- a/src/utf16_le.c
+++ b/src/utf16_le.c
@@ -36,16 +36,19 @@ init(void)
int id;
OnigEncoding enc;
char* name;
- unsigned int t_long;
unsigned int args[4];
OnigValue opts[4];
enc = ONIG_ENCODING_UTF16_LE;
- t_long = ONIG_TYPE_LONG;
name = "F\000A\000I\000L\000\000\000"; BC0_P(name, fail);
name = "M\000I\000S\000M\000A\000T\000C\000H\000\000\000"; BC0_P(name, mismatch);
- name = "M\000A\000X\000\000\000"; BC_B(name, max, 1, &t_long);
+
+ name = "M\000A\000X\000\000\000";
+ args[0] = ONIG_TYPE_TAG | ONIG_TYPE_LONG;
+ args[1] = ONIG_TYPE_CHAR;
+ opts[0].c = 'X';
+ BC_B_O(name, max, 2, args, 1, opts);
name = "E\000R\000R\000O\000R\000\000\000";
args[0] = ONIG_TYPE_LONG; opts[0].l = ONIG_ABORT;
@@ -282,5 +285,7 @@ OnigEncodingType OnigEncodingUTF16_LE = {
onigenc_always_false_is_allowed_reverse_match,
init,
0, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ ENC_FLAG_UNICODE,
+ 0, 0
};
diff --git a/src/utf32_be.c b/src/utf32_be.c
index 68760bb..92476ec 100644
--- a/src/utf32_be.c
+++ b/src/utf32_be.c
@@ -190,5 +190,7 @@ OnigEncodingType OnigEncodingUTF32_BE = {
onigenc_always_false_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ ENC_FLAG_UNICODE,
+ 0, 0
};
diff --git a/src/utf32_le.c b/src/utf32_le.c
index 8208cd0..dc3fd92 100644
--- a/src/utf32_le.c
+++ b/src/utf32_le.c
@@ -190,5 +190,7 @@ OnigEncodingType OnigEncodingUTF32_LE = {
onigenc_always_false_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ ENC_FLAG_UNICODE,
+ 0, 0
};
diff --git a/src/utf8.c b/src/utf8.c
index a5c4dbe..22a8db1 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -29,7 +29,7 @@
#include "regenc.h"
-//#define USE_INVALID_CODE_SCHEME
+/* #define USE_INVALID_CODE_SCHEME */
#ifdef USE_INVALID_CODE_SCHEME
/* virtual codepoint values for invalid encoding byte 0xfe and 0xff */
@@ -296,5 +296,7 @@ OnigEncodingType OnigEncodingUTF8 = {
onigenc_always_true_is_allowed_reverse_match,
NULL, /* init */
NULL, /* is_initialized */
- is_valid_mbc_string
+ is_valid_mbc_string,
+ ENC_FLAG_ASCII_COMPATIBLE|ENC_FLAG_UNICODE,
+ 0, 0
};