From b62fc1758f4ae8459e6d7e8386ca547274b4daa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Frings-F=C3=BCrst?= Date: Fri, 1 Sep 2017 18:53:23 +0200 Subject: New upstream version 6.6.1 --- src/regint.h | 99 ++++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 67 insertions(+), 32 deletions(-) (limited to 'src/regint.h') diff --git a/src/regint.h b/src/regint.h index 185f4b6..9dc1723 100644 --- a/src/regint.h +++ b/src/regint.h @@ -30,7 +30,7 @@ */ /* for debug */ -/* #define ONIG_DEBUG_PARSE_TREE */ +/* #define ONIG_DEBUG_PARSE */ /* #define ONIG_DEBUG_COMPILE */ /* #define ONIG_DEBUG_SEARCH */ /* #define ONIG_DEBUG_MATCH */ @@ -39,7 +39,7 @@ /* for byte-code statistical data. */ /* #define ONIG_DEBUG_STATISTICS */ -#if defined(ONIG_DEBUG_PARSE_TREE) || defined(ONIG_DEBUG_MATCH) || \ +#if defined(ONIG_DEBUG_PARSE) || defined(ONIG_DEBUG_MATCH) || \ defined(ONIG_DEBUG_SEARCH) || defined(ONIG_DEBUG_COMPILE) || \ defined(ONIG_DEBUG_STATISTICS) #ifndef ONIG_DEBUG @@ -56,7 +56,6 @@ /* config */ /* spec. config */ -#define USE_NAMED_GROUP #define USE_CALL #define USE_BACKREF_WITH_LEVEL /* \k, \k */ #define USE_INSISTENT_CHECK_CAPTURES_STATUS_IN_ENDLESS_REPEAT /* /(?:()|())*\2/ */ @@ -283,10 +282,17 @@ typedef unsigned int MemStatusType; #define INT_MAX_LIMIT ((1UL << (SIZEOF_INT * 8 - 1)) - 1) +#define IS_CODE_WORD_ASCII(enc,code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) +#define IS_CODE_DIGIT_ASCII(enc, code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_DIGIT(enc,code)) +#define IS_CODE_XDIGIT_ASCII(enc, code) \ + (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_XDIGIT(enc,code)) + #define DIGITVAL(code) ((code) - '0') #define ODIGITVAL(code) DIGITVAL(code) #define XDIGITVAL(enc,code) \ - (ONIGENC_IS_CODE_DIGIT(enc,code) ? DIGITVAL(code) \ + (IS_CODE_DIGIT_ASCII(enc,code) ? DIGITVAL(code) \ : (ONIGENC_IS_CODE_UPPER(enc,code) ? (code) - 'A' + 10 : (code) - 'a' + 10)) #define IS_SINGLELINE(option) ((option) & ONIG_OPTION_SINGLELINE) @@ -301,6 +307,21 @@ typedef unsigned int MemStatusType; #define IS_NOTEOL(option) ((option) & ONIG_OPTION_NOTEOL) #define IS_POSIX_REGION(option) ((option) & ONIG_OPTION_POSIX_REGION) +#define IS_WORD_ASCII(option) \ + ((option) & (ONIG_OPTION_WORD_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) +#define IS_DIGIT_ASCII(option) \ + ((option) & (ONIG_OPTION_DIGIT_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) +#define IS_SPACE_ASCII(option) \ + ((option) & (ONIG_OPTION_SPACE_IS_ASCII | ONIG_OPTION_POSIX_IS_ASCII)) +#define IS_POSIX_ASCII(option) ((option) & ONIG_OPTION_POSIX_IS_ASCII) + +#define IS_ASCII_MODE_CTYPE_OPTION(ctype, options) \ + ((ctype) >= 0 && \ + (((ctype) < ONIGENC_CTYPE_ASCII && IS_POSIX_ASCII(options)) ||\ + ((ctype) == ONIGENC_CTYPE_WORD && IS_WORD_ASCII(options)) ||\ + ((ctype) == ONIGENC_CTYPE_DIGIT && IS_DIGIT_ASCII(options)) ||\ + ((ctype) == ONIGENC_CTYPE_SPACE && IS_SPACE_ASCII(options)))) + /* OP_SET_OPTION is required for these options. #define IS_DYNAMIC_OPTION(option) \ (((option) & (ONIG_OPTION_MULTILINE | ONIG_OPTION_IGNORECASE)) != 0) @@ -350,21 +371,21 @@ typedef struct _BBuf { unsigned int alloc; } BBuf; -#define BBUF_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) +#define BB_INIT(buf,size) onig_bbuf_init((BBuf* )(buf), (size)) -#define BBUF_SIZE_INC(buf,inc) do{\ +#define BB_SIZE_INC(buf,inc) do{\ (buf)->alloc += (inc);\ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ } while (0) -#define BBUF_EXPAND(buf,low) do{\ +#define BB_EXPAND(buf,low) do{\ do { (buf)->alloc *= 2; } while ((buf)->alloc < (unsigned int )low);\ (buf)->p = (UChar* )xrealloc((buf)->p, (buf)->alloc);\ if (IS_NULL((buf)->p)) return(ONIGERR_MEMORY);\ } while (0) -#define BBUF_ENSURE_SIZE(buf,size) do{\ +#define BB_ENSURE_SIZE(buf,size) do{\ unsigned int new_alloc = (buf)->alloc;\ while (new_alloc < (unsigned int )(size)) { new_alloc *= 2; }\ if ((buf)->alloc != new_alloc) {\ @@ -374,54 +395,54 @@ typedef struct _BBuf { }\ } while (0) -#define BBUF_WRITE(buf,pos,bytes,n) do{\ +#define BB_WRITE(buf,pos,bytes,n) do{\ int used = (pos) + (n);\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\ xmemcpy((buf)->p + (pos), (bytes), (n));\ if ((buf)->used < (unsigned int )used) (buf)->used = used;\ } while (0) -#define BBUF_WRITE1(buf,pos,byte) do{\ +#define BB_WRITE1(buf,pos,byte) do{\ int used = (pos) + 1;\ - if ((buf)->alloc < (unsigned int )used) BBUF_EXPAND((buf),used);\ + if ((buf)->alloc < (unsigned int )used) BB_EXPAND((buf),used);\ (buf)->p[(pos)] = (byte);\ if ((buf)->used < (unsigned int )used) (buf)->used = used;\ } while (0) -#define BBUF_ADD(buf,bytes,n) BBUF_WRITE((buf),(buf)->used,(bytes),(n)) -#define BBUF_ADD1(buf,byte) BBUF_WRITE1((buf),(buf)->used,(byte)) -#define BBUF_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) -#define BBUF_GET_OFFSET_POS(buf) ((buf)->used) +#define BB_ADD(buf,bytes,n) BB_WRITE((buf),(buf)->used,(bytes),(n)) +#define BB_ADD1(buf,byte) BB_WRITE1((buf),(buf)->used,(byte)) +#define BB_GET_ADD_ADDRESS(buf) ((buf)->p + (buf)->used) +#define BB_GET_OFFSET_POS(buf) ((buf)->used) /* from < to */ -#define BBUF_MOVE_RIGHT(buf,from,to,n) do {\ - if ((unsigned int )((to)+(n)) > (buf)->alloc) BBUF_EXPAND((buf),(to) + (n));\ +#define BB_MOVE_RIGHT(buf,from,to,n) do {\ + if ((unsigned int )((to)+(n)) > (buf)->alloc) BB_EXPAND((buf),(to) + (n));\ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ if ((unsigned int )((to)+(n)) > (buf)->used) (buf)->used = (to) + (n);\ } while (0) /* from > to */ -#define BBUF_MOVE_LEFT(buf,from,to,n) do {\ +#define BB_MOVE_LEFT(buf,from,to,n) do {\ xmemmove((buf)->p + (to), (buf)->p + (from), (n));\ } while (0) /* from > to */ -#define BBUF_MOVE_LEFT_REDUCE(buf,from,to) do {\ +#define BB_MOVE_LEFT_REDUCE(buf,from,to) do {\ xmemmove((buf)->p + (to), (buf)->p + (from), (buf)->used - (from));\ (buf)->used -= (from - to);\ } while (0) -#define BBUF_INSERT(buf,pos,bytes,n) do {\ +#define BB_INSERT(buf,pos,bytes,n) do {\ if (pos >= (buf)->used) {\ - BBUF_WRITE(buf,pos,bytes,n);\ + BB_WRITE(buf,pos,bytes,n);\ }\ else {\ - BBUF_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ + BB_MOVE_RIGHT((buf),(pos),(pos) + (n),((buf)->used - (pos)));\ xmemcpy((buf)->p + (pos), (bytes), (n));\ }\ } while (0) -#define BBUF_GET_BYTE(buf, pos) (buf)->p[(pos)] +#define BB_GET_BYTE(buf, pos) (buf)->p[(pos)] /* has body */ @@ -436,15 +457,21 @@ typedef struct _BBuf { #define ANCHOR_END_BUF (1<<7) #define ANCHOR_SEMI_END_BUF (1<<8) #define ANCHOR_END_LINE (1<<9) -#define ANCHOR_WORD_BOUND (1<<10) -#define ANCHOR_NOT_WORD_BOUND (1<<11) +#define ANCHOR_WORD_BOUNDARY (1<<10) +#define ANCHOR_NO_WORD_BOUNDARY (1<<11) #define ANCHOR_WORD_BEGIN (1<<12) #define ANCHOR_WORD_END (1<<13) #define ANCHOR_ANYCHAR_STAR (1<<14) /* ".*" optimize info */ #define ANCHOR_ANYCHAR_STAR_ML (1<<15) /* ".*" optimize info (multi-line) */ +#define ANCHOR_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<16) +#define ANCHOR_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY (1<<17) + #define ANCHOR_HAS_BODY(a) ((a)->type < ANCHOR_BEGIN_BUF) +#define IS_WORD_ANCHOR_TYPE(type) \ + ((type) == ANCHOR_WORD_BOUNDARY || (type) == ANCHOR_NO_WORD_BOUNDARY || \ + (type) == ANCHOR_WORD_BEGIN || (type) == ANCHOR_WORD_END) /* operation code */ enum OpCode { @@ -473,7 +500,9 @@ enum OpCode { OP_CCLASS_NOT, OP_CCLASS_MB_NOT, OP_CCLASS_MIX_NOT, +#ifdef USE_OP_CCLASS_NODE OP_CCLASS_NODE, /* pointer to CClassNode node */ +#endif OP_ANYCHAR, /* "." */ OP_ANYCHAR_ML, /* "." multi-line */ @@ -483,12 +512,17 @@ enum OpCode { OP_ANYCHAR_ML_STAR_PEEK_NEXT, OP_WORD, - OP_NOT_WORD, - OP_WORD_BOUND, - OP_NOT_WORD_BOUND, + OP_WORD_ASCII, + OP_NO_WORD, + OP_NO_WORD_ASCII, + OP_WORD_BOUNDARY, + OP_NO_WORD_BOUNDARY, OP_WORD_BEGIN, OP_WORD_END, + OP_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, + OP_NO_EXTENDED_GRAPHEME_CLUSTER_BOUNDARY, + OP_BEGIN_BUF, OP_END_BUF, OP_BEGIN_LINE, @@ -580,6 +614,7 @@ typedef short int StateCheckNumType; typedef void* PointerType; typedef int SaveType; typedef int UpdateVarType; +typedef int ModeType; #define SIZE_OPCODE 1 #define SIZE_RELADDR sizeof(RelAddrType) @@ -593,6 +628,7 @@ typedef int UpdateVarType; #define SIZE_POINTER sizeof(PointerType) #define SIZE_SAVE_TYPE sizeof(SaveType) #define SIZE_UPDATE_VAR_TYPE sizeof(UpdateVarType) +#define SIZE_MODE sizeof(ModeType) #define GET_RELADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, RelAddrType) #define GET_ABSADDR_INC(addr,p) PLATFORM_GET_INC(addr, p, AbsAddrType) @@ -604,6 +640,7 @@ typedef int UpdateVarType; #define GET_STATE_CHECK_NUM_INC(num,p) PLATFORM_GET_INC(num, p, StateCheckNumType) #define GET_SAVE_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, SaveType) #define GET_UPDATE_VAR_TYPE_INC(type,p) PLATFORM_GET_INC(type, p, UpdateVarType) +#define GET_MODE_INC(mode,p) PLATFORM_GET_INC(mode, p, ModeType) /* code point's address must be aligned address. */ #define GET_CODE_POINT(code,p) code = *((OnigCodePoint* )(p)) @@ -624,6 +661,7 @@ typedef int UpdateVarType; #define SIZE_OP_PUSH_IF_PEEK_NEXT (SIZE_OPCODE + SIZE_RELADDR + 1) #define SIZE_OP_REPEAT_INC (SIZE_OPCODE + SIZE_MEMNUM) #define SIZE_OP_REPEAT_INC_NG (SIZE_OPCODE + SIZE_MEMNUM) +#define SIZE_OP_WORD_BOUNDARY (SIZE_OPCODE + SIZE_MODE) #define SIZE_OP_PREC_READ_START SIZE_OPCODE #define SIZE_OP_PUSH_PREC_READ_NOT (SIZE_OPCODE + SIZE_RELADDR) #define SIZE_OP_PREC_READ_END SIZE_OPCODE @@ -725,9 +763,6 @@ typedef struct { } OnigMatchArg; -#define IS_CODE_SB_WORD(enc,code) \ - (ONIGENC_IS_CODE_ASCII(code) && ONIGENC_IS_CODE_WORD(enc,code)) - typedef struct OnigEndCallListItem { struct OnigEndCallListItem* next; void (*func)(void); -- cgit v1.2.3