diff options
author | Jörg Frings-Fürst <debian@jff.email> | 2020-04-20 20:34:10 +0200 |
---|---|---|
committer | Jörg Frings-Fürst <debian@jff.email> | 2020-04-20 20:34:10 +0200 |
commit | f3d6e46ce3762b6f51a166119d3982fd3715507a (patch) | |
tree | 0935fb6da7f1d9728b42ddf08395a0e977e1c228 /doc | |
parent | 043fff5b6f2461aeccb1c62cb771826cfe301832 (diff) | |
parent | 73c6133c32cddae59813cbadf655cb50a3a7356a (diff) |
Merge branch 'feature/upstream' into develop
Diffstat (limited to 'doc')
-rw-r--r-- | doc/API | 60 | ||||
-rw-r--r-- | doc/API.ja | 63 | ||||
-rw-r--r-- | doc/RE | 50 | ||||
-rw-r--r-- | doc/RE.ja | 47 | ||||
-rw-r--r-- | doc/SYNTAX.md | 137 | ||||
-rw-r--r-- | doc/UNICODE_PROPERTIES | 1455 |
6 files changed, 981 insertions, 831 deletions
@@ -1,4 +1,4 @@ -Oniguruma API Version 6.9.4 2019/09/30 +Oniguruma API Version 6.9.5 2020/03/25 #include <oniguruma.h> @@ -273,6 +273,18 @@ Oniguruma API Version 6.9.4 2019/09/30 normal return: ONIG_NORMAL +# int onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* mp, unsigned long limit) + + Set a retry limit count of a search process. + 0 means unlimited. + + arguments + 1 mp: match-param pointer + 2 limit: number of limit + + normal return: ONIG_NORMAL + + # int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f) Set a function for callouts of contents in progress. @@ -333,7 +345,7 @@ Oniguruma API Version 6.9.4 2019/09/30 arguments 1-7: same as onig_search() - 8 mp: match parameter values (match_stack_limit, retry_limit_in_match) + 8 mp: match parameter values (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, @@ -368,7 +380,7 @@ Oniguruma API Version 6.9.4 2019/09/30 arguments 1-6: same as onig_match() - 7 mp: match parameter values (match_stack_limit, retry_limit_in_match) + 7 mp: match parameter values (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_scan(regex_t* reg, const UChar* str, const UChar* end, @@ -599,8 +611,8 @@ Oniguruma API Version 6.9.4 2019/09/30 # int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) Iterate function call for all names. @@ -866,19 +878,51 @@ Oniguruma API Version 6.9.4 2019/09/30 # unsigned long onig_get_retry_limit_in_match(void) - Return the limit of retry counts in matching process. + Return the limit of retry counts in a matching process. (default: 10000000) - normal return: limit value + normal return: current limit value + + +# unsigned long onig_get_retry_limit_in_search(void) + + Return the limit of retry counts in a search process. + 0 means unlimited. + (default: 0) + + normal return: current limit value -# int onig_set_retry_limit_in_match(unsigned long n) +# int onig_set_retry_limit_in_match(unsigned long limit) Set the limit of retry counts in matching process. normal return: ONIG_NORMAL +# int onig_set_retry_limit_in_search(unsigned long limit) + + Set a retry limit count of a search process. + 0 means unlimited. + + normal return: ONIG_NORMAL + + +# int onig_get_subexp_call_max_nest_level(void) + + Return the limit of subexp call nest level. + (default: 24) + + normal return: current limit value + + +# int onig_set_subexp_call_max_nest_level(int max_level) + + Set a limit level of subexp call nest level. + + normal return: ONIG_NORMAL + + # OnigCalloutFunc onig_get_progress_callout(void) Get a function for callouts of contents in progress. @@ -1,4 +1,4 @@ -鬼車インターフェース Version 6.9.4 2019/09/30 +鬼車インターフェース Version 6.9.5 2020/03/25 #include <oniguruma.h> @@ -263,7 +263,19 @@ # int onig_set_retry_limit_in_match_of_match_param(OnigMatchParam* mp, unsigned long limit) - 一回のマッチでのリトライ数の最大値をセットする。 + 一回のマッチでのリトライ数の制限値をセットする。 + + 引数 + 1 mp: マッチパラメタオブジェクトアドレス + 2 limit: 制限回数 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search_of_match_param(OnigMatchParam* mp, unsigned long limit) + + 一回の検索でのリトライ数の制限値をセットする。 + 0は無制限を意味する。 引数 1 mp: マッチパラメタオブジェクトアドレス @@ -331,7 +343,7 @@ 引数 1-7: onig_search()と同じ - 8 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match) + 8 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_match(regex_t* reg, const UChar* str, const UChar* end, @@ -365,7 +377,7 @@ 引数 1-6: onig_match()と同じ - 7 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match) + 7 mp: マッチパラメタ値 (match_stack_limit, retry_limit_in_match, retry_limit_in_search) # int onig_scan(regex_t* reg, const UChar* str, const UChar* end, @@ -600,8 +612,8 @@ # int onig_foreach_name(regex_t* reg, - int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), - void* arg) + int (*func)(const UChar*, const UChar*, int,int*,regex_t*,void*), + void* arg) 全ての名前に対してコールバック関数呼び出しを実行する。 @@ -872,15 +884,48 @@ # unsigned long onig_get_retry_limit_in_match(void) - マッチング関数内でのリトライ数の制限値を返す。 + 一回のマッチングでのリトライ数の制限値を返す。 (デフォルト: 10000000) 正常終了戻り値: 制限値 -# int onig_set_retry_limit_in_match(unsigned long n) +# unsigned long onig_get_retry_limit_in_search(void) + + 一回の検索でのリトライ数の制限値を返す。 + 0は無制限を意味する。 + (デフォルト: 0) + + 正常終了戻り値: 制限値 + + +# int onig_set_retry_limit_in_match(unsigned long limit) + + 一回のマッチング内でのリトライ数の制限値を指定する。 + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_set_retry_limit_in_search(unsigned long limit) + + 一回の検索でのリトライ数の制限値をセットする。 + 0は無制限を意味する。 + (デフォルト: 0) + + 正常終了戻り値: ONIG_NORMAL + + +# int onig_get_subexp_call_max_nest_level(void) + + 部分式呼出しのネストレベルの最大値を返す。 + (デフォルト: 24) + + 正常終了戻り値: 制限値 + + +# int onig_set_subexp_call_max_nest_level(int max_level) - マッチング関数内でのリトライ数の制限値を指定する。 + 部分式呼出しのネストレベルの最大値を指定する。 正常終了戻り値: ONIG_NORMAL @@ -1,6 +1,6 @@ -Oniguruma Regular Expressions Version 6.9.4 2019/10/31 +Oniguruma Regular Expressions Version 6.9.5 2020/04/09 -syntax: ONIG_SYNTAX_ONIGURUMA (default) +syntax: ONIG_SYNTAX_ONIGURUMA (default syntax) 1. Syntax elements @@ -21,19 +21,28 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) \f form feed (0x0C) \a bell (0x07) \e escape (0x1B) - \nnn octal char (encoded byte value) - \o{17777777777} wide octal char (character code point value) - \uHHHH wide hexadecimal char (character code point value) - \xHH hexadecimal char (encoded byte value) - \x{7HHHHHHH} wide hexadecimal char (character code point value) - \cx control char (character code point value) - \C-x control char (character code point value) - \M-x meta (x|0x80) (character code point value) - \M-\C-x meta control char (character code point value) + \nnn octal char (encoded byte value) + \xHH hexadecimal char (encoded byte value) + \x{7HHHHHHH} (1-8 digits) hexadecimal char (code point value) + \o{17777777777} (1-11 digits) octal char (code point value) + \uHHHH hexadecimal char (code point value) + \cx control char (code point value) + \C-x control char (code point value) + \M-x meta (x|0x80) (code point value) + \M-\C-x meta control char (code point value) (* \b as backspace is effective in character class only) +2.1 Code point sequences + + Hexadecimal code point (1-8 digits) + \x{7HHHHHHH 7HHHHHHH ... 7HHHHHHH} + + Octal code point (1-11 digits) + \o{17777777777 17777777777 ... 17777777777} + + 3. Character types . any character (except newline) @@ -132,6 +141,7 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) {,n} at least 0 but no more than n times ({0,n}) {n} n times + reluctant ?? 0 or 1 times @@ -141,6 +151,11 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) {n,}? at least n times {,n}? at least 0 but not more than n times (== {0,n}?) + {n}? is reluctant operator in ONIG_SYNTAX_JAVA and ONIG_SYNTAX_PERL only. + (In that case, it doesn't make sense to write so.) + In default syntax, /a{n}?/ === /(?:a{n})?/ + + possessive (greedy and does not backtrack once match) ?+ 1 or 0 times @@ -148,8 +163,8 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) ++ 1 or more times {n,m} (n > m) at least m but not more than n times - {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and - ONIG_SYNTAX_PERL only. + {n,m}+, {n,}+, {n}+ are possessive operators in ONIG_SYNTAX_JAVA and + ONIG_SYNTAX_PERL only. ex. /a*+/ === /(?>a*)/ @@ -279,15 +294,12 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default) (?=subexp) look-ahead (?!subexp) negative look-ahead + (?<=subexp) look-behind (?<!subexp) negative look-behind - Subexp of look-behind must be fixed-width. - But top-level alternatives can be of various lengths. - ex. (?<=a|bc) is OK. (?<=aaa(?:b|cd)) is not allowed. - - In negative look-behind, capturing group isn't allowed, - but non-capturing group (?:) is allowed. + * Cannot use Absent stopper (?~|expr) and Range clear + (?~|) operators in look-behind and negative look-behind. * In look-behind and negative look-behind, support for ignore-case option is limited. Only supports conversion @@ -1,4 +1,4 @@ -鬼車 正規表現 Version 6.9.4 2019/10/31 +鬼車 正規表現 Version 6.9.5 2020/04/09 使用文法: ONIG_SYNTAX_ONIGURUMA (既定値) @@ -21,19 +21,28 @@ \f 改頁 (0x0C) \a 鐘 (0x07) \e 退避修飾 (0x1B) - \nnn 八進数表現 符号化バイト値 - \o{17777777777} 拡張八進数表現 コードポイント値 - \uHHHH 拡張十六進数表現 コードポイント値 - \xHH 十六進数表現 符号化バイト値 - \x{7HHHHHHH} 拡張十六進数表現 コードポイント値 - \cx 制御文字表現 コードポイント値 - \C-x 制御文字表現 コードポイント値 - \M-x 超 (x|0x80) コードポイント値 - \M-\C-x 超 + 制御文字表現 コードポイント値 + \nnn 八進数表現 符号化バイト値 + \xHH 十六進数表現 符号化バイト値 + \x{7HHHHHHH} (1-8桁) 拡張十六進数表現 コードポイント値 + \o{17777777777} (1-11桁) 拡張八進数表現 コードポイント値 + \uHHHH 拡張十六進数表現 コードポイント値 + \cx 制御文字表現 コードポイント値 + \C-x 制御文字表現 コードポイント値 + \M-x 超 (x|0x80) コードポイント値 + \M-\C-x 超 + 制御文字表現 コードポイント値 ※ \bは、文字集合内でのみ有効 +2.1 コードポイント連続表記 + + 十六進数表現コードポイント (1-8桁) + \x{7HHHHHHH 7HHHHHHH ... 7HHHHHHH} + + 八進数表現コードポイント (1-11桁) + \o{17777777777 17777777777 ... 17777777777} + + 3. 文字種 . 任意文字 (改行を除く: オプションに依存) @@ -131,6 +140,7 @@ {,n} 零回以上n回以下 ({0,n}) {n} n回 + 無欲 ?? 零回または一回 @@ -140,6 +150,11 @@ {n,}? n回以上 {,n}? 零回以上n回以下 (== {0,n}?) + {n}? はONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ無欲な指定子 + (その場合には、態々そう書く意味はないが) + デフォルトの文法では、/a{n}?/ === /(?:a{n})?/ + + 強欲 (欲張りで、繰り返しに成功した後は回数を減らすような後退再試行をしない) ?+ 一回または零回 @@ -147,7 +162,8 @@ ++ 一回以上 {n,m} (n > m) m回以上 かつ n回以下 - {n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ強欲な指定子 + {n,m}+, {n,}+, {n}+ は、ONIG_SYNTAX_JAVAとONIG_SYNTAX_PERLでのみ + 強欲な指定子 例. /a*+/ === /(?>a*)/ @@ -274,15 +290,12 @@ (?=式) 先読み (?!式) 否定先読み + (?<=式) 戻り読み (?<!式) 否定戻り読み - 戻り読みの式は固定文字長でなければならない。 - しかし、最上位の選択子だけは異なった文字長が許される。 - 例. (?<=a|bc) は許可. (?<=aaa(?:b|cd)) は不許可 - - 否定戻り読みでは、捕獲式集合は許されないが、 - 非捕獲式集合は許される。 + * 戻り読み、否定戻り読みの式の中では、不在停止演算子 + (?~|expr)と範囲消去演算子(?~|)を使用することはできない * 戻り読み、否定戻り読みの中では、ignore-caseオプションの 対応が制限される。一文字と一文字の間の変換しか対応しない。 diff --git a/doc/SYNTAX.md b/doc/SYNTAX.md index 69ecf3a..c38e5c8 100644 --- a/doc/SYNTAX.md +++ b/doc/SYNTAX.md @@ -1,7 +1,7 @@ # Oniguruma syntax (operator) configuration -_Documented for Oniguruma 6.9.3 (2019/08/08)_ +_Documented for Oniguruma 6.9.5 (2020/01/23)_ ---------- @@ -75,7 +75,7 @@ data set by `onig_set_meta_char()` will be ignored. ### 1. ONIG_SYN_OP_DOT_ANYCHAR (enable `.`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Java, Perl, Perl_NG, Ruby_ Enables support for the standard `.` metacharacter, meaning "any one character." You usually want this flag on unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -84,7 +84,7 @@ so that you can use a metacharacter other than `.` instead. ### 2. ONIG_SYN_OP_ASTERISK_ZERO_INF (enable `r*`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the standard `r*` metacharacter, meaning "zero or more r's." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -103,7 +103,7 @@ behavior. ### 4. ONIG_SYN_OP_PLUS_ONE_INF (enable `r+`) -_Set in: PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the standard `r+` metacharacter, meaning "one or more r's." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -122,7 +122,7 @@ behavior. ### 6. ONIG_SYN_OP_QMARK_ZERO_ONE (enable `r?`) -_Set in: PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, Emacs, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the standard `r?` metacharacter, meaning "zero or one r" or "an optional r." You usually want this flag set unless you have turned on `ONIG_SYN_OP_VARIABLE_META_CHARACTERS` @@ -141,7 +141,7 @@ you want `?` to simply match a literal `?` character, but you still want some wa ### 8. ONIG_SYN_OP_BRACE_INTERVAL (enable `r{l,u}`) -_Set in: PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the `r{lower,upper}` range form, common to more advanced regex engines, which lets you specify precisely a minimum and maximum range on how many r's @@ -168,7 +168,7 @@ match literal curly brace characters, but you still want some way of activating ### 10. ONIG_SYN_OP_VBAR_ALT (enable `r|s`) -_Set in: PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `r|s` alternation operator. You usually want this flag set. @@ -185,7 +185,7 @@ match a literal `|` character, but you still want some way of activating "altern ### 12. ONIG_SYN_OP_LPAREN_SUBEXP (enable `(r)`) -_Set in: PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `(...)` grouping-and-capturing operators. You usually want this flag set. @@ -203,7 +203,7 @@ activating "grouping" or "capturing" behavior. ### 14. ONIG_SYN_OP_ESC_AZ_BUF_ANCHOR (enable `\A` and `\Z` and `\z`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the anchors `\A` (start-of-string), `\Z` (end-of-string or newline-at-end-of-string), and `\z` (end-of-string) escapes. @@ -214,7 +214,7 @@ option will recognize that metacharacter instead.) ### 15. ONIG_SYN_OP_ESC_CAPITAL_G_BEGIN_ANCHOR (enable `\G`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the special anchor `\G` (start-of-previous-match). @@ -231,7 +231,7 @@ exactly the same as `\A`. ### 16. ONIG_SYN_OP_DECIMAL_BACKREF (enable `\num`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for subsequent matches to back references to prior capture groups `(...)` using the common `\num` syntax (like `\3`). @@ -244,7 +244,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy ### 17. ONIG_SYN_OP_BRACKET_CC (enable `[...]`) -_Set in: PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for recognizing character classes, like `[a-z]`. If this flag is not set, `[` and `]` will be treated as ordinary literal characters instead of as metacharacters. @@ -254,7 +254,7 @@ You usually want this enabled, and it is enabled by default in every built-in sy ### 18. ONIG_SYN_OP_ESC_W_WORD (enable `\w` and `\W`) -_Set in: Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\w` and `\W` shorthand forms. These match "word characters," whose meaning varies depending on the encoding being used. @@ -285,7 +285,7 @@ Most regex syntaxes do _not_ support these metacharacters. ### 20. ONIG_SYN_OP_ESC_B_WORD_BOUND (enable `\b` and `\B`) -_Set in: Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\b` and `\B` word-boundary metacharacters. The `\b` metacharacter matches a zero-width position at a transition from word-characters to non-word-characters, or vice @@ -297,7 +297,7 @@ are considered "word characters." ### 21. ONIG_SYN_OP_ESC_S_WHITE_SPACE (enable `\s` and `\S`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\s` and `\S` whitespace-matching metacharacters. @@ -319,7 +319,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules. ### 22. ONIG_SYN_OP_ESC_D_DIGIT (enable `\d` and `\D`) -_Set in: GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `\d` and `\D` digit-matching metacharacters. @@ -337,7 +337,7 @@ Unicode-equivalent code points, and then matching according to Unicode rules. ### 23. ONIG_SYN_OP_LINE_ANCHOR (enable `^r` and `r$`) -_Set in: Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Emacs, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the common `^` and `$` line-anchor metacharacters. @@ -352,7 +352,7 @@ and not any other form.) ### 24. ONIG_SYN_OP_POSIX_BRACKET (enable POSIX `[:xxxx:]`) -_Set in: PosixBasic, PosixExtended, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Grep, GnuRegex, Perl, Java, Perl_NG, Ruby_ Enables support for the POSIX `[:xxxx:]` character classes, like `[:alpha:]` and `[:digit:]`. The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `digit`, @@ -361,7 +361,7 @@ The supported POSIX character classes are `alnum`, `alpha`, `blank`, `cntrl`, `d ### 25. ONIG_SYN_OP_QMARK_NON_GREEDY (enable `r??`, `r*?`, `r+?`, and `r{n,m}?`) -_Set in: Perl, Java, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Java, Perl_NG, Ruby_ Enables support for lazy (non-greedy) quantifiers: That is, if you append a `?` after another quantifier such as `?`, `*`, `+`, or `{n,m}`, Oniguruma will try to match @@ -370,7 +370,7 @@ as _little_ as possible instead of as _much_ as possible. ### 26. ONIG_SYN_OP_ESC_CONTROL_CHARS (enable `\n`, `\r`, `\t`, etc.) -_Set in: PosixBasic, PosixExtended, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixBasic, PosixExtended, Java, Perl, Perl_NG, Ruby_ Enables support for C-style control-code escapes, like `\n` and `\r`. Specifically, this recognizes `\a` (7), `\b` (8), `\t` (9), `\n` (10), `\f` (12), `\r` (13), and @@ -380,7 +380,7 @@ support for recognizing `\v` as code point 11. ### 27. ONIG_SYN_OP_ESC_C_CONTROL (enable `\cx` control codes) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for named control-code escapes, like `\cm` or `\cM` for code-point 13. In this shorthand form, control codes may be specified by `\c` (for "Control") @@ -390,7 +390,7 @@ followed by an alphabetic letter, a-z or A-Z, indicating which code point to rep ### 28. ONIG_SYN_OP_ESC_OCTAL3 (enable `\OOO` octal codes) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for octal-style escapes of up to three digits, like `\1` for code point 1, and `\177` for code point 127. Octal values greater than 255 will result @@ -399,7 +399,7 @@ in an error message. ### 29. ONIG_SYN_OP_ESC_X_HEX2 (enable `\xHH` hex codes) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for hexadecimal-style escapes of up to two digits, like `\x1` for code point 1, and `\x7F` for code point 127. @@ -407,7 +407,7 @@ point 1, and `\x7F` for code point 127. ### 30. ONIG_SYN_OP_ESC_X_BRACE_HEX8 (enable `\x{7HHHHHHH}` hex codes) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for brace-wrapped hexadecimal-style escapes of up to eight digits, like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. @@ -415,7 +415,7 @@ like `\x{1}` for code point 1, and `\x{FFFE}` for code point 65534. ### 31. ONIG_SYN_OP_ESC_O_BRACE_OCTAL (enable `\o{1OOOOOOOOOO}` octal codes) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for brace-wrapped octal-style escapes of up to eleven digits, like `\o{1}` for code point 1, and `\o{177776}` for code point 65534. @@ -444,7 +444,7 @@ longer be treated as metacharacters, and instead will be matched as literal ### 1. ONIG_SYN_OP2_QMARK_GROUP_EFFECT (enable `(?...)`) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for the fairly-common `(?...)` grouping operator, which controls precedence but which does _not_ capture its contents. @@ -465,7 +465,7 @@ The supported toggle-able options for this flag are: ### 3. ONIG_SYN_OP2_OPTION_RUBY (enable options `(?imx)` and `(?-imx)`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support of regex options. (i,m,x) The supported toggle-able options for this flag are: @@ -477,7 +477,7 @@ The supported toggle-able options for this flag are: ### 4. ONIG_SYN_OP2_PLUS_POSSESSIVE_REPEAT (enable `r?+`, `r*+`, and `r++`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for the _possessive_ quantifiers `?+`, `*+`, and `++`, which work similarly to `?` and `*` and `+`, respectively, but which do not backtrack @@ -499,7 +499,7 @@ extent if subsequent parts of the pattern fail to match. ### 6. ONIG_SYN_OP2_CCLASS_SET_OP (enable `&&` within `[...]`) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ Enables support for character-class _intersection_. For example, with this feature enabled, you can write `[a-z&&[^aeiou]]` to produce a character class @@ -509,7 +509,7 @@ all control codes _except_ newlines. ### 7. ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP (enable named captures `(?<name>...)`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ Enables support for _naming_ capture groups, so that instead of having to refer to captures by position (like `\3` or `$3`), you can refer to them by names @@ -519,7 +519,7 @@ and `(?'name'...)`, but not the Python `(?P<name>...)` syntax. ### 8. ONIG_SYN_OP2_ESC_K_NAMED_BACKREF (enable named backreferences `\k<name>`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ Enables support for substituted backreferences by name, not just by position. This supports using `\k'name'` in addition to supporting `\k<name>`. This also @@ -530,7 +530,7 @@ the match, if the capture matched multiple times, by writing `\k<name+n>` or ### 9. ONIG_SYN_OP2_ESC_G_SUBEXP_CALL (enable backreferences `\g<name>` and `\g<n>`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ Enables support for substituted backreferences by both name and position using the same syntax. This supports using `\g'name'` and `\g'1'` in addition to @@ -554,7 +554,7 @@ enabled by default in any syntax. ### 11. ONIG_SYN_OP2_ESC_CAPITAL_C_BAR_CONTROL (enable `\C-x`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for Ruby legacy control-code escapes, like `\C-m` or `\C-M` for code-point 13. In this shorthand form, control codes may be specified by `\C-` (for "Control") @@ -567,7 +567,7 @@ See also ONIG_SYN_OP_ESC_C_CONTROL, which enables the more-common `\cx` syntax. ### 12. ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META (enable `\M-x`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for Ruby legacy meta-code escapes. When you write `\M-x`, Oniguruma will match an `x` whose 8th bit is set (i.e., the character code of `x` will be or'ed @@ -577,7 +577,7 @@ with `0x80`). So, for example, you can match `\x81` using `\x81`, or you can wr ### 13. ONIG_SYN_OP2_ESC_V_VTAB (enable `\v` as vertical tab) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ Enables support for a C-style `\v` escape code, meaning "vertical tab." If enabled, `\v` will be equivalent to ASCII code point 11. @@ -585,7 +585,7 @@ Enables support for a C-style `\v` escape code, meaning "vertical tab." If enab ### 14. ONIG_SYN_OP2_ESC_U_HEX4 (enable `\uHHHH` for Unicode) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ Enables support for a Java-style `\uHHHH` escape code for representing Unicode code-points by number, using up to four hexadecimal digits (up to `\uFFFF`). So, @@ -611,7 +611,7 @@ These anchor forms are very obscure, and rarely supported by other regex librari ### 16. ONIG_SYN_OP2_ESC_P_BRACE_CHAR_PROPERTY (enable `\p{...}` and `\P{...}`) -_Set in: Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Perl, Perl_NG, Ruby_ Enables support for an alternate syntax for POSIX character classes; instead of writing `[:alpha:]` when this is enabled, you can instead write `\p{alpha}`. @@ -621,7 +621,7 @@ See also ONIG_SYN_OP_POSIX_BRACKET for the classic POSIX form. ### 17. ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT (enable `\p{^...}` and `\P{^...}`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for an alternate syntax for POSIX character classes; instead of writing `[:^alpha:]` when this is enabled, you can instead write `\p{^alpha}`. @@ -636,7 +636,7 @@ _(not presently used)_ ### 19. ONIG_SYN_OP2_ESC_H_XDIGIT (enable `\h` and `\H`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for the Ruby-specific shorthand `\h` and `\H` metacharacters. Somewhat like `\d` matches decimal digits, `\h` matches hexadecimal digits — that is, @@ -658,7 +658,7 @@ You usually do not want this flag to be enabled. ### 21. ONIG_SYN_OP2_QMARK_LPAREN_IF_ELSE (enable `(?(...)then|else)`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for conditional inclusion of subsequent regex patterns based on whether a prior named or numbered capture matched, or based on whether a pattern will @@ -676,7 +676,7 @@ match. This supports many different forms, including: ### 22. ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP (enable `\K`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for `\K`, which excludes all content before it from the overall regex match (i.e., capture #0). So, for example, pattern `foo\Kbar` would match @@ -687,7 +687,7 @@ regex match (i.e., capture #0). So, for example, pattern `foo\Kbar` would match ### 23. ONIG_SYN_OP2_ESC_CAPITAL_R_GENERAL_NEWLINE (enable `\R`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ Enables support for `\R`, the "general newline" shorthand, which matches `(\r\n|[\n\v\f\r\u0085\u2028\u2029])` (obviously, the Unicode values are cannot be @@ -698,7 +698,7 @@ matched in ASCII encodings). ### 24. ONIG_SYN_OP2_ESC_CAPITAL_N_O_SUPER_DOT (enable `\N` and `\O`) -_Set in: Perl, Perl_NG, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG_ Enables support for `\N` and `\O`. `\N` is "not a line break," which is much like the standard `.` metacharacter, except that while `.` can be affected by @@ -713,7 +713,7 @@ multi-line mode are enabled or disabled. ### 25. ONIG_SYN_OP2_QMARK_TILDE_ABSENT_GROUP (enable `(?~...)`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ Enables support for the `(?~r)` "absent operator" syntax, which matches as much as possible as long as the result _doesn't_ match pattern `r`. This is @@ -731,7 +731,7 @@ excellent article about it is [available on Medium](https://medium.com/rubyinsid ### 26. ONIG_SYN_OP2_ESC_X_Y_TEXT_SEGMENT (enable `\X` and `\Y` and `\y`) -_Set in: Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG, Ruby_ `\X` is another variation on `.`, designed to support Unicode, in that it matches a full _grapheme cluster_. In Unicode, `à` can be encoded as one code point, @@ -764,7 +764,7 @@ backreferences. ### 28. ONIG_SYN_OP2_QMARK_BRACE_CALLOUT_CONTENTS (enable `(?{...})`) -_Set in: Perl, Perl_NG, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG_ Enables support for Perl-style "callouts" — pattern substitutions that result from invoking a callback method. When `(?{foo})` is reached in a pattern, the callback @@ -779,7 +779,7 @@ Full documentation for this advanced feature can be found in the Oniguruma ### 29. ONIG_SYN_OP2_ASTERISK_CALLOUT_NAME (enable `(*name)`) -_Set in: Perl, Perl_NG, Oniguruma_ +_Set in: Oniguruma, Perl, Perl_NG_ Enables support for Perl-style "callouts" — pattern substitutions that result from invoking a callback method. When `(*foo)` is reached in a pattern, the callback @@ -820,7 +820,7 @@ some syntaxes but not in others. ### 0. ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS (independent `?`, `*`, `+`, `{n,m}`) -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ This flag specifies how to handle operators like `?` and `*` when they aren't directly attached to an operand, as in `^*` or `(*)`: Are they an error, are @@ -830,7 +830,7 @@ determines if they are errors or if they are discarded. ### 1. ONIG_SYN_CONTEXT_INVALID_REPEAT_OPS (error or ignore independent operators) -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ If ONIG_SYN_CONTEXT_INDEP_REPEAT_OPS is set, this flag controls what happens when independent operators appear in a pattern: If this flag is set, then independent @@ -847,7 +847,7 @@ character will produce an error message. ### 3. ONIG_SYN_ALLOW_INVALID_INTERVAL (allow `{???`) -_Set in: GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ This flag, if set, causes an invalid range, like `foo{bar}` or `foo{}`, to be silently discarded, as if `foo` had been written instead. If clear, an invalid @@ -855,7 +855,7 @@ range will produce an error message. ### 4. ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV (allow `{,n}` to mean `{0,n}`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, then `r{,n}` will be treated as equivalent to writing `{0,n}`. If this flag is clear, then `r{,n}` will produce an error message. @@ -876,7 +876,7 @@ No built-in syntax has this flag enabled. ### 6. ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND (allow `(?<=a|bc)`) -_Set in: Java, Ruby, Oniguruma_ +_Set in: Oniguruma, Java, Ruby_ If this flag is set, lookbehind patterns with alternate options may have differing lengths among those options. If this flag is clear, lookbehind patterns with options @@ -888,7 +888,7 @@ depend on this rule. ### 7. ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP (prefer `\k<name>` over `\3`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ If this flag is set on the syntax *and* ONIG_OPTION_CAPTURE_GROUP is set when calling Oniguruma, then if a name is used on any capture, all captures must also use names: A @@ -896,20 +896,33 @@ single use of a named capture prohibits the use of numbered captures. ### 8. ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME (allow `(?<x>)...(?<x>)`) -_Set in: Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, Perl_NG, Ruby_ If this flag is set, multiple capture groups may use the same name. If this flag is clear, then reuse of a name will produce an error message. ### 9. ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY (`a{n}?` is equivalent to `(?:a{n})?`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, then intervals of a fixed size will ignore a lazy (non-greedy) `?` quantifier and treat it as an optional match (an ordinary `r?`), since "match as little as possible" is meaningless for a fixed-size interval. If this flag is clear, then `r{n}?` will mean the same as `r{n}`, and the useless `?` will be discarded. +### 10. ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH (`..(?i)..`) + +_Set in: Perl, Perl_NG, Java_ + +If this flag is set, then an isolated option doesn't break the branch and affects until the end of the group (or end of the pattern). +If this flag is not set, then an isolated option is interpreted as the starting point of a new branch. /a(?i)b|c/ ==> /a(?i:b|c)/ + +### 11. ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND (`(?<=...a+...)`) + +_Set in: Oniguruma, Java_ + +If this flag is set, then a variable length expressions are allowed in look-behind. + ### 20. ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC (add `\n` to `[^...]`) _Set in: Grep_ @@ -921,7 +934,7 @@ only exclude those characters and ranges written in them. ### 21. ONIG_SYN_BACKSLASH_ESCAPE_IN_CC (allow `[...\w...]`) -_Set in: GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, GnuRegex, Java, Perl, Perl_NG, Ruby_ If this flag is set, shorthands like `\w` are allowed to describe characters in character classes. If this flag is clear, shorthands like `\w` are treated as a redundantly-escaped @@ -937,7 +950,7 @@ character ranges will produce an error message. ### 23. ONIG_SYN_ALLOW_DOUBLE_RANGE_OP_IN_CC (treat `[0-9-a]` as `[0-9\-a]`) -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ If this flag is set, then a trailing `-` after a character range will be taken as a literal `-`, as if it had been escaped as `\-`. If this flag is clear, then a trailing @@ -945,7 +958,7 @@ literal `-`, as if it had been escaped as `\-`. If this flag is clear, then a t ### 24. ONIG_SYN_WARN_CC_OP_NOT_ESCAPED (warn on `[[...]` and `[-x]`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, Oniguruma will be stricter about warning for bad forms in character classes: `[[...]` will produce a warning, but `[\[...]` will not; @@ -955,7 +968,7 @@ will be silently discarded. ### 25. ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT (warn on `(?:a*)+`) -_Set in: Ruby, Oniguruma_ +_Set in: Oniguruma, Ruby_ If this flag is set, Oniguruma will warn about nested repeat operators those have no meaning, like `(?:a*)+`. If this flag is clear, Oniguruma will allow the nested repeat operators without warning about them. @@ -968,7 +981,7 @@ If this flag is set, then invalid code points at the end of range in character c ### 31. ONIG_SYN_CONTEXT_INDEP_ANCHORS -_Set in: PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby, Oniguruma_ +_Set in: Oniguruma, PosixExtended, GnuRegex, Java, Perl, Perl_NG, Ruby_ Not currently used, and does nothing. (But still set in several syntaxes for some reason.) @@ -1062,10 +1075,12 @@ These tables show which of the built-in syntaxes use which flags and options, fo | 3 | `ONIG_SYN_ALLOW_INVALID_INTERVAL` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | | 4 | `ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV` | - | - | - | - | - | - | - | - | Yes | Yes | | 5 | `ONIG_SYN_STRICT_CHECK_BACKREF` | - | - | - | - | - | - | - | - | - | - | -| 6 | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND` | - | - | - | - | - | - | - | Yes | Yes | Yes | +| 6 | `ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | Yes | Yes | | 7 | `ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP` | - | - | - | - | - | - | - | Yes | Yes | Yes | | 8 | `ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME` | - | - | - | - | - | - | - | Yes | Yes | Yes | | 9 | `ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY` | - | - | - | - | - | - | - | - | Yes | Yes | +| 10 | `ONIG_SYN_ISOLATED_OPTION_CONTINUE_BRANCH` | - | - | - | - | - | Yes | Yes | Yes | - | - | +| 11 | `ONIG_SYN_VARIABLE_LEN_LOOK_BEHIND` | - | - | - | - | - | Yes | - | - | - | Yes | | 20 | `ONIG_SYN_NOT_NEWLINE_IN_NEGATIVE_CC` | - | - | - | Yes | - | - | - | - | - | - | | 21 | `ONIG_SYN_BACKSLASH_ESCAPE_IN_CC` | - | - | - | - | Yes | Yes | Yes | Yes | Yes | Yes | | 22 | `ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC` | - | - | Yes | Yes | - | - | - | - | - | - | diff --git a/doc/UNICODE_PROPERTIES b/doc/UNICODE_PROPERTIES index 24c2031..2227ada 100644 --- a/doc/UNICODE_PROPERTIES +++ b/doc/UNICODE_PROPERTIES @@ -1,4 +1,4 @@ -Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1) +Unicode Properties (Unicode Version: 13.0.0, Emoji: 13.0) 15: ASCII_Hex_Digit 16: Adlam @@ -38,225 +38,229 @@ Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1) 50: Changes_When_Titlecased 51: Changes_When_Uppercased 52: Cherokee - 53: Cn - 54: Co - 55: Common - 56: Coptic - 57: Cs - 58: Cuneiform - 59: Cypriot - 60: Cyrillic - 61: Dash - 62: Default_Ignorable_Code_Point - 63: Deprecated - 64: Deseret - 65: Devanagari - 66: Diacritic - 67: Dogra - 68: Duployan - 69: Egyptian_Hieroglyphs - 70: Elbasan - 71: Elymaic - 72: Emoji - 73: Emoji_Component - 74: Emoji_Modifier - 75: Emoji_Modifier_Base - 76: Emoji_Presentation - 77: Ethiopic - 78: Extended_Pictographic - 79: Extender - 80: Georgian - 81: Glagolitic - 82: Gothic - 83: Grantha - 84: Grapheme_Base - 85: Grapheme_Extend - 86: Grapheme_Link - 87: Greek - 88: Gujarati - 89: Gunjala_Gondi - 90: Gurmukhi - 91: Han - 92: Hangul - 93: Hanifi_Rohingya - 94: Hanunoo - 95: Hatran - 96: Hebrew - 97: Hex_Digit - 98: Hiragana - 99: Hyphen -100: IDS_Binary_Operator -101: IDS_Trinary_Operator -102: ID_Continue -103: ID_Start -104: Ideographic -105: Imperial_Aramaic -106: Inherited -107: Inscriptional_Pahlavi -108: Inscriptional_Parthian -109: Javanese -110: Join_Control -111: Kaithi -112: Kannada -113: Katakana -114: Kayah_Li -115: Kharoshthi -116: Khmer -117: Khojki -118: Khudawadi -119: L -120: LC -121: Lao -122: Latin -123: Lepcha -124: Limbu -125: Linear_A -126: Linear_B -127: Lisu -128: Ll -129: Lm -130: Lo -131: Logical_Order_Exception -132: Lowercase -133: Lt -134: Lu -135: Lycian -136: Lydian -137: M -138: Mahajani -139: Makasar -140: Malayalam -141: Mandaic -142: Manichaean -143: Marchen -144: Masaram_Gondi -145: Math -146: Mc -147: Me -148: Medefaidrin -149: Meetei_Mayek -150: Mende_Kikakui -151: Meroitic_Cursive -152: Meroitic_Hieroglyphs -153: Miao -154: Mn -155: Modi -156: Mongolian -157: Mro -158: Multani -159: Myanmar -160: N -161: Nabataean -162: Nandinagari -163: Nd -164: New_Tai_Lue -165: Newa -166: Nko -167: Nl -168: No -169: Noncharacter_Code_Point -170: Nushu -171: Nyiakeng_Puachue_Hmong -172: Ogham -173: Ol_Chiki -174: Old_Hungarian -175: Old_Italic -176: Old_North_Arabian -177: Old_Permic -178: Old_Persian -179: Old_Sogdian -180: Old_South_Arabian -181: Old_Turkic -182: Oriya -183: Osage -184: Osmanya -185: Other_Alphabetic -186: Other_Default_Ignorable_Code_Point -187: Other_Grapheme_Extend -188: Other_ID_Continue -189: Other_ID_Start -190: Other_Lowercase -191: Other_Math -192: Other_Uppercase -193: P -194: Pahawh_Hmong -195: Palmyrene -196: Pattern_Syntax -197: Pattern_White_Space -198: Pau_Cin_Hau -199: Pc -200: Pd -201: Pe -202: Pf -203: Phags_Pa -204: Phoenician -205: Pi -206: Po -207: Prepended_Concatenation_Mark -208: Ps -209: Psalter_Pahlavi -210: Quotation_Mark -211: Radical -212: Regional_Indicator -213: Rejang -214: Runic -215: S -216: Samaritan -217: Saurashtra -218: Sc -219: Sentence_Terminal -220: Sharada -221: Shavian -222: Siddham -223: SignWriting -224: Sinhala -225: Sk -226: Sm -227: So -228: Soft_Dotted -229: Sogdian -230: Sora_Sompeng -231: Soyombo -232: Sundanese -233: Syloti_Nagri -234: Syriac -235: Tagalog -236: Tagbanwa -237: Tai_Le -238: Tai_Tham -239: Tai_Viet -240: Takri -241: Tamil -242: Tangut -243: Telugu -244: Terminal_Punctuation -245: Thaana -246: Thai -247: Tibetan -248: Tifinagh -249: Tirhuta -250: Ugaritic -251: Unified_Ideograph -252: Unknown -253: Uppercase -254: Vai -255: Variation_Selector -256: Wancho -257: Warang_Citi -258: White_Space -259: XID_Continue -260: XID_Start -261: Yi -262: Z -263: Zanabazar_Square -264: Zl -265: Zp -266: Zs + 53: Chorasmian + 54: Cn + 55: Co + 56: Common + 57: Coptic + 58: Cs + 59: Cuneiform + 60: Cypriot + 61: Cyrillic + 62: Dash + 63: Default_Ignorable_Code_Point + 64: Deprecated + 65: Deseret + 66: Devanagari + 67: Diacritic + 68: Dives_Akuru + 69: Dogra + 70: Duployan + 71: Egyptian_Hieroglyphs + 72: Elbasan + 73: Elymaic + 74: Emoji + 75: Emoji_Component + 76: Emoji_Modifier + 77: Emoji_Modifier_Base + 78: Emoji_Presentation + 79: Ethiopic + 80: Extended_Pictographic + 81: Extender + 82: Georgian + 83: Glagolitic + 84: Gothic + 85: Grantha + 86: Grapheme_Base + 87: Grapheme_Extend + 88: Grapheme_Link + 89: Greek + 90: Gujarati + 91: Gunjala_Gondi + 92: Gurmukhi + 93: Han + 94: Hangul + 95: Hanifi_Rohingya + 96: Hanunoo + 97: Hatran + 98: Hebrew + 99: Hex_Digit +100: Hiragana +101: Hyphen +102: IDS_Binary_Operator +103: IDS_Trinary_Operator +104: ID_Continue +105: ID_Start +106: Ideographic +107: Imperial_Aramaic +108: Inherited +109: Inscriptional_Pahlavi +110: Inscriptional_Parthian +111: Javanese +112: Join_Control +113: Kaithi +114: Kannada +115: Katakana +116: Kayah_Li +117: Kharoshthi +118: Khitan_Small_Script +119: Khmer +120: Khojki +121: Khudawadi +122: L +123: LC +124: Lao +125: Latin +126: Lepcha +127: Limbu +128: Linear_A +129: Linear_B +130: Lisu +131: Ll +132: Lm +133: Lo +134: Logical_Order_Exception +135: Lowercase +136: Lt +137: Lu +138: Lycian +139: Lydian +140: M +141: Mahajani +142: Makasar +143: Malayalam +144: Mandaic +145: Manichaean +146: Marchen +147: Masaram_Gondi +148: Math +149: Mc +150: Me +151: Medefaidrin +152: Meetei_Mayek +153: Mende_Kikakui +154: Meroitic_Cursive +155: Meroitic_Hieroglyphs +156: Miao +157: Mn +158: Modi +159: Mongolian +160: Mro +161: Multani +162: Myanmar +163: N +164: Nabataean +165: Nandinagari +166: Nd +167: New_Tai_Lue +168: Newa +169: Nko +170: Nl +171: No +172: Noncharacter_Code_Point +173: Nushu +174: Nyiakeng_Puachue_Hmong +175: Ogham +176: Ol_Chiki +177: Old_Hungarian +178: Old_Italic +179: Old_North_Arabian +180: Old_Permic +181: Old_Persian +182: Old_Sogdian +183: Old_South_Arabian +184: Old_Turkic +185: Oriya +186: Osage +187: Osmanya +188: Other_Alphabetic +189: Other_Default_Ignorable_Code_Point +190: Other_Grapheme_Extend +191: Other_ID_Continue +192: Other_ID_Start +193: Other_Lowercase +194: Other_Math +195: Other_Uppercase +196: P +197: Pahawh_Hmong +198: Palmyrene +199: Pattern_Syntax +200: Pattern_White_Space +201: Pau_Cin_Hau +202: Pc +203: Pd +204: Pe +205: Pf +206: Phags_Pa +207: Phoenician +208: Pi +209: Po +210: Prepended_Concatenation_Mark +211: Ps +212: Psalter_Pahlavi +213: Quotation_Mark +214: Radical +215: Regional_Indicator +216: Rejang +217: Runic +218: S +219: Samaritan +220: Saurashtra +221: Sc +222: Sentence_Terminal +223: Sharada +224: Shavian +225: Siddham +226: SignWriting +227: Sinhala +228: Sk +229: Sm +230: So +231: Soft_Dotted +232: Sogdian +233: Sora_Sompeng +234: Soyombo +235: Sundanese +236: Syloti_Nagri +237: Syriac +238: Tagalog +239: Tagbanwa +240: Tai_Le +241: Tai_Tham +242: Tai_Viet +243: Takri +244: Tamil +245: Tangut +246: Telugu +247: Terminal_Punctuation +248: Thaana +249: Thai +250: Tibetan +251: Tifinagh +252: Tirhuta +253: Ugaritic +254: Unified_Ideograph +255: Unknown +256: Uppercase +257: Vai +258: Variation_Selector +259: Wancho +260: Warang_Citi +261: White_Space +262: XID_Continue +263: XID_Start +264: Yezidi +265: Yi +266: Z +267: Zanabazar_Square +268: Zl +269: Zp +270: Zs 16: Adlm 42: Aghb 15: AHex 21: Arab -105: Armi +107: Armi 22: Armn 24: Avst 25: Bali @@ -274,515 +278,532 @@ Unicode Properties (Unicode Version: 12.1.0, Emoji: 12.1) 45: Cakm 38: Cans 39: Cari -120: Cased_Letter +123: Cased_Letter 52: Cher + 53: Chrs 40: CI -201: Close_Punctuation -137: Combining_Mark -199: Connector_Punctuation +204: Close_Punctuation +140: Combining_Mark +202: Connector_Punctuation 43: Control - 56: Copt - 59: Cprt -218: Currency_Symbol + 57: Copt + 60: Cprt +221: Currency_Symbol 47: CWCF 48: CWCM 49: CWL 50: CWT 51: CWU - 60: Cyrl -200: Dash_Punctuation -163: Decimal_Number - 63: Dep - 65: Deva - 62: DI - 66: Dia - 67: Dogr - 64: Dsrt - 68: Dupl - 69: Egyp - 70: Elba - 71: Elym -147: Enclosing_Mark - 77: Ethi - 79: Ext -202: Final_Punctuation + 61: Cyrl +203: Dash_Punctuation +166: Decimal_Number + 64: Dep + 66: Deva + 63: DI + 67: Dia + 68: Diak + 69: Dogr + 65: Dsrt + 70: Dupl + 77: EBase + 75: EComp + 71: Egyp + 72: Elba + 73: Elym + 76: EMod +150: Enclosing_Mark + 78: EPres + 79: Ethi + 81: Ext + 80: ExtPict +205: Final_Punctuation 44: Format - 80: Geor - 81: Glag - 89: Gong -144: Gonm - 82: Goth - 83: Gran - 84: Gr_Base - 87: Grek - 85: Gr_Ext - 86: Gr_Link - 88: Gujr - 90: Guru - 92: Hang - 91: Hani - 94: Hano - 95: Hatr - 96: Hebr - 97: Hex - 98: Hira + 82: Geor + 83: Glag + 91: Gong +147: Gonm + 84: Goth + 85: Gran + 86: Gr_Base + 89: Grek + 87: Gr_Ext + 88: Gr_Link + 90: Gujr + 92: Guru + 94: Hang + 93: Hani + 96: Hano + 97: Hatr + 98: Hebr + 99: Hex +100: Hira 19: Hluw -194: Hmng -171: Hmnp -174: Hung -102: IDC -104: Ideo -103: IDS -100: IDSB -101: IDST -205: Initial_Punctuation -175: Ital -109: Java -110: Join_C -114: Kali -113: Kana -115: Khar -116: Khmr -117: Khoj -112: Knda -111: Kthi -238: Lana -121: Laoo -122: Latn -123: Lepc -119: Letter -167: Letter_Number -124: Limb -125: Lina -126: Linb -264: Line_Separator -131: LOE -128: Lowercase_Letter -135: Lyci -136: Lydi -138: Mahj -139: Maka -141: Mand -142: Mani -143: Marc -137: Mark -226: Math_Symbol -148: Medf -150: Mend -151: Merc -152: Mero -140: Mlym -129: Modifier_Letter -225: Modifier_Symbol -156: Mong -157: Mroo -149: Mtei -158: Mult -159: Mymr -162: Nand -176: Narb -161: Nbat -169: NChar -166: Nkoo -154: Nonspacing_Mark -170: Nshu -160: Number -185: OAlpha -186: ODI -172: Ogam -187: OGr_Ext -188: OIDC -189: OIDS -173: Olck -190: OLower -191: OMath -208: Open_Punctuation -181: Orkh -182: Orya -183: Osge -184: Osma +197: Hmng +174: Hmnp +177: Hung +104: IDC +106: Ideo +105: IDS +102: IDSB +103: IDST +208: Initial_Punctuation +178: Ital +111: Java +112: Join_C +116: Kali +115: Kana +117: Khar +119: Khmr +120: Khoj +118: Kits +114: Knda +113: Kthi +241: Lana +124: Laoo +125: Latn +126: Lepc +122: Letter +170: Letter_Number +127: Limb +128: Lina +129: Linb +268: Line_Separator +134: LOE +131: Lowercase_Letter +138: Lyci +139: Lydi +141: Mahj +142: Maka +144: Mand +145: Mani +146: Marc +140: Mark +229: Math_Symbol +151: Medf +153: Mend +154: Merc +155: Mero +143: Mlym +132: Modifier_Letter +228: Modifier_Symbol +159: Mong +160: Mroo +152: Mtei +161: Mult +162: Mymr +165: Nand +179: Narb +164: Nbat +172: NChar +169: Nkoo +157: Nonspacing_Mark +173: Nshu +163: Number +188: OAlpha +189: ODI +175: Ogam +190: OGr_Ext +191: OIDC +192: OIDS +176: Olck +193: OLower +194: OMath +211: Open_Punctuation +184: Orkh +185: Orya +186: Osge +187: Osma 37: Other -130: Other_Letter -168: Other_Number -206: Other_Punctuation -227: Other_Symbol -192: OUpper -195: Palm -265: Paragraph_Separator -196: Pat_Syn -197: Pat_WS -198: Pauc -207: PCM -177: Perm -203: Phag -107: Phli -209: Phlp -204: Phnx -153: Plrd - 54: Private_Use -108: Prti -193: Punctuation - 56: Qaac -106: Qaai -210: QMark -212: RI -213: Rjng - 93: Rohg -214: Runr -216: Samr -180: Sarb -217: Saur -228: SD -262: Separator -223: Sgnw -221: Shaw -220: Shrd -222: Sidd -118: Sind -224: Sinh -229: Sogd -179: Sogo -230: Sora -231: Soyo -266: Space_Separator -146: Spacing_Mark -219: STerm -232: Sund - 57: Surrogate -233: Sylo -215: Symbol -234: Syrc -236: Tagb -240: Takr -237: Tale -164: Talu -241: Taml -242: Tang -239: Tavt -243: Telu -244: Term -248: Tfng -235: Tglg -245: Thaa -247: Tibt -249: Tirh -133: Titlecase_Letter -250: Ugar -251: UIdeo - 53: Unassigned -134: Uppercase_Letter -254: Vaii -255: VS -257: Wara -256: Wcho -258: WSpace -259: XIDC -260: XIDS -178: Xpeo - 58: Xsux -261: Yiii -263: Zanb -106: Zinh - 55: Zyyy -252: Zzzz -267: In_Basic_Latin -268: In_Latin_1_Supplement -269: In_Latin_Extended_A -270: In_Latin_Extended_B -271: In_IPA_Extensions -272: In_Spacing_Modifier_Letters -273: In_Combining_Diacritical_Marks -274: In_Greek_and_Coptic -275: In_Cyrillic -276: In_Cyrillic_Supplement -277: In_Armenian -278: In_Hebrew -279: In_Arabic -280: In_Syriac -281: In_Arabic_Supplement -282: In_Thaana -283: In_NKo -284: In_Samaritan -285: In_Mandaic -286: In_Syriac_Supplement -287: In_Arabic_Extended_A -288: In_Devanagari -289: In_Bengali -290: In_Gurmukhi -291: In_Gujarati -292: In_Oriya -293: In_Tamil -294: In_Telugu -295: In_Kannada -296: In_Malayalam -297: In_Sinhala -298: In_Thai -299: In_Lao -300: In_Tibetan -301: In_Myanmar -302: In_Georgian -303: In_Hangul_Jamo -304: In_Ethiopic -305: In_Ethiopic_Supplement -306: In_Cherokee -307: In_Unified_Canadian_Aboriginal_Syllabics -308: In_Ogham -309: In_Runic -310: In_Tagalog -311: In_Hanunoo -312: In_Buhid -313: In_Tagbanwa -314: In_Khmer -315: In_Mongolian -316: In_Unified_Canadian_Aboriginal_Syllabics_Extended -317: In_Limbu -318: In_Tai_Le -319: In_New_Tai_Lue -320: In_Khmer_Symbols -321: In_Buginese -322: In_Tai_Tham -323: In_Combining_Diacritical_Marks_Extended -324: In_Balinese -325: In_Sundanese -326: In_Batak -327: In_Lepcha -328: In_Ol_Chiki -329: In_Cyrillic_Extended_C -330: In_Georgian_Extended -331: In_Sundanese_Supplement -332: In_Vedic_Extensions -333: In_Phonetic_Extensions -334: In_Phonetic_Extensions_Supplement -335: In_Combining_Diacritical_Marks_Supplement -336: In_Latin_Extended_Additional -337: In_Greek_Extended -338: In_General_Punctuation -339: In_Superscripts_and_Subscripts -340: In_Currency_Symbols -341: In_Combining_Diacritical_Marks_for_Symbols -342: In_Letterlike_Symbols -343: In_Number_Forms -344: In_Arrows -345: In_Mathematical_Operators -346: In_Miscellaneous_Technical -347: In_Control_Pictures -348: In_Optical_Character_Recognition -349: In_Enclosed_Alphanumerics -350: In_Box_Drawing -351: In_Block_Elements -352: In_Geometric_Shapes -353: In_Miscellaneous_Symbols -354: In_Dingbats -355: In_Miscellaneous_Mathematical_Symbols_A -356: In_Supplemental_Arrows_A -357: In_Braille_Patterns -358: In_Supplemental_Arrows_B -359: In_Miscellaneous_Mathematical_Symbols_B -360: In_Supplemental_Mathematical_Operators -361: In_Miscellaneous_Symbols_and_Arrows -362: In_Glagolitic -363: In_Latin_Extended_C -364: In_Coptic -365: In_Georgian_Supplement -366: In_Tifinagh -367: In_Ethiopic_Extended -368: In_Cyrillic_Extended_A -369: In_Supplemental_Punctuation -370: In_CJK_Radicals_Supplement -371: In_Kangxi_Radicals -372: In_Ideographic_Description_Characters -373: In_CJK_Symbols_and_Punctuation -374: In_Hiragana -375: In_Katakana -376: In_Bopomofo -377: In_Hangul_Compatibility_Jamo -378: In_Kanbun -379: In_Bopomofo_Extended -380: In_CJK_Strokes -381: In_Katakana_Phonetic_Extensions -382: In_Enclosed_CJK_Letters_and_Months -383: In_CJK_Compatibility -384: In_CJK_Unified_Ideographs_Extension_A -385: In_Yijing_Hexagram_Symbols -386: In_CJK_Unified_Ideographs -387: In_Yi_Syllables -388: In_Yi_Radicals -389: In_Lisu -390: In_Vai -391: In_Cyrillic_Extended_B -392: In_Bamum -393: In_Modifier_Tone_Letters -394: In_Latin_Extended_D -395: In_Syloti_Nagri -396: In_Common_Indic_Number_Forms -397: In_Phags_pa -398: In_Saurashtra -399: In_Devanagari_Extended -400: In_Kayah_Li -401: In_Rejang -402: In_Hangul_Jamo_Extended_A -403: In_Javanese -404: In_Myanmar_Extended_B -405: In_Cham -406: In_Myanmar_Extended_A -407: In_Tai_Viet -408: In_Meetei_Mayek_Extensions -409: In_Ethiopic_Extended_A -410: In_Latin_Extended_E -411: In_Cherokee_Supplement -412: In_Meetei_Mayek -413: In_Hangul_Syllables -414: In_Hangul_Jamo_Extended_B -415: In_High_Surrogates -416: In_High_Private_Use_Surrogates -417: In_Low_Surrogates -418: In_Private_Use_Area -419: In_CJK_Compatibility_Ideographs -420: In_Alphabetic_Presentation_Forms -421: In_Arabic_Presentation_Forms_A -422: In_Variation_Selectors -423: In_Vertical_Forms -424: In_Combining_Half_Marks -425: In_CJK_Compatibility_Forms -426: In_Small_Form_Variants -427: In_Arabic_Presentation_Forms_B -428: In_Halfwidth_and_Fullwidth_Forms -429: In_Specials -430: In_Linear_B_Syllabary -431: In_Linear_B_Ideograms -432: In_Aegean_Numbers -433: In_Ancient_Greek_Numbers -434: In_Ancient_Symbols -435: In_Phaistos_Disc -436: In_Lycian -437: In_Carian -438: In_Coptic_Epact_Numbers -439: In_Old_Italic -440: In_Gothic -441: In_Old_Permic -442: In_Ugaritic -443: In_Old_Persian -444: In_Deseret -445: In_Shavian -446: In_Osmanya -447: In_Osage -448: In_Elbasan -449: In_Caucasian_Albanian -450: In_Linear_A -451: In_Cypriot_Syllabary -452: In_Imperial_Aramaic -453: In_Palmyrene -454: In_Nabataean -455: In_Hatran -456: In_Phoenician -457: In_Lydian -458: In_Meroitic_Hieroglyphs -459: In_Meroitic_Cursive -460: In_Kharoshthi -461: In_Old_South_Arabian -462: In_Old_North_Arabian -463: In_Manichaean -464: In_Avestan -465: In_Inscriptional_Parthian -466: In_Inscriptional_Pahlavi -467: In_Psalter_Pahlavi -468: In_Old_Turkic -469: In_Old_Hungarian -470: In_Hanifi_Rohingya -471: In_Rumi_Numeral_Symbols -472: In_Old_Sogdian -473: In_Sogdian -474: In_Elymaic -475: In_Brahmi -476: In_Kaithi -477: In_Sora_Sompeng -478: In_Chakma -479: In_Mahajani -480: In_Sharada -481: In_Sinhala_Archaic_Numbers -482: In_Khojki -483: In_Multani -484: In_Khudawadi -485: In_Grantha -486: In_Newa -487: In_Tirhuta -488: In_Siddham -489: In_Modi -490: In_Mongolian_Supplement -491: In_Takri -492: In_Ahom -493: In_Dogra -494: In_Warang_Citi -495: In_Nandinagari -496: In_Zanabazar_Square -497: In_Soyombo -498: In_Pau_Cin_Hau -499: In_Bhaiksuki -500: In_Marchen -501: In_Masaram_Gondi -502: In_Gunjala_Gondi -503: In_Makasar -504: In_Tamil_Supplement -505: In_Cuneiform -506: In_Cuneiform_Numbers_and_Punctuation -507: In_Early_Dynastic_Cuneiform -508: In_Egyptian_Hieroglyphs -509: In_Egyptian_Hieroglyph_Format_Controls -510: In_Anatolian_Hieroglyphs -511: In_Bamum_Supplement -512: In_Mro -513: In_Bassa_Vah -514: In_Pahawh_Hmong -515: In_Medefaidrin -516: In_Miao -517: In_Ideographic_Symbols_and_Punctuation -518: In_Tangut -519: In_Tangut_Components -520: In_Kana_Supplement -521: In_Kana_Extended_A -522: In_Small_Kana_Extension -523: In_Nushu -524: In_Duployan -525: In_Shorthand_Format_Controls -526: In_Byzantine_Musical_Symbols -527: In_Musical_Symbols -528: In_Ancient_Greek_Musical_Notation -529: In_Mayan_Numerals -530: In_Tai_Xuan_Jing_Symbols -531: In_Counting_Rod_Numerals -532: In_Mathematical_Alphanumeric_Symbols -533: In_Sutton_SignWriting -534: In_Glagolitic_Supplement -535: In_Nyiakeng_Puachue_Hmong -536: In_Wancho -537: In_Mende_Kikakui -538: In_Adlam -539: In_Indic_Siyaq_Numbers -540: In_Ottoman_Siyaq_Numbers -541: In_Arabic_Mathematical_Alphabetic_Symbols -542: In_Mahjong_Tiles -543: In_Domino_Tiles -544: In_Playing_Cards -545: In_Enclosed_Alphanumeric_Supplement -546: In_Enclosed_Ideographic_Supplement -547: In_Miscellaneous_Symbols_and_Pictographs -548: In_Emoticons -549: In_Ornamental_Dingbats -550: In_Transport_and_Map_Symbols -551: In_Alchemical_Symbols -552: In_Geometric_Shapes_Extended -553: In_Supplemental_Arrows_C -554: In_Supplemental_Symbols_and_Pictographs -555: In_Chess_Symbols -556: In_Symbols_and_Pictographs_Extended_A -557: In_CJK_Unified_Ideographs_Extension_B -558: In_CJK_Unified_Ideographs_Extension_C -559: In_CJK_Unified_Ideographs_Extension_D -560: In_CJK_Unified_Ideographs_Extension_E -561: In_CJK_Unified_Ideographs_Extension_F -562: In_CJK_Compatibility_Ideographs_Supplement -563: In_Tags -564: In_Variation_Selectors_Supplement -565: In_Supplementary_Private_Use_Area_A -566: In_Supplementary_Private_Use_Area_B -567: In_No_Block +133: Other_Letter +171: Other_Number +209: Other_Punctuation +230: Other_Symbol +195: OUpper +198: Palm +269: Paragraph_Separator +199: Pat_Syn +200: Pat_WS +201: Pauc +210: PCM +180: Perm +206: Phag +109: Phli +212: Phlp +207: Phnx +156: Plrd + 55: Private_Use +110: Prti +196: Punctuation + 57: Qaac +108: Qaai +213: QMark +215: RI +216: Rjng + 95: Rohg +217: Runr +219: Samr +183: Sarb +220: Saur +231: SD +266: Separator +226: Sgnw +224: Shaw +223: Shrd +225: Sidd +121: Sind +227: Sinh +232: Sogd +182: Sogo +233: Sora +234: Soyo +270: Space_Separator +149: Spacing_Mark +222: STerm +235: Sund + 58: Surrogate +236: Sylo +218: Symbol +237: Syrc +239: Tagb +243: Takr +240: Tale +167: Talu +244: Taml +245: Tang +242: Tavt +246: Telu +247: Term +251: Tfng +238: Tglg +248: Thaa +250: Tibt +252: Tirh +136: Titlecase_Letter +253: Ugar +254: UIdeo + 54: Unassigned +137: Uppercase_Letter +257: Vaii +258: VS +260: Wara +259: Wcho +261: WSpace +262: XIDC +263: XIDS +181: Xpeo + 59: Xsux +264: Yezi +265: Yiii +267: Zanb +108: Zinh + 56: Zyyy +255: Zzzz +271: In_Basic_Latin +272: In_Latin_1_Supplement +273: In_Latin_Extended_A +274: In_Latin_Extended_B +275: In_IPA_Extensions +276: In_Spacing_Modifier_Letters +277: In_Combining_Diacritical_Marks +278: In_Greek_and_Coptic +279: In_Cyrillic +280: In_Cyrillic_Supplement +281: In_Armenian +282: In_Hebrew +283: In_Arabic +284: In_Syriac +285: In_Arabic_Supplement +286: In_Thaana +287: In_NKo +288: In_Samaritan +289: In_Mandaic +290: In_Syriac_Supplement +291: In_Arabic_Extended_A +292: In_Devanagari +293: In_Bengali +294: In_Gurmukhi +295: In_Gujarati +296: In_Oriya +297: In_Tamil +298: In_Telugu +299: In_Kannada +300: In_Malayalam +301: In_Sinhala +302: In_Thai +303: In_Lao +304: In_Tibetan +305: In_Myanmar +306: In_Georgian +307: In_Hangul_Jamo +308: In_Ethiopic +309: In_Ethiopic_Supplement +310: In_Cherokee +311: In_Unified_Canadian_Aboriginal_Syllabics +312: In_Ogham +313: In_Runic +314: In_Tagalog +315: In_Hanunoo +316: In_Buhid +317: In_Tagbanwa +318: In_Khmer +319: In_Mongolian +320: In_Unified_Canadian_Aboriginal_Syllabics_Extended +321: In_Limbu +322: In_Tai_Le +323: In_New_Tai_Lue +324: In_Khmer_Symbols +325: In_Buginese +326: In_Tai_Tham +327: In_Combining_Diacritical_Marks_Extended +328: In_Balinese +329: In_Sundanese +330: In_Batak +331: In_Lepcha +332: In_Ol_Chiki +333: In_Cyrillic_Extended_C +334: In_Georgian_Extended +335: In_Sundanese_Supplement +336: In_Vedic_Extensions +337: In_Phonetic_Extensions +338: In_Phonetic_Extensions_Supplement +339: In_Combining_Diacritical_Marks_Supplement +340: In_Latin_Extended_Additional +341: In_Greek_Extended +342: In_General_Punctuation +343: In_Superscripts_and_Subscripts +344: In_Currency_Symbols +345: In_Combining_Diacritical_Marks_for_Symbols +346: In_Letterlike_Symbols +347: In_Number_Forms +348: In_Arrows +349: In_Mathematical_Operators +350: In_Miscellaneous_Technical +351: In_Control_Pictures +352: In_Optical_Character_Recognition +353: In_Enclosed_Alphanumerics +354: In_Box_Drawing +355: In_Block_Elements +356: In_Geometric_Shapes +357: In_Miscellaneous_Symbols +358: In_Dingbats +359: In_Miscellaneous_Mathematical_Symbols_A +360: In_Supplemental_Arrows_A +361: In_Braille_Patterns +362: In_Supplemental_Arrows_B +363: In_Miscellaneous_Mathematical_Symbols_B +364: In_Supplemental_Mathematical_Operators +365: In_Miscellaneous_Symbols_and_Arrows +366: In_Glagolitic +367: In_Latin_Extended_C +368: In_Coptic +369: In_Georgian_Supplement +370: In_Tifinagh +371: In_Ethiopic_Extended +372: In_Cyrillic_Extended_A +373: In_Supplemental_Punctuation +374: In_CJK_Radicals_Supplement +375: In_Kangxi_Radicals +376: In_Ideographic_Description_Characters +377: In_CJK_Symbols_and_Punctuation +378: In_Hiragana +379: In_Katakana +380: In_Bopomofo +381: In_Hangul_Compatibility_Jamo +382: In_Kanbun +383: In_Bopomofo_Extended +384: In_CJK_Strokes +385: In_Katakana_Phonetic_Extensions +386: In_Enclosed_CJK_Letters_and_Months +387: In_CJK_Compatibility +388: In_CJK_Unified_Ideographs_Extension_A +389: In_Yijing_Hexagram_Symbols +390: In_CJK_Unified_Ideographs +391: In_Yi_Syllables +392: In_Yi_Radicals +393: In_Lisu +394: In_Vai +395: In_Cyrillic_Extended_B +396: In_Bamum +397: In_Modifier_Tone_Letters +398: In_Latin_Extended_D +399: In_Syloti_Nagri +400: In_Common_Indic_Number_Forms +401: In_Phags_pa +402: In_Saurashtra +403: In_Devanagari_Extended +404: In_Kayah_Li +405: In_Rejang +406: In_Hangul_Jamo_Extended_A +407: In_Javanese +408: In_Myanmar_Extended_B +409: In_Cham +410: In_Myanmar_Extended_A +411: In_Tai_Viet +412: In_Meetei_Mayek_Extensions +413: In_Ethiopic_Extended_A +414: In_Latin_Extended_E +415: In_Cherokee_Supplement +416: In_Meetei_Mayek +417: In_Hangul_Syllables +418: In_Hangul_Jamo_Extended_B +419: In_High_Surrogates +420: In_High_Private_Use_Surrogates +421: In_Low_Surrogates +422: In_Private_Use_Area +423: In_CJK_Compatibility_Ideographs +424: In_Alphabetic_Presentation_Forms +425: In_Arabic_Presentation_Forms_A +426: In_Variation_Selectors +427: In_Vertical_Forms +428: In_Combining_Half_Marks +429: In_CJK_Compatibility_Forms +430: In_Small_Form_Variants +431: In_Arabic_Presentation_Forms_B +432: In_Halfwidth_and_Fullwidth_Forms +433: In_Specials +434: In_Linear_B_Syllabary +435: In_Linear_B_Ideograms +436: In_Aegean_Numbers +437: In_Ancient_Greek_Numbers +438: In_Ancient_Symbols +439: In_Phaistos_Disc +440: In_Lycian +441: In_Carian +442: In_Coptic_Epact_Numbers +443: In_Old_Italic +444: In_Gothic +445: In_Old_Permic +446: In_Ugaritic +447: In_Old_Persian +448: In_Deseret +449: In_Shavian +450: In_Osmanya +451: In_Osage +452: In_Elbasan +453: In_Caucasian_Albanian +454: In_Linear_A +455: In_Cypriot_Syllabary +456: In_Imperial_Aramaic +457: In_Palmyrene +458: In_Nabataean +459: In_Hatran +460: In_Phoenician +461: In_Lydian +462: In_Meroitic_Hieroglyphs +463: In_Meroitic_Cursive +464: In_Kharoshthi +465: In_Old_South_Arabian +466: In_Old_North_Arabian +467: In_Manichaean +468: In_Avestan +469: In_Inscriptional_Parthian +470: In_Inscriptional_Pahlavi +471: In_Psalter_Pahlavi +472: In_Old_Turkic +473: In_Old_Hungarian +474: In_Hanifi_Rohingya +475: In_Rumi_Numeral_Symbols +476: In_Yezidi +477: In_Old_Sogdian +478: In_Sogdian +479: In_Chorasmian +480: In_Elymaic +481: In_Brahmi +482: In_Kaithi +483: In_Sora_Sompeng +484: In_Chakma +485: In_Mahajani +486: In_Sharada +487: In_Sinhala_Archaic_Numbers +488: In_Khojki +489: In_Multani +490: In_Khudawadi +491: In_Grantha +492: In_Newa +493: In_Tirhuta +494: In_Siddham +495: In_Modi +496: In_Mongolian_Supplement +497: In_Takri +498: In_Ahom +499: In_Dogra +500: In_Warang_Citi +501: In_Dives_Akuru +502: In_Nandinagari +503: In_Zanabazar_Square +504: In_Soyombo +505: In_Pau_Cin_Hau +506: In_Bhaiksuki +507: In_Marchen +508: In_Masaram_Gondi +509: In_Gunjala_Gondi +510: In_Makasar +511: In_Lisu_Supplement +512: In_Tamil_Supplement +513: In_Cuneiform +514: In_Cuneiform_Numbers_and_Punctuation +515: In_Early_Dynastic_Cuneiform +516: In_Egyptian_Hieroglyphs +517: In_Egyptian_Hieroglyph_Format_Controls +518: In_Anatolian_Hieroglyphs +519: In_Bamum_Supplement +520: In_Mro +521: In_Bassa_Vah +522: In_Pahawh_Hmong +523: In_Medefaidrin +524: In_Miao +525: In_Ideographic_Symbols_and_Punctuation +526: In_Tangut +527: In_Tangut_Components +528: In_Khitan_Small_Script +529: In_Tangut_Supplement +530: In_Kana_Supplement +531: In_Kana_Extended_A +532: In_Small_Kana_Extension +533: In_Nushu +534: In_Duployan +535: In_Shorthand_Format_Controls +536: In_Byzantine_Musical_Symbols +537: In_Musical_Symbols +538: In_Ancient_Greek_Musical_Notation +539: In_Mayan_Numerals +540: In_Tai_Xuan_Jing_Symbols +541: In_Counting_Rod_Numerals +542: In_Mathematical_Alphanumeric_Symbols +543: In_Sutton_SignWriting +544: In_Glagolitic_Supplement +545: In_Nyiakeng_Puachue_Hmong +546: In_Wancho +547: In_Mende_Kikakui +548: In_Adlam +549: In_Indic_Siyaq_Numbers +550: In_Ottoman_Siyaq_Numbers +551: In_Arabic_Mathematical_Alphabetic_Symbols +552: In_Mahjong_Tiles +553: In_Domino_Tiles +554: In_Playing_Cards +555: In_Enclosed_Alphanumeric_Supplement +556: In_Enclosed_Ideographic_Supplement +557: In_Miscellaneous_Symbols_and_Pictographs +558: In_Emoticons +559: In_Ornamental_Dingbats +560: In_Transport_and_Map_Symbols +561: In_Alchemical_Symbols +562: In_Geometric_Shapes_Extended +563: In_Supplemental_Arrows_C +564: In_Supplemental_Symbols_and_Pictographs +565: In_Chess_Symbols +566: In_Symbols_and_Pictographs_Extended_A +567: In_Symbols_for_Legacy_Computing +568: In_CJK_Unified_Ideographs_Extension_B +569: In_CJK_Unified_Ideographs_Extension_C +570: In_CJK_Unified_Ideographs_Extension_D +571: In_CJK_Unified_Ideographs_Extension_E +572: In_CJK_Unified_Ideographs_Extension_F +573: In_CJK_Compatibility_Ideographs_Supplement +574: In_CJK_Unified_Ideographs_Extension_G +575: In_Tags +576: In_Variation_Selectors_Supplement +577: In_Supplementary_Private_Use_Area_A +578: In_Supplementary_Private_Use_Area_B +579: In_No_Block |