summaryrefslogtreecommitdiff
path: root/doc
diff options
context:
space:
mode:
authorJörg Frings-Fürst <debian@jff.email>2018-09-07 13:43:11 +0200
committerJörg Frings-Fürst <debian@jff.email>2018-09-07 13:43:11 +0200
commit66dfd6613394a8903701840bbc9d67de537e597e (patch)
tree3b0943539a09fe5322b0b984222abb7c60ddfacb /doc
parent6bf91b141b7b3e3524f422d4c18bccf7f87aecd1 (diff)
parent52796af94a055f1c704a604f4eac567242c845b7 (diff)
Merge branch 'release/debian/6.9.0-1'debian/6.9.0-1
Diffstat (limited to 'doc')
-rw-r--r--doc/CALLOUTS.API10
-rw-r--r--doc/CALLOUTS.API.ja20
-rw-r--r--doc/RE12
-rw-r--r--doc/RE.ja13
-rw-r--r--doc/UNICODE_PROPERTIES1463
5 files changed, 800 insertions, 718 deletions
diff --git a/doc/CALLOUTS.API b/doc/CALLOUTS.API
index 6003532..057a054 100644
--- a/doc/CALLOUTS.API
+++ b/doc/CALLOUTS.API
@@ -1,4 +1,4 @@
-Callouts API Version 6.8.2 2018/04/14
+Callouts API Version 6.8.2 2018/06/08
#include <oniguruma.h>
@@ -24,9 +24,9 @@ Callouts API Version 6.8.2 2018/04/14
* Callout function return value (int)
- ONIG_CALLOUT_FAIL(1): fail
- ONIG_CALLOUT_SUCCESS(0): success
- less than -1: error code (terminate search/match)
+ ONIG_CALLOUT_FAIL (== 1): fail
+ ONIG_CALLOUT_SUCCESS (== 0): success
+ less than -1: error code (terminate search/match)
ONIG_CALLOUT_FAIL/SUCCESS values are ignored in retractions,
because retraction is a part of recovery process after failure.
@@ -196,7 +196,7 @@ Callouts API Version 6.8.2 2018/04/14
# const OnigUChar* onig_get_string_by_callout_args(OnigCalloutArgs* args)
- Returns the subject string adress.
+ Returns the subject string address.
This is the second argument(str) of onig_search().
diff --git a/doc/CALLOUTS.API.ja b/doc/CALLOUTS.API.ja
index 49d0689..c56555a 100644
--- a/doc/CALLOUTS.API.ja
+++ b/doc/CALLOUTS.API.ja
@@ -1,4 +1,4 @@
-Callouts API Version 6.8.2 2018/04/13
+Callouts API Version 6.8.2 2018/06/08
#include <oniguruma.h>
@@ -24,9 +24,9 @@ Callouts API Version 6.8.2 2018/04/13
* 呼び出し関数の戻り値 (int)
- ONIG_CALLOUT_FAIL(1): 失敗
- ONIG_CALLOUT_SUCCESS(0): 成功
- -1未満: エラーコード (検索/照合の終了)
+ ONIG_CALLOUT_FAIL (== 1): 失敗
+ ONIG_CALLOUT_SUCCESS (== 0): 成功
+ -1未満: エラーコード (検索/照合の終了)
ONIG_CALLOUT_FAIL/SUCCESSは、後退中の呼び出しでは無視される。
後退は失敗の回復過程なので。
@@ -44,12 +44,12 @@ Callouts API Version 6.8.2 2018/04/13
# OnigCalloutFunc onig_get_progress_callout(void)
- 内容の呼び出し関数(前進)を返す
+ 内容の呼び出し関数(前進中)を返す
# int onig_set_progress_callout(OnigCalloutFunc f)
- 内容の呼び出し関数(前進)をセットする。
+ 内容の呼び出し関数(前進中)をセットする。
この値はonig_initialize_match_param()の中でデフォルトの呼び出し関数として
セットされる。
@@ -58,12 +58,12 @@ Callouts API Version 6.8.2 2018/04/13
# OnigCalloutFunc onig_get_retraction_callout(void)
- 内容の呼び出し関数(後退)を返す
+ 内容の呼び出し関数(後退中)を返す
# int onig_set_retraction_callout(OnigCalloutFunc f)
- 内容の呼び出し関数(後退)をセットする。
+ 内容の呼び出し関数(後退中)をセットする。
この値はonig_initialize_match_param()の中でデフォルトの呼び出し関数として
セットされる。
@@ -72,7 +72,7 @@ Callouts API Version 6.8.2 2018/04/13
# int onig_set_progress_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f)
- 内容の呼び出し関数(前進)をセットする。
+ 内容の呼び出し関数(前進中)をセットする。
引数
1 mp: match-paramアドレス
@@ -83,7 +83,7 @@ Callouts API Version 6.8.2 2018/04/13
# int onig_set_retraction_callout_of_match_param(OnigMatchParam* mp, OnigCalloutFunc f)
- 内容の呼び出し関数(後退)をセットする。
+ 内容の呼び出し関数(後退中)をセットする。
引数
1 mp: match-paramアドレス
diff --git a/doc/RE b/doc/RE
index e75daad..963d009 100644
--- a/doc/RE
+++ b/doc/RE
@@ -1,4 +1,4 @@
-Oniguruma Regular Expressions Version 6.8.0 2018/04/13
+Oniguruma Regular Expressions Version 6.8.0 2018/07/26
syntax: ONIG_SYNTAX_ONIGURUMA (default)
@@ -221,7 +221,8 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
(?#...) comment
- (?imxWDSP-imxWDSP) option on/off
+ (?imxWDSP-imxWDSP:subexp) option on/off for subexp
+
i: ignore case
m: multi-line (dot (.) also matches newline)
x: extended form
@@ -233,7 +234,11 @@ syntax: ONIG_SYNTAX_ONIGURUMA (default)
(alnum, alpha, blank, cntrl, digit, graph,
lower, print, punct, space, upper, xdigit, word)
- (?imxWDSP-imxWDSP:subexp) option on/off for subexp
+ (?imxWDSP-imxWDSP) isolated option
+
+ * It makes a group to the next ')' or end of the pattern.
+ /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/
+
(?:subexp) non-capturing group
(subexp) capturing group
@@ -475,7 +480,6 @@ A-3. Missing features compared with perl 5.8.0
+ \N{name}
+ \l,\u,\L,\U,\C
- + (?{code})
+ (??{code})
* \Q...\E
diff --git a/doc/RE.ja b/doc/RE.ja
index 8cc0990..a83bfb4 100644
--- a/doc/RE.ja
+++ b/doc/RE.ja
@@ -1,4 +1,4 @@
-鬼車 正規表現 Version 6.8.0 2018/04/13
+鬼車 正規表現 Version 6.8.0 2018/07/26
使用文法: ONIG_SYNTAX_ONIGURUMA (既定値)
@@ -219,7 +219,9 @@
7. 拡張式集合
(?#...) 注釈
- (?imxWDSP-imxWDSP) 孤立オプション
+
+ (?imxWDSP-imxWDSP:式) 式オプション
+
i: 大文字小文字照合
m: 複数行
x: 拡張形式
@@ -231,7 +233,11 @@
(alnum, alpha, blank, cntrl, digit, graph,
lower, print, punct, space, upper, xdigit, word)
- (?imxWDSP-imxWDSP:式) 式オプション
+ (?imxWDSP-imxWDSP) 孤立オプション
+
+ * これは次の')'またはパターンの終わりまでのグループを形成する
+ /ab(?i)c|def|gh/ == /ab(?i:c|def|gh)/
+
(式) 捕獲式集合
(?:式) 非捕獲式集合
@@ -485,7 +491,6 @@
+ \N{name}
+ \l,\u,\L,\U,\C
- + (?{code})
+ (??{code})
* \Q...\E
diff --git a/doc/UNICODE_PROPERTIES b/doc/UNICODE_PROPERTIES
index 8521f0c..1f961eb 100644
--- a/doc/UNICODE_PROPERTIES
+++ b/doc/UNICODE_PROPERTIES
@@ -1,698 +1,771 @@
-Unicode Properties (from Unicode Version: 8.0.0)
+Unicode Properties (from Unicode Version: 11.0.0)
15: ASCII_Hex_Digit
- 16: Ahom
- 17: Alphabetic
- 18: Anatolian_Hieroglyphs
- 19: Any
- 20: Arabic
- 21: Armenian
- 22: Assigned
- 23: Avestan
- 24: Balinese
- 25: Bamum
- 26: Bassa_Vah
- 27: Batak
- 28: Bengali
- 29: Bidi_Control
- 30: Bopomofo
- 31: Brahmi
- 32: Braille
- 33: Buginese
- 34: Buhid
- 35: C
- 36: Canadian_Aboriginal
- 37: Carian
- 38: Case_Ignorable
- 39: Cased
- 40: Caucasian_Albanian
- 41: Cc
- 42: Cf
- 43: Chakma
- 44: Cham
- 45: Changes_When_Casefolded
- 46: Changes_When_Casemapped
- 47: Changes_When_Lowercased
- 48: Changes_When_Titlecased
- 49: Changes_When_Uppercased
- 50: Cherokee
- 51: Cn
- 52: Co
- 53: Common
- 54: Coptic
- 55: Cs
- 56: Cuneiform
- 57: Cypriot
- 58: Cyrillic
- 59: Dash
- 60: Default_Ignorable_Code_Point
- 61: Deprecated
- 62: Deseret
- 63: Devanagari
- 64: Diacritic
- 65: Duployan
- 66: Egyptian_Hieroglyphs
- 67: Elbasan
- 68: Ethiopic
- 69: Extender
- 70: Georgian
- 71: Glagolitic
- 72: Gothic
- 73: Grantha
- 74: Grapheme_Base
- 75: Grapheme_Extend
- 76: Grapheme_Link
- 77: Greek
- 78: Gujarati
- 79: Gurmukhi
- 80: Han
- 81: Hangul
- 82: Hanunoo
- 83: Hatran
- 84: Hebrew
- 85: Hex_Digit
- 86: Hiragana
- 87: Hyphen
- 88: IDS_Binary_Operator
- 89: IDS_Trinary_Operator
- 90: ID_Continue
- 91: ID_Start
- 92: Ideographic
- 93: Imperial_Aramaic
- 94: Inherited
- 95: Inscriptional_Pahlavi
- 96: Inscriptional_Parthian
- 97: Javanese
- 98: Join_Control
- 99: Kaithi
-100: Kannada
-101: Katakana
-102: Kayah_Li
-103: Kharoshthi
-104: Khmer
-105: Khojki
-106: Khudawadi
-107: L
-108: LC
-109: Lao
-110: Latin
-111: Lepcha
-112: Limbu
-113: Linear_A
-114: Linear_B
-115: Lisu
-116: Ll
-117: Lm
-118: Lo
-119: Logical_Order_Exception
-120: Lowercase
-121: Lt
-122: Lu
-123: Lycian
-124: Lydian
-125: M
-126: Mahajani
-127: Malayalam
-128: Mandaic
-129: Manichaean
-130: Math
-131: Mc
-132: Me
-133: Meetei_Mayek
-134: Mende_Kikakui
-135: Meroitic_Cursive
-136: Meroitic_Hieroglyphs
-137: Miao
-138: Mn
-139: Modi
-140: Mongolian
-141: Mro
-142: Multani
-143: Myanmar
-144: N
-145: Nabataean
-146: Nd
-147: New_Tai_Lue
-148: Nko
-149: Nl
-150: No
-151: Noncharacter_Code_Point
-152: Ogham
-153: Ol_Chiki
-154: Old_Hungarian
-155: Old_Italic
-156: Old_North_Arabian
-157: Old_Permic
-158: Old_Persian
-159: Old_South_Arabian
-160: Old_Turkic
-161: Oriya
-162: Osmanya
-163: Other_Alphabetic
-164: Other_Default_Ignorable_Code_Point
-165: Other_Grapheme_Extend
-166: Other_ID_Continue
-167: Other_ID_Start
-168: Other_Lowercase
-169: Other_Math
-170: Other_Uppercase
-171: P
-172: Pahawh_Hmong
-173: Palmyrene
-174: Pattern_Syntax
-175: Pattern_White_Space
-176: Pau_Cin_Hau
-177: Pc
-178: Pd
-179: Pe
-180: Pf
-181: Phags_Pa
-182: Phoenician
-183: Pi
-184: Po
-185: Ps
-186: Psalter_Pahlavi
-187: Quotation_Mark
-188: Radical
-189: Rejang
-190: Runic
-191: S
-192: STerm
-193: Samaritan
-194: Saurashtra
-195: Sc
-196: Sharada
-197: Shavian
-198: Siddham
-199: SignWriting
-200: Sinhala
-201: Sk
-202: Sm
-203: So
-204: Soft_Dotted
-205: Sora_Sompeng
-206: Sundanese
-207: Syloti_Nagri
-208: Syriac
-209: Tagalog
-210: Tagbanwa
-211: Tai_Le
-212: Tai_Tham
-213: Tai_Viet
-214: Takri
-215: Tamil
-216: Telugu
-217: Terminal_Punctuation
-218: Thaana
-219: Thai
-220: Tibetan
-221: Tifinagh
-222: Tirhuta
-223: Ugaritic
-224: Unified_Ideograph
-225: Unknown
-226: Uppercase
-227: Vai
-228: Variation_Selector
-229: Warang_Citi
-230: White_Space
-231: XID_Continue
-232: XID_Start
-233: Yi
-234: Z
-235: Zl
-236: Zp
-237: Zs
- 40: Aghb
+ 16: Adlam
+ 17: Ahom
+ 18: Alphabetic
+ 19: Anatolian_Hieroglyphs
+ 20: Any
+ 21: Arabic
+ 22: Armenian
+ 23: Assigned
+ 24: Avestan
+ 25: Balinese
+ 26: Bamum
+ 27: Bassa_Vah
+ 28: Batak
+ 29: Bengali
+ 30: Bhaiksuki
+ 31: Bidi_Control
+ 32: Bopomofo
+ 33: Brahmi
+ 34: Braille
+ 35: Buginese
+ 36: Buhid
+ 37: C
+ 38: Canadian_Aboriginal
+ 39: Carian
+ 40: Case_Ignorable
+ 41: Cased
+ 42: Caucasian_Albanian
+ 43: Cc
+ 44: Cf
+ 45: Chakma
+ 46: Cham
+ 47: Changes_When_Casefolded
+ 48: Changes_When_Casemapped
+ 49: Changes_When_Lowercased
+ 50: Changes_When_Titlecased
+ 51: Changes_When_Uppercased
+ 52: Cherokee
+ 53: Cn
+ 54: Co
+ 55: Common
+ 56: Coptic
+ 57: Cs
+ 58: Cuneiform
+ 59: Cypriot
+ 60: Cyrillic
+ 61: Dash
+ 62: Default_Ignorable_Code_Point
+ 63: Deprecated
+ 64: Deseret
+ 65: Devanagari
+ 66: Diacritic
+ 67: Dogra
+ 68: Duployan
+ 69: Egyptian_Hieroglyphs
+ 70: Elbasan
+ 71: Emoji
+ 72: Emoji_Component
+ 73: Emoji_Modifier
+ 74: Emoji_Modifier_Base
+ 75: Emoji_Presentation
+ 76: Ethiopic
+ 77: Extended_Pictographic
+ 78: Extender
+ 79: Georgian
+ 80: Glagolitic
+ 81: Gothic
+ 82: Grantha
+ 83: Grapheme_Base
+ 84: Grapheme_Extend
+ 85: Grapheme_Link
+ 86: Greek
+ 87: Gujarati
+ 88: Gunjala_Gondi
+ 89: Gurmukhi
+ 90: Han
+ 91: Hangul
+ 92: Hanifi_Rohingya
+ 93: Hanunoo
+ 94: Hatran
+ 95: Hebrew
+ 96: Hex_Digit
+ 97: Hiragana
+ 98: Hyphen
+ 99: IDS_Binary_Operator
+100: IDS_Trinary_Operator
+101: ID_Continue
+102: ID_Start
+103: Ideographic
+104: Imperial_Aramaic
+105: Inherited
+106: Inscriptional_Pahlavi
+107: Inscriptional_Parthian
+108: Javanese
+109: Join_Control
+110: Kaithi
+111: Kannada
+112: Katakana
+113: Kayah_Li
+114: Kharoshthi
+115: Khmer
+116: Khojki
+117: Khudawadi
+118: L
+119: LC
+120: Lao
+121: Latin
+122: Lepcha
+123: Limbu
+124: Linear_A
+125: Linear_B
+126: Lisu
+127: Ll
+128: Lm
+129: Lo
+130: Logical_Order_Exception
+131: Lowercase
+132: Lt
+133: Lu
+134: Lycian
+135: Lydian
+136: M
+137: Mahajani
+138: Makasar
+139: Malayalam
+140: Mandaic
+141: Manichaean
+142: Marchen
+143: Masaram_Gondi
+144: Math
+145: Mc
+146: Me
+147: Medefaidrin
+148: Meetei_Mayek
+149: Mende_Kikakui
+150: Meroitic_Cursive
+151: Meroitic_Hieroglyphs
+152: Miao
+153: Mn
+154: Modi
+155: Mongolian
+156: Mro
+157: Multani
+158: Myanmar
+159: N
+160: Nabataean
+161: Nd
+162: New_Tai_Lue
+163: Newa
+164: Nko
+165: Nl
+166: No
+167: Noncharacter_Code_Point
+168: Nushu
+169: Ogham
+170: Ol_Chiki
+171: Old_Hungarian
+172: Old_Italic
+173: Old_North_Arabian
+174: Old_Permic
+175: Old_Persian
+176: Old_Sogdian
+177: Old_South_Arabian
+178: Old_Turkic
+179: Oriya
+180: Osage
+181: Osmanya
+182: Other_Alphabetic
+183: Other_Default_Ignorable_Code_Point
+184: Other_Grapheme_Extend
+185: Other_ID_Continue
+186: Other_ID_Start
+187: Other_Lowercase
+188: Other_Math
+189: Other_Uppercase
+190: P
+191: Pahawh_Hmong
+192: Palmyrene
+193: Pattern_Syntax
+194: Pattern_White_Space
+195: Pau_Cin_Hau
+196: Pc
+197: Pd
+198: Pe
+199: Pf
+200: Phags_Pa
+201: Phoenician
+202: Pi
+203: Po
+204: Prepended_Concatenation_Mark
+205: Ps
+206: Psalter_Pahlavi
+207: Quotation_Mark
+208: Radical
+209: Regional_Indicator
+210: Rejang
+211: Runic
+212: S
+213: Samaritan
+214: Saurashtra
+215: Sc
+216: Sentence_Terminal
+217: Sharada
+218: Shavian
+219: Siddham
+220: SignWriting
+221: Sinhala
+222: Sk
+223: Sm
+224: So
+225: Soft_Dotted
+226: Sogdian
+227: Sora_Sompeng
+228: Soyombo
+229: Sundanese
+230: Syloti_Nagri
+231: Syriac
+232: Tagalog
+233: Tagbanwa
+234: Tai_Le
+235: Tai_Tham
+236: Tai_Viet
+237: Takri
+238: Tamil
+239: Tangut
+240: Telugu
+241: Terminal_Punctuation
+242: Thaana
+243: Thai
+244: Tibetan
+245: Tifinagh
+246: Tirhuta
+247: Ugaritic
+248: Unified_Ideograph
+249: Unknown
+250: Uppercase
+251: Vai
+252: Variation_Selector
+253: Warang_Citi
+254: White_Space
+255: XID_Continue
+256: XID_Start
+257: Yi
+258: Z
+259: Zanabazar_Square
+260: Zl
+261: Zp
+262: Zs
+ 16: Adlm
+ 42: Aghb
15: AHex
- 20: Arab
- 93: Armi
- 21: Armn
- 23: Avst
- 24: Bali
- 25: Bamu
- 26: Bass
- 27: Batk
- 28: Beng
- 29: Bidi_C
- 30: Bopo
- 31: Brah
- 32: Brai
- 33: Bugi
- 34: Buhd
- 43: Cakm
- 36: Cans
- 37: Cari
-108: Cased_Letter
- 50: Cher
- 38: CI
-179: Close_Punctuation
-125: Combining_Mark
-177: Connector_Punctuation
- 41: Control
- 54: Copt
- 57: Cprt
-195: Currency_Symbol
- 45: CWCF
- 46: CWCM
- 47: CWL
- 48: CWT
- 49: CWU
- 58: Cyrl
-178: Dash_Punctuation
-146: Decimal_Number
- 61: Dep
- 63: Deva
- 60: DI
- 64: Dia
- 62: Dsrt
- 65: Dupl
- 66: Egyp
- 67: Elba
-132: Enclosing_Mark
- 68: Ethi
- 69: Ext
-180: Final_Punctuation
- 42: Format
- 70: Geor
- 71: Glag
- 72: Goth
- 73: Gran
- 74: Gr_Base
- 77: Grek
- 75: Gr_Ext
- 76: Gr_Link
- 78: Gujr
- 79: Guru
- 81: Hang
- 80: Hani
- 82: Hano
- 83: Hatr
- 84: Hebr
- 85: Hex
- 86: Hira
- 18: Hluw
-172: Hmng
-154: Hung
- 90: IDC
- 92: Ideo
- 91: IDS
- 88: IDSB
- 89: IDST
-183: Initial_Punctuation
-155: Ital
- 97: Java
- 98: Join_C
-102: Kali
-101: Kana
-103: Khar
-104: Khmr
-105: Khoj
-100: Knda
- 99: Kthi
-212: Lana
-109: Laoo
-110: Latn
-111: Lepc
-107: Letter
-149: Letter_Number
-112: Limb
-113: Lina
-114: Linb
-235: Line_Separator
-119: LOE
-116: Lowercase_Letter
-123: Lyci
-124: Lydi
-126: Mahj
-128: Mand
-129: Mani
-125: Mark
-202: Math_Symbol
-134: Mend
-135: Merc
-136: Mero
-127: Mlym
-117: Modifier_Letter
-201: Modifier_Symbol
-140: Mong
-141: Mroo
-133: Mtei
-142: Mult
-143: Mymr
-156: Narb
-145: Nbat
-151: NChar
-148: Nkoo
-138: Nonspacing_Mark
-144: Number
-163: OAlpha
-164: ODI
-152: Ogam
-165: OGr_Ext
-166: OIDC
-167: OIDS
-153: Olck
-168: OLower
-169: OMath
-185: Open_Punctuation
-160: Orkh
-161: Orya
-162: Osma
- 35: Other
-118: Other_Letter
-150: Other_Number
-184: Other_Punctuation
-203: Other_Symbol
-170: OUpper
-173: Palm
-236: Paragraph_Separator
-174: Pat_Syn
-175: Pat_WS
-176: Pauc
-157: Perm
-181: Phag
- 95: Phli
-186: Phlp
-182: Phnx
-137: Plrd
- 52: Private_Use
- 96: Prti
-171: Punctuation
- 54: Qaac
- 94: Qaai
-187: QMark
-189: Rjng
-190: Runr
-193: Samr
-159: Sarb
-194: Saur
-204: SD
-234: Separator
-199: Sgnw
-197: Shaw
-196: Shrd
-198: Sidd
-106: Sind
-200: Sinh
-205: Sora
-237: Space_Separator
-131: Spacing_Mark
-206: Sund
- 55: Surrogate
-207: Sylo
-191: Symbol
-208: Syrc
-210: Tagb
-214: Takr
-211: Tale
-147: Talu
-215: Taml
-213: Tavt
-216: Telu
-217: Term
-221: Tfng
-209: Tglg
-218: Thaa
-220: Tibt
-222: Tirh
-121: Titlecase_Letter
-223: Ugar
-224: UIdeo
- 51: Unassigned
-122: Uppercase_Letter
-227: Vaii
-228: VS
-229: Wara
-230: WSpace
-231: XIDC
-232: XIDS
-158: Xpeo
- 56: Xsux
-233: Yiii
- 94: Zinh
- 53: Zyyy
-225: Zzzz
-238: In_Basic_Latin
-239: In_Latin_1_Supplement
-240: In_Latin_Extended_A
-241: In_Latin_Extended_B
-242: In_IPA_Extensions
-243: In_Spacing_Modifier_Letters
-244: In_Combining_Diacritical_Marks
-245: In_Greek_and_Coptic
-246: In_Cyrillic
-247: In_Cyrillic_Supplement
-248: In_Armenian
-249: In_Hebrew
-250: In_Arabic
-251: In_Syriac
-252: In_Arabic_Supplement
-253: In_Thaana
-254: In_NKo
-255: In_Samaritan
-256: In_Mandaic
-257: In_Arabic_Extended_A
-258: In_Devanagari
-259: In_Bengali
-260: In_Gurmukhi
-261: In_Gujarati
-262: In_Oriya
-263: In_Tamil
-264: In_Telugu
-265: In_Kannada
-266: In_Malayalam
-267: In_Sinhala
-268: In_Thai
-269: In_Lao
-270: In_Tibetan
-271: In_Myanmar
-272: In_Georgian
-273: In_Hangul_Jamo
-274: In_Ethiopic
-275: In_Ethiopic_Supplement
-276: In_Cherokee
-277: In_Unified_Canadian_Aboriginal_Syllabics
-278: In_Ogham
-279: In_Runic
-280: In_Tagalog
-281: In_Hanunoo
-282: In_Buhid
-283: In_Tagbanwa
-284: In_Khmer
-285: In_Mongolian
-286: In_Unified_Canadian_Aboriginal_Syllabics_Extended
-287: In_Limbu
-288: In_Tai_Le
-289: In_New_Tai_Lue
-290: In_Khmer_Symbols
-291: In_Buginese
-292: In_Tai_Tham
-293: In_Combining_Diacritical_Marks_Extended
-294: In_Balinese
-295: In_Sundanese
-296: In_Batak
-297: In_Lepcha
-298: In_Ol_Chiki
-299: In_Sundanese_Supplement
-300: In_Vedic_Extensions
-301: In_Phonetic_Extensions
-302: In_Phonetic_Extensions_Supplement
-303: In_Combining_Diacritical_Marks_Supplement
-304: In_Latin_Extended_Additional
-305: In_Greek_Extended
-306: In_General_Punctuation
-307: In_Superscripts_and_Subscripts
-308: In_Currency_Symbols
-309: In_Combining_Diacritical_Marks_for_Symbols
-310: In_Letterlike_Symbols
-311: In_Number_Forms
-312: In_Arrows
-313: In_Mathematical_Operators
-314: In_Miscellaneous_Technical
-315: In_Control_Pictures
-316: In_Optical_Character_Recognition
-317: In_Enclosed_Alphanumerics
-318: In_Box_Drawing
-319: In_Block_Elements
-320: In_Geometric_Shapes
-321: In_Miscellaneous_Symbols
-322: In_Dingbats
-323: In_Miscellaneous_Mathematical_Symbols_A
-324: In_Supplemental_Arrows_A
-325: In_Braille_Patterns
-326: In_Supplemental_Arrows_B
-327: In_Miscellaneous_Mathematical_Symbols_B
-328: In_Supplemental_Mathematical_Operators
-329: In_Miscellaneous_Symbols_and_Arrows
-330: In_Glagolitic
-331: In_Latin_Extended_C
-332: In_Coptic
-333: In_Georgian_Supplement
-334: In_Tifinagh
-335: In_Ethiopic_Extended
-336: In_Cyrillic_Extended_A
-337: In_Supplemental_Punctuation
-338: In_CJK_Radicals_Supplement
-339: In_Kangxi_Radicals
-340: In_Ideographic_Description_Characters
-341: In_CJK_Symbols_and_Punctuation
-342: In_Hiragana
-343: In_Katakana
-344: In_Bopomofo
-345: In_Hangul_Compatibility_Jamo
-346: In_Kanbun
-347: In_Bopomofo_Extended
-348: In_CJK_Strokes
-349: In_Katakana_Phonetic_Extensions
-350: In_Enclosed_CJK_Letters_and_Months
-351: In_CJK_Compatibility
-352: In_CJK_Unified_Ideographs_Extension_A
-353: In_Yijing_Hexagram_Symbols
-354: In_CJK_Unified_Ideographs
-355: In_Yi_Syllables
-356: In_Yi_Radicals
-357: In_Lisu
-358: In_Vai
-359: In_Cyrillic_Extended_B
-360: In_Bamum
-361: In_Modifier_Tone_Letters
-362: In_Latin_Extended_D
-363: In_Syloti_Nagri
-364: In_Common_Indic_Number_Forms
-365: In_Phags_pa
-366: In_Saurashtra
-367: In_Devanagari_Extended
-368: In_Kayah_Li
-369: In_Rejang
-370: In_Hangul_Jamo_Extended_A
-371: In_Javanese
-372: In_Myanmar_Extended_B
-373: In_Cham
-374: In_Myanmar_Extended_A
-375: In_Tai_Viet
-376: In_Meetei_Mayek_Extensions
-377: In_Ethiopic_Extended_A
-378: In_Latin_Extended_E
-379: In_Cherokee_Supplement
-380: In_Meetei_Mayek
-381: In_Hangul_Syllables
-382: In_Hangul_Jamo_Extended_B
-383: In_High_Surrogates
-384: In_High_Private_Use_Surrogates
-385: In_Low_Surrogates
-386: In_Private_Use_Area
-387: In_CJK_Compatibility_Ideographs
-388: In_Alphabetic_Presentation_Forms
-389: In_Arabic_Presentation_Forms_A
-390: In_Variation_Selectors
-391: In_Vertical_Forms
-392: In_Combining_Half_Marks
-393: In_CJK_Compatibility_Forms
-394: In_Small_Form_Variants
-395: In_Arabic_Presentation_Forms_B
-396: In_Halfwidth_and_Fullwidth_Forms
-397: In_Specials
-398: In_Linear_B_Syllabary
-399: In_Linear_B_Ideograms
-400: In_Aegean_Numbers
-401: In_Ancient_Greek_Numbers
-402: In_Ancient_Symbols
-403: In_Phaistos_Disc
-404: In_Lycian
-405: In_Carian
-406: In_Coptic_Epact_Numbers
-407: In_Old_Italic
-408: In_Gothic
-409: In_Old_Permic
-410: In_Ugaritic
-411: In_Old_Persian
-412: In_Deseret
-413: In_Shavian
-414: In_Osmanya
-415: In_Elbasan
-416: In_Caucasian_Albanian
-417: In_Linear_A
-418: In_Cypriot_Syllabary
-419: In_Imperial_Aramaic
-420: In_Palmyrene
-421: In_Nabataean
-422: In_Hatran
-423: In_Phoenician
-424: In_Lydian
-425: In_Meroitic_Hieroglyphs
-426: In_Meroitic_Cursive
-427: In_Kharoshthi
-428: In_Old_South_Arabian
-429: In_Old_North_Arabian
-430: In_Manichaean
-431: In_Avestan
-432: In_Inscriptional_Parthian
-433: In_Inscriptional_Pahlavi
-434: In_Psalter_Pahlavi
-435: In_Old_Turkic
-436: In_Old_Hungarian
-437: In_Rumi_Numeral_Symbols
-438: In_Brahmi
-439: In_Kaithi
-440: In_Sora_Sompeng
-441: In_Chakma
-442: In_Mahajani
-443: In_Sharada
-444: In_Sinhala_Archaic_Numbers
-445: In_Khojki
-446: In_Multani
-447: In_Khudawadi
-448: In_Grantha
-449: In_Tirhuta
-450: In_Siddham
-451: In_Modi
-452: In_Takri
-453: In_Ahom
-454: In_Warang_Citi
-455: In_Pau_Cin_Hau
-456: In_Cuneiform
-457: In_Cuneiform_Numbers_and_Punctuation
-458: In_Early_Dynastic_Cuneiform
-459: In_Egyptian_Hieroglyphs
-460: In_Anatolian_Hieroglyphs
-461: In_Bamum_Supplement
-462: In_Mro
-463: In_Bassa_Vah
-464: In_Pahawh_Hmong
-465: In_Miao
-466: In_Kana_Supplement
-467: In_Duployan
-468: In_Shorthand_Format_Controls
-469: In_Byzantine_Musical_Symbols
-470: In_Musical_Symbols
-471: In_Ancient_Greek_Musical_Notation
-472: In_Tai_Xuan_Jing_Symbols
-473: In_Counting_Rod_Numerals
-474: In_Mathematical_Alphanumeric_Symbols
-475: In_Sutton_SignWriting
-476: In_Mende_Kikakui
-477: In_Arabic_Mathematical_Alphabetic_Symbols
-478: In_Mahjong_Tiles
-479: In_Domino_Tiles
-480: In_Playing_Cards
-481: In_Enclosed_Alphanumeric_Supplement
-482: In_Enclosed_Ideographic_Supplement
-483: In_Miscellaneous_Symbols_and_Pictographs
-484: In_Emoticons
-485: In_Ornamental_Dingbats
-486: In_Transport_and_Map_Symbols
-487: In_Alchemical_Symbols
-488: In_Geometric_Shapes_Extended
-489: In_Supplemental_Arrows_C
-490: In_Supplemental_Symbols_and_Pictographs
-491: In_CJK_Unified_Ideographs_Extension_B
-492: In_CJK_Unified_Ideographs_Extension_C
-493: In_CJK_Unified_Ideographs_Extension_D
-494: In_CJK_Unified_Ideographs_Extension_E
-495: In_CJK_Compatibility_Ideographs_Supplement
-496: In_Tags
-497: In_Variation_Selectors_Supplement
-498: In_Supplementary_Private_Use_Area_A
-499: In_Supplementary_Private_Use_Area_B
-500: In_No_Block
+ 21: Arab
+104: Armi
+ 22: Armn
+ 24: Avst
+ 25: Bali
+ 26: Bamu
+ 27: Bass
+ 28: Batk
+ 29: Beng
+ 30: Bhks
+ 31: Bidi_C
+ 32: Bopo
+ 33: Brah
+ 34: Brai
+ 35: Bugi
+ 36: Buhd
+ 45: Cakm
+ 38: Cans
+ 39: Cari
+119: Cased_Letter
+ 52: Cher
+ 40: CI
+198: Close_Punctuation
+136: Combining_Mark
+196: Connector_Punctuation
+ 43: Control
+ 56: Copt
+ 59: Cprt
+215: Currency_Symbol
+ 47: CWCF
+ 48: CWCM
+ 49: CWL
+ 50: CWT
+ 51: CWU
+ 60: Cyrl
+197: Dash_Punctuation
+161: Decimal_Number
+ 63: Dep
+ 65: Deva
+ 62: DI
+ 66: Dia
+ 67: Dogr
+ 64: Dsrt
+ 68: Dupl
+ 69: Egyp
+ 70: Elba
+146: Enclosing_Mark
+ 76: Ethi
+ 78: Ext
+199: Final_Punctuation
+ 44: Format
+ 79: Geor
+ 80: Glag
+ 88: Gong
+143: Gonm
+ 81: Goth
+ 82: Gran
+ 83: Gr_Base
+ 86: Grek
+ 84: Gr_Ext
+ 85: Gr_Link
+ 87: Gujr
+ 89: Guru
+ 91: Hang
+ 90: Hani
+ 93: Hano
+ 94: Hatr
+ 95: Hebr
+ 96: Hex
+ 97: Hira
+ 19: Hluw
+191: Hmng
+171: Hung
+101: IDC
+103: Ideo
+102: IDS
+ 99: IDSB
+100: IDST
+202: Initial_Punctuation
+172: Ital
+108: Java
+109: Join_C
+113: Kali
+112: Kana
+114: Khar
+115: Khmr
+116: Khoj
+111: Knda
+110: Kthi
+235: Lana
+120: Laoo
+121: Latn
+122: Lepc
+118: Letter
+165: Letter_Number
+123: Limb
+124: Lina
+125: Linb
+260: Line_Separator
+130: LOE
+127: Lowercase_Letter
+134: Lyci
+135: Lydi
+137: Mahj
+138: Maka
+140: Mand
+141: Mani
+142: Marc
+136: Mark
+223: Math_Symbol
+147: Medf
+149: Mend
+150: Merc
+151: Mero
+139: Mlym
+128: Modifier_Letter
+222: Modifier_Symbol
+155: Mong
+156: Mroo
+148: Mtei
+157: Mult
+158: Mymr
+173: Narb
+160: Nbat
+167: NChar
+164: Nkoo
+153: Nonspacing_Mark
+168: Nshu
+159: Number
+182: OAlpha
+183: ODI
+169: Ogam
+184: OGr_Ext
+185: OIDC
+186: OIDS
+170: Olck
+187: OLower
+188: OMath
+205: Open_Punctuation
+178: Orkh
+179: Orya
+180: Osge
+181: Osma
+ 37: Other
+129: Other_Letter
+166: Other_Number
+203: Other_Punctuation
+224: Other_Symbol
+189: OUpper
+192: Palm
+261: Paragraph_Separator
+193: Pat_Syn
+194: Pat_WS
+195: Pauc
+204: PCM
+174: Perm
+200: Phag
+106: Phli
+206: Phlp
+201: Phnx
+152: Plrd
+ 54: Private_Use
+107: Prti
+190: Punctuation
+ 56: Qaac
+105: Qaai
+207: QMark
+209: RI
+210: Rjng
+ 92: Rohg
+211: Runr
+213: Samr
+177: Sarb
+214: Saur
+225: SD
+258: Separator
+220: Sgnw
+218: Shaw
+217: Shrd
+219: Sidd
+117: Sind
+221: Sinh
+226: Sogd
+176: Sogo
+227: Sora
+228: Soyo
+262: Space_Separator
+145: Spacing_Mark
+216: STerm
+229: Sund
+ 57: Surrogate
+230: Sylo
+212: Symbol
+231: Syrc
+233: Tagb
+237: Takr
+234: Tale
+162: Talu
+238: Taml
+239: Tang
+236: Tavt
+240: Telu
+241: Term
+245: Tfng
+232: Tglg
+242: Thaa
+244: Tibt
+246: Tirh
+132: Titlecase_Letter
+247: Ugar
+248: UIdeo
+ 53: Unassigned
+133: Uppercase_Letter
+251: Vaii
+252: VS
+253: Wara
+254: WSpace
+255: XIDC
+256: XIDS
+175: Xpeo
+ 58: Xsux
+257: Yiii
+259: Zanb
+105: Zinh
+ 55: Zyyy
+249: Zzzz
+263: In_Basic_Latin
+264: In_Latin_1_Supplement
+265: In_Latin_Extended_A
+266: In_Latin_Extended_B
+267: In_IPA_Extensions
+268: In_Spacing_Modifier_Letters
+269: In_Combining_Diacritical_Marks
+270: In_Greek_and_Coptic
+271: In_Cyrillic
+272: In_Cyrillic_Supplement
+273: In_Armenian
+274: In_Hebrew
+275: In_Arabic
+276: In_Syriac
+277: In_Arabic_Supplement
+278: In_Thaana
+279: In_NKo
+280: In_Samaritan
+281: In_Mandaic
+282: In_Syriac_Supplement
+283: In_Arabic_Extended_A
+284: In_Devanagari
+285: In_Bengali
+286: In_Gurmukhi
+287: In_Gujarati
+288: In_Oriya
+289: In_Tamil
+290: In_Telugu
+291: In_Kannada
+292: In_Malayalam
+293: In_Sinhala
+294: In_Thai
+295: In_Lao
+296: In_Tibetan
+297: In_Myanmar
+298: In_Georgian
+299: In_Hangul_Jamo
+300: In_Ethiopic
+301: In_Ethiopic_Supplement
+302: In_Cherokee
+303: In_Unified_Canadian_Aboriginal_Syllabics
+304: In_Ogham
+305: In_Runic
+306: In_Tagalog
+307: In_Hanunoo
+308: In_Buhid
+309: In_Tagbanwa
+310: In_Khmer
+311: In_Mongolian
+312: In_Unified_Canadian_Aboriginal_Syllabics_Extended
+313: In_Limbu
+314: In_Tai_Le
+315: In_New_Tai_Lue
+316: In_Khmer_Symbols
+317: In_Buginese
+318: In_Tai_Tham
+319: In_Combining_Diacritical_Marks_Extended
+320: In_Balinese
+321: In_Sundanese
+322: In_Batak
+323: In_Lepcha
+324: In_Ol_Chiki
+325: In_Cyrillic_Extended_C
+326: In_Georgian_Extended
+327: In_Sundanese_Supplement
+328: In_Vedic_Extensions
+329: In_Phonetic_Extensions
+330: In_Phonetic_Extensions_Supplement
+331: In_Combining_Diacritical_Marks_Supplement
+332: In_Latin_Extended_Additional
+333: In_Greek_Extended
+334: In_General_Punctuation
+335: In_Superscripts_and_Subscripts
+336: In_Currency_Symbols
+337: In_Combining_Diacritical_Marks_for_Symbols
+338: In_Letterlike_Symbols
+339: In_Number_Forms
+340: In_Arrows
+341: In_Mathematical_Operators
+342: In_Miscellaneous_Technical
+343: In_Control_Pictures
+344: In_Optical_Character_Recognition
+345: In_Enclosed_Alphanumerics
+346: In_Box_Drawing
+347: In_Block_Elements
+348: In_Geometric_Shapes
+349: In_Miscellaneous_Symbols
+350: In_Dingbats
+351: In_Miscellaneous_Mathematical_Symbols_A
+352: In_Supplemental_Arrows_A
+353: In_Braille_Patterns
+354: In_Supplemental_Arrows_B
+355: In_Miscellaneous_Mathematical_Symbols_B
+356: In_Supplemental_Mathematical_Operators
+357: In_Miscellaneous_Symbols_and_Arrows
+358: In_Glagolitic
+359: In_Latin_Extended_C
+360: In_Coptic
+361: In_Georgian_Supplement
+362: In_Tifinagh
+363: In_Ethiopic_Extended
+364: In_Cyrillic_Extended_A
+365: In_Supplemental_Punctuation
+366: In_CJK_Radicals_Supplement
+367: In_Kangxi_Radicals
+368: In_Ideographic_Description_Characters
+369: In_CJK_Symbols_and_Punctuation
+370: In_Hiragana
+371: In_Katakana
+372: In_Bopomofo
+373: In_Hangul_Compatibility_Jamo
+374: In_Kanbun
+375: In_Bopomofo_Extended
+376: In_CJK_Strokes
+377: In_Katakana_Phonetic_Extensions
+378: In_Enclosed_CJK_Letters_and_Months
+379: In_CJK_Compatibility
+380: In_CJK_Unified_Ideographs_Extension_A
+381: In_Yijing_Hexagram_Symbols
+382: In_CJK_Unified_Ideographs
+383: In_Yi_Syllables
+384: In_Yi_Radicals
+385: In_Lisu
+386: In_Vai
+387: In_Cyrillic_Extended_B
+388: In_Bamum
+389: In_Modifier_Tone_Letters
+390: In_Latin_Extended_D
+391: In_Syloti_Nagri
+392: In_Common_Indic_Number_Forms
+393: In_Phags_pa
+394: In_Saurashtra
+395: In_Devanagari_Extended
+396: In_Kayah_Li
+397: In_Rejang
+398: In_Hangul_Jamo_Extended_A
+399: In_Javanese
+400: In_Myanmar_Extended_B
+401: In_Cham
+402: In_Myanmar_Extended_A
+403: In_Tai_Viet
+404: In_Meetei_Mayek_Extensions
+405: In_Ethiopic_Extended_A
+406: In_Latin_Extended_E
+407: In_Cherokee_Supplement
+408: In_Meetei_Mayek
+409: In_Hangul_Syllables
+410: In_Hangul_Jamo_Extended_B
+411: In_High_Surrogates
+412: In_High_Private_Use_Surrogates
+413: In_Low_Surrogates
+414: In_Private_Use_Area
+415: In_CJK_Compatibility_Ideographs
+416: In_Alphabetic_Presentation_Forms
+417: In_Arabic_Presentation_Forms_A
+418: In_Variation_Selectors
+419: In_Vertical_Forms
+420: In_Combining_Half_Marks
+421: In_CJK_Compatibility_Forms
+422: In_Small_Form_Variants
+423: In_Arabic_Presentation_Forms_B
+424: In_Halfwidth_and_Fullwidth_Forms
+425: In_Specials
+426: In_Linear_B_Syllabary
+427: In_Linear_B_Ideograms
+428: In_Aegean_Numbers
+429: In_Ancient_Greek_Numbers
+430: In_Ancient_Symbols
+431: In_Phaistos_Disc
+432: In_Lycian
+433: In_Carian
+434: In_Coptic_Epact_Numbers
+435: In_Old_Italic
+436: In_Gothic
+437: In_Old_Permic
+438: In_Ugaritic
+439: In_Old_Persian
+440: In_Deseret
+441: In_Shavian
+442: In_Osmanya
+443: In_Osage
+444: In_Elbasan
+445: In_Caucasian_Albanian
+446: In_Linear_A
+447: In_Cypriot_Syllabary
+448: In_Imperial_Aramaic
+449: In_Palmyrene
+450: In_Nabataean
+451: In_Hatran
+452: In_Phoenician
+453: In_Lydian
+454: In_Meroitic_Hieroglyphs
+455: In_Meroitic_Cursive
+456: In_Kharoshthi
+457: In_Old_South_Arabian
+458: In_Old_North_Arabian
+459: In_Manichaean
+460: In_Avestan
+461: In_Inscriptional_Parthian
+462: In_Inscriptional_Pahlavi
+463: In_Psalter_Pahlavi
+464: In_Old_Turkic
+465: In_Old_Hungarian
+466: In_Hanifi_Rohingya
+467: In_Rumi_Numeral_Symbols
+468: In_Old_Sogdian
+469: In_Sogdian
+470: In_Brahmi
+471: In_Kaithi
+472: In_Sora_Sompeng
+473: In_Chakma
+474: In_Mahajani
+475: In_Sharada
+476: In_Sinhala_Archaic_Numbers
+477: In_Khojki
+478: In_Multani
+479: In_Khudawadi
+480: In_Grantha
+481: In_Newa
+482: In_Tirhuta
+483: In_Siddham
+484: In_Modi
+485: In_Mongolian_Supplement
+486: In_Takri
+487: In_Ahom
+488: In_Dogra
+489: In_Warang_Citi
+490: In_Zanabazar_Square
+491: In_Soyombo
+492: In_Pau_Cin_Hau
+493: In_Bhaiksuki
+494: In_Marchen
+495: In_Masaram_Gondi
+496: In_Gunjala_Gondi
+497: In_Makasar
+498: In_Cuneiform
+499: In_Cuneiform_Numbers_and_Punctuation
+500: In_Early_Dynastic_Cuneiform
+501: In_Egyptian_Hieroglyphs
+502: In_Anatolian_Hieroglyphs
+503: In_Bamum_Supplement
+504: In_Mro
+505: In_Bassa_Vah
+506: In_Pahawh_Hmong
+507: In_Medefaidrin
+508: In_Miao
+509: In_Ideographic_Symbols_and_Punctuation
+510: In_Tangut
+511: In_Tangut_Components
+512: In_Kana_Supplement
+513: In_Kana_Extended_A
+514: In_Nushu
+515: In_Duployan
+516: In_Shorthand_Format_Controls
+517: In_Byzantine_Musical_Symbols
+518: In_Musical_Symbols
+519: In_Ancient_Greek_Musical_Notation
+520: In_Mayan_Numerals
+521: In_Tai_Xuan_Jing_Symbols
+522: In_Counting_Rod_Numerals
+523: In_Mathematical_Alphanumeric_Symbols
+524: In_Sutton_SignWriting
+525: In_Glagolitic_Supplement
+526: In_Mende_Kikakui
+527: In_Adlam
+528: In_Indic_Siyaq_Numbers
+529: In_Arabic_Mathematical_Alphabetic_Symbols
+530: In_Mahjong_Tiles
+531: In_Domino_Tiles
+532: In_Playing_Cards
+533: In_Enclosed_Alphanumeric_Supplement
+534: In_Enclosed_Ideographic_Supplement
+535: In_Miscellaneous_Symbols_and_Pictographs
+536: In_Emoticons
+537: In_Ornamental_Dingbats
+538: In_Transport_and_Map_Symbols
+539: In_Alchemical_Symbols
+540: In_Geometric_Shapes_Extended
+541: In_Supplemental_Arrows_C
+542: In_Supplemental_Symbols_and_Pictographs
+543: In_Chess_Symbols
+544: In_CJK_Unified_Ideographs_Extension_B
+545: In_CJK_Unified_Ideographs_Extension_C
+546: In_CJK_Unified_Ideographs_Extension_D
+547: In_CJK_Unified_Ideographs_Extension_E
+548: In_CJK_Unified_Ideographs_Extension_F
+549: In_CJK_Compatibility_Ideographs_Supplement
+550: In_Tags
+551: In_Variation_Selectors_Supplement
+552: In_Supplementary_Private_Use_Area_A
+553: In_Supplementary_Private_Use_Area_B
+554: In_No_Block