Merge branch 'release/6.5.0-1'6.5.0-1

author: Jörg Frings-Fürst <debian@jff-webhosting.net> 2017-08-13 13:55:30 +0200
committer: Jörg Frings-Fürst <debian@jff-webhosting.net> 2017-08-13 13:55:30 +0200
commit: a89a4ac904bc93b1d93b410394fa05c23260351b (patch)
tree: b10c64aff1d79627925154364772774fc82d07ff /doc
parent: b6c6e4122f35fbead1e9661dfb2d852b39faf8ff (diff)
parent: ae063b1e6ea3d97ea4e3404bfd8289895619d04f (diff)
2 files changed, 126 insertions, 14 deletions
diff --git a/doc/RE b/doc/RE
index 729e71c..16cc888 100644
--- a/doc/RE
+++ b/doc/RE
@@ -1,4 +1,4 @@
-Oniguruma Regular Expressions Version 6.4.0    2017/06/28
+Oniguruma Regular Expressions Version 6.5.0    2017/07/30
 
 syntax: ONIG_SYNTAX_RUBY (default)
 
@@ -52,8 +52,8 @@ syntax: ONIG_SYNTAX_RUBY (default)
            Not Unicode:
              \t, \n, \v, \f, \r, \x20
 
-           Unicode:
-             0009, 000A, 000B, 000C, 000D, 0085(NEL),
+           Unicode case:
+             U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL),
              General_Category -- Line_Separator
                               -- Paragraph_Separator
                               -- Space_Separator
@@ -70,6 +70,16 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
   \H       non-hexdigit char
 
+  \R       general newline  (* can't be used in character-class)
+           "\r\n" or \n,\v,\f,\r  (* but doesn't backtrack from \r\n to \r)
+
+           Unicode case:
+             "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029
+
+  \N       negative newline  (?-m:.)
+
+  \O       true anychar      (?m:.)    (* original function)
+
 
   Character Property
 
@@ -133,6 +143,8 @@ syntax: ONIG_SYNTAX_RUBY (default)
   \Z      end of string, or before newline at the end
   \z      end of string
   \G      where the current search attempt begins
+  \K      keep (keep start position of the result string)
+
 
 
 6. Character class
@@ -183,9 +195,9 @@ syntax: ONIG_SYNTAX_RUBY (default)
                Final_Punctuation | Initial_Punctuation | Other_Punctuation |
                Open_Punctuation
       space    Space_Separator | Line_Separator | Paragraph_Separator |
-               0009 | 000A | 000B | 000C | 000D | 0085
+               U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085
       upper    Uppercase_Letter
-      xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066
+      xdigit   U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066
                (0-9, a-f, A-F)
       word     Letter | Mark | Decimal_Number | Connector_Punctuation
 
@@ -228,6 +240,50 @@ syntax: ONIG_SYNTAX_RUBY (default)
 
                      Assigning the same name to two or more subexps is allowed.
 
+  <Absent functions>
+
+  (?~absent)         Absent repeater    (* proposed by Tanaka Akira)
+                     This works like .* (more precisely \O*), but it is
+                     limited by the range that does not include the string
+                     match with absent.
+                     This is a written abbreviation of (?~|absent|\O*).
+                     \O* is used as a repeater.
+
+  (?~|absent|exp)    Absent expression  (* original)
+                     This works like "exp", but it is limited by the range
+                     that does not include the string match with absent.
+
+                     ex. (?~|345|\d*)  "12345678"  ==> "12", "1", ""
+
+  (?~|absent)        Absent cutter (* original)
+                     After passed this operator, string right range is limited
+                     at the point that does not include the string match whth
+                     absent.
+
+  (?~|)              Absent clear
+                     Clear the effects caused by Absent cutters.
+                     (* This operation is not cancelled by backtrack.)
+
+     * Nested Absent functions are not supported and the behavior
+       is undefined.
+
+
+  (?(condition_exp)then_exp|else_exp)    if-then-else
+  (?(condition_exp)then_exp)             if-then
+
+               condition_exp can be a backreference number/name or a normal
+               regular expression.
+               When condition_exp is a backreference, both then_exp and
+               else_exp can be omitted.
+               Then it works as a backreference validity checker.
+
+  [ backreference validity checker ]   (* original)
+
+    (?(n)), (?(-n)), (?(+n)), (?(n+level)) ...
+    (?(<n>)), (?('-n')), (?(<+n>)) ...
+    (?(<name>)), (?('name')), (?(<name+level>)) ...
+
+
 
 8. Backreferences
 
@@ -282,7 +338,7 @@ syntax: ONIG_SYNTAX_RUBY (default)
       p r.match("<foo>f<bar>bbb</bar>f</foo>").captures
 
 
-9. Subexp calls ("Tanaka Akira special")
+9. Subexp calls ("Tanaka Akira special")   (* original function)
 
   When we say "call a group," it actually means, "re-execute the subexp in
   that group."
@@ -367,7 +423,6 @@ A-3. Missing features compared with perl 5.8.0
    + \l,\u,\L,\U, \X, \C
    + (?{code})
    + (??{code})
-   + (?(condition)yes-pat|no-pat)
 
    * \Q...\E
      This is effective on ONIG_SYNTAX_PERL and ONIG_SYNTAX_JAVA.
diff --git a/doc/RE.ja b/doc/RE.ja
index 08cbefc..c05468d 100644
--- a/doc/RE.ja
+++ b/doc/RE.ja
@@ -1,4 +1,4 @@
-鬼車 正規表現 Version 6.4.0    2017/06/28
+鬼車 正規表現 Version 6.5.0    2017/07/30
 
 使用文法: ONIG_SYNTAX_RUBY (既定値)
 
@@ -35,7 +35,7 @@
 
 3. 文字種
 
-  .        任意文字 (改行を除く)
+  .        任意文字 (改行を除く: オプションに依存)
 
   \w       単語構成文字
 
@@ -53,7 +53,7 @@
              \t, \n, \v, \f, \r, \x20
 
            Unicodeの場合:
-             0009, 000A, 000B, 000C, 000D, 0085(NEL), 
+             U+0009, U+000A, U+000B, U+000C, U+000D, U+0085(NEL), 
              General_Category -- Line_Separator
                               -- Paragraph_Separator
                               -- Space_Separator
@@ -70,6 +70,16 @@
 
   \H       非16進数字
 
+  \R       汎改行  (* 文字集合の中では使用できない)
+           "\r\n" or \n,\v,\f,\r  (* 但し \r\nから\rにはバックトラックしない)
+
+           Unicodeの場合:
+             "\r\n" or \n,\v,\f,\r or U+0085, U+2028, U+2029
+
+  \N       非改行文字  (?-m:.)
+
+  \O       真任意文字  (?m:.)      (* 原作)
+
 
   Character Property
 
@@ -133,6 +143,8 @@
   \Z      文字列末尾、または文字列末尾の改行の直前
   \z      文字列末尾
   \G      照合開始位置
+  \K      保持 (結果の開始位置をこの位置に保つ)
+
 
 
 6. 文字集合
@@ -182,9 +194,9 @@
                Final_Punctuation | Initial_Punctuation | Other_Punctuation |
                Open_Punctuation
       space    Space_Separator | Line_Separator | Paragraph_Separator |
-               0009 | 000A | 000B | 000C | 000D | 0085
+               U+0009 | U+000A | U+000B | U+000C | U+000D | U+0085
       upper    Uppercase_Letter
-      xdigit   0030 - 0039 | 0041 - 0046 | 0061 - 0066
+      xdigit   U+0030 - U+0039 | U+0041 - U+0046 | U+0061 - U+0066
                (0-9, a-f, A-F)
       word     Letter | Mark | Decimal_Number | Connector_Punctuation
 
@@ -230,6 +242,52 @@
                     この場合には、この名前を使用した後方参照は可能であるが、
                     部分式呼出しはできない。
 
+  <不在機能群>
+
+  (?~不在式)        不在繰り返し  (*原案 田中哲)
+                    これは.*のように(より正確には\O*)動作するが、不在式に
+                    適合する文字列を含まない範囲に制限される。
+                    これは(?~|不在式|\O*)の省略表記である。
+                    \O*の部分はマルチラインオプション(?m)の影響を受けない。
+
+  (?~|不在式|式)    不在式  (* 原作)
+                    これは"式"のように動作するが、不在式に適合する文字列を
+                    含まない範囲に制限される。
+
+                    例 (?~|345|\d*)  "12345678"  ==> "12", "1", ""
+
+  (?~|不在式)       不在切断 (* 原作)
+                    この演算子を通過した後は、対象文字列の適合範囲の最後が
+                    不在式に適合する文字列を含まない範囲に制限される。
+
+  (?~|)             不在消去
+                    不在切断の効果を消して、初期状態にする。
+                    (* この演算子の効果は後退再試行で無効化されない)
+
+     * 不在機能の入れ子はサポートしておらず、挙動は不定とする。
+
+
+  (?(条件式)成功式|失敗式)    条件式が成功すれば成功式、失敗すれば失敗式を実行する
+                             この機能の存在理由は、成功式が失敗しても失敗式には
+                             行かないこと。これは他の正規表現で書くことができない。
+                             もうひとつは、条件式が後方参照のとき、後方参照値の有効性
+                             を調べる(文字列とマッチングはしない)意味になる。
+
+  (?(条件式)成功式)           条件式が成功すれば成功式を実行する
+                             (条件式が通常の式のときには、この構文は不必要だが
+                              今のところエラーにはしない。)
+
+
+                    条件式は後方参照または通常の式を使用できる。
+                    条件式が後方参照の場合、成功式と失敗式の両方を省略可能であり、
+                    この場合、後方参照値有効性を調べる(成功/失敗)機能のみになる。
+
+  [後方参照値有効性確認器]  (* 原作)
+    (?(n)), (?(-n)), (?(+n)), (?(n+level)) ...
+    (?(<n>)), (?('-n')), (?(<+n>)) ...
+    (?(<name>)), (?('name')), (?(<name+level>)) ...
+
+
 
 8. 後方参照
 
@@ -288,7 +346,7 @@
 
 
 
-9. 部分式呼出し ("田中哲スペシャル")
+9. 部分式呼出し ("田中哲スペシャル")   (* 原作)
 
   \g<name>    名前指定呼出し
   \g'name'    名前指定呼出し
@@ -373,7 +431,6 @@
    + \l,\u,\L,\U, \X, \C
    + (?{code})
    + (??{code})
-   + (?(condition)yes-pat|no-pat)
 
    * \Q...\E
      但しONIG_SYNTAX_PERLとONIG_SYNTAX_JAVAでは有効
author	Jörg Frings-Fürst <debian@jff-webhosting.net>	2017-08-13 13:55:30 +0200
committer	Jörg Frings-Fürst <debian@jff-webhosting.net>	2017-08-13 13:55:30 +0200
commit	a89a4ac904bc93b1d93b410394fa05c23260351b (patch)
tree	b10c64aff1d79627925154364772774fc82d07ff /doc
parent	b6c6e4122f35fbead1e9661dfb2d852b39faf8ff (diff)
parent	ae063b1e6ea3d97ea4e3404bfd8289895619d04f (diff)