summaryrefslogtreecommitdiff
path: root/lib/unigbrk/u-grapheme-breaks.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/unigbrk/u-grapheme-breaks.h')
-rw-r--r--lib/unigbrk/u-grapheme-breaks.h83
1 files changed, 41 insertions, 42 deletions
diff --git a/lib/unigbrk/u-grapheme-breaks.h b/lib/unigbrk/u-grapheme-breaks.h
index 48afcd6..e1b600f 100644
--- a/lib/unigbrk/u-grapheme-breaks.h
+++ b/lib/unigbrk/u-grapheme-breaks.h
@@ -1,28 +1,30 @@
/* Grapheme cluster break function.
- Copyright (C) 2010-2018 Free Software Foundation, Inc.
+ Copyright (C) 2010-2022 Free Software Foundation, Inc.
Written by Ben Pfaff <blp@cs.stanford.edu>, 2010.
- This program is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
- This program is distributed in the hope that it will be useful,
+ This file is free software.
+ It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+".
+ You can redistribute it and/or modify it under either
+ - the terms of the GNU Lesser General Public License as published
+ by the Free Software Foundation; either version 3, or (at your
+ option) any later version, or
+ - the terms of the GNU General Public License as published by the
+ Free Software Foundation; either version 2, or (at your option)
+ any later version, or
+ - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+".
+
+ This file is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
+ Lesser General Public License and the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License and of the GNU General Public License along with this
+ program. If not, see <https://www.gnu.org/licenses/>. */
- You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <https://www.gnu.org/licenses/>. */
+/* This file implements section 3 "Grapheme Cluster Boundaries"
+ of Unicode Standard Annex #29 <https://www.unicode.org/reports/tr29/>. */
void
FUNC (const UNIT *s, size_t n, char *p)
@@ -35,10 +37,15 @@ FUNC (const UNIT *s, size_t n, char *p)
-1 at the very beginning of the string. */
int last_char_prop = -1;
- /* Grapheme Cluster break property of the last complex character.
- -1 at the very beginning of the string. */
- int last_compchar_prop = -1;
+ /* True if the last character ends an emoji modifier sequence
+ \p{Extended_Pictographic} Extend*. */
+ bool emoji_modifier_sequence = false;
+ /* True if the last character was immediately preceded by an
+ emoji modifier sequence \p{Extended_Pictographic} Extend*. */
+ bool emoji_modifier_sequence_before_last_char = false;
+ /* Number of consecutive regional indicator (RI) characters seen
+ immediately before the current point. */
size_t ri_count = 0;
/* Don't break inside multibyte characters. */
@@ -89,35 +96,27 @@ FUNC (const UNIT *s, size_t n, char *p)
/* No break after Prepend characters (GB9b). */
else if (last_char_prop == GBP_PREPEND)
/* *p = 0 */;
- /* No break within emoji modifier sequences (GB10). */
- else if ((last_compchar_prop == GBP_EB
- || last_compchar_prop == GBP_EBG)
- && prop == GBP_EM)
- /* *p = 0 */;
- /* No break within emoji zwj sequences (GB11). */
+ /* No break within emoji modifier sequences or emoji zwj sequences
+ (GB11). */
else if (last_char_prop == GBP_ZWJ
- && (prop == GBP_GAZ
- || prop == GBP_EBG))
+ && emoji_modifier_sequence_before_last_char
+ && uc_is_property_extended_pictographic (uc))
/* *p = 0 */;
/* No break between RI if there is an odd number of RI
characters before (GB12, GB13). */
- else if (prop == GBP_RI)
- {
- if (ri_count % 2 == 0)
- *p = 1;
- /* else *p = 0; */
- }
- /* Break everywhere (GBP999). */
+ else if (prop == GBP_RI && (ri_count % 2) != 0)
+ /* *p = 0 */;
+ /* Break everywhere (GB999). */
else
*p = 1;
}
- last_char_prop = prop;
+ emoji_modifier_sequence_before_last_char = emoji_modifier_sequence;
+ emoji_modifier_sequence =
+ (emoji_modifier_sequence && prop == GBP_EXTEND)
+ || uc_is_property_extended_pictographic (uc);
- if (!(prop == GBP_EXTEND
- && (last_compchar_prop == GBP_EB
- || last_compchar_prop == GBP_EBG)))
- last_compchar_prop = prop;
+ last_char_prop = prop;
if (prop == GBP_RI)
ri_count++;