diff options
Diffstat (limited to 'lib/unigbrk/u-grapheme-breaks.h')
-rw-r--r-- | lib/unigbrk/u-grapheme-breaks.h | 83 |
1 files changed, 41 insertions, 42 deletions
diff --git a/lib/unigbrk/u-grapheme-breaks.h b/lib/unigbrk/u-grapheme-breaks.h index 48afcd6..e1b600f 100644 --- a/lib/unigbrk/u-grapheme-breaks.h +++ b/lib/unigbrk/u-grapheme-breaks.h @@ -1,28 +1,30 @@ /* Grapheme cluster break function. - Copyright (C) 2010-2018 Free Software Foundation, Inc. + Copyright (C) 2010-2022 Free Software Foundation, Inc. Written by Ben Pfaff <blp@cs.stanford.edu>, 2010. - This program is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - This program is distributed in the hope that it will be useful, + This file is free software. + It is dual-licensed under "the GNU LGPLv3+ or the GNU GPLv2+". + You can redistribute it and/or modify it under either + - the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3, or (at your + option) any later version, or + - the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) + any later version, or + - the same dual license "the GNU LGPLv3+ or the GNU GPLv2+". + + This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - Lesser General Public License for more details. + Lesser General Public License and the GNU General Public License + for more details. + + You should have received a copy of the GNU Lesser General Public + License and of the GNU General Public License along with this + program. If not, see <https://www.gnu.org/licenses/>. */ - You should have received a copy of the GNU Lesser General Public License - along with this program. If not, see <https://www.gnu.org/licenses/>. */ +/* This file implements section 3 "Grapheme Cluster Boundaries" + of Unicode Standard Annex #29 <https://www.unicode.org/reports/tr29/>. */ void FUNC (const UNIT *s, size_t n, char *p) @@ -35,10 +37,15 @@ FUNC (const UNIT *s, size_t n, char *p) -1 at the very beginning of the string. */ int last_char_prop = -1; - /* Grapheme Cluster break property of the last complex character. - -1 at the very beginning of the string. */ - int last_compchar_prop = -1; + /* True if the last character ends an emoji modifier sequence + \p{Extended_Pictographic} Extend*. */ + bool emoji_modifier_sequence = false; + /* True if the last character was immediately preceded by an + emoji modifier sequence \p{Extended_Pictographic} Extend*. */ + bool emoji_modifier_sequence_before_last_char = false; + /* Number of consecutive regional indicator (RI) characters seen + immediately before the current point. */ size_t ri_count = 0; /* Don't break inside multibyte characters. */ @@ -89,35 +96,27 @@ FUNC (const UNIT *s, size_t n, char *p) /* No break after Prepend characters (GB9b). */ else if (last_char_prop == GBP_PREPEND) /* *p = 0 */; - /* No break within emoji modifier sequences (GB10). */ - else if ((last_compchar_prop == GBP_EB - || last_compchar_prop == GBP_EBG) - && prop == GBP_EM) - /* *p = 0 */; - /* No break within emoji zwj sequences (GB11). */ + /* No break within emoji modifier sequences or emoji zwj sequences + (GB11). */ else if (last_char_prop == GBP_ZWJ - && (prop == GBP_GAZ - || prop == GBP_EBG)) + && emoji_modifier_sequence_before_last_char + && uc_is_property_extended_pictographic (uc)) /* *p = 0 */; /* No break between RI if there is an odd number of RI characters before (GB12, GB13). */ - else if (prop == GBP_RI) - { - if (ri_count % 2 == 0) - *p = 1; - /* else *p = 0; */ - } - /* Break everywhere (GBP999). */ + else if (prop == GBP_RI && (ri_count % 2) != 0) + /* *p = 0 */; + /* Break everywhere (GB999). */ else *p = 1; } - last_char_prop = prop; + emoji_modifier_sequence_before_last_char = emoji_modifier_sequence; + emoji_modifier_sequence = + (emoji_modifier_sequence && prop == GBP_EXTEND) + || uc_is_property_extended_pictographic (uc); - if (!(prop == GBP_EXTEND - && (last_compchar_prop == GBP_EB - || last_compchar_prop == GBP_EBG))) - last_compchar_prop = prop; + last_char_prop = prop; if (prop == GBP_RI) ri_count++; |