diff options
Diffstat (limited to 'lib/uniwbrk/u-wordbreaks.h')
-rw-r--r-- | lib/uniwbrk/u-wordbreaks.h | 85 |
1 files changed, 28 insertions, 57 deletions
diff --git a/lib/uniwbrk/u-wordbreaks.h b/lib/uniwbrk/u-wordbreaks.h index cdeab0b..b0fd301 100644 --- a/lib/uniwbrk/u-wordbreaks.h +++ b/lib/uniwbrk/u-wordbreaks.h @@ -1,5 +1,5 @@ /* Word breaks in UTF-8/UTF-16/UTF-32 strings. - Copyright (C) 2009-2015 Free Software Foundation, Inc. + Copyright (C) 2009-2010 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2009. This program is free software: you can redistribute it and/or modify it @@ -55,12 +55,16 @@ FUNC (const UNIT *s, size_t n, char *p) if (last_char_prop == WBP_CR && prop == WBP_LF) /* *p = 0 */; /* Break before and after newlines. */ - else if ((last_char_prop == WBP_CR - || last_char_prop == WBP_LF - || last_char_prop == WBP_NEWLINE) - || (prop == WBP_CR - || prop == WBP_LF - || prop == WBP_NEWLINE)) + else if (last_char_prop >= WBP_NEWLINE + /* same as: + last_char_prop == WBP_CR + || last_char_prop == WBP_LF + || last_char_prop == WBP_NEWLINE */ + || prop >= WBP_NEWLINE + /* same as: + prop == WBP_CR + || prop == WBP_LF + || prop == WBP_NEWLINE */) *p = 1; /* Ignore Format and Extend characters. */ else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT)) @@ -69,66 +73,38 @@ FUNC (const UNIT *s, size_t n, char *p) secondlast last current - (ALetter | HL) (MidLetter | MidNumLet | SQ) × (ALetter | HL) (WB7) - (ALetter | HL) × (MidLetter | MidNumLet | SQ) (ALetter | HL) (WB6) - Numeric (MidNum | MidNumLet | SQ) × Numeric (WB11) - Numeric × (MidNum | MidNumLet | SQ) Numeric (WB12) - HL × DQ HL (WB7b) - HL DQ × HL (WB7c) - (ALetter | HL) × (ALetter | HL) (WB5) - (ALetter | HL) × Numeric (WB9) - Numeric × (ALetter | HL) (WB10) + ALetter (MidLetter | MidNumLet) × ALetter (WB7) + ALetter × (MidLetter | MidNumLet) ALetter (WB6) + Numeric (MidNum | MidNumLet) × Numeric (WB11) + Numeric × (MidNum | MidNumLet) Numeric (WB12) + ALetter × ALetter (WB5) + ALetter × Numeric (WB9) + Numeric × ALetter (WB10) Numeric × Numeric (WB8) - HL × SQ (WB7a) Katakana × Katakana (WB13) - (ALetter | HL | Numeric | Katakana) × ExtendNumLet (WB13a) + (ALetter | Numeric | Katakana) × ExtendNumLet (WB13a) ExtendNumLet × ExtendNumLet (WB13a) - ExtendNumLet × (ALetter | HL | Numeric | Katakana) (WB13b) - Regional_Indicator × Regional_Indicator (WB13c) + ExtendNumLet × (ALetter | Numeric | Katakana) (WB13b) */ /* No break across certain punctuation. Also, disable word breaks that were recognized earlier (due to lookahead of only one complex character). */ - if (((prop == WBP_ALETTER - || prop == WBP_HL) + if ((prop == WBP_ALETTER && (last_compchar_prop == WBP_MIDLETTER - || last_compchar_prop == WBP_MIDNUMLET - || last_compchar_prop == WBP_SQ) - && (secondlast_compchar_prop == WBP_ALETTER - || secondlast_compchar_prop == WBP_HL)) + || last_compchar_prop == WBP_MIDNUMLET) + && secondlast_compchar_prop == WBP_ALETTER) || (prop == WBP_NUMERIC && (last_compchar_prop == WBP_MIDNUM - || last_compchar_prop == WBP_MIDNUMLET - || last_compchar_prop == WBP_SQ) - && secondlast_compchar_prop == WBP_NUMERIC) - || (prop == WBP_HL - && last_compchar_prop == WBP_DQ - && secondlast_compchar_prop == WBP_HL)) + || last_compchar_prop == WBP_MIDNUMLET) + && secondlast_compchar_prop == WBP_NUMERIC)) { *last_compchar_ptr = 0; /* *p = 0; */ } - /* Break after Format and Extend characters. */ - else if (last_compchar_prop == WBP_EXTEND - || last_compchar_prop == WBP_FORMAT) - *p = 1; else { - /* Normalize property value to table index, - skipping 5 properties: WBP_EXTEND, - WBP_FORMAT, WBP_NEWLINE, WBP_CR, and - WBP_LF. */ - int last_compchar_prop_index = last_compchar_prop; - int prop_index = prop; - - if (last_compchar_prop_index >= WBP_EXTEND) - last_compchar_prop_index -= 5; - - if (prop_index >= WBP_EXTEND) - prop_index -= 5; - /* Perform a single table lookup. */ - if (uniwbrk_table[last_compchar_prop_index][prop_index]) + if (uniwbrk_table[last_compchar_prop][prop]) *p = 1; /* else *p = 0; */ } @@ -136,13 +112,8 @@ FUNC (const UNIT *s, size_t n, char *p) } last_char_prop = prop; - /* Ignore Format and Extend characters, except at the start - of the line. */ - if (last_compchar_prop < 0 - || last_compchar_prop == WBP_CR - || last_compchar_prop == WBP_LF - || last_compchar_prop == WBP_NEWLINE - || !(prop == WBP_EXTEND || prop == WBP_FORMAT)) + /* Ignore Format and Extend characters, except at the start of the string. */ + if (last_compchar_prop < 0 || !(prop == WBP_EXTEND || prop == WBP_FORMAT)) { secondlast_compchar_prop = last_compchar_prop; last_compchar_prop = prop; |