summaryrefslogtreecommitdiff
path: root/lib/uniwbrk/u-wordbreaks.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/uniwbrk/u-wordbreaks.h')
-rw-r--r--lib/uniwbrk/u-wordbreaks.h85
1 files changed, 28 insertions, 57 deletions
diff --git a/lib/uniwbrk/u-wordbreaks.h b/lib/uniwbrk/u-wordbreaks.h
index cdeab0b..b0fd301 100644
--- a/lib/uniwbrk/u-wordbreaks.h
+++ b/lib/uniwbrk/u-wordbreaks.h
@@ -1,5 +1,5 @@
/* Word breaks in UTF-8/UTF-16/UTF-32 strings.
- Copyright (C) 2009-2015 Free Software Foundation, Inc.
+ Copyright (C) 2009-2010 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2009.
This program is free software: you can redistribute it and/or modify it
@@ -55,12 +55,16 @@ FUNC (const UNIT *s, size_t n, char *p)
if (last_char_prop == WBP_CR && prop == WBP_LF)
/* *p = 0 */;
/* Break before and after newlines. */
- else if ((last_char_prop == WBP_CR
- || last_char_prop == WBP_LF
- || last_char_prop == WBP_NEWLINE)
- || (prop == WBP_CR
- || prop == WBP_LF
- || prop == WBP_NEWLINE))
+ else if (last_char_prop >= WBP_NEWLINE
+ /* same as:
+ last_char_prop == WBP_CR
+ || last_char_prop == WBP_LF
+ || last_char_prop == WBP_NEWLINE */
+ || prop >= WBP_NEWLINE
+ /* same as:
+ prop == WBP_CR
+ || prop == WBP_LF
+ || prop == WBP_NEWLINE */)
*p = 1;
/* Ignore Format and Extend characters. */
else if (!(prop == WBP_EXTEND || prop == WBP_FORMAT))
@@ -69,66 +73,38 @@ FUNC (const UNIT *s, size_t n, char *p)
secondlast last current
- (ALetter | HL) (MidLetter | MidNumLet | SQ) × (ALetter | HL) (WB7)
- (ALetter | HL) × (MidLetter | MidNumLet | SQ) (ALetter | HL) (WB6)
- Numeric (MidNum | MidNumLet | SQ) × Numeric (WB11)
- Numeric × (MidNum | MidNumLet | SQ) Numeric (WB12)
- HL × DQ HL (WB7b)
- HL DQ × HL (WB7c)
- (ALetter | HL) × (ALetter | HL) (WB5)
- (ALetter | HL) × Numeric (WB9)
- Numeric × (ALetter | HL) (WB10)
+ ALetter (MidLetter | MidNumLet) × ALetter (WB7)
+ ALetter × (MidLetter | MidNumLet) ALetter (WB6)
+ Numeric (MidNum | MidNumLet) × Numeric (WB11)
+ Numeric × (MidNum | MidNumLet) Numeric (WB12)
+ ALetter × ALetter (WB5)
+ ALetter × Numeric (WB9)
+ Numeric × ALetter (WB10)
Numeric × Numeric (WB8)
- HL × SQ (WB7a)
Katakana × Katakana (WB13)
- (ALetter | HL | Numeric | Katakana) × ExtendNumLet (WB13a)
+ (ALetter | Numeric | Katakana) × ExtendNumLet (WB13a)
ExtendNumLet × ExtendNumLet (WB13a)
- ExtendNumLet × (ALetter | HL | Numeric | Katakana) (WB13b)
- Regional_Indicator × Regional_Indicator (WB13c)
+ ExtendNumLet × (ALetter | Numeric | Katakana) (WB13b)
*/
/* No break across certain punctuation. Also, disable word
breaks that were recognized earlier (due to lookahead of
only one complex character). */
- if (((prop == WBP_ALETTER
- || prop == WBP_HL)
+ if ((prop == WBP_ALETTER
&& (last_compchar_prop == WBP_MIDLETTER
- || last_compchar_prop == WBP_MIDNUMLET
- || last_compchar_prop == WBP_SQ)
- && (secondlast_compchar_prop == WBP_ALETTER
- || secondlast_compchar_prop == WBP_HL))
+ || last_compchar_prop == WBP_MIDNUMLET)
+ && secondlast_compchar_prop == WBP_ALETTER)
|| (prop == WBP_NUMERIC
&& (last_compchar_prop == WBP_MIDNUM
- || last_compchar_prop == WBP_MIDNUMLET
- || last_compchar_prop == WBP_SQ)
- && secondlast_compchar_prop == WBP_NUMERIC)
- || (prop == WBP_HL
- && last_compchar_prop == WBP_DQ
- && secondlast_compchar_prop == WBP_HL))
+ || last_compchar_prop == WBP_MIDNUMLET)
+ && secondlast_compchar_prop == WBP_NUMERIC))
{
*last_compchar_ptr = 0;
/* *p = 0; */
}
- /* Break after Format and Extend characters. */
- else if (last_compchar_prop == WBP_EXTEND
- || last_compchar_prop == WBP_FORMAT)
- *p = 1;
else
{
- /* Normalize property value to table index,
- skipping 5 properties: WBP_EXTEND,
- WBP_FORMAT, WBP_NEWLINE, WBP_CR, and
- WBP_LF. */
- int last_compchar_prop_index = last_compchar_prop;
- int prop_index = prop;
-
- if (last_compchar_prop_index >= WBP_EXTEND)
- last_compchar_prop_index -= 5;
-
- if (prop_index >= WBP_EXTEND)
- prop_index -= 5;
-
/* Perform a single table lookup. */
- if (uniwbrk_table[last_compchar_prop_index][prop_index])
+ if (uniwbrk_table[last_compchar_prop][prop])
*p = 1;
/* else *p = 0; */
}
@@ -136,13 +112,8 @@ FUNC (const UNIT *s, size_t n, char *p)
}
last_char_prop = prop;
- /* Ignore Format and Extend characters, except at the start
- of the line. */
- if (last_compchar_prop < 0
- || last_compchar_prop == WBP_CR
- || last_compchar_prop == WBP_LF
- || last_compchar_prop == WBP_NEWLINE
- || !(prop == WBP_EXTEND || prop == WBP_FORMAT))
+ /* Ignore Format and Extend characters, except at the start of the string. */
+ if (last_compchar_prop < 0 || !(prop == WBP_EXTEND || prop == WBP_FORMAT))
{
secondlast_compchar_prop = last_compchar_prop;
last_compchar_prop = prop;