summaryrefslogtreecommitdiff
path: root/lib/uniname/uniname.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/uniname/uniname.c')
-rw-r--r--lib/uniname/uniname.c412
1 files changed, 207 insertions, 205 deletions
diff --git a/lib/uniname/uniname.c b/lib/uniname/uniname.c
index 08ead7b..48c8e96 100644
--- a/lib/uniname/uniname.c
+++ b/lib/uniname/uniname.c
@@ -1,5 +1,5 @@
/* Association between Unicode characters and their names.
- Copyright (C) 2000-2002, 2005-2007, 2009-2016 Free Software Foundation, Inc.
+ Copyright (C) 2000-2002, 2005-2007, 2009-2017 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or
modify it under the terms of either:
@@ -21,7 +21,7 @@
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>. */
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
#include <config.h>
@@ -415,218 +415,220 @@ unicode_name_character (const char *name)
}
}
}
- /* Convert the constituents to uint16_t words. */
- uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
- uint16_t *wordptr = words;
{
- const char *p1 = buf;
- for (;;)
- {
+ /* Convert the constituents to uint16_t words. */
+ uint16_t words[UNICODE_CHARNAME_MAX_WORDS];
+ uint16_t *wordptr = words;
+ {
+ const char *p1 = buf;
+ for (;;)
{
- int word;
- const char *p2 = p1;
- while (p2 < ptr && *p2 != ' ')
- p2++;
- word = unicode_name_word_lookup (p1, p2 - p1);
- if (word < 0)
- break;
- if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
- break;
- *wordptr++ = word;
- if (p2 == ptr)
- goto filled_words;
- p1 = p2 + 1;
- }
- /* Special case for Hangul syllables. Keeps the tables small. */
- if (wordptr == &words[2]
- && words[0] == UNICODE_CHARNAME_WORD_HANGUL
- && words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
{
- /* Split the last word [p1..ptr) into three parts:
- 1) [BCDGHJKMNPRST]
- 2) [AEIOUWY]
- 3) [BCDGHIJKLMNPST]
- */
- const char *p2;
- const char *p3;
- const char *p4;
-
- p2 = p1;
- while (p2 < ptr
- && (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
- || *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
- || *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
- || *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
- || *p2 == 'T'))
+ int word;
+ const char *p2 = p1;
+ while (p2 < ptr && *p2 != ' ')
p2++;
- p3 = p2;
- while (p3 < ptr
- && (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
- || *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
- || *p3 == 'Y'))
- p3++;
- p4 = p3;
- while (p4 < ptr
- && (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
- || *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
- || *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
- || *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
- || *p4 == 'S' || *p4 == 'T'))
- p4++;
- if (p4 == ptr)
- {
- unsigned int n1 = p2 - p1;
- unsigned int n2 = p3 - p2;
- unsigned int n3 = p4 - p3;
-
- if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
- {
- unsigned int index1;
-
- for (index1 = 0; index1 < 19; index1++)
- if (memcmp (jamo_initial_short_name[index1], p1, n1) == 0
- && jamo_initial_short_name[index1][n1] == '\0')
+ word = unicode_name_word_lookup (p1, p2 - p1);
+ if (word < 0)
+ break;
+ if (wordptr == &words[UNICODE_CHARNAME_MAX_WORDS])
+ break;
+ *wordptr++ = word;
+ if (p2 == ptr)
+ goto filled_words;
+ p1 = p2 + 1;
+ }
+ /* Special case for Hangul syllables. Keeps the tables small. */
+ if (wordptr == &words[2]
+ && words[0] == UNICODE_CHARNAME_WORD_HANGUL
+ && words[1] == UNICODE_CHARNAME_WORD_SYLLABLE)
+ {
+ /* Split the last word [p1..ptr) into three parts:
+ 1) [BCDGHJKMNPRST]
+ 2) [AEIOUWY]
+ 3) [BCDGHIJKLMNPST]
+ */
+ const char *p2;
+ const char *p3;
+ const char *p4;
+
+ p2 = p1;
+ while (p2 < ptr
+ && (*p2 == 'B' || *p2 == 'C' || *p2 == 'D'
+ || *p2 == 'G' || *p2 == 'H' || *p2 == 'J'
+ || *p2 == 'K' || *p2 == 'M' || *p2 == 'N'
+ || *p2 == 'P' || *p2 == 'R' || *p2 == 'S'
+ || *p2 == 'T'))
+ p2++;
+ p3 = p2;
+ while (p3 < ptr
+ && (*p3 == 'A' || *p3 == 'E' || *p3 == 'I'
+ || *p3 == 'O' || *p3 == 'U' || *p3 == 'W'
+ || *p3 == 'Y'))
+ p3++;
+ p4 = p3;
+ while (p4 < ptr
+ && (*p4 == 'B' || *p4 == 'C' || *p4 == 'D'
+ || *p4 == 'G' || *p4 == 'H' || *p4 == 'I'
+ || *p4 == 'J' || *p4 == 'K' || *p4 == 'L'
+ || *p4 == 'M' || *p4 == 'N' || *p4 == 'P'
+ || *p4 == 'S' || *p4 == 'T'))
+ p4++;
+ if (p4 == ptr)
+ {
+ unsigned int n1 = p2 - p1;
+ unsigned int n2 = p3 - p2;
+ unsigned int n3 = p4 - p3;
+
+ if (n1 <= 2 && (n2 >= 1 && n2 <= 3) && n3 <= 2)
+ {
+ unsigned int index1;
+
+ for (index1 = 0; index1 < 19; index1++)
+ if (memcmp (jamo_initial_short_name[index1], p1, n1) == 0
+ && jamo_initial_short_name[index1][n1] == '\0')
+ {
+ unsigned int index2;
+
+ for (index2 = 0; index2 < 21; index2++)
+ if (memcmp (jamo_medial_short_name[index2], p2, n2) == 0
+ && jamo_medial_short_name[index2][n2] == '\0')
+ {
+ unsigned int index3;
+
+ for (index3 = 0; index3 < 28; index3++)
+ if (memcmp (jamo_final_short_name[index3], p3, n3) == 0
+ && jamo_final_short_name[index3][n3] == '\0')
+ {
+ return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
+ }
+ break;
+ }
+ break;
+ }
+ }
+ }
+ }
+ /* Special case for CJK compatibility ideographs. Keeps the
+ tables small. */
+ if (wordptr == &words[2]
+ && words[0] == UNICODE_CHARNAME_WORD_CJK
+ && words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
+ && p1 + 14 <= ptr
+ && p1 + 15 >= ptr
+ && memcmp (p1, "IDEOGRAPH-", 10) == 0)
+ {
+ const char *p2 = p1 + 10;
+
+ if (*p2 != '0')
+ {
+ unsigned int c = 0;
+
+ for (;;)
+ {
+ if (*p2 >= '0' && *p2 <= '9')
+ c += (*p2 - '0');
+ else if (*p2 >= 'A' && *p2 <= 'F')
+ c += (*p2 - 'A' + 10);
+ else
+ break;
+ p2++;
+ if (p2 == ptr)
{
- unsigned int index2;
-
- for (index2 = 0; index2 < 21; index2++)
- if (memcmp (jamo_medial_short_name[index2], p2, n2) == 0
- && jamo_medial_short_name[index2][n2] == '\0')
- {
- unsigned int index3;
-
- for (index3 = 0; index3 < 28; index3++)
- if (memcmp (jamo_final_short_name[index3], p3, n3) == 0
- && jamo_final_short_name[index3][n3] == '\0')
- {
- return 0xAC00 + (index1 * 21 + index2) * 28 + index3;
- }
- break;
- }
- break;
+ if ((c >= 0xF900 && c <= 0xFA2D)
+ || (c >= 0xFA30 && c <= 0xFA6A)
+ || (c >= 0xFA70 && c <= 0xFAD9)
+ || (c >= 0x2F800 && c <= 0x2FA1D))
+ return c;
+ else
+ break;
}
- }
- }
- }
- /* Special case for CJK compatibility ideographs. Keeps the
- tables small. */
- if (wordptr == &words[2]
- && words[0] == UNICODE_CHARNAME_WORD_CJK
- && words[1] == UNICODE_CHARNAME_WORD_COMPATIBILITY
- && p1 + 14 <= ptr
- && p1 + 15 >= ptr
- && memcmp (p1, "IDEOGRAPH-", 10) == 0)
- {
- const char *p2 = p1 + 10;
-
- if (*p2 != '0')
- {
- unsigned int c = 0;
-
- for (;;)
- {
- if (*p2 >= '0' && *p2 <= '9')
- c += (*p2 - '0');
- else if (*p2 >= 'A' && *p2 <= 'F')
- c += (*p2 - 'A' + 10);
- else
+ c = c << 4;
+ }
+ }
+ }
+ /* Special case for variation selectors. Keeps the
+ tables small. */
+ if (wordptr == &words[1]
+ && words[0] == UNICODE_CHARNAME_WORD_VARIATION
+ && p1 + 10 <= ptr
+ && p1 + 12 >= ptr
+ && memcmp (p1, "SELECTOR-", 9) == 0)
+ {
+ const char *p2 = p1 + 9;
+
+ if (*p2 != '0')
+ {
+ unsigned int c = 0;
+
+ for (;;)
+ {
+ if (*p2 >= '0' && *p2 <= '9')
+ c += (*p2 - '0');
+ p2++;
+ if (p2 == ptr)
+ {
+ if (c >= 1 && c <= 16)
+ return c - 1 + 0xFE00;
+ else if (c >= 17 && c <= 256)
+ return c - 17 + 0xE0100;
+ else
+ break;
+ }
+ c = c * 10;
+ }
+ }
+ }
+ }
+ }
+ if (false)
+ filled_words:
+ {
+ /* Multiply by 2, to simplify later comparisons. */
+ unsigned int words_length = wordptr - words;
+ {
+ int i = words_length - 1;
+ words[i] = 2 * words[i];
+ for (; --i >= 0; )
+ words[i] = 2 * words[i] + 1;
+ }
+ /* Binary search in unicode_name_to_index. */
+ {
+ unsigned int i1 = 0;
+ unsigned int i2 = SIZEOF (unicode_name_to_index);
+ for (;;)
+ {
+ unsigned int i = (i1 + i2) >> 1;
+ const uint16_t *w = words;
+ const uint16_t *p = &unicode_names[unicode_name_to_index[i].name];
+ unsigned int n = words_length;
+ for (;;)
+ {
+ if (*p < *w)
+ {
+ if (i1 == i)
+ goto name_not_found;
+ /* Note here: i1 < i < i2. */
+ i1 = i;
break;
- p2++;
- if (p2 == ptr)
- {
- if ((c >= 0xF900 && c <= 0xFA2D)
- || (c >= 0xFA30 && c <= 0xFA6A)
- || (c >= 0xFA70 && c <= 0xFAD9)
- || (c >= 0x2F800 && c <= 0x2FA1D))
- return c;
- else
- break;
- }
- c = c << 4;
- }
- }
- }
- /* Special case for variation selectors. Keeps the
- tables small. */
- if (wordptr == &words[1]
- && words[0] == UNICODE_CHARNAME_WORD_VARIATION
- && p1 + 10 <= ptr
- && p1 + 12 >= ptr
- && memcmp (p1, "SELECTOR-", 9) == 0)
- {
- const char *p2 = p1 + 9;
-
- if (*p2 != '0')
- {
- unsigned int c = 0;
-
- for (;;)
- {
- if (*p2 >= '0' && *p2 <= '9')
- c += (*p2 - '0');
- p2++;
- if (p2 == ptr)
- {
- if (c >= 1 && c <= 16)
- return c - 1 + 0xFE00;
- else if (c >= 17 && c <= 256)
- return c - 17 + 0xE0100;
- else
- break;
- }
- c = c * 10;
- }
- }
- }
+ }
+ else if (*p > *w)
+ {
+ if (i2 == i)
+ goto name_not_found;
+ /* Note here: i1 <= i < i2. */
+ i2 = i;
+ break;
+ }
+ p++; w++; n--;
+ if (n == 0)
+ return unicode_index_to_code (unicode_name_to_index[i].index);
+ }
+ }
+ }
+ name_not_found: ;
}
}
- if (false)
- filled_words:
- {
- /* Multiply by 2, to simplify later comparisons. */
- unsigned int words_length = wordptr - words;
- {
- int i = words_length - 1;
- words[i] = 2 * words[i];
- for (; --i >= 0; )
- words[i] = 2 * words[i] + 1;
- }
- /* Binary search in unicode_name_to_index. */
- {
- unsigned int i1 = 0;
- unsigned int i2 = SIZEOF (unicode_name_to_index);
- for (;;)
- {
- unsigned int i = (i1 + i2) >> 1;
- const uint16_t *w = words;
- const uint16_t *p = &unicode_names[unicode_name_to_index[i].name];
- unsigned int n = words_length;
- for (;;)
- {
- if (*p < *w)
- {
- if (i1 == i)
- goto name_not_found;
- /* Note here: i1 < i < i2. */
- i1 = i;
- break;
- }
- else if (*p > *w)
- {
- if (i2 == i)
- goto name_not_found;
- /* Note here: i1 <= i < i2. */
- i2 = i;
- break;
- }
- p++; w++; n--;
- if (n == 0)
- return unicode_index_to_code (unicode_name_to_index[i].index);
- }
- }
- }
- name_not_found: ;
- }
}
}
return UNINAME_INVALID;