From 5f2b09982312c98863eb9a8dfe2c608b81f58259 Mon Sep 17 00:00:00 2001 From: "Manuel A. Fernandez Montecelo" Date: Thu, 26 May 2016 16:48:15 +0100 Subject: Imported Upstream version 0.9.6 --- lib/unictype/categ_byname.c | 232 ++++++++++++++++++++++++++++++-------------- 1 file changed, 159 insertions(+), 73 deletions(-) (limited to 'lib/unictype/categ_byname.c') diff --git a/lib/unictype/categ_byname.c b/lib/unictype/categ_byname.c index fb75b67..47c743e 100644 --- a/lib/unictype/categ_byname.c +++ b/lib/unictype/categ_byname.c @@ -1,5 +1,5 @@ /* Categories of Unicode characters. - Copyright (C) 2002, 2006-2007, 2009-2010 Free Software Foundation, Inc. + Copyright (C) 2002, 2006-2007, 2011-2015 Free Software Foundation, Inc. Written by Bruno Haible , 2002. This program is free software: you can redistribute it and/or modify it @@ -20,86 +20,172 @@ /* Specification. */ #include "unictype.h" +#include +#include + +/* Indices stored in the 'struct named_category' elements of the perfect hash + table. We don't use uc_general_category_t values or their addresses + directly, because this would introduce load-time relocations. */ +enum +{ + UC_CATEGORY_INDEX_L, + UC_CATEGORY_INDEX_LC, + UC_CATEGORY_INDEX_Lu, + UC_CATEGORY_INDEX_Ll, + UC_CATEGORY_INDEX_Lt, + UC_CATEGORY_INDEX_Lm, + UC_CATEGORY_INDEX_Lo, + UC_CATEGORY_INDEX_M, + UC_CATEGORY_INDEX_Mn, + UC_CATEGORY_INDEX_Mc, + UC_CATEGORY_INDEX_Me, + UC_CATEGORY_INDEX_N, + UC_CATEGORY_INDEX_Nd, + UC_CATEGORY_INDEX_Nl, + UC_CATEGORY_INDEX_No, + UC_CATEGORY_INDEX_P, + UC_CATEGORY_INDEX_Pc, + UC_CATEGORY_INDEX_Pd, + UC_CATEGORY_INDEX_Ps, + UC_CATEGORY_INDEX_Pe, + UC_CATEGORY_INDEX_Pi, + UC_CATEGORY_INDEX_Pf, + UC_CATEGORY_INDEX_Po, + UC_CATEGORY_INDEX_S, + UC_CATEGORY_INDEX_Sm, + UC_CATEGORY_INDEX_Sc, + UC_CATEGORY_INDEX_Sk, + UC_CATEGORY_INDEX_So, + UC_CATEGORY_INDEX_Z, + UC_CATEGORY_INDEX_Zs, + UC_CATEGORY_INDEX_Zl, + UC_CATEGORY_INDEX_Zp, + UC_CATEGORY_INDEX_C, + UC_CATEGORY_INDEX_Cc, + UC_CATEGORY_INDEX_Cf, + UC_CATEGORY_INDEX_Cs, + UC_CATEGORY_INDEX_Co, + UC_CATEGORY_INDEX_Cn +}; + +#include "unictype/categ_byname.h" + uc_general_category_t uc_general_category_byname (const char *category_name) { - if (category_name[0] != '\0' - && (category_name[1] == '\0' || category_name[2] == '\0')) - switch (category_name[0]) + size_t len; + + len = strlen (category_name); + if (len <= MAX_WORD_LENGTH) + { + char buf[MAX_WORD_LENGTH + 1]; + const struct named_category *found; + + /* Copy category_name into buf, converting '_' and '-' to ' '. */ { - case 'L': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_L; - case 'u': return UC_CATEGORY_Lu; - case 'l': return UC_CATEGORY_Ll; - case 't': return UC_CATEGORY_Lt; - case 'm': return UC_CATEGORY_Lm; - case 'o': return UC_CATEGORY_Lo; - } - break; - case 'M': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_M; - case 'n': return UC_CATEGORY_Mn; - case 'c': return UC_CATEGORY_Mc; - case 'e': return UC_CATEGORY_Me; - } - break; - case 'N': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_N; - case 'd': return UC_CATEGORY_Nd; - case 'l': return UC_CATEGORY_Nl; - case 'o': return UC_CATEGORY_No; - } - break; - case 'P': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_P; - case 'c': return UC_CATEGORY_Pc; - case 'd': return UC_CATEGORY_Pd; - case 's': return UC_CATEGORY_Ps; - case 'e': return UC_CATEGORY_Pe; - case 'i': return UC_CATEGORY_Pi; - case 'f': return UC_CATEGORY_Pf; - case 'o': return UC_CATEGORY_Po; - } - break; - case 'S': - switch (category_name[1]) - { - case '\0': return UC_CATEGORY_S; - case 'm': return UC_CATEGORY_Sm; - case 'c': return UC_CATEGORY_Sc; - case 'k': return UC_CATEGORY_Sk; - case 'o': return UC_CATEGORY_So; - } - break; - case 'Z': - switch (category_name[1]) + const char *p = category_name; + char *q = buf; + + for (;; p++, q++) { - case '\0': return UC_CATEGORY_Z; - case 's': return UC_CATEGORY_Zs; - case 'l': return UC_CATEGORY_Zl; - case 'p': return UC_CATEGORY_Zp; + char c = *p; + + if (c == '_' || c == '-') + c = ' '; + *q = c; + if (c == '\0') + break; } - break; - case 'C': - switch (category_name[1]) + } + /* Here q == buf + len. */ + + /* Do a hash table lookup, with case-insensitive comparison. */ + found = uc_general_category_lookup (buf, len); + if (found != NULL) + /* Use a 'switch' statement here, because a table would introduce + load-time relocations. */ + switch (found->category_index) { - case '\0': return UC_CATEGORY_C; - case 'c': return UC_CATEGORY_Cc; - case 'f': return UC_CATEGORY_Cf; - case 's': return UC_CATEGORY_Cs; - case 'o': return UC_CATEGORY_Co; - case 'n': return UC_CATEGORY_Cn; + case UC_CATEGORY_INDEX_L: + return UC_CATEGORY_L; + case UC_CATEGORY_INDEX_LC: + return UC_CATEGORY_LC; + case UC_CATEGORY_INDEX_Lu: + return UC_CATEGORY_Lu; + case UC_CATEGORY_INDEX_Ll: + return UC_CATEGORY_Ll; + case UC_CATEGORY_INDEX_Lt: + return UC_CATEGORY_Lt; + case UC_CATEGORY_INDEX_Lm: + return UC_CATEGORY_Lm; + case UC_CATEGORY_INDEX_Lo: + return UC_CATEGORY_Lo; + case UC_CATEGORY_INDEX_M: + return UC_CATEGORY_M; + case UC_CATEGORY_INDEX_Mn: + return UC_CATEGORY_Mn; + case UC_CATEGORY_INDEX_Mc: + return UC_CATEGORY_Mc; + case UC_CATEGORY_INDEX_Me: + return UC_CATEGORY_Me; + case UC_CATEGORY_INDEX_N: + return UC_CATEGORY_N; + case UC_CATEGORY_INDEX_Nd: + return UC_CATEGORY_Nd; + case UC_CATEGORY_INDEX_Nl: + return UC_CATEGORY_Nl; + case UC_CATEGORY_INDEX_No: + return UC_CATEGORY_No; + case UC_CATEGORY_INDEX_P: + return UC_CATEGORY_P; + case UC_CATEGORY_INDEX_Pc: + return UC_CATEGORY_Pc; + case UC_CATEGORY_INDEX_Pd: + return UC_CATEGORY_Pd; + case UC_CATEGORY_INDEX_Ps: + return UC_CATEGORY_Ps; + case UC_CATEGORY_INDEX_Pe: + return UC_CATEGORY_Pe; + case UC_CATEGORY_INDEX_Pi: + return UC_CATEGORY_Pi; + case UC_CATEGORY_INDEX_Pf: + return UC_CATEGORY_Pf; + case UC_CATEGORY_INDEX_Po: + return UC_CATEGORY_Po; + case UC_CATEGORY_INDEX_S: + return UC_CATEGORY_S; + case UC_CATEGORY_INDEX_Sm: + return UC_CATEGORY_Sm; + case UC_CATEGORY_INDEX_Sc: + return UC_CATEGORY_Sc; + case UC_CATEGORY_INDEX_Sk: + return UC_CATEGORY_Sk; + case UC_CATEGORY_INDEX_So: + return UC_CATEGORY_So; + case UC_CATEGORY_INDEX_Z: + return UC_CATEGORY_Z; + case UC_CATEGORY_INDEX_Zs: + return UC_CATEGORY_Zs; + case UC_CATEGORY_INDEX_Zl: + return UC_CATEGORY_Zl; + case UC_CATEGORY_INDEX_Zp: + return UC_CATEGORY_Zp; + case UC_CATEGORY_INDEX_C: + return UC_CATEGORY_C; + case UC_CATEGORY_INDEX_Cc: + return UC_CATEGORY_Cc; + case UC_CATEGORY_INDEX_Cf: + return UC_CATEGORY_Cf; + case UC_CATEGORY_INDEX_Cs: + return UC_CATEGORY_Cs; + case UC_CATEGORY_INDEX_Co: + return UC_CATEGORY_Co; + case UC_CATEGORY_INDEX_Cn: + return UC_CATEGORY_Cn; + default: + abort (); } - break; - } + } /* Invalid category name. */ return _UC_CATEGORY_NONE; } -- cgit v1.2.3