From 3590c846d4c2febbc05b4ad6b14a06edc549e453 Mon Sep 17 00:00:00 2001 From: "Manuel A. Fernandez Montecelo" Date: Fri, 27 May 2016 14:35:16 +0100 Subject: Imported Upstream version 0.9.6+really0.9.6 --- lib/unictype/categ_longname.c | 106 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 lib/unictype/categ_longname.c (limited to 'lib/unictype/categ_longname.c') diff --git a/lib/unictype/categ_longname.c b/lib/unictype/categ_longname.c new file mode 100644 index 0000000..d3a4f8d --- /dev/null +++ b/lib/unictype/categ_longname.c @@ -0,0 +1,106 @@ +/* Categories of Unicode characters. + Copyright (C) 2002, 2006-2007, 2011-2015 Free Software Foundation, Inc. + Written by Bruno Haible , 2011. + + This program is free software: you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . */ + +#include + +/* Specification. */ +#include "unictype.h" + +static const char u_category_long_name[30][22] = +{ + "Uppercase Letter", + "Lowercase Letter", + "Titlecase Letter", + "Modifier Letter", + "Other Letter", + "Nonspacing Mark", + "Spacing Mark", + "Enclosing Mark", + "Decimal Number", + "Letter Number", + "Other Number", + "Connector Punctuation", + "Dash Punctuation", + "Open Punctuation", + "Close Punctuation", + "Initial Punctuation", + "Final Punctuation", + "Other Punctuation", + "Math Symbol", + "Currency Symbol", + "Modifier Symbol", + "Other Symbol", + "Space Separator", + "Line Separator", + "Paragraph Separator", + "Control", + "Format", + "Surrogate", + "Private Use", + "Unassigned" +}; + +const char * +uc_general_category_long_name (uc_general_category_t category) +{ + uint32_t bitmask = category.bitmask; + /* bitmask should consist of a single bit. */ + if (bitmask != 0) + { + if ((bitmask & (bitmask - 1)) == 0) + { + int bit; + /* Take log2 using a variant of Robert Harley's method. + Found by Bruno Haible 1996. */ + uint32_t n = bitmask; + static const char ord2_tab[64] = + { + -1, 0, 1, 12, 2, 6, -1, 13, 3, -1, 7, -1, -1, -1, -1, 14, + 10, 4, -1, -1, 8, -1, -1, 25, -1, -1, -1, -1, -1, 21, 27, 15, + 31, 11, 5, -1, -1, -1, -1, -1, 9, -1, -1, 24, -1, -1, 20, 26, + 30, -1, -1, -1, -1, 23, -1, 19, 29, -1, 22, 18, 28, 17, 16, -1 + }; + n += n << 4; + n += n << 6; + n = (n << 16) - n; + bit = ord2_tab[n >> 26]; + + if (bit < sizeof (u_category_long_name) / sizeof (u_category_long_name[0])) + return u_category_long_name[bit]; + } + else + { + if (bitmask == UC_CATEGORY_MASK_L) + return "Letter"; + if (bitmask == UC_CATEGORY_MASK_LC) + return "Cased Letter"; + if (bitmask == UC_CATEGORY_MASK_M) + return "Mark"; + if (bitmask == UC_CATEGORY_MASK_N) + return "Number"; + if (bitmask == UC_CATEGORY_MASK_P) + return "Punctuation"; + if (bitmask == UC_CATEGORY_MASK_S) + return "Symbol"; + if (bitmask == UC_CATEGORY_MASK_Z) + return "Separator"; + if (bitmask == UC_CATEGORY_MASK_C) + return "Other"; + } + } + return NULL; +} -- cgit v1.2.3