/* Properties of Unicode characters. Copyright (C) 2007, 2011-2015 Free Software Foundation, Inc. Written by Bruno Haible , 2007. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this program. If not, see . */ #include /* Specification. */ #include "unictype.h" #include #include /* Indices stored in the 'struct named_category' elements of the perfect hash table. We don't use uc_general_category_t values or their addresses directly, because this would introduce load-time relocations. */ enum { /* General. */ UC_PROPERTY_INDEX_WHITE_SPACE, UC_PROPERTY_INDEX_ALPHABETIC, UC_PROPERTY_INDEX_OTHER_ALPHABETIC, UC_PROPERTY_INDEX_NOT_A_CHARACTER, UC_PROPERTY_INDEX_DEFAULT_IGNORABLE_CODE_POINT, UC_PROPERTY_INDEX_OTHER_DEFAULT_IGNORABLE_CODE_POINT, UC_PROPERTY_INDEX_DEPRECATED, UC_PROPERTY_INDEX_LOGICAL_ORDER_EXCEPTION, UC_PROPERTY_INDEX_VARIATION_SELECTOR, UC_PROPERTY_INDEX_PRIVATE_USE, UC_PROPERTY_INDEX_UNASSIGNED_CODE_VALUE, /* Case. */ UC_PROPERTY_INDEX_UPPERCASE, UC_PROPERTY_INDEX_OTHER_UPPERCASE, UC_PROPERTY_INDEX_LOWERCASE, UC_PROPERTY_INDEX_OTHER_LOWERCASE, UC_PROPERTY_INDEX_TITLECASE, UC_PROPERTY_INDEX_CASED, UC_PROPERTY_INDEX_CASE_IGNORABLE, UC_PROPERTY_INDEX_CHANGES_WHEN_LOWERCASED, UC_PROPERTY_INDEX_CHANGES_WHEN_UPPERCASED, UC_PROPERTY_INDEX_CHANGES_WHEN_TITLECASED, UC_PROPERTY_INDEX_CHANGES_WHEN_CASEFOLDED, UC_PROPERTY_INDEX_CHANGES_WHEN_CASEMAPPED, UC_PROPERTY_INDEX_SOFT_DOTTED, /* Identifiers. */ UC_PROPERTY_INDEX_ID_START, UC_PROPERTY_INDEX_OTHER_ID_START, UC_PROPERTY_INDEX_ID_CONTINUE, UC_PROPERTY_INDEX_OTHER_ID_CONTINUE, UC_PROPERTY_INDEX_XID_START, UC_PROPERTY_INDEX_XID_CONTINUE, UC_PROPERTY_INDEX_PATTERN_WHITE_SPACE, UC_PROPERTY_INDEX_PATTERN_SYNTAX, /* Shaping and rendering. */ UC_PROPERTY_INDEX_JOIN_CONTROL, UC_PROPERTY_INDEX_GRAPHEME_BASE, UC_PROPERTY_INDEX_GRAPHEME_EXTEND, UC_PROPERTY_INDEX_OTHER_GRAPHEME_EXTEND, UC_PROPERTY_INDEX_GRAPHEME_LINK, /* Bidi. */ UC_PROPERTY_INDEX_BIDI_CONTROL, UC_PROPERTY_INDEX_BIDI_LEFT_TO_RIGHT, UC_PROPERTY_INDEX_BIDI_HEBREW_RIGHT_TO_LEFT, UC_PROPERTY_INDEX_BIDI_ARABIC_RIGHT_TO_LEFT, UC_PROPERTY_INDEX_BIDI_EUROPEAN_DIGIT, UC_PROPERTY_INDEX_BIDI_EUR_NUM_SEPARATOR, UC_PROPERTY_INDEX_BIDI_EUR_NUM_TERMINATOR, UC_PROPERTY_INDEX_BIDI_ARABIC_DIGIT, UC_PROPERTY_INDEX_BIDI_COMMON_SEPARATOR, UC_PROPERTY_INDEX_BIDI_BLOCK_SEPARATOR, UC_PROPERTY_INDEX_BIDI_SEGMENT_SEPARATOR, UC_PROPERTY_INDEX_BIDI_WHITESPACE, UC_PROPERTY_INDEX_BIDI_NON_SPACING_MARK, UC_PROPERTY_INDEX_BIDI_BOUNDARY_NEUTRAL, UC_PROPERTY_INDEX_BIDI_PDF, UC_PROPERTY_INDEX_BIDI_EMBEDDING_OR_OVERRIDE, UC_PROPERTY_INDEX_BIDI_OTHER_NEUTRAL, /* Numeric. */ UC_PROPERTY_INDEX_HEX_DIGIT, UC_PROPERTY_INDEX_ASCII_HEX_DIGIT, /* CJK. */ UC_PROPERTY_INDEX_IDEOGRAPHIC, UC_PROPERTY_INDEX_UNIFIED_IDEOGRAPH, UC_PROPERTY_INDEX_RADICAL, UC_PROPERTY_INDEX_IDS_BINARY_OPERATOR, UC_PROPERTY_INDEX_IDS_TRINARY_OPERATOR, /* Misc. */ UC_PROPERTY_INDEX_ZERO_WIDTH, UC_PROPERTY_INDEX_SPACE, UC_PROPERTY_INDEX_NON_BREAK, UC_PROPERTY_INDEX_ISO_CONTROL, UC_PROPERTY_INDEX_FORMAT_CONTROL, UC_PROPERTY_INDEX_DASH, UC_PROPERTY_INDEX_HYPHEN, UC_PROPERTY_INDEX_PUNCTUATION, UC_PROPERTY_INDEX_LINE_SEPARATOR, UC_PROPERTY_INDEX_PARAGRAPH_SEPARATOR, UC_PROPERTY_INDEX_QUOTATION_MARK, UC_PROPERTY_INDEX_SENTENCE_TERMINAL, UC_PROPERTY_INDEX_TERMINAL_PUNCTUATION, UC_PROPERTY_INDEX_CURRENCY_SYMBOL, UC_PROPERTY_INDEX_MATH, UC_PROPERTY_INDEX_OTHER_MATH, UC_PROPERTY_INDEX_PAIRED_PUNCTUATION, UC_PROPERTY_INDEX_LEFT_OF_PAIR, UC_PROPERTY_INDEX_COMBINING, UC_PROPERTY_INDEX_COMPOSITE, UC_PROPERTY_INDEX_DECIMAL_DIGIT, UC_PROPERTY_INDEX_NUMERIC, UC_PROPERTY_INDEX_DIACRITIC, UC_PROPERTY_INDEX_EXTENDER, UC_PROPERTY_INDEX_IGNORABLE_CONTROL }; /* Get gperf generated lookup function. */ #include "unictype/pr_byname.h" static const uc_property_t UC_PROPERTY_NONE = { NULL }; uc_property_t uc_property_byname (const char *property_name) { char buf[MAX_WORD_LENGTH + 1]; const char *cp; char *bp; unsigned int count; const struct named_property *found; for (cp = property_name, bp = buf, count = MAX_WORD_LENGTH + 1; ; cp++, bp++) { unsigned char c = (unsigned char) *cp; if (c >= 0x80) goto invalid; if (c >= 'A' && c <= 'Z') c += 'a' - 'A'; else if (c == ' ' || c == '-') c = '_'; *bp = c; if (c == '\0') break; if (--count == 0) goto invalid; } found = uc_property_lookup (buf, bp - buf); if (found != NULL) /* Use a 'switch' statement here, because a table would introduce load-time relocations. */ switch (found->property_index) { case UC_PROPERTY_INDEX_WHITE_SPACE: return UC_PROPERTY_WHITE_SPACE; case UC_PROPERTY_INDEX_ALPHABETIC: return UC_PROPERTY_ALPHABETIC; case UC_PROPERTY_INDEX_OTHER_ALPHABETIC: return UC_PROPERTY_OTHER_ALPHABETIC; case UC_PROPERTY_INDEX_NOT_A_CHARACTER: return UC_PROPERTY_NOT_A_CHARACTER; case UC_PROPERTY_INDEX_DEFAULT_IGNORABLE_CODE_POINT: return UC_PROPERTY_DEFAULT_IGNORABLE_CODE_POINT; case UC_PROPERTY_INDEX_OTHER_DEFAULT_IGNORABLE_CODE_POINT: return UC_PROPERTY_OTHER_DEFAULT_IGNORABLE_CODE_POINT; case UC_PROPERTY_INDEX_DEPRECATED: return UC_PROPERTY_DEPRECATED; case UC_PROPERTY_INDEX_LOGICAL_ORDER_EXCEPTION: return UC_PROPERTY_LOGICAL_ORDER_EXCEPTION; case UC_PROPERTY_INDEX_VARIATION_SELECTOR: return UC_PROPERTY_VARIATION_SELECTOR; case UC_PROPERTY_INDEX_PRIVATE_USE: return UC_PROPERTY_PRIVATE_USE; case UC_PROPERTY_INDEX_UNASSIGNED_CODE_VALUE: return UC_PROPERTY_UNASSIGNED_CODE_VALUE; case UC_PROPERTY_INDEX_UPPERCASE: return UC_PROPERTY_UPPERCASE; case UC_PROPERTY_INDEX_OTHER_UPPERCASE: return UC_PROPERTY_OTHER_UPPERCASE; case UC_PROPERTY_INDEX_LOWERCASE: return UC_PROPERTY_LOWERCASE; case UC_PROPERTY_INDEX_OTHER_LOWERCASE: return UC_PROPERTY_OTHER_LOWERCASE; case UC_PROPERTY_INDEX_TITLECASE: return UC_PROPERTY_TITLECASE; case UC_PROPERTY_INDEX_CASED: return UC_PROPERTY_CASED; case UC_PROPERTY_INDEX_CASE_IGNORABLE: return UC_PROPERTY_CASE_IGNORABLE; case UC_PROPERTY_INDEX_CHANGES_WHEN_LOWERCASED: return UC_PROPERTY_CHANGES_WHEN_LOWERCASED; case UC_PROPERTY_INDEX_CHANGES_WHEN_UPPERCASED: return UC_PROPERTY_CHANGES_WHEN_UPPERCASED; case UC_PROPERTY_INDEX_CHANGES_WHEN_TITLECASED: return UC_PROPERTY_CHANGES_WHEN_TITLECASED; case UC_PROPERTY_INDEX_CHANGES_WHEN_CASEFOLDED: return UC_PROPERTY_CHANGES_WHEN_CASEFOLDED; case UC_PROPERTY_INDEX_CHANGES_WHEN_CASEMAPPED: return UC_PROPERTY_CHANGES_WHEN_CASEMAPPED; case UC_PROPERTY_INDEX_SOFT_DOTTED: return UC_PROPERTY_SOFT_DOTTED; case UC_PROPERTY_INDEX_ID_START: return UC_PROPERTY_ID_START; case UC_PROPERTY_INDEX_OTHER_ID_START: return UC_PROPERTY_OTHER_ID_START; case UC_PROPERTY_INDEX_ID_CONTINUE: return UC_PROPERTY_ID_CONTINUE; case UC_PROPERTY_INDEX_OTHER_ID_CONTINUE: return UC_PROPERTY_OTHER_ID_CONTINUE; case UC_PROPERTY_INDEX_XID_START: return UC_PROPERTY_XID_START; case UC_PROPERTY_INDEX_XID_CONTINUE: return UC_PROPERTY_XID_CONTINUE; case UC_PROPERTY_INDEX_PATTERN_WHITE_SPACE: return UC_PROPERTY_PATTERN_WHITE_SPACE; case UC_PROPERTY_INDEX_PATTERN_SYNTAX: return UC_PROPERTY_PATTERN_SYNTAX; case UC_PROPERTY_INDEX_JOIN_CONTROL: return UC_PROPERTY_JOIN_CONTROL; case UC_PROPERTY_INDEX_GRAPHEME_BASE: return UC_PROPERTY_GRAPHEME_BASE; case UC_PROPERTY_INDEX_GRAPHEME_EXTEND: return UC_PROPERTY_GRAPHEME_EXTEND; case UC_PROPERTY_INDEX_OTHER_GRAPHEME_EXTEND: return UC_PROPERTY_OTHER_GRAPHEME_EXTEND; case UC_PROPERTY_INDEX_GRAPHEME_LINK: return UC_PROPERTY_GRAPHEME_LINK; case UC_PROPERTY_INDEX_BIDI_CONTROL: return UC_PROPERTY_BIDI_CONTROL; case UC_PROPERTY_INDEX_BIDI_LEFT_TO_RIGHT: return UC_PROPERTY_BIDI_LEFT_TO_RIGHT; case UC_PROPERTY_INDEX_BIDI_HEBREW_RIGHT_TO_LEFT: return UC_PROPERTY_BIDI_HEBREW_RIGHT_TO_LEFT; case UC_PROPERTY_INDEX_BIDI_ARABIC_RIGHT_TO_LEFT: return UC_PROPERTY_BIDI_ARABIC_RIGHT_TO_LEFT; case UC_PROPERTY_INDEX_BIDI_EUROPEAN_DIGIT: return UC_PROPERTY_BIDI_EUROPEAN_DIGIT; case UC_PROPERTY_INDEX_BIDI_EUR_NUM_SEPARATOR: return UC_PROPERTY_BIDI_EUR_NUM_SEPARATOR; case UC_PROPERTY_INDEX_BIDI_EUR_NUM_TERMINATOR: return UC_PROPERTY_BIDI_EUR_NUM_TERMINATOR; case UC_PROPERTY_INDEX_BIDI_ARABIC_DIGIT: return UC_PROPERTY_BIDI_ARABIC_DIGIT; case UC_PROPERTY_INDEX_BIDI_COMMON_SEPARATOR: return UC_PROPERTY_BIDI_COMMON_SEPARATOR; case UC_PROPERTY_INDEX_BIDI_BLOCK_SEPARATOR: return UC_PROPERTY_BIDI_BLOCK_SEPARATOR; case UC_PROPERTY_INDEX_BIDI_SEGMENT_SEPARATOR: return UC_PROPERTY_BIDI_SEGMENT_SEPARATOR; case UC_PROPERTY_INDEX_BIDI_WHITESPACE: return UC_PROPERTY_BIDI_WHITESPACE; case UC_PROPERTY_INDEX_BIDI_NON_SPACING_MARK: return UC_PROPERTY_BIDI_NON_SPACING_MARK; case UC_PROPERTY_INDEX_BIDI_BOUNDARY_NEUTRAL: return UC_PROPERTY_BIDI_BOUNDARY_NEUTRAL; case UC_PROPERTY_INDEX_BIDI_PDF: return UC_PROPERTY_BIDI_PDF; case UC_PROPERTY_INDEX_BIDI_EMBEDDING_OR_OVERRIDE: return UC_PROPERTY_BIDI_EMBEDDING_OR_OVERRIDE; case UC_PROPERTY_INDEX_BIDI_OTHER_NEUTRAL: return UC_PROPERTY_BIDI_OTHER_NEUTRAL; case UC_PROPERTY_INDEX_HEX_DIGIT: return UC_PROPERTY_HEX_DIGIT; case UC_PROPERTY_INDEX_ASCII_HEX_DIGIT: return UC_PROPERTY_ASCII_HEX_DIGIT; case UC_PROPERTY_INDEX_IDEOGRAPHIC: return UC_PROPERTY_IDEOGRAPHIC; case UC_PROPERTY_INDEX_UNIFIED_IDEOGRAPH: return UC_PROPERTY_UNIFIED_IDEOGRAPH; case UC_PROPERTY_INDEX_RADICAL: return UC_PROPERTY_RADICAL; case UC_PROPERTY_INDEX_IDS_BINARY_OPERATOR: return UC_PROPERTY_IDS_BINARY_OPERATOR; case UC_PROPERTY_INDEX_IDS_TRINARY_OPERATOR: return UC_PROPERTY_IDS_TRINARY_OPERATOR; case UC_PROPERTY_INDEX_ZERO_WIDTH: return UC_PROPERTY_ZERO_WIDTH; case UC_PROPERTY_INDEX_SPACE: return UC_PROPERTY_SPACE; case UC_PROPERTY_INDEX_NON_BREAK: return UC_PROPERTY_NON_BREAK; case UC_PROPERTY_INDEX_ISO_CONTROL: return UC_PROPERTY_ISO_CONTROL; case UC_PROPERTY_INDEX_FORMAT_CONTROL: return UC_PROPERTY_FORMAT_CONTROL; case UC_PROPERTY_INDEX_DASH: return UC_PROPERTY_DASH; case UC_PROPERTY_INDEX_HYPHEN: return UC_PROPERTY_HYPHEN; case UC_PROPERTY_INDEX_PUNCTUATION: return UC_PROPERTY_PUNCTUATION; case UC_PROPERTY_INDEX_LINE_SEPARATOR: return UC_PROPERTY_LINE_SEPARATOR; case UC_PROPERTY_INDEX_PARAGRAPH_SEPARATOR: return UC_PROPERTY_PARAGRAPH_SEPARATOR; case UC_PROPERTY_INDEX_QUOTATION_MARK: return UC_PROPERTY_QUOTATION_MARK; case UC_PROPERTY_INDEX_SENTENCE_TERMINAL: return UC_PROPERTY_SENTENCE_TERMINAL; case UC_PROPERTY_INDEX_TERMINAL_PUNCTUATION: return UC_PROPERTY_TERMINAL_PUNCTUATION; case UC_PROPERTY_INDEX_CURRENCY_SYMBOL: return UC_PROPERTY_CURRENCY_SYMBOL; case UC_PROPERTY_INDEX_MATH: return UC_PROPERTY_MATH; case UC_PROPERTY_INDEX_OTHER_MATH: return UC_PROPERTY_OTHER_MATH; case UC_PROPERTY_INDEX_PAIRED_PUNCTUATION: return UC_PROPERTY_PAIRED_PUNCTUATION; case UC_PROPERTY_INDEX_LEFT_OF_PAIR: return UC_PROPERTY_LEFT_OF_PAIR; case UC_PROPERTY_INDEX_COMBINING: return UC_PROPERTY_COMBINING; case UC_PROPERTY_INDEX_COMPOSITE: return UC_PROPERTY_COMPOSITE; case UC_PROPERTY_INDEX_DECIMAL_DIGIT: return UC_PROPERTY_DECIMAL_DIGIT; case UC_PROPERTY_INDEX_NUMERIC: return UC_PROPERTY_NUMERIC; case UC_PROPERTY_INDEX_DIACRITIC: return UC_PROPERTY_DIACRITIC; case UC_PROPERTY_INDEX_EXTENDER: return UC_PROPERTY_EXTENDER; case UC_PROPERTY_INDEX_IGNORABLE_CONTROL: return UC_PROPERTY_IGNORABLE_CONTROL; default: abort (); } invalid: return UC_PROPERTY_NONE; }