diff options
Diffstat (limited to 'tests/uniname/test-uninames.c')
-rw-r--r-- | tests/uniname/test-uninames.c | 261 |
1 files changed, 261 insertions, 0 deletions
diff --git a/tests/uniname/test-uninames.c b/tests/uniname/test-uninames.c new file mode 100644 index 0000000..1408434 --- /dev/null +++ b/tests/uniname/test-uninames.c @@ -0,0 +1,261 @@ +/* Test the Unicode character name functions. + Copyright (C) 2000-2003, 2005, 2007, 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "xalloc.h" +#include "uniname.h" +#include "progname.h" + +/* The names according to the UnicodeData.txt file, modified to contain the + Hangul syllable names, as described in the Unicode 3.0 book. */ +const char * unicode_names [0x110000]; + +/* Maximum length of a field in the UnicodeData.txt file. */ +#define FIELDLEN 120 + +/* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN. + Reads up to (but excluding) DELIM. + Returns 1 when a field was successfully read, otherwise 0. */ +static int +getfield (FILE *stream, char *buffer, int delim) +{ + int count = 0; + int c; + + for (; (c = getc (stream)), (c != EOF && c != delim); ) + { + /* Put c into the buffer. */ + if (++count >= FIELDLEN - 1) + { + fprintf (stderr, "field too long\n"); + exit (EXIT_FAILURE); + } + *buffer++ = c; + } + + if (c == EOF) + return 0; + + *buffer = '\0'; + return 1; +} + +/* Stores in unicode_names[] the relevant contents of the UnicodeData.txt + file. */ +static void +fill_names (const char *unicodedata_filename) +{ + unsigned int i; + FILE *stream; + char field0[FIELDLEN]; + char field1[FIELDLEN]; + int lineno = 0; + + for (i = 0; i < 0x110000; i++) + unicode_names[i] = NULL; + + stream = fopen (unicodedata_filename, "r"); + if (stream == NULL) + { + fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename); + exit (EXIT_FAILURE); + } + + for (;;) + { + int n; + int c; + + lineno++; + n = getfield (stream, field0, ';'); + n += getfield (stream, field1, ';'); + if (n == 0) + break; + if (n != 2) + { + fprintf (stderr, "short line in '%s':%d\n", + unicodedata_filename, lineno); + exit (EXIT_FAILURE); + } + for (; (c = getc (stream)), (c != EOF && c != '\n'); ) + ; + i = strtoul (field0, NULL, 16); + if (i >= 0x110000) + { + fprintf (stderr, "index too large\n"); + exit (EXIT_FAILURE); + } + unicode_names[i] = xstrdup (field1); + } + if (ferror (stream) || fclose (stream)) + { + fprintf (stderr, "error reading from '%s'\n", unicodedata_filename); + exit (1); + } +} + +/* Perform an exhaustive test of the unicode_character_name function. */ +static int +test_name_lookup () +{ + int error = 0; + unsigned int i; + char buf[UNINAME_MAX]; + + for (i = 0; i < 0x11000; i++) + { + char *result = unicode_character_name (i, buf); + + if (unicode_names[i] != NULL) + { + if (result == NULL) + { + fprintf (stderr, "\\u%04X name lookup failed!\n", i); + error = 1; + } + else if (strcmp (result, unicode_names[i]) != 0) + { + fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", + i, result); + error = 1; + } + } + else + { + if (result != NULL) + { + fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", + i, result); + error = 1; + } + } + } + + for (i = 0x110000; i < 0x1000000; i++) + { + char *result = unicode_character_name (i, buf); + + if (result != NULL) + { + fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n", + i, result); + error = 1; + } + } + + return error; +} + +/* Perform a test of the unicode_name_character function. */ +static int +test_inverse_lookup () +{ + int error = 0; + unsigned int i; + + /* First, verify all valid character names are recognized. */ + for (i = 0; i < 0x110000; i++) + if (unicode_names[i] != NULL) + { + unsigned int result = unicode_name_character (unicode_names[i]); + if (result != i) + { + if (result == UNINAME_INVALID) + fprintf (stderr, "inverse name lookup of \"%s\" failed\n", + unicode_names[i]); + else + fprintf (stderr, + "inverse name lookup of \"%s\" returned 0x%04X\n", + unicode_names[i], result); + error = 1; + } + } + + /* Second, generate random but likely names and verify they are not + recognized unless really valid. */ + for (i = 0; i < 10000; i++) + { + unsigned int i1, i2; + const char *s1; + const char *s2; + unsigned int l1, l2, j1, j2; + char buf[2*UNINAME_MAX]; + unsigned int result; + + do i1 = ((rand () % 0x11) << 16) + + ((rand () & 0xff) << 8) + + (rand () & 0xff); + while (unicode_names[i1] == NULL); + + do i2 = ((rand () % 0x11) << 16) + + ((rand () & 0xff) << 8) + + (rand () & 0xff); + while (unicode_names[i2] == NULL); + + s1 = unicode_names[i1]; + l1 = strlen (s1); + s2 = unicode_names[i2]; + l2 = strlen (s2); + + /* Concatenate a starting piece of s1 with an ending piece of s2. */ + for (j1 = 1; j1 <= l1; j1++) + if (j1 == l1 || s1[j1] == ' ') + for (j2 = 0; j2 < l2; j2++) + if (j2 == 0 || s2[j2-1] == ' ') + { + memcpy (buf, s1, j1); + buf[j1] = ' '; + memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1); + + result = unicode_name_character (buf); + if (result != UNINAME_INVALID + && !(unicode_names[result] != NULL + && strcmp (unicode_names[result], buf) == 0)) + { + fprintf (stderr, + "inverse name lookup of \"%s\" returned 0x%04X\n", + unicode_names[i], result); + error = 1; + } + } + } + + /* Third, some extreme case that used to loop. */ + if (unicode_name_character ("A A") != UNINAME_INVALID) + error = 1; + + return error; +} + +int +main (int argc, char *argv[]) +{ + int error = 0; + + set_program_name (argv[0]); + + fill_names (argv[1]); + + error |= test_name_lookup (); + error |= test_inverse_lookup (); + + return error; +} |