diff options
Diffstat (limited to 'tests/uninorm/test-u32-normalize-big.c')
-rw-r--r-- | tests/uninorm/test-u32-normalize-big.c | 315 |
1 files changed, 315 insertions, 0 deletions
diff --git a/tests/uninorm/test-u32-normalize-big.c b/tests/uninorm/test-u32-normalize-big.c new file mode 100644 index 0000000..96781ab --- /dev/null +++ b/tests/uninorm/test-u32-normalize-big.c @@ -0,0 +1,315 @@ +/* Test of Unicode compliance of normalization of UTF-32 strings. + Copyright (C) 2009 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +/* Written by Bruno Haible <bruno@clisp.org>, 2009. */ + +#include <config.h> + +/* Specification. */ +#include "test-u32-normalize-big.h" + +#if GNULIB_UNINORM_U32_NORMALIZE + +#include <stdio.h> +#include <stdlib.h> + +#include "xalloc.h" +#include "unistr.h" + +#define ASSERT(expr) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +#define ASSERT_WITH_LINE(expr, file, line) \ + do \ + { \ + if (!(expr)) \ + { \ + fprintf (stderr, "%s:%d: assertion failed for %s:%u\n", \ + __FILE__, __LINE__, file, line); \ + fflush (stderr); \ + abort (); \ + } \ + } \ + while (0) + +static int +cmp_ucs4_t (const void *a, const void *b) +{ + ucs4_t a_value = *(const ucs4_t *)a; + ucs4_t b_value = *(const ucs4_t *)b; + return (a_value < b_value ? -1 : a_value > b_value ? 1 : 0); +} + +void +read_normalization_test_file (const char *filename, + struct normalization_test_file *file) +{ + FILE *stream; + unsigned int lineno; + int part_index; + struct normalization_test_line *lines; + size_t lines_length; + size_t lines_allocated; + + stream = fopen (filename, "r"); + if (stream == NULL) + { + fprintf (stderr, "error during fopen of '%s'\n", filename); + exit (1); + } + + for (part_index = 0; part_index < 4; part_index++) + { + file->parts[part_index].lines = NULL; + file->parts[part_index].lines_length = 0; + } + + lineno = 0; + + part_index = -1; + lines = NULL; + lines_length = 0; + lines_allocated = 0; + + for (;;) + { + char buf[1000+1]; + char *ptr; + int c; + struct normalization_test_line line; + size_t sequence_index; + + lineno++; + + /* Read a line. */ + ptr = buf; + do + { + c = getc (stream); + if (c == EOF || c == '\n') + break; + *ptr++ = c; + } + while (ptr < buf + 1000); + *ptr = '\0'; + if (c == EOF) + break; + + /* Ignore empty lines and comment lines. */ + if (buf[0] == '\0' || buf[0] == '#') + continue; + + /* Handle lines that introduce a new part. */ + if (buf[0] == '@') + { + /* Switch to the next part. */ + if (part_index >= 0) + { + lines = + (struct normalization_test_line *) + xnrealloc (lines, lines_length, sizeof (struct normalization_test_line)); + file->parts[part_index].lines = lines; + file->parts[part_index].lines_length = lines_length; + } + part_index++; + lines = NULL; + lines_length = 0; + lines_allocated = 0; + continue; + } + + /* It's a line containing 5 sequences of Unicode characters. + Parse it and append it to the current part. */ + if (!(part_index >= 0 && part_index < 4)) + { + fprintf (stderr, "unexpected structure of '%s'\n", filename); + exit (1); + } + ptr = buf; + line.lineno = lineno; + for (sequence_index = 0; sequence_index < 5; sequence_index++) + line.sequences[sequence_index] = NULL; + for (sequence_index = 0; sequence_index < 5; sequence_index++) + { + uint32_t *sequence = XNMALLOC (1, uint32_t); + size_t sequence_length = 0; + + for (;;) + { + char *endptr; + unsigned int uc; + + uc = strtoul (ptr, &endptr, 16); + if (endptr == ptr) + break; + ptr = endptr; + + /* Append uc to the sequence. */ + sequence = + (uint32_t *) + xnrealloc (sequence, sequence_length + 2, sizeof (uint32_t)); + sequence[sequence_length] = uc; + sequence_length++; + + if (*ptr == ' ') + ptr++; + } + if (sequence_length == 0) + { + fprintf (stderr, "empty character sequence in '%s'\n", filename); + exit (1); + } + sequence[sequence_length] = 0; /* terminator */ + + line.sequences[sequence_index] = sequence; + + if (*ptr != ';') + { + fprintf (stderr, "error parsing '%s'\n", filename); + exit (1); + } + ptr++; + } + + /* Append the line to the current part. */ + if (lines_length == lines_allocated) + { + lines_allocated = 2 * lines_allocated; + if (lines_allocated < 7) + lines_allocated = 7; + lines = + (struct normalization_test_line *) + xnrealloc (lines, lines_allocated, sizeof (struct normalization_test_line)); + } + lines[lines_length] = line; + lines_length++; + } + + if (part_index >= 0) + { + lines = + (struct normalization_test_line *) + xnrealloc (lines, lines_length, sizeof (struct normalization_test_line)); + file->parts[part_index].lines = lines; + file->parts[part_index].lines_length = lines_length; + } + + { + /* Collect all c1 values from the part 1 in an array. */ + const struct normalization_test_part *p = &file->parts[1]; + ucs4_t *c1_array = XNMALLOC (p->lines_length + 1, ucs4_t); + size_t line_index; + + for (line_index = 0; line_index < p->lines_length; line_index++) + { + const uint32_t *sequence = p->lines[line_index].sequences[0]; + /* In part 1, every sequences[0] consists of a single character. */ + if (!(sequence[0] != 0 && sequence[1] == 0)) + abort (); + c1_array[line_index] = sequence[0]; + } + + /* Sort this array. */ + qsort (c1_array, p->lines_length, sizeof (ucs4_t), cmp_ucs4_t); + + /* Add the sentinel at the end. */ + c1_array[p->lines_length] = 0x110000; + + file->part1_c1_sorted = c1_array; + } + + file->filename = xstrdup (filename); + + if (ferror (stream) || fclose (stream)) + { + fprintf (stderr, "error reading from '%s'\n", filename); + exit (1); + } +} + +void +test_specific (const struct normalization_test_file *file, + int (*check) (const uint32_t *c1, size_t c1_length, + const uint32_t *c2, size_t c2_length, + const uint32_t *c3, size_t c3_length, + const uint32_t *c4, size_t c4_length, + const uint32_t *c5, size_t c5_length)) +{ + size_t part_index; + + for (part_index = 0; part_index < 4; part_index++) + { + const struct normalization_test_part *p = &file->parts[part_index]; + size_t line_index; + + for (line_index = 0; line_index < p->lines_length; line_index++) + { + const struct normalization_test_line *l = &p->lines[line_index]; + + ASSERT_WITH_LINE (check (l->sequences[0], u32_strlen (l->sequences[0]), + l->sequences[1], u32_strlen (l->sequences[1]), + l->sequences[2], u32_strlen (l->sequences[2]), + l->sequences[3], u32_strlen (l->sequences[3]), + l->sequences[4], u32_strlen (l->sequences[4])) + == 0, + file->filename, l->lineno); + } + } +} + +void +test_other (const struct normalization_test_file *file, uninorm_t nf) +{ + /* Check that for every character not listed in part 1 of the + NormalizationTest.txt file, the character maps to itself in each + of the four normalization forms. */ + const ucs4_t *p = file->part1_c1_sorted; + ucs4_t uc; + + for (uc = 0; uc < 0x110000; uc++) + { + if (uc >= 0xD800 && uc < 0xE000) + { + /* A surrogate, not a character. Skip uc. */ + } + else if (uc == *p) + { + /* Skip uc. */ + p++; + } + else + { + uint32_t input[1]; + size_t length; + uint32_t *result; + + input[0] = uc; + result = u32_normalize (nf, input, 1, NULL, &length); + ASSERT (result != NULL && length == 1 && result[0] == uc); + } + } +} + +#endif |