diff options
Diffstat (limited to 'tests/test-striconveh.c')
-rw-r--r-- | tests/test-striconveh.c | 399 |
1 files changed, 395 insertions, 4 deletions
diff --git a/tests/test-striconveh.c b/tests/test-striconveh.c index cb21a4c..781aa52 100644 --- a/tests/test-striconveh.c +++ b/tests/test-striconveh.c @@ -1,5 +1,5 @@ /* Test of character set conversion with error handling. - Copyright (C) 2007-2018 Free Software Foundation, Inc. + Copyright (C) 2007-2022 Free Software Foundation, Inc. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -44,16 +44,21 @@ new_offsets (size_t n) int main () { +#if HAVE_ICONV static enum iconv_ilseq_handler handlers[] = - { iconveh_error, iconveh_question_mark, iconveh_escape_sequence }; + { + iconveh_error, + iconveh_question_mark, + iconveh_replacement_character, + iconveh_escape_sequence + }; size_t indirect; size_t h; size_t o; size_t i; -#if HAVE_ICONV /* Assume that iconv() supports at least the encodings ASCII, ISO-8859-1, - ISO-8859-2, and UTF-8. */ + ISO-8859-2, UTF-8, and with libiconv or glibc also GB18030. */ iconv_t cd_ascii_to_88591 = iconv_open ("ISO-8859-1", "ASCII"); iconv_t cd_88591_to_88592 = iconv_open ("ISO-8859-2", "ISO-8859-1"); iconv_t cd_88592_to_88591 = iconv_open ("ISO-8859-1", "ISO-8859-2"); @@ -63,6 +68,12 @@ main () iconv_t cd_88592_to_utf8 = iconv_open ("UTF-8", "ISO-8859-2"); iconv_t cd_utf8_to_88592 = iconv_open ("ISO-8859-2", "UTF-8"); iconv_t cd_utf7_to_utf8 = iconv_open ("UTF-8", "UTF-7"); +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + iconv_t cd_ascii_to_gb18030 = iconv_open ("GB18030", "ASCII"); + iconv_t cd_utf8_to_gb18030 = iconv_open ("GB18030", "UTF-8"); + iconv_t cd_88591_to_gb18030 = iconv_open ("GB18030", "ISO-8859-1"); + iconv_t cd_utf7_to_gb18030 = iconv_open ("GB18030", "UTF-7"); +# endif iconveh_t cdeh_ascii_to_88591; iconveh_t cdeh_ascii_to_88591_indirectly; iconveh_t cdeh_88592_to_88591; @@ -71,12 +82,21 @@ main () iconveh_t cdeh_88591_to_utf8; iconveh_t cdeh_utf8_to_88591; iconveh_t cdeh_utf7_to_utf8; +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + iconveh_t cdeh_ascii_to_gb18030; + iconveh_t cdeh_88591_to_gb18030; + iconveh_t cdeh_utf7_to_gb18030; +# endif ASSERT (cd_ascii_to_utf8 != (iconv_t)(-1)); ASSERT (cd_88591_to_utf8 != (iconv_t)(-1)); ASSERT (cd_utf8_to_88591 != (iconv_t)(-1)); ASSERT (cd_88592_to_utf8 != (iconv_t)(-1)); ASSERT (cd_utf8_to_88592 != (iconv_t)(-1)); +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + ASSERT (cd_ascii_to_gb18030 != (iconv_t)(-1)); + ASSERT (cd_utf8_to_gb18030 != (iconv_t)(-1)); +# endif cdeh_ascii_to_88591.cd = cd_ascii_to_88591; cdeh_ascii_to_88591.cd1 = cd_ascii_to_utf8; @@ -110,6 +130,20 @@ main () cdeh_utf7_to_utf8.cd1 = cd_utf7_to_utf8; cdeh_utf7_to_utf8.cd2 = (iconv_t)(-1); +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + cdeh_ascii_to_gb18030.cd = cd_ascii_to_gb18030; + cdeh_ascii_to_gb18030.cd1 = cd_ascii_to_utf8; + cdeh_ascii_to_gb18030.cd2 = cd_utf8_to_gb18030; + + cdeh_88591_to_gb18030.cd = cd_88591_to_gb18030; + cdeh_88591_to_gb18030.cd1 = cd_88591_to_utf8; + cdeh_88591_to_gb18030.cd2 = cd_utf8_to_gb18030; + + cdeh_utf7_to_gb18030.cd = cd_utf7_to_gb18030; + cdeh_utf7_to_gb18030.cd1 = cd_utf7_to_utf8; + cdeh_utf7_to_gb18030.cd2 = cd_utf8_to_gb18030; +# endif + /* ------------------------ Test mem_cd_iconveh() ------------------------ */ /* Test conversion from ISO-8859-2 to ISO-8859-1 with no errors. */ @@ -175,6 +209,7 @@ main () free (offsets); break; case iconveh_question_mark: + case iconveh_replacement_character: case iconveh_escape_sequence: { static const char expected[] = "Rafa? Maszkowski"; @@ -224,6 +259,7 @@ main () free (offsets); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Rafa? Maszkowski"; ASSERT (retval == 0); @@ -294,6 +330,41 @@ main () } } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from ISO-8859-1 to GB18030 with no errors. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + &cdeh_88591_to_gb18030, + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 37; i++) + ASSERT (offsets[i] == (i < 1 ? i : + i < 12 ? i + 3 : + i < 18 ? i + 6 : + i + 7)); + ASSERT (offsets[37] == MAGIC); + free (offsets); + } + free (result); + } + } +# endif + /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { @@ -371,9 +442,87 @@ main () free (result); } break; + case iconveh_replacement_character: + { + static const char expected[] = "Rafa\357\277\275 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == (i < 5 ? i : i + 2)); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); + } + break; + } + } + } + +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ). */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */ + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + &cdeh_ascii_to_gb18030, + handler, + offsets, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + if (o) + free (offsets); + break; + case iconveh_question_mark: + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == i); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); + } + break; + case iconveh_replacement_character: + { + static const char expected[] = "Rafa\2041\2447 Maszkowski"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 16; i++) + ASSERT (offsets[i] == (i < 5 ? i : i + 3)); + ASSERT (offsets[16] == MAGIC); + free (offsets); + } + free (result); + } + break; } } } +# endif /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ for (h = 0; h < SIZEOF (handlers); h++) @@ -399,6 +548,7 @@ main () free (offsets); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Rafa? Maszkowski"; ASSERT (retval == 0); @@ -496,6 +646,34 @@ main () free (result); } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from UTF-7 to GB18030 with EINVAL. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + /* This is base64 encoded 0x54 0x32 0xD8 0x3F 0xD8 0x40. It would + convert to U+5432 U+D83F U+D840 but these are Unicode surrogates. */ + static const char input[] = "+VDLYP9hA"; + static const char expected1[] = "\337\305"; /* 吲 glibc */ + static const char expected2[] = ""; /* libiconv */ + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, 7, + &cdeh_utf7_to_gb18030, + handler, + NULL, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected1) || length == strlen (expected2)); + ASSERT (result != NULL); + if (length == strlen (expected1)) + ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); + else + ASSERT (memcmp (result, expected2, strlen (expected2)) == 0); + free (result); + } +# endif + /* Disabled on NetBSD, because NetBSD 5.0 iconv() is buggy: it converts the input "+2D/YQNhB" to U+1FED8 U+3FD8 U+40D8. */ # if !(defined __NetBSD__ && !defined _LIBICONV_VERSION) @@ -544,8 +722,98 @@ main () free (result); } break; + case iconveh_replacement_character: + { + /* glibc result */ + static const char expected1[] = "\357\277\275\357\277\275\357\277\275\357\277\275\357\277\275"; + /* libiconv <= 1.12 result */ + static const char expected2[] = "\357\277\2752D/YQNhB"; + /* libiconv >= 1.13 result */ + static const char expected3[] = "\357\277\275\340\277\266\341\200\266"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected1) + || length == strlen (expected2) + || length == strlen (expected3)); + ASSERT (result != NULL); + if (length == strlen (expected1)) + ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); + else if (length == strlen (expected2)) + ASSERT (memcmp (result, expected2, strlen (expected2)) == 0); + else + ASSERT (memcmp (result, expected3, strlen (expected3)) == 0); + free (result); + } } } + +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from UTF-7 to GB18030 with EILSEQ. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + /* This is base64 encoded 0xD8 0x3F 0xD8 0x40 0xD8 0x41. It would + convert to U+D83F U+D840 U+D841 but these are Unicode surrogates. */ + static const char input[] = "+2D/YQNhB"; + char *result = NULL; + size_t length = 0; + int retval = mem_cd_iconveh (input, strlen (input), + &cdeh_utf7_to_gb18030, + handler, + NULL, + &result, &length); + switch (handler) + { + case iconveh_error: + ASSERT (retval == -1 && errno == EILSEQ); + ASSERT (result == NULL); + break; + case iconveh_question_mark: + case iconveh_escape_sequence: + { + /* glibc result */ + static const char expected1[] = "?????"; + /* libiconv <= 1.12 result */ + static const char expected2[] = "?2D/YQNhB"; + /* libiconv behaviour changed in version 1.13: the result is + '?' U+0FF6 U+1036; this is U+D83F U+D840 U+D841 shifted left + by 6 bits. */ + static const char expected3[] = "?\2013\2030\2013\2114"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected1) + || length == strlen (expected2) + || length == strlen (expected3)); + ASSERT (result != NULL); + if (length == strlen (expected1)) + ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); + else if (length == strlen (expected2)) + ASSERT (memcmp (result, expected2, strlen (expected2)) == 0 + || memcmp (result, expected3, strlen (expected3)) == 0); + free (result); + } + break; + case iconveh_replacement_character: + { + /* glibc result */ + static const char expected1[] = "\2041\2447\2041\2447\2041\2447\2041\2447\2041\2447"; + /* libiconv <= 1.12 result */ + static const char expected2[] = "\2041\24472D/YQNhB"; + /* libiconv >= 1.13 result */ + static const char expected3[] = "\2041\2447\2013\2030\2013\2114"; + ASSERT (retval == 0); + ASSERT (length == strlen (expected1) + || length == strlen (expected2) + || length == strlen (expected3)); + ASSERT (result != NULL); + if (length == strlen (expected1)) + ASSERT (memcmp (result, expected1, strlen (expected1)) == 0); + else if (length == strlen (expected2)) + ASSERT (memcmp (result, expected2, strlen (expected2)) == 0 + || memcmp (result, expected3, strlen (expected3)) == 0); + free (result); + } + } + } +# endif # endif # endif } @@ -589,6 +857,7 @@ main () ASSERT (result == NULL && errno == EILSEQ); break; case iconveh_question_mark: + case iconveh_replacement_character: case iconveh_escape_sequence: { static const char expected[] = "Rafa? Maszkowski"; @@ -619,6 +888,7 @@ main () ASSERT (result == NULL && errno == EILSEQ); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Rafa? Maszkowski"; ASSERT (result != NULL); @@ -652,6 +922,22 @@ main () free (result); } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from ISO-8859-1 to GB18030 with no errors. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; + char *result = str_cd_iconveh (input, + &cdeh_88591_to_gb18030, + handler); + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } +# endif + /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { @@ -688,8 +974,51 @@ main () free (result); } break; + case iconveh_replacement_character: + { + static const char expected[] = "Rafa\357\277\275 Maszkowski"; + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } + break; + } + } + +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from ASCII to GB18030 with invalid input (EILSEQ). */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "Rafa\263 Maszkowski"; /* Rafa? Maszkowski */ + char *result = str_cd_iconveh (input, + &cdeh_ascii_to_gb18030, + handler); + switch (handler) + { + case iconveh_error: + ASSERT (result == NULL && errno == EILSEQ); + break; + case iconveh_question_mark: + case iconveh_escape_sequence: + { + static const char expected[] = "Rafa? Maszkowski"; + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } + break; + case iconveh_replacement_character: + { + static const char expected[] = "Rafa\2041\2447 Maszkowski"; + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } + break; } } +# endif /* Test conversion from UTF-8 to ISO-8859-1 with EILSEQ. */ for (h = 0; h < SIZEOF (handlers); h++) @@ -705,6 +1034,7 @@ main () ASSERT (result == NULL && errno == EILSEQ); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Costs: 27 ?"; ASSERT (result != NULL); @@ -801,6 +1131,7 @@ main () free (offsets); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Rafa? Maszkowski"; ASSERT (retval == 0); @@ -870,6 +1201,41 @@ main () } } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from ISO-8859-1 to GB18030 with no errors. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; + for (o = 0; o < 2; o++) + { + size_t *offsets = (o ? new_offsets (strlen (input)) : NULL); + char *result = NULL; + size_t length = 0; + int retval = mem_iconveh (input, strlen (input), + "ISO-8859-1", "GB18030", + handler, + offsets, + &result, &length); + ASSERT (retval == 0); + ASSERT (length == strlen (expected)); + ASSERT (result != NULL && memcmp (result, expected, strlen (expected)) == 0); + if (o) + { + for (i = 0; i < 37; i++) + ASSERT (offsets[i] == (i < 1 ? i : + i < 12 ? i + 3 : + i < 18 ? i + 6 : + i + 7)); + ASSERT (offsets[37] == MAGIC); + free (offsets); + } + free (result); + } + } +# endif + /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { @@ -931,6 +1297,7 @@ main () free (offsets); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Rafa? Maszkowski"; ASSERT (retval == 0); @@ -1023,6 +1390,7 @@ main () ASSERT (result == NULL && errno == EILSEQ); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Rafa? Maszkowski"; ASSERT (result != NULL); @@ -1053,6 +1421,20 @@ main () free (result); } +# if defined _LIBICONV_VERSION || (defined __GLIBC__ && !defined __UCLIBC__) + /* Test conversion from ISO-8859-1 to GB18030 with no errors. */ + for (h = 0; h < SIZEOF (handlers); h++) + { + enum iconv_ilseq_handler handler = handlers[h]; + static const char input[] = "\304rger mit b\366sen B\374bchen ohne Augenma\337"; + static const char expected[] = "\2010\2072rger mit b\2010\2132sen B\250\271bchen ohne Augenma\2010\2118"; + char *result = str_iconveh (input, "ISO-8859-1", "GB18030", handler); + ASSERT (result != NULL); + ASSERT (strcmp (result, expected) == 0); + free (result); + } +# endif + /* Test conversion from UTF-8 to ISO-8859-1 with no errors. */ for (h = 0; h < SIZEOF (handlers); h++) { @@ -1077,6 +1459,7 @@ main () ASSERT (result == NULL && errno == EILSEQ); break; case iconveh_question_mark: + case iconveh_replacement_character: { static const char expected[] = "Costs: 27 ?"; ASSERT (result != NULL); @@ -1106,6 +1489,14 @@ main () free (result); } + /* -------------------------------- Done. -------------------------------- */ + + if (cd_ascii_to_88591 != (iconv_t)(-1)) + iconv_close (cd_ascii_to_88591); + iconv_close (cd_ascii_to_utf8); + if (cd_utf7_to_utf8 != (iconv_t)(-1)) + iconv_close (cd_utf7_to_utf8); + #endif return 0; |