summaryrefslogtreecommitdiff
path: root/lib/striconveh.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/striconveh.c')
-rw-r--r--lib/striconveh.c1251
1 files changed, 1251 insertions, 0 deletions
diff --git a/lib/striconveh.c b/lib/striconveh.c
new file mode 100644
index 0000000..b39a01f
--- /dev/null
+++ b/lib/striconveh.c
@@ -0,0 +1,1251 @@
+/* Character set conversion with error handling.
+ Copyright (C) 2001-2008 Free Software Foundation, Inc.
+ Written by Bruno Haible and Simon Josefsson.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as published by
+ the Free Software Foundation; either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include "striconveh.h"
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if HAVE_ICONV
+# include <iconv.h>
+# include "unistr.h"
+#endif
+
+#include "c-strcase.h"
+#include "c-strcaseeq.h"
+
+#ifndef SIZE_MAX
+# define SIZE_MAX ((size_t) -1)
+#endif
+
+
+#if HAVE_ICONV
+
+/* The caller must provide CD, CD1, CD2, not just CD, because when a conversion
+ error occurs, we may have to determine the Unicode representation of the
+ inconvertible character. */
+
+/* iconv_carefully is like iconv, except that it stops as soon as it encounters
+ a conversion error, and it returns in *INCREMENTED a boolean telling whether
+ it has incremented the input pointers past the error location. */
+# if !defined _LIBICONV_VERSION && !defined __GLIBC__
+/* Irix iconv() inserts a NUL byte if it cannot convert.
+ NetBSD iconv() inserts a question mark if it cannot convert.
+ Only GNU libiconv and GNU libc are known to prefer to fail rather
+ than doing a lossy conversion. */
+static size_t
+iconv_carefully (iconv_t cd,
+ const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft,
+ bool *incremented)
+{
+ const char *inptr = *inbuf;
+ const char *inptr_end = inptr + *inbytesleft;
+ char *outptr = *outbuf;
+ size_t outsize = *outbytesleft;
+ const char *inptr_before;
+ size_t res;
+
+ do
+ {
+ size_t insize;
+
+ inptr_before = inptr;
+ res = (size_t)(-1);
+
+ for (insize = 1; inptr + insize <= inptr_end; insize++)
+ {
+ res = iconv (cd,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+ if (!(res == (size_t)(-1) && errno == EINVAL))
+ break;
+ /* iconv can eat up a shift sequence but give EINVAL while attempting
+ to convert the first character. E.g. libiconv does this. */
+ if (inptr > inptr_before)
+ {
+ res = 0;
+ break;
+ }
+ }
+
+ if (res == 0)
+ {
+ *outbuf = outptr;
+ *outbytesleft = outsize;
+ }
+ }
+ while (res == 0 && inptr < inptr_end);
+
+ *inbuf = inptr;
+ *inbytesleft = inptr_end - inptr;
+ if (res != (size_t)(-1) && res > 0)
+ {
+ /* iconv() has already incremented INPTR. We cannot go back to a
+ previous INPTR, otherwise the state inside CD would become invalid,
+ if FROM_CODESET is a stateful encoding. So, tell the caller that
+ *INBUF has already been incremented. */
+ *incremented = (inptr > inptr_before);
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+ else
+ {
+ *incremented = false;
+ return res;
+ }
+}
+# else
+# define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \
+ (*(incremented) = false, \
+ iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft))
+# endif
+
+/* iconv_carefully_1 is like iconv_carefully, except that it stops after
+ converting one character or one shift sequence. */
+static size_t
+iconv_carefully_1 (iconv_t cd,
+ const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft,
+ bool *incremented)
+{
+ const char *inptr_before = *inbuf;
+ const char *inptr = inptr_before;
+ const char *inptr_end = inptr_before + *inbytesleft;
+ char *outptr = *outbuf;
+ size_t outsize = *outbytesleft;
+ size_t res = (size_t)(-1);
+ size_t insize;
+
+ for (insize = 1; inptr_before + insize <= inptr_end; insize++)
+ {
+ inptr = inptr_before;
+ res = iconv (cd,
+ (ICONV_CONST char **) &inptr, &insize,
+ &outptr, &outsize);
+ if (!(res == (size_t)(-1) && errno == EINVAL))
+ break;
+ /* iconv can eat up a shift sequence but give EINVAL while attempting
+ to convert the first character. E.g. libiconv does this. */
+ if (inptr > inptr_before)
+ {
+ res = 0;
+ break;
+ }
+ }
+
+ *inbuf = inptr;
+ *inbytesleft = inptr_end - inptr;
+# if !defined _LIBICONV_VERSION && !defined __GLIBC__
+ /* Irix iconv() inserts a NUL byte if it cannot convert.
+ NetBSD iconv() inserts a question mark if it cannot convert.
+ Only GNU libiconv and GNU libc are known to prefer to fail rather
+ than doing a lossy conversion. */
+ if (res != (size_t)(-1) && res > 0)
+ {
+ /* iconv() has already incremented INPTR. We cannot go back to a
+ previous INPTR, otherwise the state inside CD would become invalid,
+ if FROM_CODESET is a stateful encoding. So, tell the caller that
+ *INBUF has already been incremented. */
+ *incremented = (inptr > inptr_before);
+ errno = EILSEQ;
+ return (size_t)(-1);
+ }
+# endif
+
+ if (res != (size_t)(-1))
+ {
+ *outbuf = outptr;
+ *outbytesleft = outsize;
+ }
+ *incremented = false;
+ return res;
+}
+
+/* utf8conv_carefully is like iconv, except that
+ - it converts from UTF-8 to UTF-8,
+ - it stops as soon as it encounters a conversion error, and it returns
+ in *INCREMENTED a boolean telling whether it has incremented the input
+ pointers past the error location,
+ - if one_character_only is true, it stops after converting one
+ character. */
+static size_t
+utf8conv_carefully (bool one_character_only,
+ const char **inbuf, size_t *inbytesleft,
+ char **outbuf, size_t *outbytesleft,
+ bool *incremented)
+{
+ const char *inptr = *inbuf;
+ size_t insize = *inbytesleft;
+ char *outptr = *outbuf;
+ size_t outsize = *outbytesleft;
+ size_t res;
+
+ res = 0;
+ do
+ {
+ ucs4_t uc;
+ int n;
+ int m;
+
+ n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize);
+ if (n < 0)
+ {
+ errno = (n == -2 ? EINVAL : EILSEQ);
+ n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize);
+ inptr += n;
+ insize -= n;
+ res = (size_t)(-1);
+ *incremented = true;
+ break;
+ }
+ if (outsize == 0)
+ {
+ errno = E2BIG;
+ res = (size_t)(-1);
+ *incremented = false;
+ break;
+ }
+ m = u8_uctomb ((uint8_t *) outptr, uc, outsize);
+ if (m == -2)
+ {
+ errno = E2BIG;
+ res = (size_t)(-1);
+ *incremented = false;
+ break;
+ }
+ inptr += n;
+ insize -= n;
+ if (m == -1)
+ {
+ errno = EILSEQ;
+ res = (size_t)(-1);
+ *incremented = true;
+ break;
+ }
+ outptr += m;
+ outsize -= m;
+ }
+ while (!one_character_only && insize > 0);
+
+ *inbuf = inptr;
+ *inbytesleft = insize;
+ *outbuf = outptr;
+ *outbytesleft = outsize;
+ return res;
+}
+
+static int
+mem_cd_iconveh_internal (const char *src, size_t srclen,
+ iconv_t cd, iconv_t cd1, iconv_t cd2,
+ enum iconv_ilseq_handler handler,
+ size_t extra_alloc,
+ size_t *offsets,
+ char **resultp, size_t *lengthp)
+{
+ /* When a conversion error occurs, we cannot start using CD1 and CD2 at
+ this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR.
+ Instead, we have to start afresh from the beginning of SRC. */
+ /* Use a temporary buffer, so that for small strings, a single malloc()
+ call will be sufficient. */
+# define tmpbufsize 4096
+ /* The alignment is needed when converting e.g. to glibc's WCHAR_T or
+ libiconv's UCS-4-INTERNAL encoding. */
+ union { unsigned int align; char buf[tmpbufsize]; } tmp;
+# define tmpbuf tmp.buf
+
+ char *initial_result;
+ char *result;
+ size_t allocated;
+ size_t length;
+ size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */
+
+ if (*resultp != NULL && *lengthp >= sizeof (tmpbuf))
+ {
+ initial_result = *resultp;
+ allocated = *lengthp;
+ }
+ else
+ {
+ initial_result = tmpbuf;
+ allocated = sizeof (tmpbuf);
+ }
+ result = initial_result;
+
+ /* Test whether a direct conversion is possible at all. */
+ if (cd == (iconv_t)(-1))
+ goto indirectly;
+
+ if (offsets != NULL)
+ {
+ size_t i;
+
+ for (i = 0; i < srclen; i++)
+ offsets[i] = (size_t)(-1);
+
+ last_length = (size_t)(-1);
+ }
+ length = 0;
+
+ /* First, try a direct conversion, and see whether a conversion error
+ occurs at all. */
+ {
+ const char *inptr = src;
+ size_t insize = srclen;
+
+ /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
+ /* Set to the initial state. */
+ iconv (cd, NULL, NULL, NULL, NULL);
+# endif
+
+ while (insize > 0)
+ {
+ char *outptr = result + length;
+ size_t outsize = allocated - extra_alloc - length;
+ bool incremented;
+ size_t res;
+ bool grow;
+
+ if (offsets != NULL)
+ {
+ if (length != last_length) /* ensure that offset[] be increasing */
+ {
+ offsets[inptr - src] = length;
+ last_length = length;
+ }
+ res = iconv_carefully_1 (cd,
+ &inptr, &insize,
+ &outptr, &outsize,
+ &incremented);
+ }
+ else
+ /* Use iconv_carefully instead of iconv here, because:
+ - If TO_CODESET is UTF-8, we can do the error handling in this
+ loop, no need for a second loop,
+ - With iconv() implementations other than GNU libiconv and GNU
+ libc, if we use iconv() in a big swoop, checking for an E2BIG
+ return, we lose the number of irreversible conversions. */
+ res = iconv_carefully (cd,
+ &inptr, &insize,
+ &outptr, &outsize,
+ &incremented);
+
+ length = outptr - result;
+ grow = (length + extra_alloc > allocated / 2);
+ if (res == (size_t)(-1))
+ {
+ if (errno == E2BIG)
+ grow = true;
+ else if (errno == EINVAL)
+ break;
+ else if (errno == EILSEQ && handler != iconveh_error)
+ {
+ if (cd2 == (iconv_t)(-1))
+ {
+ /* TO_CODESET is UTF-8. */
+ /* Error handling can produce up to 1 byte of output. */
+ if (length + 1 + extra_alloc > allocated)
+ {
+ char *memory;
+
+ allocated = 2 * allocated;
+ if (length + 1 + extra_alloc > allocated)
+ abort ();
+ if (result == initial_result)
+ memory = (char *) malloc (allocated);
+ else
+ memory = (char *) realloc (result, allocated);
+ if (memory == NULL)
+ {
+ if (result != initial_result)
+ free (result);
+ errno = ENOMEM;
+ return -1;
+ }
+ if (result == initial_result)
+ memcpy (memory, initial_result, length);
+ result = memory;
+ grow = false;
+ }
+ /* The input is invalid in FROM_CODESET. Eat up one byte
+ and emit a question mark. */
+ if (!incremented)
+ {
+ if (insize == 0)
+ abort ();
+ inptr++;
+ insize--;
+ }
+ result[length] = '?';
+ length++;
+ }
+ else
+ goto indirectly;
+ }
+ else
+ {
+ if (result != initial_result)
+ {
+ int saved_errno = errno;
+ free (result);
+ errno = saved_errno;
+ }
+ return -1;
+ }
+ }
+ if (insize == 0)
+ break;
+ if (grow)
+ {
+ char *memory;
+
+ allocated = 2 * allocated;
+ if (result == initial_result)
+ memory = (char *) malloc (allocated);
+ else
+ memory = (char *) realloc (result, allocated);
+ if (memory == NULL)
+ {
+ if (result != initial_result)
+ free (result);
+ errno = ENOMEM;
+ return -1;
+ }
+ if (result == initial_result)
+ memcpy (memory, initial_result, length);
+ result = memory;
+ }
+ }
+ }
+
+ /* Now get the conversion state back to the initial state.
+ But avoid glibc-2.1 bug and Solaris 2.7 bug. */
+#if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
+ for (;;)
+ {
+ char *outptr = result + length;
+ size_t outsize = allocated - extra_alloc - length;
+ size_t res;
+
+ res = iconv (cd, NULL, NULL, &outptr, &outsize);
+ length = outptr - result;
+ if (res == (size_t)(-1))
+ {
+ if (errno == E2BIG)
+ {
+ char *memory;
+
+ allocated = 2 * allocated;
+ if (result == initial_result)
+ memory = (char *) malloc (allocated);
+ else
+ memory = (char *) realloc (result, allocated);
+ if (memory == NULL)
+ {
+ if (result != initial_result)
+ free (result);
+ errno = ENOMEM;
+ return -1;
+ }
+ if (result == initial_result)
+ memcpy (memory, initial_result, length);
+ result = memory;
+ }
+ else
+ {
+ if (result != initial_result)
+ {
+ int saved_errno = errno;
+ free (result);
+ errno = saved_errno;
+ }
+ return -1;
+ }
+ }
+ else
+ break;
+ }
+#endif
+
+ /* The direct conversion succeeded. */
+ goto done;
+
+ indirectly:
+ /* The direct conversion failed.
+ Use a conversion through UTF-8. */
+ if (offsets != NULL)
+ {
+ size_t i;
+
+ for (i = 0; i < srclen; i++)
+ offsets[i] = (size_t)(-1);
+
+ last_length = (size_t)(-1);
+ }
+ length = 0;
+ {
+ const bool slowly = (offsets != NULL || handler == iconveh_error);
+# define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
+ char utf8buf[utf8bufsize + 1];
+ size_t utf8len = 0;
+ const char *in1ptr = src;
+ size_t in1size = srclen;
+ bool do_final_flush1 = true;
+ bool do_final_flush2 = true;
+
+ /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) || defined __sun)
+ /* Set to the initial state. */
+ if (cd1 != (iconv_t)(-1))
+ iconv (cd1, NULL, NULL, NULL, NULL);
+ if (cd2 != (iconv_t)(-1))
+ iconv (cd2, NULL, NULL, NULL, NULL);
+# endif
+
+ while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2)
+ {
+ char *out1ptr = utf8buf + utf8len;
+ size_t out1size = utf8bufsize - utf8len;
+ bool incremented1;
+ size_t res1;
+ int errno1;
+
+ /* Conversion step 1: from FROM_CODESET to UTF-8. */
+ if (in1size > 0)
+ {
+ if (offsets != NULL
+ && length != last_length) /* ensure that offset[] be increasing */
+ {
+ offsets[in1ptr - src] = length;
+ last_length = length;
+ }
+ if (cd1 != (iconv_t)(-1))
+ {
+ if (slowly)
+ res1 = iconv_carefully_1 (cd1,
+ &in1ptr, &in1size,
+ &out1ptr, &out1size,
+ &incremented1);
+ else
+ res1 = iconv_carefully (cd1,
+ &in1ptr, &in1size,
+ &out1ptr, &out1size,
+ &incremented1);
+ }
+ else
+ {
+ /* FROM_CODESET is UTF-8. */
+ res1 = utf8conv_carefully (slowly,
+ &in1ptr, &in1size,
+ &out1ptr, &out1size,
+ &incremented1);
+ }
+ }
+ else if (do_final_flush1)
+ {
+ /* Now get the conversion state of CD1 back to the initial state.
+ But avoid glibc-2.1 bug and Solaris 2.7 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
+ if (cd1 != (iconv_t)(-1))
+ res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size);
+ else
+# endif
+ res1 = 0;
+ do_final_flush1 = false;
+ incremented1 = true;
+ }
+ else
+ {
+ res1 = 0;
+ incremented1 = true;
+ }
+ if (res1 == (size_t)(-1)
+ && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
+ {
+ if (result != initial_result)
+ {
+ int saved_errno = errno;
+ free (result);
+ errno = saved_errno;
+ }
+ return -1;
+ }
+ if (res1 == (size_t)(-1)
+ && errno == EILSEQ && handler != iconveh_error)
+ {
+ /* The input is invalid in FROM_CODESET. Eat up one byte and
+ emit a question mark. Room for the question mark was allocated
+ at the end of utf8buf. */
+ if (!incremented1)
+ {
+ if (in1size == 0)
+ abort ();
+ in1ptr++;
+ in1size--;
+ }
+ utf8buf[utf8len++] = '?';
+ }
+ errno1 = errno;
+ utf8len = out1ptr - utf8buf;
+
+ if (offsets != NULL
+ || in1size == 0
+ || utf8len > utf8bufsize / 2
+ || (res1 == (size_t)(-1) && errno1 == E2BIG))
+ {
+ /* Conversion step 2: from UTF-8 to TO_CODESET. */
+ const char *in2ptr = utf8buf;
+ size_t in2size = utf8len;
+
+ while (in2size > 0
+ || (in1size == 0 && !do_final_flush1 && do_final_flush2))
+ {
+ char *out2ptr = result + length;
+ size_t out2size = allocated - extra_alloc - length;
+ bool incremented2;
+ size_t res2;
+ bool grow;
+
+ if (in2size > 0)
+ {
+ if (cd2 != (iconv_t)(-1))
+ res2 = iconv_carefully (cd2,
+ &in2ptr, &in2size,
+ &out2ptr, &out2size,
+ &incremented2);
+ else
+ /* TO_CODESET is UTF-8. */
+ res2 = utf8conv_carefully (false,
+ &in2ptr, &in2size,
+ &out2ptr, &out2size,
+ &incremented2);
+ }
+ else /* in1size == 0 && !do_final_flush1
+ && in2size == 0 && do_final_flush2 */
+ {
+ /* Now get the conversion state of CD1 back to the initial
+ state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */
+# if defined _LIBICONV_VERSION \
+ || !((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) || defined __sun)
+ if (cd2 != (iconv_t)(-1))
+ res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size);
+ else
+# endif
+ res2 = 0;
+ do_final_flush2 = false;
+ incremented2 = true;
+ }
+
+ length = out2ptr - result;
+ grow = (length + extra_alloc > allocated / 2);
+ if (res2 == (size_t)(-1))
+ {
+ if (errno == E2BIG)
+ grow = true;
+ else if (errno == EINVAL)
+ break;
+ else if (errno == EILSEQ && handler != iconveh_error)
+ {
+ /* Error handling can produce up to 10 bytes of ASCII
+ output. But TO_CODESET may be UCS-2, UTF-16 or
+ UCS-4, so use CD2 here as well. */
+ char scratchbuf[10];
+ size_t scratchlen;
+ ucs4_t uc;
+ const char *inptr;
+ size_t insize;
+ size_t res;
+
+ if (incremented2)
+ {
+ if (u8_prev (&uc, (const uint8_t *) in2ptr,
+ (const uint8_t *) utf8buf)
+ == NULL)
+ abort ();
+ }
+ else
+ {
+ int n;
+ if (in2size == 0)
+ abort ();
+ n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr,
+ in2size);
+ in2ptr += n;
+ in2size -= n;
+ }
+
+ if (handler == iconveh_escape_sequence)
+ {
+ static char hex[16] = "0123456789ABCDEF";
+ scratchlen = 0;
+ scratchbuf[scratchlen++] = '\\';
+ if (uc < 0x10000)
+ scratchbuf[scratchlen++] = 'u';
+ else
+ {
+ scratchbuf[scratchlen++] = 'U';
+ scratchbuf[scratchlen++] = hex[(uc>>28) & 15];
+ scratchbuf[scratchlen++] = hex[(uc>>24) & 15];
+ scratchbuf[scratchlen++] = hex[(uc>>20) & 15];
+ scratchbuf[scratchlen++] = hex[(uc>>16) & 15];
+ }
+ scratchbuf[scratchlen++] = hex[(uc>>12) & 15];
+ scratchbuf[scratchlen++] = hex[(uc>>8) & 15];
+ scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
+ scratchbuf[scratchlen++] = hex[uc & 15];
+ }
+ else
+ {
+ scratchbuf[0] = '?';
+ scratchlen = 1;
+ }
+
+ inptr = scratchbuf;
+ insize = scratchlen;
+ if (cd2 != (iconv_t)(-1))
+ res = iconv (cd2,
+ (ICONV_CONST char **) &inptr, &insize,
+ &out2ptr, &out2size);
+ else
+ {
+ /* TO_CODESET is UTF-8. */
+ if (out2size >= insize)
+ {
+ memcpy (out2ptr, inptr, insize);
+ out2ptr += insize;
+ out2size -= insize;
+ inptr += insize;
+ insize = 0;
+ res = 0;
+ }
+ else
+ {
+ errno = E2BIG;
+ res = (size_t)(-1);
+ }
+ }
+ length = out2ptr - result;
+ if (res == (size_t)(-1) && errno == E2BIG)
+ {
+ char *memory;
+
+ allocated = 2 * allocated;
+ if (length + 1 + extra_alloc > allocated)
+ abort ();
+ if (result == initial_result)
+ memory = (char *) malloc (allocated);
+ else
+ memory = (char *) realloc (result, allocated);
+ if (memory == NULL)
+ {
+ if (result != initial_result)
+ free (result);
+ errno = ENOMEM;
+ return -1;
+ }
+ if (result == initial_result)
+ memcpy (memory, initial_result, length);
+ result = memory;
+ grow = false;
+
+ out2ptr = result + length;
+ out2size = allocated - extra_alloc - length;
+ if (cd2 != (iconv_t)(-1))
+ res = iconv (cd2,
+ (ICONV_CONST char **) &inptr,
+ &insize,
+ &out2ptr, &out2size);
+ else
+ {
+ /* TO_CODESET is UTF-8. */
+ if (!(out2size >= insize))
+ abort ();
+ memcpy (out2ptr, inptr, insize);
+ out2ptr += insize;
+ out2size -= insize;
+ inptr += insize;
+ insize = 0;
+ res = 0;
+ }
+ length = out2ptr - result;
+ }
+# if !defined _LIBICONV_VERSION && !defined __GLIBC__
+ /* Irix iconv() inserts a NUL byte if it cannot convert.
+ NetBSD iconv() inserts a question mark if it cannot
+ convert.
+ Only GNU libiconv and GNU libc are known to prefer
+ to fail rather than doing a lossy conversion. */
+ if (res != (size_t)(-1) && res > 0)
+ {
+ errno = EILSEQ;
+ res = (size_t)(-1);
+ }
+# endif
+ if (res == (size_t)(-1))
+ {
+ /* Failure converting the ASCII replacement. */
+ if (result != initial_result)
+ {
+ int saved_errno = errno;
+ free (result);
+ errno = saved_errno;
+ }
+ return -1;
+ }
+ }
+ else
+ {
+ if (result != initial_result)
+ {
+ int saved_errno = errno;
+ free (result);
+ errno = saved_errno;
+ }
+ return -1;
+ }
+ }
+ if (!(in2size > 0
+ || (in1size == 0 && !do_final_flush1 && do_final_flush2)))
+ break;
+ if (grow)
+ {
+ char *memory;
+
+ allocated = 2 * allocated;
+ if (result == initial_result)
+ memory = (char *) malloc (allocated);
+ else
+ memory = (char *) realloc (result, allocated);
+ if (memory == NULL)
+ {
+ if (result != initial_result)
+ free (result);
+ errno = ENOMEM;
+ return -1;
+ }
+ if (result == initial_result)
+ memcpy (memory, initial_result, length);
+ result = memory;
+ }
+ }
+
+ /* Move the remaining bytes to the beginning of utf8buf. */
+ if (in2size > 0)
+ memmove (utf8buf, in2ptr, in2size);
+ utf8len = in2size;
+ }
+
+ if (res1 == (size_t)(-1))
+ {
+ if (errno1 == EINVAL)
+ in1size = 0;
+ else if (errno1 == EILSEQ)
+ {
+ if (result != initial_result)
+ free (result);
+ errno = errno1;
+ return -1;
+ }
+ }
+ }
+# undef utf8bufsize
+ }
+
+ done:
+ /* Now the final memory allocation. */
+ if (result == tmpbuf)
+ {
+ size_t memsize = length + extra_alloc;
+ char *memory;
+
+ memory = (char *) malloc (memsize > 0 ? memsize : 1);
+ if (memory != NULL)
+ {
+ memcpy (memory, tmpbuf, length);
+ result = memory;
+ }
+ else
+ {
+ errno = ENOMEM;
+ return -1;
+ }
+ }
+ else if (result != *resultp && length + extra_alloc < allocated)
+ {
+ /* Shrink the allocated memory if possible. */
+ size_t memsize = length + extra_alloc;
+ char *memory;
+
+ memory = (char *) realloc (result, memsize > 0 ? memsize : 1);
+ if (memory != NULL)
+ result = memory;
+ }
+ *resultp = result;
+ *lengthp = length;
+ return 0;
+# undef tmpbuf
+# undef tmpbufsize
+}
+
+int
+mem_cd_iconveh (const char *src, size_t srclen,
+ iconv_t cd, iconv_t cd1, iconv_t cd2,
+ enum iconv_ilseq_handler handler,
+ size_t *offsets,
+ char **resultp, size_t *lengthp)
+{
+ return mem_cd_iconveh_internal (src, srclen, cd, cd1, cd2, handler, 0,
+ offsets, resultp, lengthp);
+}
+
+char *
+str_cd_iconveh (const char *src,
+ iconv_t cd, iconv_t cd1, iconv_t cd2,
+ enum iconv_ilseq_handler handler)
+{
+ /* For most encodings, a trailing NUL byte in the input will be converted
+ to a trailing NUL byte in the output. But not for UTF-7. So that this
+ function is usable for UTF-7, we have to exclude the NUL byte from the
+ conversion and add it by hand afterwards. */
+ char *result = NULL;
+ size_t length = 0;
+ int retval = mem_cd_iconveh_internal (src, strlen (src),
+ cd, cd1, cd2, handler, 1, NULL,
+ &result, &length);
+
+ if (retval < 0)
+ {
+ if (result != NULL)
+ {
+ int saved_errno = errno;
+ free (result);
+ errno = saved_errno;
+ }
+ return NULL;
+ }
+
+ /* Add the terminating NUL byte. */
+ result[length] = '\0';
+
+ return result;
+}
+
+#endif
+
+int
+mem_iconveh (const char *src, size_t srclen,
+ const char *from_codeset, const char *to_codeset,
+ enum iconv_ilseq_handler handler,
+ size_t *offsets,
+ char **resultp, size_t *lengthp)
+{
+ if (srclen == 0)
+ {
+ /* Nothing to convert. */
+ *lengthp = 0;
+ return 0;
+ }
+ else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0)
+ {
+ char *result;
+
+ if (*resultp != NULL && *lengthp >= srclen)
+ result = *resultp;
+ else
+ {
+ result = (char *) malloc (srclen);
+ if (result == NULL)
+ {
+ errno = ENOMEM;
+ return -1;
+ }
+ }
+ memcpy (result, src, srclen);
+ *resultp = result;
+ *lengthp = srclen;
+ return 0;
+ }
+ else
+ {
+#if HAVE_ICONV
+ iconv_t cd;
+ iconv_t cd1;
+ iconv_t cd2;
+ char *result;
+ size_t length;
+ int retval;
+
+ /* Avoid glibc-2.1 bug with EUC-KR. */
+# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
+ if (c_strcasecmp (from_codeset, "EUC-KR") == 0
+ || c_strcasecmp (to_codeset, "EUC-KR") == 0)
+ {
+ errno = EINVAL;
+ return -1;
+ }
+# endif
+
+ cd = iconv_open (to_codeset, from_codeset);
+
+ if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
+ cd1 = (iconv_t)(-1);
+ else
+ {
+ cd1 = iconv_open ("UTF-8", from_codeset);
+ if (cd1 == (iconv_t)(-1))
+ {
+ int saved_errno = errno;
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ errno = saved_errno;
+ return -1;
+ }
+ }
+
+ if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
+# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
+ || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
+# endif
+ )
+ cd2 = (iconv_t)(-1);
+ else
+ {
+ cd2 = iconv_open (to_codeset, "UTF-8");
+ if (cd2 == (iconv_t)(-1))
+ {
+ int saved_errno = errno;
+ if (cd1 != (iconv_t)(-1))
+ iconv_close (cd1);
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ errno = saved_errno;
+ return -1;
+ }
+ }
+
+ result = *resultp;
+ length = *lengthp;
+ retval = mem_cd_iconveh (src, srclen, cd, cd1, cd2, handler, offsets,
+ &result, &length);
+
+ if (retval < 0)
+ {
+ /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */
+ int saved_errno = errno;
+ if (cd2 != (iconv_t)(-1))
+ iconv_close (cd2);
+ if (cd1 != (iconv_t)(-1))
+ iconv_close (cd1);
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ errno = saved_errno;
+ }
+ else
+ {
+ if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
+ {
+ /* Return -1, but free the allocated memory, and while doing
+ that, preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ if (cd1 != (iconv_t)(-1))
+ iconv_close (cd1);
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ if (result != *resultp && result != NULL)
+ free (result);
+ errno = saved_errno;
+ return -1;
+ }
+ if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
+ {
+ /* Return -1, but free the allocated memory, and while doing
+ that, preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ if (result != *resultp && result != NULL)
+ free (result);
+ errno = saved_errno;
+ return -1;
+ }
+ if (cd != (iconv_t)(-1) && iconv_close (cd) < 0)
+ {
+ /* Return -1, but free the allocated memory, and while doing
+ that, preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ if (result != *resultp && result != NULL)
+ free (result);
+ errno = saved_errno;
+ return -1;
+ }
+ *resultp = result;
+ *lengthp = length;
+ }
+ return retval;
+#else
+ /* This is a different error code than if iconv_open existed but didn't
+ support from_codeset and to_codeset, so that the caller can emit
+ an error message such as
+ "iconv() is not supported. Installing GNU libiconv and
+ then reinstalling this package would fix this." */
+ errno = ENOSYS;
+ return -1;
+#endif
+ }
+}
+
+char *
+str_iconveh (const char *src,
+ const char *from_codeset, const char *to_codeset,
+ enum iconv_ilseq_handler handler)
+{
+ if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
+ {
+ char *result = strdup (src);
+
+ if (result == NULL)
+ errno = ENOMEM;
+ return result;
+ }
+ else
+ {
+#if HAVE_ICONV
+ iconv_t cd;
+ iconv_t cd1;
+ iconv_t cd2;
+ char *result;
+
+ /* Avoid glibc-2.1 bug with EUC-KR. */
+# if (__GLIBC__ - 0 == 2 && __GLIBC_MINOR__ - 0 <= 1) && !defined _LIBICONV_VERSION
+ if (c_strcasecmp (from_codeset, "EUC-KR") == 0
+ || c_strcasecmp (to_codeset, "EUC-KR") == 0)
+ {
+ errno = EINVAL;
+ return NULL;
+ }
+# endif
+
+ cd = iconv_open (to_codeset, from_codeset);
+
+ if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0))
+ cd1 = (iconv_t)(-1);
+ else
+ {
+ cd1 = iconv_open ("UTF-8", from_codeset);
+ if (cd1 == (iconv_t)(-1))
+ {
+ int saved_errno = errno;
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ errno = saved_errno;
+ return NULL;
+ }
+ }
+
+ if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)
+# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || _LIBICONV_VERSION >= 0x0105
+ || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0
+# endif
+ )
+ cd2 = (iconv_t)(-1);
+ else
+ {
+ cd2 = iconv_open (to_codeset, "UTF-8");
+ if (cd2 == (iconv_t)(-1))
+ {
+ int saved_errno = errno;
+ if (cd1 != (iconv_t)(-1))
+ iconv_close (cd1);
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ errno = saved_errno;
+ return NULL;
+ }
+ }
+
+ result = str_cd_iconveh (src, cd, cd1, cd2, handler);
+
+ if (result == NULL)
+ {
+ /* Close cd, cd1, cd2, but preserve the errno from str_cd_iconv. */
+ int saved_errno = errno;
+ if (cd2 != (iconv_t)(-1))
+ iconv_close (cd2);
+ if (cd1 != (iconv_t)(-1))
+ iconv_close (cd1);
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ errno = saved_errno;
+ }
+ else
+ {
+ if (cd2 != (iconv_t)(-1) && iconv_close (cd2) < 0)
+ {
+ /* Return NULL, but free the allocated memory, and while doing
+ that, preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ if (cd1 != (iconv_t)(-1))
+ iconv_close (cd1);
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ free (result);
+ errno = saved_errno;
+ return NULL;
+ }
+ if (cd1 != (iconv_t)(-1) && iconv_close (cd1) < 0)
+ {
+ /* Return NULL, but free the allocated memory, and while doing
+ that, preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ if (cd != (iconv_t)(-1))
+ iconv_close (cd);
+ free (result);
+ errno = saved_errno;
+ return NULL;
+ }
+ if (cd != (iconv_t)(-1) && iconv_close (cd) < 0)
+ {
+ /* Return NULL, but free the allocated memory, and while doing
+ that, preserve the errno from iconv_close. */
+ int saved_errno = errno;
+ free (result);
+ errno = saved_errno;
+ return NULL;
+ }
+ }
+ return result;
+#else
+ /* This is a different error code than if iconv_open existed but didn't
+ support from_codeset and to_codeset, so that the caller can emit
+ an error message such as
+ "iconv() is not supported. Installing GNU libiconv and
+ then reinstalling this package would fix this." */
+ errno = ENOSYS;
+ return NULL;
+#endif
+ }
+}