/* Character set conversion with error handling. Copyright (C) 2001-2018 Free Software Foundation, Inc. Written by Bruno Haible and Simon Josefsson. This program is free software: you can redistribute it and/or modify it under the terms of either: * the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. or * the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. or both in parallel, as here. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . */ #include /* Specification. */ #include "striconveh.h" #include #include #include #include #if HAVE_ICONV # include # include "unistr.h" #endif #include "c-strcase.h" #include "c-strcaseeq.h" #ifndef SIZE_MAX # define SIZE_MAX ((size_t) -1) #endif #if HAVE_ICONV /* The caller must provide an iconveh_t, not just an iconv_t, because when a conversion error occurs, we may have to determine the Unicode representation of the inconvertible character. */ int iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp) { iconv_t cd; iconv_t cd1; iconv_t cd2; /* Avoid glibc-2.1 bug with EUC-KR. */ # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ && !defined _LIBICONV_VERSION if (c_strcasecmp (from_codeset, "EUC-KR") == 0 || c_strcasecmp (to_codeset, "EUC-KR") == 0) { errno = EINVAL; return -1; } # endif cd = iconv_open (to_codeset, from_codeset); if (STRCASEEQ (from_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0)) cd1 = (iconv_t)(-1); else { cd1 = iconv_open ("UTF-8", from_codeset); if (cd1 == (iconv_t)(-1)) { int saved_errno = errno; if (cd != (iconv_t)(-1)) iconv_close (cdp->cd); errno = saved_errno; return -1; } } if (STRCASEEQ (to_codeset, "UTF-8", 'U','T','F','-','8',0,0,0,0) # if (((__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2) \ && !defined __UCLIBC__) \ || _LIBICONV_VERSION >= 0x0105 || c_strcasecmp (to_codeset, "UTF-8//TRANSLIT") == 0 # endif ) cd2 = (iconv_t)(-1); else { cd2 = iconv_open (to_codeset, "UTF-8"); if (cd2 == (iconv_t)(-1)) { int saved_errno = errno; if (cd1 != (iconv_t)(-1)) iconv_close (cd1); if (cd != (iconv_t)(-1)) iconv_close (cd); errno = saved_errno; return -1; } } cdp->cd = cd; cdp->cd1 = cd1; cdp->cd2 = cd2; return 0; } int iconveh_close (const iconveh_t *cd) { if (cd->cd2 != (iconv_t)(-1) && iconv_close (cd->cd2) < 0) { /* Return -1, but preserve the errno from iconv_close. */ int saved_errno = errno; if (cd->cd1 != (iconv_t)(-1)) iconv_close (cd->cd1); if (cd->cd != (iconv_t)(-1)) iconv_close (cd->cd); errno = saved_errno; return -1; } if (cd->cd1 != (iconv_t)(-1) && iconv_close (cd->cd1) < 0) { /* Return -1, but preserve the errno from iconv_close. */ int saved_errno = errno; if (cd->cd != (iconv_t)(-1)) iconv_close (cd->cd); errno = saved_errno; return -1; } if (cd->cd != (iconv_t)(-1) && iconv_close (cd->cd) < 0) return -1; return 0; } /* iconv_carefully is like iconv, except that it stops as soon as it encounters a conversion error, and it returns in *INCREMENTED a boolean telling whether it has incremented the input pointers past the error location. */ # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) /* Irix iconv() inserts a NUL byte if it cannot convert. NetBSD iconv() inserts a question mark if it cannot convert. Only GNU libiconv and GNU libc are known to prefer to fail rather than doing a lossy conversion. */ static size_t iconv_carefully (iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, bool *incremented) { const char *inptr = *inbuf; const char *inptr_end = inptr + *inbytesleft; char *outptr = *outbuf; size_t outsize = *outbytesleft; const char *inptr_before; size_t res; do { size_t insize; inptr_before = inptr; res = (size_t)(-1); for (insize = 1; inptr + insize <= inptr_end; insize++) { res = iconv (cd, (ICONV_CONST char **) &inptr, &insize, &outptr, &outsize); if (!(res == (size_t)(-1) && errno == EINVAL)) break; /* iconv can eat up a shift sequence but give EINVAL while attempting to convert the first character. E.g. libiconv does this. */ if (inptr > inptr_before) { res = 0; break; } } if (res == 0) { *outbuf = outptr; *outbytesleft = outsize; } } while (res == 0 && inptr < inptr_end); *inbuf = inptr; *inbytesleft = inptr_end - inptr; if (res != (size_t)(-1) && res > 0) { /* iconv() has already incremented INPTR. We cannot go back to a previous INPTR, otherwise the state inside CD would become invalid, if FROM_CODESET is a stateful encoding. So, tell the caller that *INBUF has already been incremented. */ *incremented = (inptr > inptr_before); errno = EILSEQ; return (size_t)(-1); } else { *incremented = false; return res; } } # else # define iconv_carefully(cd, inbuf, inbytesleft, outbuf, outbytesleft, incremented) \ (*(incremented) = false, \ iconv (cd, (ICONV_CONST char **) (inbuf), inbytesleft, outbuf, outbytesleft)) # endif /* iconv_carefully_1 is like iconv_carefully, except that it stops after converting one character or one shift sequence. */ static size_t iconv_carefully_1 (iconv_t cd, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, bool *incremented) { const char *inptr_before = *inbuf; const char *inptr = inptr_before; const char *inptr_end = inptr_before + *inbytesleft; char *outptr = *outbuf; size_t outsize = *outbytesleft; size_t res = (size_t)(-1); size_t insize; for (insize = 1; inptr_before + insize <= inptr_end; insize++) { inptr = inptr_before; res = iconv (cd, (ICONV_CONST char **) &inptr, &insize, &outptr, &outsize); if (!(res == (size_t)(-1) && errno == EINVAL)) break; /* iconv can eat up a shift sequence but give EINVAL while attempting to convert the first character. E.g. libiconv does this. */ if (inptr > inptr_before) { res = 0; break; } } *inbuf = inptr; *inbytesleft = inptr_end - inptr; # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) /* Irix iconv() inserts a NUL byte if it cannot convert. NetBSD iconv() inserts a question mark if it cannot convert. Only GNU libiconv and GNU libc are known to prefer to fail rather than doing a lossy conversion. */ if (res != (size_t)(-1) && res > 0) { /* iconv() has already incremented INPTR. We cannot go back to a previous INPTR, otherwise the state inside CD would become invalid, if FROM_CODESET is a stateful encoding. So, tell the caller that *INBUF has already been incremented. */ *incremented = (inptr > inptr_before); errno = EILSEQ; return (size_t)(-1); } # endif if (res != (size_t)(-1)) { *outbuf = outptr; *outbytesleft = outsize; } *incremented = false; return res; } /* utf8conv_carefully is like iconv, except that - it converts from UTF-8 to UTF-8, - it stops as soon as it encounters a conversion error, and it returns in *INCREMENTED a boolean telling whether it has incremented the input pointers past the error location, - if one_character_only is true, it stops after converting one character. */ static size_t utf8conv_carefully (bool one_character_only, const char **inbuf, size_t *inbytesleft, char **outbuf, size_t *outbytesleft, bool *incremented) { const char *inptr = *inbuf; size_t insize = *inbytesleft; char *outptr = *outbuf; size_t outsize = *outbytesleft; size_t res; res = 0; do { ucs4_t uc; int n; int m; n = u8_mbtoucr (&uc, (const uint8_t *) inptr, insize); if (n < 0) { errno = (n == -2 ? EINVAL : EILSEQ); n = u8_mbtouc (&uc, (const uint8_t *) inptr, insize); inptr += n; insize -= n; res = (size_t)(-1); *incremented = true; break; } if (outsize == 0) { errno = E2BIG; res = (size_t)(-1); *incremented = false; break; } m = u8_uctomb ((uint8_t *) outptr, uc, outsize); if (m == -2) { errno = E2BIG; res = (size_t)(-1); *incremented = false; break; } inptr += n; insize -= n; if (m == -1) { errno = EILSEQ; res = (size_t)(-1); *incremented = true; break; } outptr += m; outsize -= m; } while (!one_character_only && insize > 0); *inbuf = inptr; *inbytesleft = insize; *outbuf = outptr; *outbytesleft = outsize; return res; } static int mem_cd_iconveh_internal (const char *src, size_t srclen, iconv_t cd, iconv_t cd1, iconv_t cd2, enum iconv_ilseq_handler handler, size_t extra_alloc, size_t *offsets, char **resultp, size_t *lengthp) { /* When a conversion error occurs, we cannot start using CD1 and CD2 at this point: FROM_CODESET may be a stateful encoding like ISO-2022-KR. Instead, we have to start afresh from the beginning of SRC. */ /* Use a temporary buffer, so that for small strings, a single malloc() call will be sufficient. */ # define tmpbufsize 4096 /* The alignment is needed when converting e.g. to glibc's WCHAR_T or libiconv's UCS-4-INTERNAL encoding. */ union { unsigned int align; char buf[tmpbufsize]; } tmp; # define tmpbuf tmp.buf char *initial_result; char *result; size_t allocated; size_t length; size_t last_length = (size_t)(-1); /* only needed if offsets != NULL */ if (*resultp != NULL && *lengthp >= sizeof (tmpbuf)) { initial_result = *resultp; allocated = *lengthp; } else { initial_result = tmpbuf; allocated = sizeof (tmpbuf); } result = initial_result; /* Test whether a direct conversion is possible at all. */ if (cd == (iconv_t)(-1)) goto indirectly; if (offsets != NULL) { size_t i; for (i = 0; i < srclen; i++) offsets[i] = (size_t)(-1); last_length = (size_t)(-1); } length = 0; /* First, try a direct conversion, and see whether a conversion error occurs at all. */ { const char *inptr = src; size_t insize = srclen; /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ || defined __sun) /* Set to the initial state. */ iconv (cd, NULL, NULL, NULL, NULL); # endif while (insize > 0) { char *outptr = result + length; size_t outsize = allocated - extra_alloc - length; bool incremented; size_t res; bool grow; if (offsets != NULL) { if (length != last_length) /* ensure that offset[] be increasing */ { offsets[inptr - src] = length; last_length = length; } res = iconv_carefully_1 (cd, &inptr, &insize, &outptr, &outsize, &incremented); } else /* Use iconv_carefully instead of iconv here, because: - If TO_CODESET is UTF-8, we can do the error handling in this loop, no need for a second loop, - With iconv() implementations other than GNU libiconv and GNU libc, if we use iconv() in a big swoop, checking for an E2BIG return, we lose the number of irreversible conversions. */ res = iconv_carefully (cd, &inptr, &insize, &outptr, &outsize, &incremented); length = outptr - result; grow = (length + extra_alloc > allocated / 2); if (res == (size_t)(-1)) { if (errno == E2BIG) grow = true; else if (errno == EINVAL) break; else if (errno == EILSEQ && handler != iconveh_error) { if (cd2 == (iconv_t)(-1)) { /* TO_CODESET is UTF-8. */ /* Error handling can produce up to 1 byte of output. */ if (length + 1 + extra_alloc > allocated) { char *memory; allocated = 2 * allocated; if (length + 1 + extra_alloc > allocated) abort (); if (result == initial_result) memory = (char *) malloc (allocated); else memory = (char *) realloc (result, allocated); if (memory == NULL) { if (result != initial_result) free (result); errno = ENOMEM; return -1; } if (result == initial_result) memcpy (memory, initial_result, length); result = memory; grow = false; } /* The input is invalid in FROM_CODESET. Eat up one byte and emit a question mark. */ if (!incremented) { if (insize == 0) abort (); inptr++; insize--; } result[length] = '?'; length++; } else goto indirectly; } else { if (result != initial_result) { int saved_errno = errno; free (result); errno = saved_errno; } return -1; } } if (insize == 0) break; if (grow) { char *memory; allocated = 2 * allocated; if (result == initial_result) memory = (char *) malloc (allocated); else memory = (char *) realloc (result, allocated); if (memory == NULL) { if (result != initial_result) free (result); errno = ENOMEM; return -1; } if (result == initial_result) memcpy (memory, initial_result, length); result = memory; } } } /* Now get the conversion state back to the initial state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */ #if defined _LIBICONV_VERSION \ || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ || defined __sun) for (;;) { char *outptr = result + length; size_t outsize = allocated - extra_alloc - length; size_t res; res = iconv (cd, NULL, NULL, &outptr, &outsize); length = outptr - result; if (res == (size_t)(-1)) { if (errno == E2BIG) { char *memory; allocated = 2 * allocated; if (result == initial_result) memory = (char *) malloc (allocated); else memory = (char *) realloc (result, allocated); if (memory == NULL) { if (result != initial_result) free (result); errno = ENOMEM; return -1; } if (result == initial_result) memcpy (memory, initial_result, length); result = memory; } else { if (result != initial_result) { int saved_errno = errno; free (result); errno = saved_errno; } return -1; } } else break; } #endif /* The direct conversion succeeded. */ goto done; indirectly: /* The direct conversion failed. Use a conversion through UTF-8. */ if (offsets != NULL) { size_t i; for (i = 0; i < srclen; i++) offsets[i] = (size_t)(-1); last_length = (size_t)(-1); } length = 0; { const bool slowly = (offsets != NULL || handler == iconveh_error); # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */ char utf8buf[utf8bufsize + 1]; size_t utf8len = 0; const char *in1ptr = src; size_t in1size = srclen; bool do_final_flush1 = true; bool do_final_flush2 = true; /* Avoid glibc-2.1 bug and Solaris 2.7-2.9 bug. */ # if defined _LIBICONV_VERSION \ || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ || defined __sun) /* Set to the initial state. */ if (cd1 != (iconv_t)(-1)) iconv (cd1, NULL, NULL, NULL, NULL); if (cd2 != (iconv_t)(-1)) iconv (cd2, NULL, NULL, NULL, NULL); # endif while (in1size > 0 || do_final_flush1 || utf8len > 0 || do_final_flush2) { char *out1ptr = utf8buf + utf8len; size_t out1size = utf8bufsize - utf8len; bool incremented1; size_t res1; int errno1; /* Conversion step 1: from FROM_CODESET to UTF-8. */ if (in1size > 0) { if (offsets != NULL && length != last_length) /* ensure that offset[] be increasing */ { offsets[in1ptr - src] = length; last_length = length; } if (cd1 != (iconv_t)(-1)) { if (slowly) res1 = iconv_carefully_1 (cd1, &in1ptr, &in1size, &out1ptr, &out1size, &incremented1); else res1 = iconv_carefully (cd1, &in1ptr, &in1size, &out1ptr, &out1size, &incremented1); } else { /* FROM_CODESET is UTF-8. */ res1 = utf8conv_carefully (slowly, &in1ptr, &in1size, &out1ptr, &out1size, &incremented1); } } else if (do_final_flush1) { /* Now get the conversion state of CD1 back to the initial state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ || defined __sun) if (cd1 != (iconv_t)(-1)) res1 = iconv (cd1, NULL, NULL, &out1ptr, &out1size); else # endif res1 = 0; do_final_flush1 = false; incremented1 = true; } else { res1 = 0; incremented1 = true; } if (res1 == (size_t)(-1) && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ)) { if (result != initial_result) { int saved_errno = errno; free (result); errno = saved_errno; } return -1; } if (res1 == (size_t)(-1) && errno == EILSEQ && handler != iconveh_error) { /* The input is invalid in FROM_CODESET. Eat up one byte and emit a question mark. Room for the question mark was allocated at the end of utf8buf. */ if (!incremented1) { if (in1size == 0) abort (); in1ptr++; in1size--; } *out1ptr++ = '?'; res1 = 0; } errno1 = errno; utf8len = out1ptr - utf8buf; if (offsets != NULL || in1size == 0 || utf8len > utf8bufsize / 2 || (res1 == (size_t)(-1) && errno1 == E2BIG)) { /* Conversion step 2: from UTF-8 to TO_CODESET. */ const char *in2ptr = utf8buf; size_t in2size = utf8len; while (in2size > 0 || (in1size == 0 && !do_final_flush1 && do_final_flush2)) { char *out2ptr = result + length; size_t out2size = allocated - extra_alloc - length; bool incremented2; size_t res2; bool grow; if (in2size > 0) { if (cd2 != (iconv_t)(-1)) res2 = iconv_carefully (cd2, &in2ptr, &in2size, &out2ptr, &out2size, &incremented2); else /* TO_CODESET is UTF-8. */ res2 = utf8conv_carefully (false, &in2ptr, &in2size, &out2ptr, &out2size, &incremented2); } else /* in1size == 0 && !do_final_flush1 && in2size == 0 && do_final_flush2 */ { /* Now get the conversion state of CD1 back to the initial state. But avoid glibc-2.1 bug and Solaris 2.7 bug. */ # if defined _LIBICONV_VERSION \ || !(((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \ || defined __sun) if (cd2 != (iconv_t)(-1)) res2 = iconv (cd2, NULL, NULL, &out2ptr, &out2size); else # endif res2 = 0; do_final_flush2 = false; incremented2 = true; } length = out2ptr - result; grow = (length + extra_alloc > allocated / 2); if (res2 == (size_t)(-1)) { if (errno == E2BIG) grow = true; else if (errno == EINVAL) break; else if (errno == EILSEQ && handler != iconveh_error) { /* Error handling can produce up to 10 bytes of ASCII output. But TO_CODESET may be UCS-2, UTF-16 or UCS-4, so use CD2 here as well. */ char scratchbuf[10]; size_t scratchlen; ucs4_t uc; const char *inptr; size_t insize; size_t res; if (incremented2) { if (u8_prev (&uc, (const uint8_t *) in2ptr, (const uint8_t *) utf8buf) == NULL) abort (); } else { int n; if (in2size == 0) abort (); n = u8_mbtouc_unsafe (&uc, (const uint8_t *) in2ptr, in2size); in2ptr += n; in2size -= n; } if (handler == iconveh_escape_sequence) { static char hex[16] = "0123456789ABCDEF"; scratchlen = 0; scratchbuf[scratchlen++] = '\\'; if (uc < 0x10000) scratchbuf[scratchlen++] = 'u'; else { scratchbuf[scratchlen++] = 'U'; scratchbuf[scratchlen++] = hex[(uc>>28) & 15]; scratchbuf[scratchlen++] = hex[(uc>>24) & 15]; scratchbuf[scratchlen++] = hex[(uc>>20) & 15]; scratchbuf[scratchlen++] = hex[(uc>>16) & 15]; } scratchbuf[scratchlen++] = hex[(uc>>12) & 15]; scratchbuf[scratchlen++] = hex[(uc>>8) & 15]; scratchbuf[scratchlen++] = hex[(uc>>4) & 15]; scratchbuf[scratchlen++] = hex[uc & 15]; } else { scratchbuf[0] = '?'; scratchlen = 1; } inptr = scratchbuf; insize = scratchlen; if (cd2 != (iconv_t)(-1)) res = iconv (cd2, (ICONV_CONST char **) &inptr, &insize, &out2ptr, &out2size); else { /* TO_CODESET is UTF-8. */ if (out2size >= insize) { memcpy (out2ptr, inptr, insize); out2ptr += insize; out2size -= insize; inptr += insize; insize = 0; res = 0; } else { errno = E2BIG; res = (size_t)(-1); } } length = out2ptr - result; if (res == (size_t)(-1) && errno == E2BIG) { char *memory; allocated = 2 * allocated; if (length + 1 + extra_alloc > allocated) abort (); if (result == initial_result) memory = (char *) malloc (allocated); else memory = (char *) realloc (result, allocated); if (memory == NULL) { if (result != initial_result) free (result); errno = ENOMEM; return -1; } if (result == initial_result) memcpy (memory, initial_result, length); result = memory; grow = false; out2ptr = result + length; out2size = allocated - extra_alloc - length; if (cd2 != (iconv_t)(-1)) res = iconv (cd2, (ICONV_CONST char **) &inptr, &insize, &out2ptr, &out2size); else { /* TO_CODESET is UTF-8. */ if (!(out2size >= insize)) abort (); memcpy (out2ptr, inptr, insize); out2ptr += insize; out2size -= insize; inptr += insize; insize = 0; res = 0; } length = out2ptr - result; } # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__) /* Irix iconv() inserts a NUL byte if it cannot convert. NetBSD iconv() inserts a question mark if it cannot convert. Only GNU libiconv and GNU libc are known to prefer to fail rather than doing a lossy conversion. */ if (res != (size_t)(-1) && res > 0) { errno = EILSEQ; res = (size_t)(-1); } # endif if (res == (size_t)(-1)) { /* Failure converting the ASCII replacement. */ if (result != initial_result) { int saved_errno = errno; free (result); errno = saved_errno; } return -1; } } else { if (result != initial_result) { int saved_errno = errno; free (result); errno = saved_errno; } return -1; } } if (!(in2size > 0 || (in1size == 0 && !do_final_flush1 && do_final_flush2))) break; if (grow) { char *memory; allocated = 2 * allocated; if (result == initial_result) memory = (char *) malloc (allocated); else memory = (char *) realloc (result, allocated); if (memory == NULL) { if (result != initial_result) free (result); errno = ENOMEM; return -1; } if (result == initial_result) memcpy (memory, initial_result, length); result = memory; } } /* Move the remaining bytes to the beginning of utf8buf. */ if (in2size > 0) memmove (utf8buf, in2ptr, in2size); utf8len = in2size; } if (res1 == (size_t)(-1)) { if (errno1 == EINVAL) in1size = 0; else if (errno1 == EILSEQ) { if (result != initial_result) free (result); errno = errno1; return -1; } } } # undef utf8bufsize } done: /* Now the final memory allocation. */ if (result == tmpbuf) { size_t memsize = length + extra_alloc; if (*resultp != NULL && *lengthp >= memsize) result = *resultp; else { char *memory; memory = (char *) malloc (memsize > 0 ? memsize : 1); if (memory != NULL) result = memory; else { errno = ENOMEM; return -1; } } memcpy (result, tmpbuf, length); } else if (result != *resultp && length + extra_alloc < allocated) { /* Shrink the allocated memory if possible. */ size_t memsize = length + extra_alloc; char *memory; memory = (char *) realloc (result, memsize > 0 ? memsize : 1); if (memory != NULL) result = memory; } *resultp = result; *lengthp = length; return 0; # undef tmpbuf # undef tmpbufsize } int mem_cd_iconveh (const char *src, size_t srclen, const iconveh_t *cd, enum iconv_ilseq_handler handler, size_t *offsets, char **resultp, size_t *lengthp) { return mem_cd_iconveh_internal (src, srclen, cd->cd, cd->cd1, cd->cd2, handler, 0, offsets, resultp, lengthp); } char * str_cd_iconveh (const char *src, const iconveh_t *cd, enum iconv_ilseq_handler handler) { /* For most encodings, a trailing NUL byte in the input will be converted to a trailing NUL byte in the output. But not for UTF-7. So that this function is usable for UTF-7, we have to exclude the NUL byte from the conversion and add it by hand afterwards. */ char *result = NULL; size_t length = 0; int retval = mem_cd_iconveh_internal (src, strlen (src), cd->cd, cd->cd1, cd->cd2, handler, 1, NULL, &result, &length); if (retval < 0) { if (result != NULL) { int saved_errno = errno; free (result); errno = saved_errno; } return NULL; } /* Add the terminating NUL byte. */ result[length] = '\0'; return result; } #endif int mem_iconveh (const char *src, size_t srclen, const char *from_codeset, const char *to_codeset, enum iconv_ilseq_handler handler, size_t *offsets, char **resultp, size_t *lengthp) { if (srclen == 0) { /* Nothing to convert. */ *lengthp = 0; return 0; } else if (offsets == NULL && c_strcasecmp (from_codeset, to_codeset) == 0) { char *result; if (*resultp != NULL && *lengthp >= srclen) result = *resultp; else { result = (char *) malloc (srclen); if (result == NULL) { errno = ENOMEM; return -1; } } memcpy (result, src, srclen); *resultp = result; *lengthp = srclen; return 0; } else { #if HAVE_ICONV iconveh_t cd; char *result; size_t length; int retval; if (iconveh_open (to_codeset, from_codeset, &cd) < 0) return -1; result = *resultp; length = *lengthp; retval = mem_cd_iconveh (src, srclen, &cd, handler, offsets, &result, &length); if (retval < 0) { /* Close cd, but preserve the errno from str_cd_iconv. */ int saved_errno = errno; iconveh_close (&cd); errno = saved_errno; } else { if (iconveh_close (&cd) < 0) { /* Return -1, but free the allocated memory, and while doing that, preserve the errno from iconveh_close. */ int saved_errno = errno; if (result != *resultp && result != NULL) free (result); errno = saved_errno; return -1; } *resultp = result; *lengthp = length; } return retval; #else /* This is a different error code than if iconv_open existed but didn't support from_codeset and to_codeset, so that the caller can emit an error message such as "iconv() is not supported. Installing GNU libiconv and then reinstalling this package would fix this." */ errno = ENOSYS; return -1; #endif } } char * str_iconveh (const char *src, const char *from_codeset, const char *to_codeset, enum iconv_ilseq_handler handler) { if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0) { char *result = strdup (src); if (result == NULL) errno = ENOMEM; return result; } else { #if HAVE_ICONV iconveh_t cd; char *result; if (iconveh_open (to_codeset, from_codeset, &cd) < 0) return NULL; result = str_cd_iconveh (src, &cd, handler); if (result == NULL) { /* Close cd, but preserve the errno from str_cd_iconv. */ int saved_errno = errno; iconveh_close (&cd); errno = saved_errno; } else { if (iconveh_close (&cd) < 0) { /* Return NULL, but free the allocated memory, and while doing that, preserve the errno from iconveh_close. */ int saved_errno = errno; free (result); errno = saved_errno; return NULL; } } return result; #else /* This is a different error code than if iconv_open existed but didn't support from_codeset and to_codeset, so that the caller can emit an error message such as "iconv() is not supported. Installing GNU libiconv and then reinstalling this package would fix this." */ errno = ENOSYS; return NULL; #endif } }