Update upstream source from tag 'upstream/1.0'

Update to upstream version '1.0' with Debian dir 4875e7dc9f7277205f0086a63ee21ccdb1d54593
author: Jörg Frings-Fürst <debian@jff.email> 2022-01-08 11:51:39 +0100
committer: Jörg Frings-Fürst <debian@jff.email> 2022-01-08 11:51:39 +0100
commit: 2959e59fab3bab834368adefd90bd4b1b094366b (patch)
tree: 7d0ae09775ea950056193eaa2ca93844299d46f1 /lib/striconveh.c
parent: c78359d9542c86b972aac373efcf7bc7a8a560e5 (diff)
parent: be8efac78d067c138ad8dda03df4336e73f94887 (diff)
1 files changed, 103 insertions, 75 deletions
diff --git a/lib/striconveh.c b/lib/striconveh.c
index 45d76f8..5b60a7e 100644
--- a/lib/striconveh.c
+++ b/lib/striconveh.c
@@ -1,27 +1,18 @@
 /* Character set conversion with error handling.
-   Copyright (C) 2001-2018 Free Software Foundation, Inc.
+   Copyright (C) 2001-2022 Free Software Foundation, Inc.
    Written by Bruno Haible and Simon Josefsson.
 
-   This program is free software: you can redistribute it and/or
-   modify it under the terms of either:
+   This file is free software: you can redistribute it and/or modify
+   it under the terms of the GNU Lesser General Public License as
+   published by the Free Software Foundation; either version 2.1 of the
+   License, or (at your option) any later version.
 
-     * the GNU Lesser General Public License as published by the Free
-       Software Foundation; either version 3 of the License, or (at your
-       option) any later version.
-
-   or
-
-     * the GNU General Public License as published by the Free
-       Software Foundation; either version 2 of the License, or (at your
-       option) any later version.
-
-   or both in parallel, as here.
-   This program is distributed in the hope that it will be useful,
+   This file is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
+   GNU Lesser General Public License for more details.
 
-   You should have received a copy of the GNU General Public License
+   You should have received a copy of the GNU Lesser General Public License
    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
 
 #include <config.h>
@@ -82,7 +73,7 @@ iconveh_open (const char *to_codeset, const char *from_codeset, iconveh_t *cdp)
         {
           int saved_errno = errno;
           if (cd != (iconv_t)(-1))
-            iconv_close (cdp->cd);
+            iconv_close (cd);
           errno = saved_errno;
           return -1;
         }
@@ -466,13 +457,18 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                 if (cd2 == (iconv_t)(-1))
                   {
                     /* TO_CODESET is UTF-8.  */
-                    /* Error handling can produce up to 1 byte of output.  */
-                    if (length + 1 + extra_alloc > allocated)
+                    /* Error handling can produce up to 1 or 3 bytes of
+                       output.  */
+                    size_t extra_need =
+                      (handler == iconveh_replacement_character ? 3 : 1);
+                    if (length + extra_need + extra_alloc > allocated)
                       {
                         char *memory;
 
                         allocated = 2 * allocated;
-                        if (length + 1 + extra_alloc > allocated)
+                        if (length + extra_need + extra_alloc > allocated)
+                          allocated = 2 * allocated;
+                        if (length + extra_need + extra_alloc > allocated)
                           abort ();
                         if (result == initial_result)
                           memory = (char *) malloc (allocated);
@@ -491,7 +487,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                         grow = false;
                       }
                     /* The input is invalid in FROM_CODESET.  Eat up one byte
-                       and emit a question mark.  */
+                       and emit a replacement character or a question mark.  */
                     if (!incremented)
                       {
                         if (insize == 0)
@@ -499,8 +495,19 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                         inptr++;
                         insize--;
                       }
-                    result[length] = '?';
-                    length++;
+                    if (handler == iconveh_replacement_character)
+                      {
+                        /* U+FFFD in UTF-8 encoding.  */
+                        result[length+0] = '\357';
+                        result[length+1] = '\277';
+                        result[length+2] = '\275';
+                        length += 3;
+                      }
+                    else
+                      {
+                        result[length] = '?';
+                        length++;
+                      }
                   }
                 else
                   goto indirectly;
@@ -508,11 +515,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
             else
               {
                 if (result != initial_result)
-                  {
-                    int saved_errno = errno;
-                    free (result);
-                    errno = saved_errno;
-                  }
+                  free (result);
                 return -1;
               }
           }
@@ -579,11 +582,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
           else
             {
               if (result != initial_result)
-                {
-                  int saved_errno = errno;
-                  free (result);
-                  errno = saved_errno;
-                }
+                free (result);
               return -1;
             }
         }
@@ -611,7 +610,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
   {
     const bool slowly = (offsets != NULL || handler == iconveh_error);
 # define utf8bufsize 4096 /* may also be smaller or larger than tmpbufsize */
-    char utf8buf[utf8bufsize + 1];
+    char utf8buf[utf8bufsize + 3];
     size_t utf8len = 0;
     const char *in1ptr = src;
     size_t in1size = srclen;
@@ -692,19 +691,15 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
             && !(errno == E2BIG || errno == EINVAL || errno == EILSEQ))
           {
             if (result != initial_result)
-              {
-                int saved_errno = errno;
-                free (result);
-                errno = saved_errno;
-              }
+              free (result);
             return -1;
           }
         if (res1 == (size_t)(-1)
             && errno == EILSEQ && handler != iconveh_error)
           {
             /* The input is invalid in FROM_CODESET.  Eat up one byte and
-               emit a question mark.  Room for the question mark was allocated
-               at the end of utf8buf.  */
+               emit a U+FFFD character or a question mark.  Room for this
+               character was allocated at the end of utf8buf.  */
             if (!incremented1)
               {
                 if (in1size == 0)
@@ -712,7 +707,16 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                 in1ptr++;
                 in1size--;
               }
-            *out1ptr++ = '?';
+            if (handler == iconveh_replacement_character)
+              {
+                /* U+FFFD in UTF-8 encoding.  */
+                out1ptr[0] = '\357';
+                out1ptr[1] = '\277';
+                out1ptr[2] = '\275';
+                out1ptr += 3;
+              }
+            else
+              *out1ptr++ = '?';
             res1 = 0;
           }
         errno1 = errno;
@@ -777,7 +781,7 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                       break;
                     else if (errno == EILSEQ && handler != iconveh_error)
                       {
-                        /* Error handling can produce up to 10 bytes of ASCII
+                        /* Error handling can produce up to 10 bytes of UTF-8
                            output.  But TO_CODESET may be UCS-2, UTF-16 or
                            UCS-4, so use CD2 here as well.  */
                         char scratchbuf[10];
@@ -825,6 +829,14 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                             scratchbuf[scratchlen++] = hex[(uc>>4) & 15];
                             scratchbuf[scratchlen++] = hex[uc & 15];
                           }
+                        else if (handler == iconveh_replacement_character)
+                          {
+                            /* U+FFFD in UTF-8 encoding.  */
+                            scratchbuf[0] = '\357';
+                            scratchbuf[1] = '\277';
+                            scratchbuf[2] = '\275';
+                            scratchlen = 3;
+                          }
                         else
                           {
                             scratchbuf[0] = '?';
@@ -834,9 +846,45 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                         inptr = scratchbuf;
                         insize = scratchlen;
                         if (cd2 != (iconv_t)(-1))
-                          res = iconv (cd2,
-                                       (ICONV_CONST char **) &inptr, &insize,
-                                       &out2ptr, &out2size);
+                          {
+                            char *out2ptr_try = out2ptr;
+                            size_t out2size_try = out2size;
+                            res = iconv (cd2,
+                                         (ICONV_CONST char **) &inptr, &insize,
+                                         &out2ptr_try, &out2size_try);
+                            if (handler == iconveh_replacement_character
+                                && (res == (size_t)(-1)
+                                    ? errno == EILSEQ
+                                    /* FreeBSD iconv(), NetBSD iconv(), and
+                                       Solaris 11 iconv() insert a '?' if they
+                                       cannot convert.  This is what we want.
+                                       But IRIX iconv() inserts a NUL byte if it
+                                       cannot convert.
+                                       And musl libc iconv() inserts a '*' if it
+                                       cannot convert.  */
+                                    : (res > 0
+                                       && !(out2ptr_try - out2ptr == 1
+                                            && *out2ptr == '?'))))
+                              {
+                                /* The iconv() call failed.
+                                   U+FFFD can't be converted to TO_CODESET.
+                                   Use '?' instead.  */
+                                scratchbuf[0] = '?';
+                                scratchlen = 1;
+                                inptr = scratchbuf;
+                                insize = scratchlen;
+                                res = iconv (cd2,
+                                             (ICONV_CONST char **) &inptr, &insize,
+                                             &out2ptr, &out2size);
+                              }
+                            else
+                              {
+                                /* Accept the results of the iconv() call.  */
+                                out2ptr = out2ptr_try;
+                                out2size = out2size_try;
+                                res = 0;
+                              }
+                          }
                         else
                           {
                             /* TO_CODESET is UTF-8.  */
@@ -901,9 +949,10 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                             length = out2ptr - result;
                           }
 # if !defined _LIBICONV_VERSION && !(defined __GLIBC__ && !defined __UCLIBC__)
-                        /* Irix iconv() inserts a NUL byte if it cannot convert.
-                           NetBSD iconv() inserts a question mark if it cannot
-                           convert.
+                        /* IRIX iconv() inserts a NUL byte if it cannot convert.
+                           FreeBSD iconv(), NetBSD iconv(), and Solaris 11
+                           iconv() insert a '?' if they cannot convert.
+                           musl libc iconv() inserts a '*' if it cannot convert.
                            Only GNU libiconv and GNU libc are known to prefer
                            to fail rather than doing a lossy conversion.  */
                         if (res != (size_t)(-1) && res > 0)
@@ -916,22 +965,14 @@ mem_cd_iconveh_internal (const char *src, size_t srclen,
                           {
                             /* Failure converting the ASCII replacement.  */
                             if (result != initial_result)
-                              {
-                                int saved_errno = errno;
-                                free (result);
-                                errno = saved_errno;
-                              }
+                              free (result);
                             return -1;
                           }
                       }
                     else
                       {
                         if (result != initial_result)
-                          {
-                            int saved_errno = errno;
-                            free (result);
-                            errno = saved_errno;
-                          }
+                          free (result);
                         return -1;
                       }
                   }
@@ -1050,12 +1091,7 @@ str_cd_iconveh (const char *src,
 
   if (retval < 0)
     {
-      if (result != NULL)
-        {
-          int saved_errno = errno;
-          free (result);
-          errno = saved_errno;
-        }
+      free (result);
       return NULL;
     }
 
@@ -1127,12 +1163,8 @@ mem_iconveh (const char *src, size_t srclen,
         {
           if (iconveh_close (&cd) < 0)
             {
-              /* Return -1, but free the allocated memory, and while doing
-                 that, preserve the errno from iconveh_close.  */
-              int saved_errno = errno;
-              if (result != *resultp && result != NULL)
+              if (result != *resultp)
                 free (result);
-              errno = saved_errno;
               return -1;
             }
           *resultp = result;
@@ -1186,11 +1218,7 @@ str_iconveh (const char *src,
         {
           if (iconveh_close (&cd) < 0)
             {
-              /* Return NULL, but free the allocated memory, and while doing
-                 that, preserve the errno from iconveh_close.  */
-              int saved_errno = errno;
               free (result);
-              errno = saved_errno;
               return NULL;
             }
         }
author	Jörg Frings-Fürst <debian@jff.email>	2022-01-08 11:51:39 +0100
committer	Jörg Frings-Fürst <debian@jff.email>	2022-01-08 11:51:39 +0100
commit	2959e59fab3bab834368adefd90bd4b1b094366b (patch)
tree	7d0ae09775ea950056193eaa2ca93844299d46f1 /lib/striconveh.c
parent	c78359d9542c86b972aac373efcf7bc7a8a560e5 (diff)
parent	be8efac78d067c138ad8dda03df4336e73f94887 (diff)