summaryrefslogtreecommitdiff
path: root/lib/uniconv/u8-conv-from-enc.c
blob: 9de7fed7957c6342a63a0d00b0215cc77c84839c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/* Conversion to UTF-8 from legacy encodings.
   Copyright (C) 2002, 2006-2007, 2009-2016 Free Software Foundation, Inc.

   This program is free software: you can redistribute it and/or
   modify it under the terms of either:

     * the GNU Lesser General Public License as published by the Free
       Software Foundation; either version 3 of the License, or (at your
       option) any later version.

   or

     * the GNU General Public License as published by the Free
       Software Foundation; either version 2 of the License, or (at your
       option) any later version.

   or both in parallel, as here.
   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public License
   along with this program.  If not, see <http://www.gnu.org/licenses/>.  */

/* Written by Bruno Haible <bruno@clisp.org>.  */

#include <config.h>

/* Specification.  */
#include "uniconv.h"

#include <errno.h>
#include <stdlib.h>
#include <string.h>

#include "c-strcaseeq.h"
#include "striconveha.h"
#include "unistr.h"

uint8_t *
u8_conv_from_encoding (const char *fromcode,
                       enum iconv_ilseq_handler handler,
                       const char *src, size_t srclen,
                       size_t *offsets,
                       uint8_t *resultbuf, size_t *lengthp)
{
  if (STRCASEEQ (fromcode, "UTF-8", 'U','T','F','-','8',0,0,0,0))
    {
      /* Conversion from UTF-8 to UTF-8.  No need to go through iconv().  */
      uint8_t *result;

      if (u8_check ((const uint8_t *) src, srclen))
        {
          errno = EILSEQ;
          return NULL;
        }

      if (offsets != NULL)
        {
          size_t i;

          for (i = 0; i < srclen; )
            {
              int count = u8_mblen ((const uint8_t *) src + i, srclen - i);
              /* We can rely on count > 0 because of the previous u8_check.  */
              if (count <= 0)
                abort ();
              offsets[i] = i;
              i++;
              while (--count > 0)
                offsets[i++] = (size_t)(-1);
            }
        }

      /* Memory allocation.  */
      if (resultbuf != NULL && *lengthp >= srclen)
        result = resultbuf;
      else
        {
          result = (uint8_t *) malloc (srclen > 0 ? srclen : 1);
          if (result == NULL)
            {
              errno = ENOMEM;
              return NULL;
            }
        }

      memcpy ((char *) result, src, srclen);
      *lengthp = srclen;
      return result;
    }
  else
    {
      char *result = (char *) resultbuf;
      size_t length = *lengthp;

      if (mem_iconveha (src, srclen, fromcode, "UTF-8", true, handler,
                        offsets, &result, &length) < 0)
        return NULL;

      if (result == NULL) /* when (resultbuf == NULL && length == 0)  */
        {
          result = (char *) malloc (1);
          if (result == NULL)
            {
              errno = ENOMEM;
              return NULL;
            }
        }
      *lengthp = length;
      return (uint8_t *) result;
    }
}