summaryrefslogtreecommitdiff
path: root/lib/localcharset.c
diff options
context:
space:
mode:
authorManuel A. Fernandez Montecelo <manuel.montezelo@gmail.com>2016-05-27 14:28:30 +0100
committerManuel A. Fernandez Montecelo <manuel.montezelo@gmail.com>2016-05-27 14:28:30 +0100
commita9a31b1de5776a3b08a82101a4fa711294f0dd1d (patch)
tree159134a624e51509f40ed8823249f09a70d1dda3 /lib/localcharset.c
parent5f2b09982312c98863eb9a8dfe2c608b81f58259 (diff)
Imported Upstream version 0.9.6+really0.9.3upstream/0.9.6+really0.9.3
Diffstat (limited to 'lib/localcharset.c')
-rw-r--r--lib/localcharset.c130
1 files changed, 33 insertions, 97 deletions
diff --git a/lib/localcharset.c b/lib/localcharset.c
index 5bbe2c8..29de23d 100644
--- a/lib/localcharset.c
+++ b/lib/localcharset.c
@@ -1,6 +1,6 @@
/* Determine a canonical name for the current locale's character encoding.
- Copyright (C) 2000-2006, 2008-2015 Free Software Foundation, Inc.
+ Copyright (C) 2000-2006, 2008-2010 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
@@ -13,7 +13,8 @@
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License along
- with this program; if not, see <http://www.gnu.org/licenses/>. */
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
/* Written by Bruno Haible <bruno@clisp.org>. */
@@ -29,12 +30,11 @@
#include <stdlib.h>
#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
-# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
+# define DARWIN7 /* Darwin 7 or newer, i.e. MacOS X 10.3 or newer */
#endif
#if defined _WIN32 || defined __WIN32__
-# define WINDOWS_NATIVE
-# include <locale.h>
+# define WIN32_NATIVE
#endif
#if defined __EMX__
@@ -44,7 +44,7 @@
# endif
#endif
-#if !defined WINDOWS_NATIVE
+#if !defined WIN32_NATIVE
# include <unistd.h>
# if HAVE_LANGINFO_CODESET
# include <langinfo.h>
@@ -57,7 +57,7 @@
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
# endif
-#elif defined WINDOWS_NATIVE
+#elif defined WIN32_NATIVE
# define WIN32_LEAN_AND_MEAN
# include <windows.h>
#endif
@@ -66,11 +66,6 @@
# include <os2.h>
#endif
-/* For MB_CUR_MAX_L */
-#if defined DARWIN7
-# include <xlocale.h>
-#endif
-
#if ENABLE_RELOCATABLE
# include "relocatable.h"
#else
@@ -88,7 +83,7 @@
#endif
#if defined _WIN32 || defined __WIN32__ || defined __CYGWIN__ || defined __EMX__ || defined __DJGPP__
- /* Native Windows, Cygwin, OS/2, DOS */
+ /* Win32, Cygwin, OS/2, DOS */
# define ISSLASH(C) ((C) == '/' || (C) == '\\')
#endif
@@ -128,7 +123,7 @@ get_charset_aliases (void)
cp = charset_aliases;
if (cp == NULL)
{
-#if !(defined DARWIN7 || defined VMS || defined WINDOWS_NATIVE || defined __CYGWIN__ || defined OS2)
+#if !(defined DARWIN7 || defined VMS || defined WIN32_NATIVE || defined __CYGWIN__)
const char *dir;
const char *base = "charset.alias";
char *file_name;
@@ -233,7 +228,8 @@ get_charset_aliases (void)
{
/* Out of memory. */
res_size = 0;
- free (old_res_ptr);
+ if (old_res_ptr != NULL)
+ free (old_res_ptr);
break;
}
strcpy (res_ptr + res_size - (l2 + 1) - (l1 + 1), buf1);
@@ -313,7 +309,7 @@ get_charset_aliases (void)
"DECKOREAN" "\0" "EUC-KR" "\0";
# endif
-# if defined WINDOWS_NATIVE || defined __CYGWIN__
+# if defined WIN32_NATIVE || defined __CYGWIN__
/* To avoid the troubles of installing a separate file in the same
directory as the DLL and of retrieving the DLL's directory at
runtime, simply inline the aliases here. */
@@ -342,36 +338,6 @@ get_charset_aliases (void)
"CP54936" "\0" "GB18030" "\0"
"CP65001" "\0" "UTF-8" "\0";
# endif
-# if defined OS2
- /* To avoid the troubles of installing a separate file in the same
- directory as the DLL and of retrieving the DLL's directory at
- runtime, simply inline the aliases here. */
-
- /* The list of encodings is taken from "List of OS/2 Codepages"
- by Alex Taylor:
- <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
- See also "IBM Globalization - Code page identifiers":
- <http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html>. */
- cp = "CP813" "\0" "ISO-8859-7" "\0"
- "CP878" "\0" "KOI8-R" "\0"
- "CP819" "\0" "ISO-8859-1" "\0"
- "CP912" "\0" "ISO-8859-2" "\0"
- "CP913" "\0" "ISO-8859-3" "\0"
- "CP914" "\0" "ISO-8859-4" "\0"
- "CP915" "\0" "ISO-8859-5" "\0"
- "CP916" "\0" "ISO-8859-8" "\0"
- "CP920" "\0" "ISO-8859-9" "\0"
- "CP921" "\0" "ISO-8859-13" "\0"
- "CP923" "\0" "ISO-8859-15" "\0"
- "CP954" "\0" "EUC-JP" "\0"
- "CP964" "\0" "EUC-TW" "\0"
- "CP970" "\0" "EUC-KR" "\0"
- "CP1089" "\0" "ISO-8859-6" "\0"
- "CP1208" "\0" "UTF-8" "\0"
- "CP1381" "\0" "GB2312" "\0"
- "CP1386" "\0" "GBK" "\0"
- "CP3372" "\0" "EUC-JP" "\0";
-# endif
#endif
charset_aliases = cp;
@@ -395,7 +361,7 @@ locale_charset (void)
const char *codeset;
const char *aliases;
-#if !(defined WINDOWS_NATIVE || defined OS2)
+#if !(defined WIN32_NATIVE || defined OS2)
# if HAVE_LANGINFO_CODESET
@@ -403,9 +369,10 @@ locale_charset (void)
codeset = nl_langinfo (CODESET);
# ifdef __CYGWIN__
- /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always
- returns "US-ASCII". Return the suffix of the locale name from the
- environment variables (if present) or the codepage as a number. */
+ /* Cygwin 1.5.x does not have locales. nl_langinfo (CODESET) always
+ returns "US-ASCII". As long as this is not fixed, return the suffix
+ of the locale name from the environment variables (if present) or
+ the codepage as a number. */
if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
{
const char *locale;
@@ -442,10 +409,10 @@ locale_charset (void)
}
}
- /* The Windows API has a function returning the locale's codepage as a
- number: GetACP(). This encoding is used by Cygwin, unless the user
- has set the environment variable CYGWIN=codepage:oem (which very few
- people do).
+ /* Woe32 has a function returning the locale's codepage as a number:
+ GetACP(). This encoding is used by Cygwin, unless the user has set
+ the environment variable CYGWIN=codepage:oem (which very few people
+ do).
Output directed to console windows needs to be converted (to
GetOEMCP() if the console is using a raster font, or to
GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
@@ -488,38 +455,18 @@ locale_charset (void)
# endif
-#elif defined WINDOWS_NATIVE
+#elif defined WIN32_NATIVE
static char buf[2 + 10 + 1];
- /* The Windows API has a function returning the locale's codepage as
- a number, but the value doesn't change according to what the
- 'setlocale' call specified. So we use it as a last resort, in
- case the string returned by 'setlocale' doesn't specify the
- codepage. */
- char *current_locale = setlocale (LC_ALL, NULL);
- char *pdot;
-
- /* If they set different locales for different categories,
- 'setlocale' will return a semi-colon separated list of locale
- values. To make sure we use the correct one, we choose LC_CTYPE. */
- if (strchr (current_locale, ';'))
- current_locale = setlocale (LC_CTYPE, NULL);
-
- pdot = strrchr (current_locale, '.');
- if (pdot)
- sprintf (buf, "CP%s", pdot + 1);
- else
- {
- /* The Windows API has a function returning the locale's codepage as a
- number: GetACP().
- When the output goes to a console window, it needs to be provided in
- GetOEMCP() encoding if the console is using a raster font, or in
- GetConsoleOutputCP() encoding if it is using a TrueType font.
- But in GUI programs and for output sent to files and pipes, GetACP()
- encoding is the best bet. */
- sprintf (buf, "CP%u", GetACP ());
- }
+ /* Woe32 has a function returning the locale's codepage as a number:
+ GetACP().
+ When the output goes to a console window, it needs to be provided in
+ GetOEMCP() encoding if the console is using a raster font, or in
+ GetConsoleOutputCP() encoding if it is using a TrueType font.
+ But in GUI programs and for output sent to files and pipes, GetACP()
+ encoding is the best bet. */
+ sprintf (buf, "CP%u", GetACP ());
codeset = buf;
#elif defined OS2
@@ -529,8 +476,6 @@ locale_charset (void)
ULONG cp[3];
ULONG cplen;
- codeset = NULL;
-
/* Allow user to override the codeset, as set in the operating system,
with standard language environment variables. */
locale = getenv ("LC_ALL");
@@ -562,12 +507,10 @@ locale_charset (void)
}
}
- /* For the POSIX locale, don't use the system's codepage. */
- if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
- codeset = "";
+ /* Resolve through the charset.alias file. */
+ codeset = locale;
}
-
- if (codeset == NULL)
+ else
{
/* OS/2 has a function returning the locale's codepage as a number. */
if (DosQueryCp (sizeof (cp), cp, &cplen))
@@ -602,12 +545,5 @@ locale_charset (void)
if (codeset[0] == '\0')
codeset = "ASCII";
-#ifdef DARWIN7
- /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
- (the default codeset) does not work when MB_CUR_MAX is 1. */
- if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
- codeset = "ASCII";
-#endif
-
return codeset;
}