diff options
Diffstat (limited to 'tests/setlocale.c')
-rw-r--r-- | tests/setlocale.c | 787 |
1 files changed, 757 insertions, 30 deletions
diff --git a/tests/setlocale.c b/tests/setlocale.c index 1ac3f4d..3ea3559 100644 --- a/tests/setlocale.c +++ b/tests/setlocale.c @@ -1,17 +1,17 @@ /* Set the current locale. -*- coding: utf-8 -*- - Copyright (C) 2009, 2011-2018 Free Software Foundation, Inc. + Copyright (C) 2009, 2011-2022 Free Software Foundation, Inc. - This program is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3 of the License, or - (at your option) any later version. + This file is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 3 of the + License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + This file is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + GNU Lesser General Public License for more details. - You should have received a copy of the GNU General Public License + You should have received a copy of the GNU Lesser General Public License along with this program. If not, see <https://www.gnu.org/licenses/>. */ /* Written by Bruno Haible <bruno@clisp.org>, 2009. */ @@ -29,15 +29,59 @@ /* Specification. */ #include <locale.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include "localename.h" +#if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE +# if HAVE_CFLOCALECOPYPREFERREDLANGUAGES +# include <CoreFoundation/CFLocale.h> +# elif HAVE_CFPREFERENCESCOPYAPPVALUE +# include <CoreFoundation/CFPreferences.h> +# endif +# include <CoreFoundation/CFPropertyList.h> +# include <CoreFoundation/CFArray.h> +# include <CoreFoundation/CFString.h> +extern void gl_locale_name_canonicalize (char *name); +#endif + #if 1 # undef setlocale +/* Which of the replacements to activate? */ +# if NEED_SETLOCALE_IMPROVED +# define setlocale_improved rpl_setlocale +# elif NEED_SETLOCALE_MTSAFE +# define setlocale_mtsafe rpl_setlocale +# else +# error "This file should only be compiled if NEED_SETLOCALE_IMPROVED || NEED_SETLOCALE_MTSAFE." +# endif + +/* Like setlocale, but guaranteed to be multithread-safe if LOCALE == NULL. */ +# if !SETLOCALE_NULL_ALL_MTSAFE || !SETLOCALE_NULL_ONE_MTSAFE /* i.e. if NEED_SETLOCALE_MTSAFE */ + +# if NEED_SETLOCALE_IMPROVED +static +# endif +char * +setlocale_mtsafe (int category, const char *locale) +{ + if (locale == NULL) + return (char *) setlocale_null (category); + else + return setlocale (category, locale); +} +# else /* !NEED_SETLOCALE_MTSAFE */ + +# define setlocale_mtsafe setlocale + +# endif /* NEED_SETLOCALE_MTSAFE */ + +# if NEED_SETLOCALE_IMPROVED + /* Return string representation of locale category CATEGORY. */ static const char * category_to_name (int category) @@ -72,7 +116,7 @@ category_to_name (int category) return retval; } -# if defined _WIN32 && ! defined __CYGWIN__ +# if defined _WIN32 && ! defined __CYGWIN__ /* The native Windows setlocale() function expects locale names of the form "German" or "German_Germany" or "DEU", but not "de" or "de_DE". We need @@ -604,7 +648,7 @@ search (const struct table_entry *table, size_t table_size, const char *string, { size_t i; - for (i = mid; i < hi; i++) + for (i = mid + 1; i < hi; i++) { if (strcmp (table[i].code, string) > 0) { @@ -635,13 +679,11 @@ setlocale_unixlike (int category, const char *locale) /* The native Windows implementation of setlocale understands the special locale name "C", but not "POSIX". Therefore map "POSIX" to "C". */ -#if defined _WIN32 && !defined __CYGWIN__ if (locale != NULL && strcmp (locale, "POSIX") == 0) locale = "C"; -#endif /* First, try setlocale with the original argument unchanged. */ - result = setlocale (category, locale); + result = setlocale_mtsafe (category, locale); if (result != NULL) return result; @@ -783,11 +825,44 @@ setlocale_unixlike (int category, const char *locale) return NULL; } -# else -# define setlocale_unixlike setlocale -# endif +# elif defined __ANDROID__ + +/* Like setlocale, but accept also the locale names "C" and "POSIX". */ +static char * +setlocale_unixlike (int category, const char *locale) +{ + char *result = setlocale_mtsafe (category, locale); + if (result == NULL) + switch (category) + { + case LC_CTYPE: + case LC_NUMERIC: + case LC_TIME: + case LC_COLLATE: + case LC_MONETARY: + case LC_MESSAGES: + case LC_ALL: + case LC_PAPER: + case LC_NAME: + case LC_ADDRESS: + case LC_TELEPHONE: + case LC_MEASUREMENT: + if (locale == NULL + || strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0) + result = (char *) "C"; + break; + default: + break; + } + return result; +} +# define setlocale setlocale_unixlike -# if LC_MESSAGES == 1729 +# else +# define setlocale_unixlike setlocale_mtsafe +# endif + +# if LC_MESSAGES == 1729 /* The system does not store an LC_MESSAGES locale category. Do it here. */ static char lc_messages_name[64] = "C"; @@ -809,12 +884,533 @@ setlocale_single (int category, const char *locale) return setlocale_unixlike (category, locale); } -# else -# define setlocale_single setlocale_unixlike -# endif +# else +# define setlocale_single setlocale_unixlike +# endif + +# if defined __APPLE__ && defined __MACH__ + +/* Mapping from language to main territory where that language is spoken. */ +static char const locales_with_principal_territory[][6 + 1] = + { + /* Language Main territory */ + "ace_ID", /* Achinese Indonesia */ + "af_ZA", /* Afrikaans South Africa */ + "ak_GH", /* Akan Ghana */ + "am_ET", /* Amharic Ethiopia */ + "an_ES", /* Aragonese Spain */ + "ang_GB", /* Old English Britain */ + "arn_CL", /* Mapudungun Chile */ + "as_IN", /* Assamese India */ + "ast_ES", /* Asturian Spain */ + "av_RU", /* Avaric Russia */ + "awa_IN", /* Awadhi India */ + "az_AZ", /* Azerbaijani Azerbaijan */ + "ban_ID", /* Balinese Indonesia */ + "be_BY", /* Belarusian Belarus */ + "bej_SD", /* Beja Sudan */ + "bem_ZM", /* Bemba Zambia */ + "bg_BG", /* Bulgarian Bulgaria */ + "bho_IN", /* Bhojpuri India */ + "bi_VU", /* Bislama Vanuatu */ + "bik_PH", /* Bikol Philippines */ + "bin_NG", /* Bini Nigeria */ + "bm_ML", /* Bambara Mali */ + "bn_IN", /* Bengali India */ + "bo_CN", /* Tibetan China */ + "br_FR", /* Breton France */ + "bs_BA", /* Bosnian Bosnia */ + "bug_ID", /* Buginese Indonesia */ + "ca_ES", /* Catalan Spain */ + "ce_RU", /* Chechen Russia */ + "ceb_PH", /* Cebuano Philippines */ + "co_FR", /* Corsican France */ + "cr_CA", /* Cree Canada */ + /* Don't put "crh_UZ" or "crh_UA" here. That would be asking for fruitless + political discussion. */ + "cs_CZ", /* Czech Czech Republic */ + "csb_PL", /* Kashubian Poland */ + "cy_GB", /* Welsh Britain */ + "da_DK", /* Danish Denmark */ + "de_DE", /* German Germany */ + "din_SD", /* Dinka Sudan */ + "doi_IN", /* Dogri India */ + "dsb_DE", /* Lower Sorbian Germany */ + "dv_MV", /* Divehi Maldives */ + "dz_BT", /* Dzongkha Bhutan */ + "ee_GH", /* Éwé Ghana */ + "el_GR", /* Greek Greece */ + /* Don't put "en_GB" or "en_US" here. That would be asking for fruitless + political discussion. */ + "es_ES", /* Spanish Spain */ + "et_EE", /* Estonian Estonia */ + "fa_IR", /* Persian Iran */ + "fi_FI", /* Finnish Finland */ + "fil_PH", /* Filipino Philippines */ + "fj_FJ", /* Fijian Fiji */ + "fo_FO", /* Faroese Faeroe Islands */ + "fon_BJ", /* Fon Benin */ + "fr_FR", /* French France */ + "fur_IT", /* Friulian Italy */ + "fy_NL", /* Western Frisian Netherlands */ + "ga_IE", /* Irish Ireland */ + "gd_GB", /* Scottish Gaelic Britain */ + "gon_IN", /* Gondi India */ + "gsw_CH", /* Swiss German Switzerland */ + "gu_IN", /* Gujarati India */ + "he_IL", /* Hebrew Israel */ + "hi_IN", /* Hindi India */ + "hil_PH", /* Hiligaynon Philippines */ + "hr_HR", /* Croatian Croatia */ + "hsb_DE", /* Upper Sorbian Germany */ + "ht_HT", /* Haitian Haiti */ + "hu_HU", /* Hungarian Hungary */ + "hy_AM", /* Armenian Armenia */ + "id_ID", /* Indonesian Indonesia */ + "ig_NG", /* Igbo Nigeria */ + "ii_CN", /* Sichuan Yi China */ + "ilo_PH", /* Iloko Philippines */ + "is_IS", /* Icelandic Iceland */ + "it_IT", /* Italian Italy */ + "ja_JP", /* Japanese Japan */ + "jab_NG", /* Hyam Nigeria */ + "jv_ID", /* Javanese Indonesia */ + "ka_GE", /* Georgian Georgia */ + "kab_DZ", /* Kabyle Algeria */ + "kaj_NG", /* Jju Nigeria */ + "kam_KE", /* Kamba Kenya */ + "kmb_AO", /* Kimbundu Angola */ + "kcg_NG", /* Tyap Nigeria */ + "kdm_NG", /* Kagoma Nigeria */ + "kg_CD", /* Kongo Democratic Republic of Congo */ + "kk_KZ", /* Kazakh Kazakhstan */ + "kl_GL", /* Kalaallisut Greenland */ + "km_KH", /* Central Khmer Cambodia */ + "kn_IN", /* Kannada India */ + "ko_KR", /* Korean Korea (South) */ + "kok_IN", /* Konkani India */ + "kr_NG", /* Kanuri Nigeria */ + "kru_IN", /* Kurukh India */ + "ky_KG", /* Kyrgyz Kyrgyzstan */ + "lg_UG", /* Ganda Uganda */ + "li_BE", /* Limburgish Belgium */ + "lo_LA", /* Laotian Laos */ + "lt_LT", /* Lithuanian Lithuania */ + "lu_CD", /* Luba-Katanga Democratic Republic of Congo */ + "lua_CD", /* Luba-Lulua Democratic Republic of Congo */ + "luo_KE", /* Luo Kenya */ + "lv_LV", /* Latvian Latvia */ + "mad_ID", /* Madurese Indonesia */ + "mag_IN", /* Magahi India */ + "mai_IN", /* Maithili India */ + "mak_ID", /* Makasar Indonesia */ + "man_ML", /* Mandingo Mali */ + "men_SL", /* Mende Sierra Leone */ + "mfe_MU", /* Mauritian Creole Mauritius */ + "mg_MG", /* Malagasy Madagascar */ + "mi_NZ", /* Maori New Zealand */ + "min_ID", /* Minangkabau Indonesia */ + "mk_MK", /* Macedonian North Macedonia */ + "ml_IN", /* Malayalam India */ + "mn_MN", /* Mongolian Mongolia */ + "mni_IN", /* Manipuri India */ + "mos_BF", /* Mossi Burkina Faso */ + "mr_IN", /* Marathi India */ + "ms_MY", /* Malay Malaysia */ + "mt_MT", /* Maltese Malta */ + "mwr_IN", /* Marwari India */ + "my_MM", /* Burmese Myanmar */ + "na_NR", /* Nauru Nauru */ + "nah_MX", /* Nahuatl Mexico */ + "nap_IT", /* Neapolitan Italy */ + "nb_NO", /* Norwegian Bokmål Norway */ + "nds_DE", /* Low Saxon Germany */ + "ne_NP", /* Nepali Nepal */ + "nl_NL", /* Dutch Netherlands */ + "nn_NO", /* Norwegian Nynorsk Norway */ + "no_NO", /* Norwegian Norway */ + "nr_ZA", /* South Ndebele South Africa */ + "nso_ZA", /* Northern Sotho South Africa */ + "ny_MW", /* Chichewa Malawi */ + "nym_TZ", /* Nyamwezi Tanzania */ + "nyn_UG", /* Nyankole Uganda */ + "oc_FR", /* Occitan France */ + "oj_CA", /* Ojibwa Canada */ + "or_IN", /* Oriya India */ + "pa_IN", /* Punjabi India */ + "pag_PH", /* Pangasinan Philippines */ + "pam_PH", /* Pampanga Philippines */ + "pap_AN", /* Papiamento Netherlands Antilles - this line can be removed in 2018 */ + "pbb_CO", /* Páez Colombia */ + "pl_PL", /* Polish Poland */ + "ps_AF", /* Pashto Afghanistan */ + "pt_PT", /* Portuguese Portugal */ + "raj_IN", /* Rajasthani India */ + "rm_CH", /* Romansh Switzerland */ + "rn_BI", /* Kirundi Burundi */ + "ro_RO", /* Romanian Romania */ + "ru_RU", /* Russian Russia */ + "rw_RW", /* Kinyarwanda Rwanda */ + "sa_IN", /* Sanskrit India */ + "sah_RU", /* Yakut Russia */ + "sas_ID", /* Sasak Indonesia */ + "sat_IN", /* Santali India */ + "sc_IT", /* Sardinian Italy */ + "scn_IT", /* Sicilian Italy */ + "sg_CF", /* Sango Central African Republic */ + "shn_MM", /* Shan Myanmar */ + "si_LK", /* Sinhala Sri Lanka */ + "sid_ET", /* Sidamo Ethiopia */ + "sk_SK", /* Slovak Slovakia */ + "sl_SI", /* Slovenian Slovenia */ + "sm_WS", /* Samoan Samoa */ + "smn_FI", /* Inari Sami Finland */ + "sms_FI", /* Skolt Sami Finland */ + "so_SO", /* Somali Somalia */ + "sq_AL", /* Albanian Albania */ + "sr_RS", /* Serbian Serbia */ + "srr_SN", /* Serer Senegal */ + "suk_TZ", /* Sukuma Tanzania */ + "sus_GN", /* Susu Guinea */ + "sv_SE", /* Swedish Sweden */ + "te_IN", /* Telugu India */ + "tem_SL", /* Timne Sierra Leone */ + "tet_ID", /* Tetum Indonesia */ + "tg_TJ", /* Tajik Tajikistan */ + "th_TH", /* Thai Thailand */ + "ti_ER", /* Tigrinya Eritrea */ + "tiv_NG", /* Tiv Nigeria */ + "tk_TM", /* Turkmen Turkmenistan */ + "tl_PH", /* Tagalog Philippines */ + "to_TO", /* Tonga Tonga */ + "tpi_PG", /* Tok Pisin Papua New Guinea */ + "tr_TR", /* Turkish Turkey */ + "tum_MW", /* Tumbuka Malawi */ + "ug_CN", /* Uighur China */ + "uk_UA", /* Ukrainian Ukraine */ + "umb_AO", /* Umbundu Angola */ + "ur_PK", /* Urdu Pakistan */ + "uz_UZ", /* Uzbek Uzbekistan */ + "ve_ZA", /* Venda South Africa */ + "vi_VN", /* Vietnamese Vietnam */ + "wa_BE", /* Walloon Belgium */ + "wal_ET", /* Walamo Ethiopia */ + "war_PH", /* Waray Philippines */ + "wen_DE", /* Sorbian Germany */ + "yao_MW", /* Yao Malawi */ + "zap_MX" /* Zapotec Mexico */ + }; + +/* Compare just the language part of two locale names. */ +static int +langcmp (const char *locale1, const char *locale2) +{ + size_t locale1_len; + size_t locale2_len; + int cmp; + + { + const char *locale1_end = strchr (locale1, '_'); + if (locale1_end != NULL) + locale1_len = locale1_end - locale1; + else + locale1_len = strlen (locale1); + } + { + const char *locale2_end = strchr (locale2, '_'); + if (locale2_end != NULL) + locale2_len = locale2_end - locale2; + else + locale2_len = strlen (locale2); + } + + if (locale1_len < locale2_len) + { + cmp = memcmp (locale1, locale2, locale1_len); + if (cmp == 0) + cmp = -1; + } + else + { + cmp = memcmp (locale1, locale2, locale2_len); + if (locale1_len > locale2_len && cmp == 0) + cmp = 1; + } + + return cmp; +} + +/* Given a locale name, return the main locale with the same language, + or NULL if not found. + For example: "fr_DE" -> "fr_FR". */ +static const char * +get_main_locale_with_same_language (const char *locale) +{ +# define table locales_with_principal_territory + /* The table is sorted. Perform a binary search. */ + size_t hi = sizeof (table) / sizeof (table[0]); + size_t lo = 0; + while (lo < hi) + { + /* Invariant: + for i < lo, langcmp (table[i], locale) < 0, + for i >= hi, langcmp (table[i], locale) > 0. */ + size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ + int cmp = langcmp (table[mid], locale); + if (cmp < 0) + lo = mid + 1; + else if (cmp > 0) + hi = mid; + else + { + /* Found an i with + langcmp (language_table[i], locale) == 0. + Verify that it is the only such i. */ + if (mid > lo && langcmp (table[mid - 1], locale) >= 0) + abort (); + if (mid + 1 < hi && langcmp (table[mid + 1], locale) <= 0) + abort (); + return table[mid]; + } + } +# undef table + return NULL; +} + +/* Mapping from territory to main language that is spoken in that territory. */ +static char const locales_with_principal_language[][6 + 1] = + { + /* This is based on the set of existing locales in glibc, with duplicates + removed, and on the Wikipedia pages named "Languages of <territory>". + If in doubt, use the locale that exists in macOS. For example, the only + "*_IN" locale in macOS 10.13 is "hi_IN", so use that. */ + /* A useful shell function for producing a line of this table is: + func_line () + { + # Usage: func_line ll_CC + ll=`echo "$1" | sed -e 's|_.*||'` + cc=`echo "$1" | sed -e 's|^.*_||'` + llx=`sed -n -e "s|^${ll} ||p" < gettext-tools/doc/ISO_639` + ccx=`expand gettext-tools/doc/ISO_3166 | sed -n -e "s|^${cc} *||p"` + echo " \"$1\", /$X* ${llx} ${ccx} *$X/" + } + */ + /* Main language Territory */ + "ca_AD", /* Catalan Andorra */ + "ar_AE", /* Arabic United Arab Emirates */ + "ps_AF", /* Pashto Afghanistan */ + "en_AG", /* English Antigua and Barbuda */ + "sq_AL", /* Albanian Albania */ + "hy_AM", /* Armenian Armenia */ + "pap_AN", /* Papiamento Netherlands Antilles - this line can be removed in 2018 */ + "pt_AO", /* Portuguese Angola */ + "es_AR", /* Spanish Argentina */ + "de_AT", /* German Austria */ + "en_AU", /* English Australia */ + /* Aruba has two official languages: "nl_AW", "pap_AW". */ + "az_AZ", /* Azerbaijani Azerbaijan */ + "bs_BA", /* Bosnian Bosnia */ + "bn_BD", /* Bengali Bangladesh */ + "nl_BE", /* Dutch Belgium */ + "fr_BF", /* French Burkina Faso */ + "bg_BG", /* Bulgarian Bulgaria */ + "ar_BH", /* Arabic Bahrain */ + "rn_BI", /* Kirundi Burundi */ + "fr_BJ", /* French Benin */ + "es_BO", /* Spanish Bolivia */ + "pt_BR", /* Portuguese Brazil */ + "dz_BT", /* Dzongkha Bhutan */ + "en_BW", /* English Botswana */ + "be_BY", /* Belarusian Belarus */ + "en_CA", /* English Canada */ + "fr_CD", /* French Democratic Republic of Congo */ + "sg_CF", /* Sango Central African Republic */ + "de_CH", /* German Switzerland */ + "es_CL", /* Spanish Chile */ + "zh_CN", /* Chinese China */ + "es_CO", /* Spanish Colombia */ + "es_CR", /* Spanish Costa Rica */ + "es_CU", /* Spanish Cuba */ + /* Curaçao has three official languages: "nl_CW", "pap_CW", "en_CW". */ + "el_CY", /* Greek Cyprus */ + "cs_CZ", /* Czech Czech Republic */ + "de_DE", /* German Germany */ + /* Djibouti has two official languages: "ar_DJ" and "fr_DJ". */ + "da_DK", /* Danish Denmark */ + "es_DO", /* Spanish Dominican Republic */ + "ar_DZ", /* Arabic Algeria */ + "es_EC", /* Spanish Ecuador */ + "et_EE", /* Estonian Estonia */ + "ar_EG", /* Arabic Egypt */ + "ti_ER", /* Tigrinya Eritrea */ + "es_ES", /* Spanish Spain */ + "am_ET", /* Amharic Ethiopia */ + "fi_FI", /* Finnish Finland */ + /* Fiji has three official languages: "en_FJ", "fj_FJ", "hif_FJ". */ + "fo_FO", /* Faroese Faeroe Islands */ + "fr_FR", /* French France */ + "en_GB", /* English Britain */ + "ka_GE", /* Georgian Georgia */ + "en_GH", /* English Ghana */ + "kl_GL", /* Kalaallisut Greenland */ + "fr_GN", /* French Guinea */ + "el_GR", /* Greek Greece */ + "es_GT", /* Spanish Guatemala */ + "zh_HK", /* Chinese Hong Kong */ + "es_HN", /* Spanish Honduras */ + "hr_HR", /* Croatian Croatia */ + "ht_HT", /* Haitian Haiti */ + "hu_HU", /* Hungarian Hungary */ + "id_ID", /* Indonesian Indonesia */ + "en_IE", /* English Ireland */ + "he_IL", /* Hebrew Israel */ + "hi_IN", /* Hindi India */ + "ar_IQ", /* Arabic Iraq */ + "fa_IR", /* Persian Iran */ + "is_IS", /* Icelandic Iceland */ + "it_IT", /* Italian Italy */ + "ar_JO", /* Arabic Jordan */ + "ja_JP", /* Japanese Japan */ + "sw_KE", /* Swahili Kenya */ + "ky_KG", /* Kyrgyz Kyrgyzstan */ + "km_KH", /* Central Khmer Cambodia */ + "ko_KR", /* Korean Korea (South) */ + "ar_KW", /* Arabic Kuwait */ + "kk_KZ", /* Kazakh Kazakhstan */ + "lo_LA", /* Laotian Laos */ + "ar_LB", /* Arabic Lebanon */ + "de_LI", /* German Liechtenstein */ + "si_LK", /* Sinhala Sri Lanka */ + "lt_LT", /* Lithuanian Lithuania */ + /* Luxembourg has three official languages: "lb_LU", "fr_LU", "de_LU". */ + "lv_LV", /* Latvian Latvia */ + "ar_LY", /* Arabic Libya */ + "ar_MA", /* Arabic Morocco */ + "sr_ME", /* Serbian Montenegro */ + "mg_MG", /* Malagasy Madagascar */ + "mk_MK", /* Macedonian North Macedonia */ + "fr_ML", /* French Mali */ + "my_MM", /* Burmese Myanmar */ + "mn_MN", /* Mongolian Mongolia */ + "mt_MT", /* Maltese Malta */ + "mfe_MU", /* Mauritian Creole Mauritius */ + "dv_MV", /* Divehi Maldives */ + "ny_MW", /* Chichewa Malawi */ + "es_MX", /* Spanish Mexico */ + "ms_MY", /* Malay Malaysia */ + "en_NG", /* English Nigeria */ + "es_NI", /* Spanish Nicaragua */ + "nl_NL", /* Dutch Netherlands */ + "no_NO", /* Norwegian Norway */ + "ne_NP", /* Nepali Nepal */ + "na_NR", /* Nauru Nauru */ + "niu_NU", /* Niuean Niue */ + "en_NZ", /* English New Zealand */ + "ar_OM", /* Arabic Oman */ + "es_PA", /* Spanish Panama */ + "es_PE", /* Spanish Peru */ + "tpi_PG", /* Tok Pisin Papua New Guinea */ + "fil_PH", /* Filipino Philippines */ + "pa_PK", /* Punjabi Pakistan */ + "pl_PL", /* Polish Poland */ + "es_PR", /* Spanish Puerto Rico */ + "pt_PT", /* Portuguese Portugal */ + "es_PY", /* Spanish Paraguay */ + "ar_QA", /* Arabic Qatar */ + "ro_RO", /* Romanian Romania */ + "sr_RS", /* Serbian Serbia */ + "ru_RU", /* Russian Russia */ + "rw_RW", /* Kinyarwanda Rwanda */ + "ar_SA", /* Arabic Saudi Arabia */ + "en_SC", /* English Seychelles */ + "ar_SD", /* Arabic Sudan */ + "sv_SE", /* Swedish Sweden */ + "en_SG", /* English Singapore */ + "sl_SI", /* Slovenian Slovenia */ + "sk_SK", /* Slovak Slovakia */ + "en_SL", /* English Sierra Leone */ + "fr_SN", /* French Senegal */ + "so_SO", /* Somali Somalia */ + "ar_SS", /* Arabic South Sudan */ + "es_SV", /* Spanish El Salvador */ + "ar_SY", /* Arabic Syria */ + "th_TH", /* Thai Thailand */ + "tg_TJ", /* Tajik Tajikistan */ + "tk_TM", /* Turkmen Turkmenistan */ + "ar_TN", /* Arabic Tunisia */ + "to_TO", /* Tonga Tonga */ + "tr_TR", /* Turkish Turkey */ + "zh_TW", /* Chinese Taiwan */ + "sw_TZ", /* Swahili Tanzania */ + "uk_UA", /* Ukrainian Ukraine */ + "lg_UG", /* Ganda Uganda */ + "en_US", /* English United States of America */ + "es_UY", /* Spanish Uruguay */ + "uz_UZ", /* Uzbek Uzbekistan */ + "es_VE", /* Spanish Venezuela */ + "vi_VN", /* Vietnamese Vietnam */ + "bi_VU", /* Bislama Vanuatu */ + "sm_WS", /* Samoan Samoa */ + "ar_YE", /* Arabic Yemen */ + "en_ZA", /* English South Africa */ + "en_ZM", /* English Zambia */ + "en_ZW" /* English Zimbabwe */ + }; + +/* Compare just the territory part of two locale names. */ +static int +terrcmp (const char *locale1, const char *locale2) +{ + const char *territory1 = strrchr (locale1, '_') + 1; + const char *territory2 = strrchr (locale2, '_') + 1; + + return strcmp (territory1, territory2); +} + +/* Given a locale name, return the locale corresponding to the main language + with the same territory, or NULL if not found. + For example: "fr_DE" -> "de_DE". */ +static const char * +get_main_locale_with_same_territory (const char *locale) +{ + if (strrchr (locale, '_') != NULL) + { +# define table locales_with_principal_language + /* The table is sorted. Perform a binary search. */ + size_t hi = sizeof (table) / sizeof (table[0]); + size_t lo = 0; + while (lo < hi) + { + /* Invariant: + for i < lo, terrcmp (table[i], locale) < 0, + for i >= hi, terrcmp (table[i], locale) > 0. */ + size_t mid = (hi + lo) >> 1; /* >= lo, < hi */ + int cmp = terrcmp (table[mid], locale); + if (cmp < 0) + lo = mid + 1; + else if (cmp > 0) + hi = mid; + else + { + /* Found an i with + terrcmp (language_table[i], locale) == 0. + Verify that it is the only such i. */ + if (mid > lo && terrcmp (table[mid - 1], locale) >= 0) + abort (); + if (mid + 1 < hi && terrcmp (table[mid + 1], locale) <= 0) + abort (); + return table[mid]; + } + } +# undef table + } + return NULL; +} + +# endif char * -rpl_setlocale (int category, const char *locale) +setlocale_improved (int category, const char *locale) { if (locale != NULL && locale[0] == '\0') { @@ -824,6 +1420,7 @@ rpl_setlocale (int category, const char *locale) /* Set LC_CTYPE first. Then the other categories. */ static int const categories[] = { + LC_CTYPE, LC_NUMERIC, LC_TIME, LC_COLLATE, @@ -850,18 +1447,31 @@ rpl_setlocale (int category, const char *locale) if (base_name == NULL) base_name = gl_locale_name_default (); - if (setlocale_unixlike (LC_ALL, base_name) == NULL) - goto fail; -# if defined _WIN32 && ! defined __CYGWIN__ + if (setlocale_unixlike (LC_ALL, base_name) != NULL) + { + /* LC_CTYPE category already set. */ + i = 1; + } + else + { + /* On Mac OS X, "UTF-8" is a valid locale name for LC_CTYPE but + not for LC_ALL. Therefore this call may fail. So, try + another base_name. */ + base_name = "C"; + if (setlocale_unixlike (LC_ALL, base_name) == NULL) + goto fail; + i = 0; + } +# if defined _WIN32 && ! defined __CYGWIN__ /* On native Windows, setlocale(LC_ALL,...) may succeed but set the LC_CTYPE category to an invalid value ("C") when it does not support the specified encoding. Report a failure instead. */ if (strchr (base_name, '.') != NULL && strcmp (setlocale (LC_CTYPE, NULL), "C") == 0) goto fail; -# endif +# endif - for (i = 0; i < sizeof (categories) / sizeof (categories[0]); i++) + for (; i < sizeof (categories) / sizeof (categories[0]); i++) { int cat = categories[i]; const char *name; @@ -873,12 +1483,127 @@ rpl_setlocale (int category, const char *locale) /* If name is the same as base_name, it has already been set through the setlocale call before the loop. */ if (strcmp (name, base_name) != 0 -# if LC_MESSAGES == 1729 +# if LC_MESSAGES == 1729 || cat == LC_MESSAGES -# endif +# endif ) if (setlocale_single (cat, name) == NULL) +# if defined __APPLE__ && defined __MACH__ + { + /* On Mac OS X 10.13, some locales can be set through + System Preferences > Language & Region, that are not + supported by libc. The system's setlocale() falls + back to "C" for these locale categories. We can do + better, by trying an existing locale with the same + language or an existing locale with the same territory. + If we can't, print a warning, to limit user + expectations. */ + int warn = 0; + + if (cat == LC_CTYPE) + warn = (setlocale_single (cat, "UTF-8") == NULL); + else if (cat == LC_MESSAGES) + { +# if HAVE_CFLOCALECOPYPREFERREDLANGUAGES || HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */ + /* Take the primary language preference. */ +# if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */ + CFArrayRef prefArray = CFLocaleCopyPreferredLanguages (); +# elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */ + CFTypeRef preferences = + CFPreferencesCopyAppValue (CFSTR ("AppleLanguages"), + kCFPreferencesCurrentApplication); + if (preferences != NULL + && CFGetTypeID (preferences) == CFArrayGetTypeID ()) + { + CFArrayRef prefArray = (CFArrayRef)preferences; +# endif + int n = CFArrayGetCount (prefArray); + if (n > 0) + { + char buf[256]; + CFTypeRef element = CFArrayGetValueAtIndex (prefArray, 0); + if (element != NULL + && CFGetTypeID (element) == CFStringGetTypeID () + && CFStringGetCString ((CFStringRef)element, + buf, sizeof (buf), + kCFStringEncodingASCII)) + { + /* Remove the country. + E.g. "zh-Hans-DE" -> "zh-Hans". */ + char *last_minus = strrchr (buf, '-'); + if (last_minus != NULL) + *last_minus = '\0'; + + /* Convert to Unix locale name. + E.g. "zh-Hans" -> "zh_CN". */ + gl_locale_name_canonicalize (buf); + + /* Try setlocale with this value. */ + if (setlocale_single (cat, buf) == NULL) + { + const char *last_try = + get_main_locale_with_same_language (buf); + + if (last_try == NULL + || setlocale_single (cat, last_try) == NULL) + warn = 1; + } + } + } +# if HAVE_CFLOCALECOPYPREFERREDLANGUAGES /* MacOS X 10.5 or newer */ + CFRelease (prefArray); +# elif HAVE_CFPREFERENCESCOPYAPPVALUE /* MacOS X 10.4 or newer */ + } +# endif +# else + const char *last_try = + get_main_locale_with_same_language (name); + + if (last_try == NULL + || setlocale_single (cat, last_try) == NULL) + warn = 1; +# endif + } + else + { + /* For LC_NUMERIC, the application should use the locale + properties kCFLocaleDecimalSeparator, + kCFLocaleGroupingSeparator. + For LC_TIME, the application should use the locale + property kCFLocaleCalendarIdentifier. + For LC_COLLATE, the application should use the locale + properties kCFLocaleCollationIdentifier, + kCFLocaleCollatorIdentifier. + For LC_MONETARY, the applicationshould use the locale + properties kCFLocaleCurrencySymbol, + kCFLocaleCurrencyCode. + But since most applications don't have macOS specific + code like this, try an existing locale with the same + territory. */ + const char *last_try = + get_main_locale_with_same_territory (name); + + if (last_try == NULL + || setlocale_single (cat, last_try) == NULL) + warn = 1; + } + + if (warn) + { + /* Warn only if the environment variable + SETLOCALE_VERBOSE is set. Otherwise these warnings + are just annoyances, since normal users won't invoke + 'localedef'. */ + const char *verbose = getenv ("SETLOCALE_VERBOSE"); + if (verbose != NULL && verbose[0] != '\0') + fprintf (stderr, + "Warning: Failed to set locale category %s to %s.\n", + category_to_name (cat), name); + } + } +# else goto fail; +# endif } /* All steps were successful. */ @@ -903,7 +1628,7 @@ rpl_setlocale (int category, const char *locale) } else { -# if defined _WIN32 && ! defined __CYGWIN__ +# if defined _WIN32 && ! defined __CYGWIN__ if (category == LC_ALL && locale != NULL && strchr (locale, '.') != NULL) { char *saved_locale; @@ -938,9 +1663,11 @@ rpl_setlocale (int category, const char *locale) return setlocale (LC_ALL, NULL); } else -# endif +# endif return setlocale_single (category, locale); } } +# endif /* NEED_SETLOCALE_IMPROVED */ + #endif |