| /* |
| * R : A Computer Language for Statistical Data Analysis |
| * Copyright (C) 2005-2014 The R Core Team |
| * |
| * This program is free software; you can redistribute it and/or modify |
| * it under the terms of the GNU General Public License as published by |
| * the Free Software Foundation; either version 2 of the License, or |
| * (at your option) any later version. |
| * |
| * This program is distributed in the hope that it will be useful, |
| * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| * GNU General Public License for more details. |
| * |
| * You should have received a copy of the GNU General Public License |
| * along with this program; if not, a copy is available at |
| * https://www.R-project.org/Licenses/ |
| */ |
| |
| /* This file was contributed by Ei-ji Nakama. |
| * It exports locale2charset for use in gram.y, and rlocale.c on macOS. |
| * And sysutils.c, grDevices/src/devPS.c |
| */ |
| |
| /* setlocale(LC_CTYPE,NULL) to encodingname cf nl_langinfo(LC_CTYPE) */ |
| |
| |
| /********************************************************************* |
| * usage : char *locale2charset(const char *locale) * |
| * return : ASCII - default and undefine * |
| * other - encodename * |
| * * |
| * cc -o localecharset -DDEBUG_TEST=1 localecharset.c * |
| * or * |
| * cc -o localecharset -DDEBUG_TEST=2 localecharset.c * |
| *********************************************************************/ |
| |
| #ifdef HAVE_CONFIG_H |
| # include <config.h> |
| #endif |
| |
| #ifdef DEBUG_TEST |
| #define SPRINT(x) printf("%6d:" #x "=%s\n", __LINE__, x) |
| #define DPRINT(x) printf("%6d:" #x "=%d\n", __LINE__, x) |
| //#define HAVE_STRING_H |
| #endif |
| |
| #include <string.h> |
| #include <memory.h> |
| #include <locale.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <ctype.h> |
| |
| //#include <rlocale.h> /* To get the correct linkage for locale2charset */ |
| |
| /* name_value struct */ |
| typedef struct { |
| char *name; |
| char *value; |
| } name_value; |
| |
| |
| #ifndef __APPLE__ |
| /* |
| * codeset name defined. |
| * |
| cat /usr/X11R6/lib/X11/locale/locale.alias | \ |
| sed -e '/#.*$/d' -e 's/://' | \ |
| awk '{gsub(/^[^.]+\./, "", $2); |
| $2=toupper($2); |
| gsub(/^EUC/, "EUC-",$2); |
| gsub(/^BIG5HKSCS$/, "BIG5-HKSCS",$2); |
| if (($2!="")&&(!system("iconv --list|grep " $2 ))) print $2 |
| }' | \ |
| sed -e '/\/$/d' | \ |
| sort | uniq | \ |
| awk '{NAME=$1;gsub(/-/,"_",NAME); |
| printf("static const char ENC_%-20s\"%s\";\n", |
| NAME "[]=" , |
| $1)}' |
| */ |
| static char ENC_ARMSCII_8[]= "ARMSCII-8"; |
| static char ENC_BIG5[]= "BIG5"; |
| static char ENC_BIG5_HKSCS[]= "BIG5-HKSCS"; |
| static char ENC_C[]= "C"; |
| static char ENC_CP1251[]= "CP1251"; |
| static char ENC_CP1255[]= "CP1255"; |
| static char ENC_CP1256[]= "CP1256"; |
| static char ENC_EUC_CN[]= "EUC-CN"; |
| static char ENC_EUC_JP[]= "EUC-JP"; |
| static char ENC_EUC_KR[]= "EUC-KR"; |
| static char ENC_EUC_TW[]= "EUC-TW"; |
| static char ENC_GB2312[]= "GB2312"; |
| static char ENC_GBK[]= "GBK"; |
| static char ENC_GEORGIAN_ACADEMY[]= "GEORGIAN-ACADEMY"; |
| /* static char ENC_GEORGIAN_PS[]= "GEORGIAN-PS"; */ |
| /* static char ENC_ISIRI_3342[]= "ISIRI-3342"; */ |
| static char ENC_ISO8859_1[]= "ISO8859-1"; |
| static char ENC_ISO8859_10[]= "ISO8859-10"; |
| static char ENC_ISO8859_11[]= "ISO8859-11"; |
| static char ENC_ISO8859_13[]= "ISO8859-13"; |
| /* static char ENC_ISO8859_14[]= "ISO8859-14"; */ |
| static char ENC_ISO8859_15[]= "ISO8859-15"; |
| static char ENC_ISO8859_2[]= "ISO8859-2"; |
| static char ENC_ISO8859_3[]= "ISO8859-3"; |
| /* static char ENC_ISO8859_4[]= "ISO8859-4"; */ |
| static char ENC_ISO8859_5[]= "ISO8859-5"; |
| static char ENC_ISO8859_6[]= "ISO8859-6"; |
| static char ENC_ISO8859_7[]= "ISO8859-7"; |
| static char ENC_ISO8859_8[]= "ISO8859-8"; |
| static char ENC_ISO8859_9[]= "ISO8859-9"; |
| static char ENC_KOI8_R[]= "KOI8-R"; |
| static char ENC_KOI8_U[]= "KOI8-U"; |
| /* static char ENC_SJIS[]= "SJIS"; */ |
| static char ENC_TCVN[]= "TCVN"; |
| /* static char ENC_TIS620[]= "TIS620"; */ |
| static char ENC_UTF_8[]= "UTF-8"; |
| /* static char ENC_VISCII[]= "VISCII"; */ |
| |
| /* |
| # charset getscript. iconv list output line is backslant. |
| cat /usr/X11R6/lib/X11/locale/locale.alias | \ |
| sed -e '/#.*$/d ; /^[A-z]*\./d' -e 's/://' | \ |
| awk '{gsub(/^[^.]+\./, "", $2); |
| $2=toupper($2); |
| gsub(/^EUC/, "EUC-",$2); |
| gsub(/^BIG5HKSCS$/, "BIG5-HKSCS",$2); |
| NAME=$2; |
| gsub(/\xe7/,"\"\"\\xe7\"\"",$1); |
| gsub(/\xe5/,"\"\"\\xe5\"\"",$1); |
| gsub(/-/, "_",NAME); |
| NAME="ENC_" NAME; |
| if (($2!="")&&(!system("iconv --list|grep " $2 ))) print $1 " " NAME |
| }' | \ |
| sed -e '/\/$/d' | \ |
| sort -k 1 | uniq | \ |
| awk '{printf (" {%-34s%s},\n", "\"" $1 "\",", $2)}' |
| */ |
| |
| static const name_value guess[] = { |
| {"Cextend", ENC_ISO8859_1}, |
| {"English_United-States.437", ENC_C}, |
| {"ISO-8859-1", ENC_ISO8859_1}, |
| {"ISO8859-1", ENC_ISO8859_1}, |
| {"Japanese-EUC", ENC_EUC_JP}, |
| {"Jp_JP", ENC_EUC_JP}, |
| {"POSIX", ENC_C}, |
| {"POSIX-UTF2", ENC_C}, |
| {"aa_DJ", ENC_ISO8859_1}, |
| {"aa_ER", ENC_UTF_8}, |
| {"aa_ER@saaho", ENC_UTF_8}, |
| {"aa_ET", ENC_UTF_8}, |
| {"af", ENC_ISO8859_1}, |
| {"af_ZA", ENC_ISO8859_1}, |
| {"am", ENC_UTF_8}, |
| {"am_ET", ENC_UTF_8}, |
| {"an_ES", ENC_ISO8859_15}, |
| {"ar", ENC_ISO8859_6}, |
| {"ar_AA", ENC_ISO8859_6}, |
| {"ar_AE", ENC_ISO8859_6}, |
| {"ar_BH", ENC_ISO8859_6}, |
| {"ar_DZ", ENC_ISO8859_6}, |
| {"ar_EG", ENC_ISO8859_6}, |
| {"ar_IN", ENC_UTF_8}, |
| {"ar_IQ", ENC_ISO8859_6}, |
| {"ar_JO", ENC_ISO8859_6}, |
| {"ar_KW", ENC_ISO8859_6}, |
| {"ar_LB", ENC_ISO8859_6}, |
| {"ar_LY", ENC_ISO8859_6}, |
| {"ar_MA", ENC_ISO8859_6}, |
| {"ar_OM", ENC_ISO8859_6}, |
| {"ar_QA", ENC_ISO8859_6}, |
| {"ar_SA", ENC_ISO8859_6}, |
| {"ar_SD", ENC_ISO8859_6}, |
| {"ar_SY", ENC_ISO8859_6}, |
| {"ar_TN", ENC_ISO8859_6}, |
| {"ar_YE", ENC_ISO8859_6}, |
| {"be", ENC_CP1251}, |
| {"be_BY", ENC_CP1251}, |
| {"bg", ENC_CP1251}, |
| {"bg_BG", ENC_CP1251}, |
| {"bn_BD", ENC_UTF_8}, |
| {"bn_IN", ENC_UTF_8}, |
| {"bokm""\xe5""l", ENC_ISO8859_1}, |
| {"bokmal", ENC_ISO8859_1}, |
| {"br", ENC_ISO8859_1}, |
| {"br_FR", ENC_ISO8859_1}, |
| {"br_FR@euro", ENC_ISO8859_15}, |
| {"bs_BA", ENC_ISO8859_2}, |
| {"bulgarian", ENC_CP1251}, |
| {"byn_ER", ENC_UTF_8}, |
| {"c-french.iso88591", ENC_ISO8859_1}, |
| {"ca", ENC_ISO8859_1}, |
| {"ca_ES", ENC_ISO8859_1}, |
| {"ca_ES@euro", ENC_ISO8859_15}, |
| {"catalan", ENC_ISO8859_1}, |
| {"chinese-s", ENC_EUC_CN}, |
| {"chinese-t", ENC_EUC_TW}, |
| {"croatian", ENC_ISO8859_2}, |
| {"cs", ENC_ISO8859_2}, |
| {"cs_CS", ENC_ISO8859_2}, |
| {"cs_CZ", ENC_ISO8859_2}, |
| {"cy", ENC_ISO8859_1}, |
| {"cy_GB", ENC_ISO8859_1}, |
| {"cz", ENC_ISO8859_2}, |
| {"cz_CZ", ENC_ISO8859_2}, |
| {"czech", ENC_ISO8859_2}, |
| {"da", ENC_ISO8859_1}, |
| {"da_DK", ENC_ISO8859_1}, |
| {"danish", ENC_ISO8859_1}, |
| {"dansk", ENC_ISO8859_1}, |
| {"de", ENC_ISO8859_1}, |
| {"de_AT", ENC_ISO8859_1}, |
| {"de_AT@euro", ENC_ISO8859_15}, |
| {"de_BE", ENC_ISO8859_1}, |
| {"de_BE@euro", ENC_ISO8859_15}, |
| {"de_CH", ENC_ISO8859_1}, |
| {"de_DE", ENC_ISO8859_1}, |
| {"de_DE@euro", ENC_ISO8859_15}, |
| {"de_LI", ENC_ISO8859_1}, |
| {"de_LI@euro", ENC_ISO8859_15}, |
| {"de_LU", ENC_ISO8859_1}, |
| {"de_LU@euro", ENC_ISO8859_15}, |
| {"deutsch", ENC_ISO8859_1}, |
| {"dutch", ENC_ISO8859_1}, |
| {"eesti", ENC_ISO8859_1}, |
| {"el", ENC_ISO8859_7}, |
| {"el_GR", ENC_ISO8859_7}, |
| {"en", ENC_ISO8859_1}, |
| {"en_AU", ENC_ISO8859_1}, |
| {"en_BW", ENC_ISO8859_1}, |
| {"en_CA", ENC_ISO8859_1}, |
| {"en_DK", ENC_ISO8859_1}, |
| {"en_GB", ENC_ISO8859_1}, |
| {"en_HK", ENC_ISO8859_1}, |
| {"en_IE", ENC_ISO8859_1}, |
| {"en_IE@euro", ENC_ISO8859_15}, |
| {"en_IN", ENC_UTF_8}, |
| {"en_NZ", ENC_ISO8859_1}, |
| {"en_PH", ENC_ISO8859_1}, |
| {"en_SG", ENC_ISO8859_1}, |
| {"en_UK", ENC_ISO8859_1}, |
| {"en_US", ENC_ISO8859_1}, |
| {"en_ZA", ENC_ISO8859_1}, |
| {"en_ZW", ENC_ISO8859_1}, |
| {"es", ENC_ISO8859_1}, |
| {"es_AR", ENC_ISO8859_1}, |
| {"es_BO", ENC_ISO8859_1}, |
| {"es_CL", ENC_ISO8859_1}, |
| {"es_CO", ENC_ISO8859_1}, |
| {"es_CR", ENC_ISO8859_1}, |
| {"es_DO", ENC_ISO8859_1}, |
| {"es_EC", ENC_ISO8859_1}, |
| {"es_ES", ENC_ISO8859_1}, |
| {"es_ES@euro", ENC_ISO8859_15}, |
| {"es_GT", ENC_ISO8859_1}, |
| {"es_HN", ENC_ISO8859_1}, |
| {"es_MX", ENC_ISO8859_1}, |
| {"es_NI", ENC_ISO8859_1}, |
| {"es_PA", ENC_ISO8859_1}, |
| {"es_PE", ENC_ISO8859_1}, |
| {"es_PR", ENC_ISO8859_1}, |
| {"es_PY", ENC_ISO8859_1}, |
| {"es_SV", ENC_ISO8859_1}, |
| {"es_US", ENC_ISO8859_1}, |
| {"es_UY", ENC_ISO8859_1}, |
| {"es_VE", ENC_ISO8859_1}, |
| {"estonian", ENC_ISO8859_1}, |
| {"et", ENC_ISO8859_15}, |
| {"et_EE", ENC_ISO8859_15}, |
| {"eu", ENC_ISO8859_1}, |
| {"eu_ES", ENC_ISO8859_1}, |
| {"eu_ES@euro", ENC_ISO8859_15}, |
| {"eu_FR", ENC_ISO8859_1}, |
| {"eu_FR@euro", ENC_ISO8859_15}, |
| {"fa", ENC_UTF_8}, |
| {"fa_IR", ENC_UTF_8}, |
| {"fi", ENC_ISO8859_1}, |
| {"fi_FI", ENC_ISO8859_1}, |
| {"fi_FI@euro", ENC_ISO8859_15}, |
| {"finnish", ENC_ISO8859_1}, |
| {"fo", ENC_ISO8859_1}, |
| {"fo_FO", ENC_ISO8859_1}, |
| {"fr", ENC_ISO8859_1}, |
| {"fr_BE", ENC_ISO8859_1}, |
| {"fr_BE@euro", ENC_ISO8859_15}, |
| {"fr_CA", ENC_ISO8859_1}, |
| {"fr_CH", ENC_ISO8859_1}, |
| {"fr_FR", ENC_ISO8859_1}, |
| {"fr_FR@euro", ENC_ISO8859_15}, |
| {"fr_LU", ENC_ISO8859_1}, |
| {"fr_LU@euro", ENC_ISO8859_15}, |
| {"fran""\xe7""ais", ENC_ISO8859_1}, |
| {"french", ENC_ISO8859_1}, |
| {"ga", ENC_ISO8859_1}, |
| {"ga_IE", ENC_ISO8859_1}, |
| {"ga_IE@euro", ENC_ISO8859_15}, |
| {"galego", ENC_ISO8859_1}, |
| {"galician", ENC_ISO8859_1}, |
| {"gd", ENC_ISO8859_1}, |
| {"gd_GB", ENC_ISO8859_1}, |
| {"german", ENC_ISO8859_1}, |
| {"gez_ER", ENC_UTF_8}, |
| {"gez_ER@abegede", ENC_UTF_8}, |
| {"gez_ET", ENC_UTF_8}, |
| {"gez_ET@abegede", ENC_UTF_8}, |
| {"gl", ENC_ISO8859_1}, |
| {"gl_ES", ENC_ISO8859_1}, |
| {"gl_ES@euro", ENC_ISO8859_15}, |
| {"greek", ENC_ISO8859_7}, |
| {"gu_IN", ENC_UTF_8}, |
| {"gv", ENC_ISO8859_1}, |
| {"gv_GB", ENC_ISO8859_1}, |
| {"he", ENC_ISO8859_8}, |
| {"he_IL", ENC_ISO8859_8}, |
| {"hebrew", ENC_ISO8859_8}, |
| {"hr", ENC_ISO8859_2}, |
| {"hr_HR", ENC_ISO8859_2}, |
| {"hrvatski", ENC_ISO8859_2}, |
| {"hu", ENC_ISO8859_2}, |
| {"hu_HU", ENC_ISO8859_2}, |
| {"hungarian", ENC_ISO8859_2}, |
| {"hy", ENC_ARMSCII_8}, |
| {"hy_AM", ENC_ARMSCII_8}, |
| {"icelandic", ENC_ISO8859_1}, |
| {"id", ENC_ISO8859_1}, |
| {"id_ID", ENC_ISO8859_1}, |
| {"in", ENC_ISO8859_1}, |
| {"in_ID", ENC_ISO8859_1}, |
| {"is", ENC_ISO8859_1}, |
| {"is_IS", ENC_ISO8859_1}, |
| {"iso_8859_1", ENC_ISO8859_1}, |
| {"it", ENC_ISO8859_1}, |
| {"it_CH", ENC_ISO8859_1}, |
| {"it_IT", ENC_ISO8859_1}, |
| {"it_IT@euro", ENC_ISO8859_15}, |
| {"italian", ENC_ISO8859_1}, |
| {"iw", ENC_ISO8859_8}, |
| {"iw_IL", ENC_ISO8859_8}, |
| {"ja", ENC_EUC_JP}, |
| {"ja_JP", ENC_EUC_JP}, |
| {"japan", ENC_EUC_JP}, |
| {"japanese", ENC_EUC_JP}, |
| {"ka", ENC_GEORGIAN_ACADEMY}, |
| {"ka_GE", ENC_GEORGIAN_ACADEMY}, |
| {"kl", ENC_ISO8859_1}, |
| {"kl_GL", ENC_ISO8859_1}, |
| {"kn_IN", ENC_UTF_8}, |
| {"ko", ENC_EUC_KR}, |
| {"ko_KR", ENC_EUC_KR}, |
| {"korean", ENC_EUC_KR}, |
| {"kw", ENC_ISO8859_1}, |
| {"kw_GB", ENC_ISO8859_1}, |
| {"lg_UG", ENC_ISO8859_10}, |
| {"lithuanian", ENC_ISO8859_13}, |
| {"lt", ENC_ISO8859_13}, |
| {"lt_LT", ENC_ISO8859_13}, |
| {"lv", ENC_ISO8859_13}, |
| {"lv_LV", ENC_ISO8859_13}, |
| {"mi", ENC_ISO8859_13}, |
| {"mi_NZ", ENC_ISO8859_13}, |
| {"mk", ENC_ISO8859_5}, |
| {"mk_MK", ENC_ISO8859_5}, |
| {"ml_IN", ENC_UTF_8}, |
| {"mn_MN", ENC_UTF_8}, |
| {"mr_IN", ENC_UTF_8}, |
| {"ms", ENC_ISO8859_1}, |
| {"ms_MY", ENC_ISO8859_1}, |
| {"mt", ENC_ISO8859_3}, |
| {"mt_MT", ENC_ISO8859_3}, |
| {"nb", ENC_ISO8859_1}, |
| {"nb_NO", ENC_ISO8859_1}, |
| {"ne_NP", ENC_UTF_8}, |
| {"nl", ENC_ISO8859_1}, |
| {"nl_BE", ENC_ISO8859_1}, |
| {"nl_BE@euro", ENC_ISO8859_15}, |
| {"nl_NL", ENC_ISO8859_1}, |
| {"nl_NL@euro", ENC_ISO8859_15}, |
| {"nn", ENC_ISO8859_1}, |
| {"nn_NO", ENC_ISO8859_1}, |
| {"no", ENC_ISO8859_1}, |
| {"no@nynorsk", ENC_ISO8859_1}, |
| {"no_NO", ENC_ISO8859_1}, |
| {"norwegian", ENC_ISO8859_1}, |
| {"nynorsk", ENC_ISO8859_1}, |
| {"oc", ENC_ISO8859_1}, |
| {"oc_FR", ENC_ISO8859_1}, |
| {"oc_FR@euro", ENC_ISO8859_15}, |
| {"om_ET", ENC_UTF_8}, |
| {"om_KE", ENC_ISO8859_1}, |
| {"pa_IN", ENC_UTF_8}, |
| {"ph", ENC_ISO8859_1}, |
| {"ph_PH", ENC_ISO8859_1}, |
| {"pl", ENC_ISO8859_2}, |
| {"pl_PL", ENC_ISO8859_2}, |
| {"polish", ENC_ISO8859_2}, |
| {"portuguese", ENC_ISO8859_1}, |
| {"pp", ENC_ISO8859_1}, |
| {"pp_AN", ENC_ISO8859_1}, |
| {"pt", ENC_ISO8859_1}, |
| {"pt_BR", ENC_ISO8859_1}, |
| {"pt_PT", ENC_ISO8859_1}, |
| {"pt_PT@euro", ENC_ISO8859_15}, |
| {"ro", ENC_ISO8859_2}, |
| {"ro_RO", ENC_ISO8859_2}, |
| {"romanian", ENC_ISO8859_2}, |
| {"ru", ENC_KOI8_R}, |
| {"ru_RU", ENC_KOI8_R}, |
| {"ru_UA", ENC_KOI8_U}, |
| {"rumanian", ENC_ISO8859_2}, |
| {"russian", ENC_ISO8859_5}, |
| {"se_NO", ENC_UTF_8}, |
| {"serbocroatian", ENC_ISO8859_2}, |
| {"sh", ENC_ISO8859_2}, |
| {"sh_SP", ENC_ISO8859_2}, |
| {"sh_YU", ENC_ISO8859_2}, |
| {"sid_ET", ENC_UTF_8}, |
| {"sk", ENC_ISO8859_2}, |
| {"sk_SK", ENC_ISO8859_2}, |
| {"sl", ENC_ISO8859_2}, |
| {"sl_SI", ENC_ISO8859_2}, |
| {"slovak", ENC_ISO8859_2}, |
| {"slovene", ENC_ISO8859_2}, |
| {"slovenian", ENC_ISO8859_2}, |
| {"so_DJ", ENC_ISO8859_1}, |
| {"so_ET", ENC_UTF_8}, |
| {"so_KE", ENC_ISO8859_1}, |
| {"so_SO", ENC_ISO8859_1}, |
| {"sp", ENC_ISO8859_5}, |
| {"sp_YU", ENC_ISO8859_5}, |
| {"spanish", ENC_ISO8859_1}, |
| {"sq", ENC_ISO8859_2}, |
| {"sq_AL", ENC_ISO8859_2}, |
| {"sr", ENC_ISO8859_5}, |
| {"sr@cyrillic", ENC_ISO8859_5}, |
| {"sr_SP", ENC_ISO8859_2}, |
| {"sr_YU", ENC_ISO8859_5}, |
| {"sr_YU@cyrillic", ENC_ISO8859_5}, |
| {"st_ZA", ENC_ISO8859_1}, |
| {"sv", ENC_ISO8859_1}, |
| {"sv_FI", ENC_ISO8859_1}, |
| {"sv_FI@euro", ENC_ISO8859_15}, |
| {"sv_SE", ENC_ISO8859_1}, |
| {"sv_SE@euro", ENC_ISO8859_15}, |
| {"swedish", ENC_ISO8859_1}, |
| {"te_IN", ENC_UTF_8}, |
| {"th", ENC_ISO8859_11}, |
| {"th_TH", ENC_ISO8859_11}, |
| {"thai", ENC_ISO8859_11}, |
| {"ti_ER", ENC_UTF_8}, |
| {"ti_ET", ENC_UTF_8}, |
| {"tig_ER", ENC_UTF_8}, |
| {"tl", ENC_ISO8859_1}, |
| {"tl_PH", ENC_ISO8859_1}, |
| {"tr", ENC_ISO8859_9}, |
| {"tr_TR", ENC_ISO8859_9}, |
| {"turkish", ENC_ISO8859_9}, |
| {"uk", ENC_KOI8_U}, |
| {"uk_UA", ENC_KOI8_U}, |
| {"ur", ENC_CP1256}, |
| {"ur_PK", ENC_CP1256}, |
| {"uz_UZ", ENC_ISO8859_1}, |
| {"uz_UZ@cyrillic", ENC_UTF_8}, |
| {"vi", ENC_TCVN}, |
| {"vi_VN", ENC_TCVN}, |
| {"wa", ENC_ISO8859_1}, |
| {"wa_BE", ENC_ISO8859_1}, |
| {"wa_BE@euro", ENC_ISO8859_15}, |
| {"xh_ZA", ENC_ISO8859_1}, |
| {"yi", ENC_CP1255}, |
| {"yi_US", ENC_CP1255}, |
| {"zh_CN", ENC_GBK}, |
| {"zh_HK", ENC_BIG5_HKSCS}, |
| {"zh_SG", ENC_GB2312}, |
| {"zh_TW", ENC_BIG5}, |
| {"zu_ZA", ENC_ISO8859_1}, |
| }; |
| static const int guess_count = (sizeof(guess)/sizeof(name_value)); |
| #endif |
| |
| static const name_value known[] = { |
| {"iso88591", "ISO8859-1"}, |
| {"iso88592", "ISO8859-2"}, |
| {"iso88593", "ISO8859-3"}, |
| {"iso88596", "ISO8859-6"}, |
| {"iso88597", "ISO8859-7"}, |
| {"iso88598", "ISO8859-8"}, |
| {"iso88599", "ISO8859-9"}, |
| {"iso885910", "ISO8859-10"}, |
| {"iso885913", "ISO8859-13"}, |
| {"iso885914", "ISO8859-14"}, |
| {"iso885915", "ISO8859-15"}, |
| {"cp1251", "CP1251"}, |
| {"cp1255", "CP1255"}, |
| {"eucjp", "EUC-JP"}, |
| {"euckr", "EUC-KR"}, |
| {"euctw", "EUC-TW"}, |
| {"georgianps", "GEORGIAN-PS"}, |
| {"koi8u", "KOI8-U"}, |
| {"tcvn", "TCVN"}, |
| {"big5", "BIG5"}, |
| {"gb2312", "GB2312"}, |
| {"gb18030", "GB18030"}, |
| {"gbk", "GBK"}, |
| {"tis-620", "TIS-620"}, |
| {"sjis", "SHIFT_JIS"}, |
| {"euccn", "GB2312"}, |
| {"big5-hkscs", "BIG5-HKSCS"}, |
| #ifdef __APPLE__ |
| /* known additional Apple encodings (see locale -a) up to macOS 10.5, |
| unlike other systems they correspond directly */ |
| {"iso8859-1", "ISO8859-1"}, |
| {"iso8859-2", "ISO8859-2"}, |
| {"iso8859-4", "ISO8859-4"}, |
| {"iso8859-7", "ISO8859-7"}, |
| {"iso8859-9", "ISO8859-9"}, |
| {"iso8859-13", "ISO8859-13"}, |
| {"iso8859-15", "ISO8859-15"}, |
| {"koi8-u", "KOI8-U"}, |
| {"koi8-r", "KOI8-R"}, |
| {"pt154", "PT154"}, |
| {"us-ascii", "ASCII"}, |
| {"armscii-8", "ARMSCII-8"}, |
| {"iscii-dev", "ISCII-DEV"}, |
| {"big5hkscs", "BIG5-HKSCS"}, |
| #endif |
| }; |
| static const int known_count = (sizeof(known)/sizeof(name_value)); |
| |
| |
| #ifndef __APPLE__ |
| static char* name_value_search(const char *name, const name_value table[], |
| const int table_count) |
| { |
| int min, mid, max; |
| |
| #if defined(DEBUG_TEST) |
| static last; |
| DPRINT(last); |
| last = 0; |
| #endif |
| |
| min = 0; |
| max = table_count - 1; |
| |
| if ( 0 > strcmp(name,table[min].name) || |
| 0 < strcmp(name,table[max].name) ) { |
| #if defined(DEBUG_TEST) && DEBUG_TEST > 1 |
| DPRINT(strcmp(name, table[min].name)); |
| DPRINT(strcmp(name, table[max].name)); |
| #endif |
| return (NULL); |
| } |
| while (max >= min) { |
| #if defined(DEBUG_TEST) |
| last++; |
| #endif |
| mid = (min + max) / 2; |
| #if defined(DEBUG_TEST) && DEBUG_TEST > 1 |
| SPRINT(table[mid].name); |
| #endif |
| if (0 < strcmp(name,table[mid].name)) { |
| #if defined(DEBUG_TEST) && DEBUG_TEST > 1 |
| DPRINT(strcmp(name, table[mid].name)); |
| #endif |
| min = mid + 1; |
| } else if (0 > strcmp(name, table[mid].name)) { |
| #if defined(DEBUG_TEST) && DEBUG_TEST > 1 |
| DPRINT(strcmp(name, table[mid].name)); |
| #endif |
| max = mid - 1; |
| } else { |
| #if defined(DEBUG_TEST) && DEBUG_TEST > 1 |
| DPRINT(strcmp(name, table[mid].name)); |
| #endif |
| return(table[mid].value); |
| } |
| } |
| return (NULL); |
| } |
| #endif |
| |
| const char *locale2charset(const char *locale) |
| { |
| static char charset[128]; |
| |
| char la_loc[128]; |
| char enc[128], *p; |
| int i; |
| int cp; |
| #ifndef __APPLE__ |
| char *value; |
| #endif |
| |
| if ((locale == NULL) || (0 == strcmp(locale, "NULL"))) |
| locale = setlocale(LC_CTYPE,NULL); |
| |
| /* in some rare circumstances Darwin may return NULL */ |
| if (!locale || !strcmp(locale, "C") || !strcmp(locale, "POSIX")) |
| return ("ASCII"); |
| |
| memset(charset,0,sizeof(charset)); |
| |
| /* separate language_locale.encoding |
| NB, under Windows 'locale' may contains dots |
| */ |
| memset(la_loc, 0, sizeof(la_loc)); |
| memset(enc, 0, sizeof(enc)); |
| p = strrchr(locale, '.'); |
| if(p) { |
| strncpy(enc, p+1, sizeof(enc)-1); |
| enc[sizeof(enc) - 1] = '\0'; |
| strncpy(la_loc, locale, sizeof(la_loc)-1); |
| la_loc[sizeof(la_loc) - 1] = '\0'; |
| p = strrchr(la_loc, '.'); |
| if(p) *p = '\0'; |
| } |
| |
| #ifdef Win32 |
| /* |
| ## PUTTY suggests mapping Windows code pages as |
| ## 1250 -> ISO 8859-2: this is WRONG |
| ## 1251 -> KOI8-U |
| ## 1252 -> ISO 8859-1 |
| ## 1253 -> ISO 8859-7 |
| ## 1254 -> ISO 8859-9 |
| ## 1255 -> ISO 8859-8 |
| ## 1256 -> ISO 8859-6 |
| ## 1257 -> ISO 8859-13 |
| */ |
| switch(cp = atoi(enc)) { |
| /* case 1250: return "ISO8859-2"; */ |
| /* case 1251: return "KOI8-U"; This is not anywhere near the same */ |
| case 1252: return "ISO8859-1"; |
| /* |
| case 1253: return "ISO8859-7"; |
| case 1254: return "ISO8859-9"; |
| case 1255: return "ISO8859-8"; |
| case 1256: return "ISO8859-6"; |
| */ |
| case 1257: return "ISO8859-13"; |
| default: |
| snprintf(charset, 128, "CP%u", cp); |
| return charset; |
| } |
| #endif |
| |
| /* |
| * Assume locales are like en_US[.utf8[@euro]] |
| */ |
| /* cut encoding @hoge no use. |
| for(i=0;enc[i] && enc[i]!='@' && i<sizeof(enc)-1;i++); |
| enc[i]='\0'; |
| */ |
| |
| /* for AIX */ |
| if (0 == strcmp(enc, "UTF-8")) strcpy(enc, "utf8"); |
| |
| if(strcmp(enc, "") && strcmp(enc, "utf8")) { |
| for(i = 0; enc[i]; i++) enc[i] = (char) tolower(enc[i]); |
| |
| for(i = 0; i < known_count; i++) |
| if (0 == strcmp(known[i].name,enc)) return known[i].value; |
| |
| /* cut encoding old linux cp- */ |
| if (0 == strncmp(enc, "cp-", 3)){ |
| snprintf(charset, 128, "CP%s", enc+3); |
| return charset; |
| } |
| /* cut encoding IBM ibm- */ |
| if (0 == strncmp(enc, "ibm", 3)){ |
| cp = atoi(enc + 3); |
| snprintf(charset, 128, "IBM-%d", abs(cp)); |
| /* IBM-[0-9]+ case */ |
| if(cp != 0) return charset; |
| /* IBM-eucXX case */ |
| strncpy(charset, (enc[3] == '-') ? enc+4: enc+3, sizeof(charset)); |
| charset[sizeof(charset) - 1] = '\0'; |
| if(strncmp(charset, "euc", 3)) { |
| if (charset[3] != '-') { |
| for(i = (int) strlen(charset)-3; 0 < i; i--) |
| charset[i+1] = charset[i]; |
| charset[3] = '-'; |
| } |
| for(i = 0; charset[i]; i++) |
| charset[i] = (char) toupper(charset[i]); |
| return charset; |
| } |
| } |
| |
| /* let's hope it is a ll_* name */ |
| if (0 == strcmp(enc, "euc")) { |
| /* This is OK as encoding names are ASCII */ |
| if(isalpha((int)la_loc[0]) && isalpha((int)la_loc[1]) |
| && (la_loc[2] == '_')) { |
| if (0 == strncmp("ja", la_loc, 2)) return "EUC-JP"; |
| if (0 == strncmp("ko", la_loc, 2)) return "EUC-KR"; |
| if (0 == strncmp("zh", la_loc, 2)) return "GB2312"; |
| } |
| } |
| |
| } |
| |
| #ifdef __APPLE__ |
| /* on macOS *all* real locales w/o encoding part are UTF-8 locales |
| (C and POSIX are virtual and taken care of previously) */ |
| return "UTF-8"; |
| #else |
| |
| if(0 == strcmp(enc, "utf8")) return "UTF-8"; |
| |
| value = name_value_search(la_loc, guess, guess_count); |
| return value == NULL ? "ASCII" : value; |
| #endif |
| } |
| |
| /***************************************************** |
| * Test !! |
| *****************************************************/ |
| #ifdef DEBUG_TEST |
| main() |
| { |
| int i; |
| i=0; |
| setlocale(LC_CTYPE,""); |
| DPRINT(guess_count); |
| #ifndef Win32 |
| SPRINT(locale2charset(NULL)); |
| SPRINT(locale2charset("ja")); |
| SPRINT(locale2charset("ja_JP")); |
| SPRINT(locale2charset("ja_JP.eucJP")); |
| SPRINT(locale2charset("ja_JP.ujis")); |
| SPRINT(locale2charset("ja_JP.IBM-eucJP")); |
| SPRINT(locale2charset("ja_JP.sjis")); |
| SPRINT(locale2charset("ja_JP.IBM-932")); |
| /* cannot encoding only zh */ |
| SPRINT(locale2charset("zh""\0""BIG5")); |
| SPRINT(locale2charset("zh_CN")); |
| SPRINT(locale2charset("zh_CN.BIG5")); |
| SPRINT(locale2charset("zh_TW")); |
| SPRINT(locale2charset("zh_TW.eucTW")); |
| SPRINT(locale2charset("zh_TW.big5")); |
| SPRINT(locale2charset("zh_SG")); |
| SPRINT(locale2charset("zh_HK")); |
| SPRINT(locale2charset("ko")); |
| SPRINT(locale2charset("en")); |
| SPRINT(locale2charset("en_IE@euro")); |
| SPRINT(locale2charset("en_IN")); |
| SPRINT(locale2charset("C")); |
| SPRINT(locale2charset("fran""\xe7""ais")); |
| for(i=0;i<guess_count;i++){ |
| locale2charset(guess[i].name); |
| } |
| #else |
| SPRINT(locale2charset("japanese_JAPAN.932")); |
| SPRINT(locale2charset("japanese_JAPAN.932")); |
| #endif |
| } |
| |
| #endif /* DEBUG_TEST */ |