blob: 6bafd950624b5ccbab5d6d198b2b7d5844004f8c [file] [log] [blame] [edit]
/* Message list charset and locale charset handling.
Copyright (C) 2001-2003, 2005-2009, 2019-2021 Free Software Foundation, Inc.
Written by Bruno Haible <haible@clisp.cons.org>, 2001.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include "config.h"
#endif
#include <alloca.h>
/* Specification. */
#include "msgl-iconv.h"
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#if HAVE_ICONV
# include <iconv.h>
#endif
#include "noreturn.h"
#include "progname.h"
#include "basename-lgpl.h"
#include "message.h"
#include "po-charset.h"
#include "xstriconv.h"
#include "xstriconveh.h"
#include "msgl-ascii.h"
#include "msgl-ofn.h"
#include "xalloc.h"
#include "xmalloca.h"
#include "c-strstr.h"
#include "xvasprintf.h"
#include "po-xerror.h"
#include "gettext.h"
#define _(str) gettext (str)
#if HAVE_ICONV
_GL_NORETURN_FUNC static void conversion_error (const struct conversion_context* context);
static void
conversion_error (const struct conversion_context* context)
{
if (context->to_code == po_charset_utf8)
/* If a conversion to UTF-8 fails, the problem lies in the input. */
po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
xasprintf (_("%s: input is not valid in \"%s\" encoding"),
context->from_filename, context->from_code));
else
po_xerror (PO_SEVERITY_FATAL_ERROR, context->message, NULL, 0, 0, false,
xasprintf (_("%s: error while converting from \"%s\" encoding to \"%s\" encoding"),
context->from_filename, context->from_code,
context->to_code));
/* NOTREACHED */
abort ();
}
char *
convert_string_directly (iconv_t cd, const char *string,
const struct conversion_context* context)
{
size_t len = strlen (string) + 1;
char *result = NULL;
size_t resultlen = 0;
if (xmem_cd_iconv (string, len, cd, &result, &resultlen) == 0)
/* Verify the result has exactly one NUL byte, at the end. */
if (resultlen > 0 && result[resultlen - 1] == '\0'
&& strlen (result) == resultlen - 1)
return result;
conversion_error (context);
/* NOTREACHED */
return NULL;
}
static char *
convert_string (const iconveh_t *cd, const char *string,
const struct conversion_context* context)
{
size_t len = strlen (string) + 1;
char *result = NULL;
size_t resultlen = 0;
if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
&result, &resultlen) == 0)
/* Verify the result has exactly one NUL byte, at the end. */
if (resultlen > 0 && result[resultlen - 1] == '\0'
&& strlen (result) == resultlen - 1)
return result;
conversion_error (context);
/* NOTREACHED */
return NULL;
}
static void
convert_string_list (const iconveh_t *cd, string_list_ty *slp,
const struct conversion_context* context)
{
size_t i;
if (slp != NULL)
for (i = 0; i < slp->nitems; i++)
slp->item[i] = convert_string (cd, slp->item[i], context);
}
static void
convert_prev_msgid (const iconveh_t *cd, message_ty *mp,
const struct conversion_context* context)
{
if (mp->prev_msgctxt != NULL)
mp->prev_msgctxt = convert_string (cd, mp->prev_msgctxt, context);
if (mp->prev_msgid != NULL)
mp->prev_msgid = convert_string (cd, mp->prev_msgid, context);
if (mp->prev_msgid_plural != NULL)
mp->prev_msgid_plural = convert_string (cd, mp->prev_msgid_plural, context);
}
static void
convert_msgid (const iconveh_t *cd, message_ty *mp,
const struct conversion_context* context)
{
if (mp->msgctxt != NULL)
mp->msgctxt = convert_string (cd, mp->msgctxt, context);
mp->msgid = convert_string (cd, mp->msgid, context);
if (mp->msgid_plural != NULL)
mp->msgid_plural = convert_string (cd, mp->msgid_plural, context);
}
static void
convert_msgstr (const iconveh_t *cd, message_ty *mp,
const struct conversion_context* context)
{
char *result = NULL;
size_t resultlen = 0;
if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
abort ();
if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
&result, &resultlen) == 0)
/* Verify the result has a NUL byte at the end. */
if (resultlen > 0 && result[resultlen - 1] == '\0')
/* Verify the result has the same number of NUL bytes. */
{
const char *p;
const char *pend;
int nulcount1;
int nulcount2;
for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
p < pend;
p += strlen (p) + 1, nulcount1++);
for (p = result, pend = p + resultlen, nulcount2 = 0;
p < pend;
p += strlen (p) + 1, nulcount2++);
if (nulcount1 == nulcount2)
{
mp->msgstr = result;
mp->msgstr_len = resultlen;
return;
}
}
conversion_error (context);
}
#endif
static bool
iconv_message_list_internal (message_list_ty *mlp,
const char *canon_from_code,
const char *canon_to_code,
bool update_header,
const char *from_filename)
{
bool canon_from_code_overridden = (canon_from_code != NULL);
bool msgids_changed;
size_t j;
/* If the list is empty, nothing to do. */
if (mlp->nitems == 0)
return false;
/* Search the header entry, and extract and replace the charset name. */
for (j = 0; j < mlp->nitems; j++)
if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
{
const char *header = mlp->item[j]->msgstr;
if (header != NULL)
{
const char *charsetstr = c_strstr (header, "charset=");
if (charsetstr != NULL)
{
size_t len;
char *charset;
const char *canon_charset;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
charset = (char *) xmalloca (len + 1);
memcpy (charset, charsetstr, len);
charset[len] = '\0';
canon_charset = po_charset_canonicalize (charset);
if (canon_charset == NULL)
{
if (!canon_from_code_overridden)
{
/* Don't give an error for POT files, because POT
files usually contain only ASCII msgids. */
const char *filename = from_filename;
size_t filenamelen;
if (filename != NULL
&& (filenamelen = strlen (filename)) >= 4
&& memcmp (filename + filenamelen - 4, ".pot", 4)
== 0
&& strcmp (charset, "CHARSET") == 0)
canon_charset = po_charset_ascii;
else
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
false,
xasprintf (_("present charset \"%s\" is not a portable encoding name"),
charset));
}
}
else
{
if (canon_from_code == NULL)
canon_from_code = canon_charset;
else if (canon_from_code != canon_charset)
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0,
false,
xasprintf (_("two different charsets \"%s\" and \"%s\" in input file"),
canon_from_code, canon_charset));
}
freea (charset);
if (update_header)
{
size_t len1, len2, len3;
char *new_header;
len1 = charsetstr - header;
len2 = strlen (canon_to_code);
len3 = (header + strlen (header)) - (charsetstr + len);
new_header = XNMALLOC (len1 + len2 + len3 + 1, char);
memcpy (new_header, header, len1);
memcpy (new_header + len1, canon_to_code, len2);
memcpy (new_header + len1 + len2, charsetstr + len,
len3 + 1);
mlp->item[j]->msgstr = new_header;
mlp->item[j]->msgstr_len = len1 + len2 + len3 + 1;
}
}
}
}
if (canon_from_code == NULL)
{
if (is_ascii_message_list (mlp))
canon_from_code = po_charset_ascii;
else
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
_("input file doesn't contain a header entry with a charset specification"));
}
msgids_changed = false;
/* If the two encodings are the same, nothing to do. */
if (canon_from_code != canon_to_code)
{
#if HAVE_ICONV
iconveh_t cd;
struct conversion_context context;
if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
xasprintf (_("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), and iconv() does not support this conversion."),
canon_from_code, canon_to_code,
last_component (program_name)));
context.from_code = canon_from_code;
context.to_code = canon_to_code;
context.from_filename = from_filename;
for (j = 0; j < mlp->nitems; j++)
{
message_ty *mp = mlp->item[j];
if ((mp->msgctxt != NULL && !is_ascii_string (mp->msgctxt))
|| !is_ascii_string (mp->msgid))
msgids_changed = true;
context.message = mp;
convert_string_list (&cd, mp->comment, &context);
convert_string_list (&cd, mp->comment_dot, &context);
convert_prev_msgid (&cd, mp, &context);
convert_msgid (&cd, mp, &context);
convert_msgstr (&cd, mp, &context);
}
iconveh_close (&cd);
if (msgids_changed)
if (message_list_msgids_changed (mlp))
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
xasprintf (_("Conversion from \"%s\" to \"%s\" introduces duplicates: some different msgids become equal."),
canon_from_code, canon_to_code));
#else
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
xasprintf (_("Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). This version was built without iconv()."),
canon_from_code, canon_to_code,
last_component (program_name)));
#endif
}
return msgids_changed;
}
bool
iconv_message_list (message_list_ty *mlp,
const char *canon_from_code, const char *canon_to_code,
const char *from_filename)
{
return iconv_message_list_internal (mlp,
canon_from_code, canon_to_code, true,
from_filename);
}
msgdomain_list_ty *
iconv_msgdomain_list (msgdomain_list_ty *mdlp,
const char *to_code,
bool update_header,
const char *from_filename)
{
const char *canon_to_code;
size_t k;
/* Canonicalize target encoding. */
canon_to_code = po_charset_canonicalize (to_code);
if (canon_to_code == NULL)
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
xasprintf (_("target charset \"%s\" is not a portable encoding name."),
to_code));
/* Test whether the control characters required for escaping file names with
spaces are present in the target encoding. */
if (msgdomain_list_has_filenames_with_spaces (mdlp)
&& !(canon_to_code == po_charset_utf8
|| strcmp (canon_to_code, "GB18030") == 0))
po_xerror (PO_SEVERITY_FATAL_ERROR, NULL, NULL, 0, 0, false,
xasprintf (_("Cannot write the control characters that protect file names with spaces in the %s encoding"),
canon_to_code));
for (k = 0; k < mdlp->nitems; k++)
iconv_message_list_internal (mdlp->item[k]->messages,
mdlp->encoding, canon_to_code, update_header,
from_filename);
mdlp->encoding = canon_to_code;
return mdlp;
}
#if HAVE_ICONV
static bool
iconvable_string (const iconveh_t *cd, const char *string)
{
size_t len = strlen (string) + 1;
char *result = NULL;
size_t resultlen = 0;
if (xmem_cd_iconveh (string, len, cd, iconveh_error, NULL,
&result, &resultlen) == 0)
{
/* Test if the result has exactly one NUL byte, at the end. */
bool ok = (resultlen > 0 && result[resultlen - 1] == '\0'
&& strlen (result) == resultlen - 1);
free (result);
return ok;
}
return false;
}
static bool
iconvable_string_list (const iconveh_t *cd, string_list_ty *slp)
{
size_t i;
if (slp != NULL)
for (i = 0; i < slp->nitems; i++)
if (!iconvable_string (cd, slp->item[i]))
return false;
return true;
}
static bool
iconvable_prev_msgid (const iconveh_t *cd, message_ty *mp)
{
if (mp->prev_msgctxt != NULL)
if (!iconvable_string (cd, mp->prev_msgctxt))
return false;
if (mp->prev_msgid != NULL)
if (!iconvable_string (cd, mp->prev_msgid))
return false;
if (mp->prev_msgid_plural != NULL)
if (!iconvable_string (cd, mp->prev_msgid_plural))
return false;
return true;
}
static bool
iconvable_msgid (const iconveh_t *cd, message_ty *mp)
{
if (mp->msgctxt != NULL)
if (!iconvable_string (cd, mp->msgctxt))
return false;
if (!iconvable_string (cd, mp->msgid))
return false;
if (mp->msgid_plural != NULL)
if (!iconvable_string (cd, mp->msgid_plural))
return false;
return true;
}
static bool
iconvable_msgstr (const iconveh_t *cd, message_ty *mp)
{
char *result = NULL;
size_t resultlen = 0;
if (!(mp->msgstr_len > 0 && mp->msgstr[mp->msgstr_len - 1] == '\0'))
abort ();
if (xmem_cd_iconveh (mp->msgstr, mp->msgstr_len, cd, iconveh_error, NULL,
&result, &resultlen) == 0)
{
bool ok = false;
/* Test if the result has a NUL byte at the end. */
if (resultlen > 0 && result[resultlen - 1] == '\0')
/* Test if the result has the same number of NUL bytes. */
{
const char *p;
const char *pend;
int nulcount1;
int nulcount2;
for (p = mp->msgstr, pend = p + mp->msgstr_len, nulcount1 = 0;
p < pend;
p += strlen (p) + 1, nulcount1++);
for (p = result, pend = p + resultlen, nulcount2 = 0;
p < pend;
p += strlen (p) + 1, nulcount2++);
if (nulcount1 == nulcount2)
ok = true;
}
free (result);
return ok;
}
return false;
}
#endif
bool
is_message_list_iconvable (message_list_ty *mlp,
const char *canon_from_code,
const char *canon_to_code)
{
bool canon_from_code_overridden = (canon_from_code != NULL);
size_t j;
/* If the list is empty, nothing to check. */
if (mlp->nitems == 0)
return true;
/* Search the header entry, and extract the charset name. */
for (j = 0; j < mlp->nitems; j++)
if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
{
const char *header = mlp->item[j]->msgstr;
if (header != NULL)
{
const char *charsetstr = c_strstr (header, "charset=");
if (charsetstr != NULL)
{
size_t len;
char *charset;
const char *canon_charset;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
charset = (char *) xmalloca (len + 1);
memcpy (charset, charsetstr, len);
charset[len] = '\0';
canon_charset = po_charset_canonicalize (charset);
if (canon_charset == NULL)
{
if (!canon_from_code_overridden)
{
/* Don't give an error for POT files, because POT
files usually contain only ASCII msgids. */
if (strcmp (charset, "CHARSET") == 0)
canon_charset = po_charset_ascii;
else
{
/* charset is not a portable encoding name. */
freea (charset);
return false;
}
}
}
else
{
if (canon_from_code == NULL)
canon_from_code = canon_charset;
else if (canon_from_code != canon_charset)
{
/* Two different charsets in input file. */
freea (charset);
return false;
}
}
freea (charset);
}
}
}
if (canon_from_code == NULL)
{
if (is_ascii_message_list (mlp))
canon_from_code = po_charset_ascii;
else
/* Input file lacks a header entry with a charset specification. */
return false;
}
/* If the two encodings are the same, nothing to check. */
if (canon_from_code != canon_to_code)
{
#if HAVE_ICONV
iconveh_t cd;
if (iconveh_open (canon_to_code, canon_from_code, &cd) < 0)
/* iconv() doesn't support this conversion. */
return false;
for (j = 0; j < mlp->nitems; j++)
{
message_ty *mp = mlp->item[j];
if (!(iconvable_string_list (&cd, mp->comment)
&& iconvable_string_list (&cd, mp->comment_dot)
&& iconvable_prev_msgid (&cd, mp)
&& iconvable_msgid (&cd, mp)
&& iconvable_msgstr (&cd, mp)))
return false;
}
iconveh_close (&cd);
#else
/* This version was built without iconv(). */
return false;
#endif
}
return true;
}