blob: 338532a86ef868724e6f9dbfb7ea7531bafd3bec [file] [log] [blame] [edit]
/* GNU gettext - internationalization aids
Copyright (C) 1995-1998, 2000-2010, 2012, 2014-2016, 2018-2020 Free Software
Foundation, Inc.
This file was written by Peter Miller <millerp@canb.auug.org.au>
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
#include <alloca.h>
#include <getopt.h>
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <locale.h>
#ifdef _OPENMP
# include <omp.h>
#endif
#include <textstyle.h>
#include "noreturn.h"
#include "closeout.h"
#include "dir-list.h"
#include "error.h"
#include "error-progname.h"
#include "progname.h"
#include "relocatable.h"
#include "basename-lgpl.h"
#include "message.h"
#include "read-catalog.h"
#include "read-po.h"
#include "read-properties.h"
#include "read-stringtable.h"
#include "write-catalog.h"
#include "write-po.h"
#include "write-properties.h"
#include "write-stringtable.h"
#include "format.h"
#include "xalloc.h"
#include "xmalloca.h"
#include "obstack.h"
#include "c-strstr.h"
#include "c-strcase.h"
#include "po-charset.h"
#include "msgl-iconv.h"
#include "msgl-equal.h"
#include "msgl-fsearch.h"
#include "glthread/lock.h"
#include "lang-table.h"
#include "plural-exp.h"
#include "plural-count.h"
#include "msgl-check.h"
#include "po-xerror.h"
#include "backupfile.h"
#include "copy-file.h"
#include "propername.h"
#include "gettext.h"
#define _(str) gettext (str)
#define obstack_chunk_alloc xmalloc
#define obstack_chunk_free free
/* If true do not print unneeded messages. */
static bool quiet;
/* Verbosity level. */
static int verbosity_level;
/* Force output of PO file even if empty. */
static int force_po;
/* Apply the .pot file to each of the domains in the PO file. */
static bool multi_domain_mode = false;
/* Produce output for msgfmt, not for a translator.
msgfmt ignores
- untranslated messages,
- fuzzy messages, except the header entry,
- obsolete messages.
Therefore output for msgfmt does not need to include such messages. */
static bool for_msgfmt = false;
/* Determines whether to use fuzzy matching. */
static bool use_fuzzy_matching = true;
/* Determines whether to keep old msgids as previous msgids. */
static bool keep_previous = false;
/* Language (ISO-639 code) and optional territory (ISO-3166 code). */
static const char *catalogname = NULL;
/* List of user-specified compendiums. */
static message_list_list_ty *compendiums;
/* List of corresponding filenames. */
static string_list_ty *compendium_filenames;
/* Update mode. */
static bool update_mode = false;
static const char *version_control_string;
static const char *backup_suffix_string;
/* Long options. */
static const struct option long_options[] =
{
{ "add-location", optional_argument, NULL, 'n' },
{ "backup", required_argument, NULL, CHAR_MAX + 1 },
{ "color", optional_argument, NULL, CHAR_MAX + 9 },
{ "compendium", required_argument, NULL, 'C' },
{ "directory", required_argument, NULL, 'D' },
{ "escape", no_argument, NULL, 'E' },
{ "for-msgfmt", no_argument, NULL, CHAR_MAX + 12 },
{ "force-po", no_argument, &force_po, 1 },
{ "help", no_argument, NULL, 'h' },
{ "indent", no_argument, NULL, 'i' },
{ "lang", required_argument, NULL, CHAR_MAX + 8 },
{ "multi-domain", no_argument, NULL, 'm' },
{ "no-escape", no_argument, NULL, 'e' },
{ "no-fuzzy-matching", no_argument, NULL, 'N' },
{ "no-location", no_argument, NULL, CHAR_MAX + 11 },
{ "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
{ "output-file", required_argument, NULL, 'o' },
{ "previous", no_argument, NULL, CHAR_MAX + 7 },
{ "properties-input", no_argument, NULL, 'P' },
{ "properties-output", no_argument, NULL, 'p' },
{ "quiet", no_argument, NULL, 'q' },
{ "sort-by-file", no_argument, NULL, 'F' },
{ "sort-output", no_argument, NULL, 's' },
{ "silent", no_argument, NULL, 'q' },
{ "strict", no_argument, NULL, CHAR_MAX + 2 },
{ "stringtable-input", no_argument, NULL, CHAR_MAX + 5 },
{ "stringtable-output", no_argument, NULL, CHAR_MAX + 6 },
{ "style", required_argument, NULL, CHAR_MAX + 10 },
{ "suffix", required_argument, NULL, CHAR_MAX + 3 },
{ "update", no_argument, NULL, 'U' },
{ "verbose", no_argument, NULL, 'v' },
{ "version", no_argument, NULL, 'V' },
{ "width", required_argument, NULL, 'w' },
{ NULL, 0, NULL, 0 }
};
struct statistics
{
size_t merged;
size_t fuzzied;
size_t missing;
size_t obsolete;
};
/* Forward declaration of local functions. */
_GL_NORETURN_FUNC static void usage (int status);
static void compendium (const char *filename);
static void msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp);
static msgdomain_list_ty *merge (const char *fn1, const char *fn2,
catalog_input_format_ty input_syntax,
msgdomain_list_ty **defp);
int
main (int argc, char **argv)
{
int opt;
bool do_help;
bool do_version;
char *output_file;
char *color;
msgdomain_list_ty *def;
msgdomain_list_ty *result;
catalog_input_format_ty input_syntax = &input_format_po;
catalog_output_format_ty output_syntax = &output_format_po;
bool sort_by_filepos = false;
bool sort_by_msgid = false;
/* Set program name for messages. */
set_program_name (argv[0]);
error_print_progname = maybe_print_progname;
verbosity_level = 0;
quiet = false;
gram_max_allowed_errors = UINT_MAX;
/* Set locale via LC_ALL. */
setlocale (LC_ALL, "");
/* Set the text message domain. */
bindtextdomain (PACKAGE, relocate (LOCALEDIR));
bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
textdomain (PACKAGE);
/* Ensure that write errors on stdout are detected. */
atexit (close_stdout);
/* Set default values for variables. */
do_help = false;
do_version = false;
output_file = NULL;
color = NULL;
while ((opt = getopt_long (argc, argv, "C:D:eEFhimn:No:pPqsUvVw:",
long_options, NULL))
!= EOF)
switch (opt)
{
case '\0': /* Long option. */
break;
case 'C':
compendium (optarg);
break;
case 'D':
dir_list_append (optarg);
break;
case 'e':
message_print_style_escape (false);
break;
case 'E':
message_print_style_escape (true);
break;
case 'F':
sort_by_filepos = true;
break;
case 'h':
do_help = true;
break;
case 'i':
message_print_style_indent ();
break;
case 'm':
multi_domain_mode = true;
break;
case 'n':
if (handle_filepos_comment_option (optarg))
usage (EXIT_FAILURE);
break;
case 'N':
use_fuzzy_matching = false;
break;
case 'o':
output_file = optarg;
break;
case 'p':
output_syntax = &output_format_properties;
break;
case 'P':
input_syntax = &input_format_properties;
break;
case 'q':
quiet = true;
break;
case 's':
sort_by_msgid = true;
break;
case 'U':
update_mode = true;
break;
case 'v':
++verbosity_level;
break;
case 'V':
do_version = true;
break;
case 'w':
{
int value;
char *endp;
value = strtol (optarg, &endp, 10);
if (endp != optarg)
message_page_width_set (value);
}
break;
case CHAR_MAX + 1: /* --backup */
version_control_string = optarg;
break;
case CHAR_MAX + 2: /* --strict */
message_print_style_uniforum ();
break;
case CHAR_MAX + 3: /* --suffix */
backup_suffix_string = optarg;
break;
case CHAR_MAX + 4: /* --no-wrap */
message_page_width_ignore ();
break;
case CHAR_MAX + 5: /* --stringtable-input */
input_syntax = &input_format_stringtable;
break;
case CHAR_MAX + 6: /* --stringtable-output */
output_syntax = &output_format_stringtable;
break;
case CHAR_MAX + 7: /* --previous */
keep_previous = true;
break;
case CHAR_MAX + 8: /* --lang */
catalogname = optarg;
break;
case CHAR_MAX + 9: /* --color */
if (handle_color_option (optarg) || color_test_mode)
usage (EXIT_FAILURE);
color = optarg;
break;
case CHAR_MAX + 10: /* --style */
handle_style_option (optarg);
break;
case CHAR_MAX + 11: /* --no-location */
message_print_style_filepos (filepos_comment_none);
break;
case CHAR_MAX + 12: /* --for-msgfmt */
for_msgfmt = true;
break;
default:
usage (EXIT_FAILURE);
break;
}
/* Version information is requested. */
if (do_version)
{
printf ("%s (GNU %s) %s\n", last_component (program_name),
PACKAGE, VERSION);
/* xgettext: no-wrap */
printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
License GPLv3+: GNU GPL version 3 or later <%s>\n\
This is free software: you are free to change and redistribute it.\n\
There is NO WARRANTY, to the extent permitted by law.\n\
"),
"1995-2020", "https://gnu.org/licenses/gpl.html");
printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
exit (EXIT_SUCCESS);
}
/* Help is requested. */
if (do_help)
usage (EXIT_SUCCESS);
/* Test whether we have an .po file name as argument. */
if (optind >= argc)
{
error (EXIT_SUCCESS, 0, _("no input files given"));
usage (EXIT_FAILURE);
}
if (optind + 2 != argc)
{
error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
usage (EXIT_FAILURE);
}
/* Verify selected options. */
if (update_mode)
{
if (output_file != NULL)
{
error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
"--update", "--output-file");
}
if (for_msgfmt)
{
error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
"--update", "--for-msgfmt");
}
if (color != NULL)
{
error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
"--update", "--color");
}
if (style_file_name != NULL)
{
error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
"--update", "--style");
}
}
else
{
if (version_control_string != NULL)
{
error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
"--backup", "--update");
usage (EXIT_FAILURE);
}
if (backup_suffix_string != NULL)
{
error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
"--suffix", "--update");
usage (EXIT_FAILURE);
}
}
if (sort_by_msgid && sort_by_filepos)
error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
"--sort-output", "--sort-by-file");
/* In update mode, --properties-input implies --properties-output. */
if (update_mode && input_syntax == &input_format_properties)
output_syntax = &output_format_properties;
/* In update mode, --stringtable-input implies --stringtable-output. */
if (update_mode && input_syntax == &input_format_stringtable)
output_syntax = &output_format_stringtable;
if (for_msgfmt)
{
/* With --for-msgfmt, no fuzzy matching. */
use_fuzzy_matching = false;
/* With --for-msgfmt, merging is fast, therefore no need for a progress
indicator. */
quiet = true;
/* With --for-msgfmt, no need for comments. */
message_print_style_comment (false);
/* With --for-msgfmt, no need for source location lines. */
message_print_style_filepos (filepos_comment_none);
}
/* Initialize OpenMP. */
#ifdef _OPENMP
openmp_init ();
#endif
/* Merge the two files. */
result = merge (argv[optind], argv[optind + 1], input_syntax, &def);
/* Sort the results. */
if (sort_by_filepos)
msgdomain_list_sort_by_filepos (result);
else if (sort_by_msgid)
msgdomain_list_sort_by_msgid (result);
if (update_mode)
{
/* Before comparing result with def, sort the result into the same order
as would be done implicitly by output_syntax->print. */
if (output_syntax->sorts_obsoletes_to_end)
msgdomain_list_stablesort_by_obsolete (result);
/* Do nothing if the original file and the result are equal. Also do
nothing if the original file and the result differ only by the
POT-Creation-Date in the header entry; this is needed for projects
which don't put the .pot file under CVS. */
if (!msgdomain_list_equal (def, result, true))
{
/* Back up def.po. */
enum backup_type backup_type;
char *backup_file;
output_file = argv[optind];
if (backup_suffix_string == NULL)
{
backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX");
if (backup_suffix_string != NULL
&& backup_suffix_string[0] == '\0')
backup_suffix_string = NULL;
}
if (backup_suffix_string != NULL)
simple_backup_suffix = backup_suffix_string;
backup_type = xget_version (_("backup type"), version_control_string);
if (backup_type != none)
{
backup_file = find_backup_file_name (output_file, backup_type);
copy_file_preserving (output_file, backup_file);
}
/* Write the merged message list out. */
msgdomain_list_print (result, output_file, output_syntax, true,
false);
}
}
else
{
/* Write the merged message list out. */
msgdomain_list_print (result, output_file, output_syntax,
for_msgfmt || force_po, false);
}
exit (EXIT_SUCCESS);
}
/* Display usage information and exit. */
static void
usage (int status)
{
if (status != EXIT_SUCCESS)
fprintf (stderr, _("Try '%s --help' for more information.\n"),
program_name);
else
{
printf (_("\
Usage: %s [OPTION] def.po ref.pot\n\
"), program_name);
printf ("\n");
/* xgettext: no-wrap */
printf (_("\
Merges two Uniforum style .po files together. The def.po file is an\n\
existing PO file with translations which will be taken over to the newly\n\
created file as long as they still match; comments will be preserved,\n\
but extracted comments and file positions will be discarded. The ref.pot\n\
file is the last created PO file with up-to-date source references but\n\
old translations, or a PO Template file (generally created by xgettext);\n\
any translations or comments in the file will be discarded, however dot\n\
comments and file positions will be preserved. Where an exact match\n\
cannot be found, fuzzy matching is used to produce better results.\n\
"));
printf ("\n");
printf (_("\
Mandatory arguments to long options are mandatory for short options too.\n"));
printf ("\n");
printf (_("\
Input file location:\n"));
printf (_("\
def.po translations referring to old sources\n"));
printf (_("\
ref.pot references to new sources\n"));
printf (_("\
-D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
printf (_("\
-C, --compendium=FILE additional library of message translations,\n\
may be specified more than once\n"));
printf ("\n");
printf (_("\
Operation mode:\n"));
printf (_("\
-U, --update update def.po,\n\
do nothing if def.po already up to date\n"));
printf ("\n");
printf (_("\
Output file location:\n"));
printf (_("\
-o, --output-file=FILE write output to specified file\n"));
printf (_("\
The results are written to standard output if no output file is specified\n\
or if it is -.\n"));
printf ("\n");
printf (_("\
Output file location in update mode:\n"));
printf (_("\
The result is written back to def.po.\n"));
printf (_("\
--backup=CONTROL make a backup of def.po\n"));
printf (_("\
--suffix=SUFFIX override the usual backup suffix\n"));
printf (_("\
The version control method may be selected via the --backup option or through\n\
the VERSION_CONTROL environment variable. Here are the values:\n\
none, off never make backups (even if --backup is given)\n\
numbered, t make numbered backups\n\
existing, nil numbered if numbered backups exist, simple otherwise\n\
simple, never always make simple backups\n"));
printf (_("\
The backup suffix is '~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\
environment variable.\n\
"));
printf ("\n");
printf (_("\
Operation modifiers:\n"));
printf (_("\
-m, --multi-domain apply ref.pot to each of the domains in def.po\n"));
printf (_("\
--for-msgfmt produce output for '%s', not for a translator\n"),
"msgfmt");
printf (_("\
-N, --no-fuzzy-matching do not use fuzzy matching\n"));
printf (_("\
--previous keep previous msgids of translated messages\n"));
printf ("\n");
printf (_("\
Input file syntax:\n"));
printf (_("\
-P, --properties-input input files are in Java .properties syntax\n"));
printf (_("\
--stringtable-input input files are in NeXTstep/GNUstep .strings\n\
syntax\n"));
printf ("\n");
printf (_("\
Output details:\n"));
printf (_("\
--lang=CATALOGNAME set 'Language' field in the header entry\n"));
printf (_("\
--color use colors and other text attributes always\n\
--color=WHEN use colors and other text attributes if WHEN.\n\
WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
printf (_("\
--style=STYLEFILE specify CSS style rule file for --color\n"));
printf (_("\
-e, --no-escape do not use C escapes in output (default)\n"));
printf (_("\
-E, --escape use C escapes in output, no extended chars\n"));
printf (_("\
--force-po write PO file even if empty\n"));
printf (_("\
-i, --indent indented output style\n"));
printf (_("\
--no-location suppress '#: filename:line' lines\n"));
printf (_("\
-n, --add-location preserve '#: filename:line' lines (default)\n"));
printf (_("\
--strict strict Uniforum output style\n"));
printf (_("\
-p, --properties-output write out a Java .properties file\n"));
printf (_("\
--stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
printf (_("\
-w, --width=NUMBER set output page width\n"));
printf (_("\
--no-wrap do not break long message lines, longer than\n\
the output page width, into several lines\n"));
printf (_("\
-s, --sort-output generate sorted output\n"));
printf (_("\
-F, --sort-by-file sort output by file location\n"));
printf ("\n");
printf (_("\
Informative output:\n"));
printf (_("\
-h, --help display this help and exit\n"));
printf (_("\
-V, --version output version information and exit\n"));
printf (_("\
-v, --verbose increase verbosity level\n"));
printf (_("\
-q, --quiet, --silent suppress progress indicators\n"));
printf ("\n");
/* TRANSLATORS: The first placeholder is the web address of the Savannah
project of this package. The second placeholder is the bug-reporting
email address for this package. Please add _another line_ saying
"Report translation bugs to <...>\n" with the address for translation
bugs (typically your translation team's web or email address). */
printf(_("\
Report bugs in the bug tracker at <%s>\n\
or by email to <%s>.\n"),
"https://savannah.gnu.org/projects/gettext",
"bug-gettext@gnu.org");
}
exit (status);
}
static void
compendium (const char *filename)
{
msgdomain_list_ty *mdlp;
size_t k;
mdlp = read_catalog_file (filename, &input_format_po);
if (compendiums == NULL)
{
compendiums = message_list_list_alloc ();
compendium_filenames = string_list_alloc ();
}
for (k = 0; k < mdlp->nitems; k++)
{
message_list_list_append (compendiums, mdlp->item[k]->messages);
string_list_append (compendium_filenames, filename);
}
}
/* Sorts obsolete messages to the end, for every domain. */
static void
msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp)
{
size_t k;
for (k = 0; k < mdlp->nitems; k++)
{
message_list_ty *mlp = mdlp->item[k]->messages;
/* Sort obsolete messages to the end. */
if (mlp->nitems > 0)
{
message_ty **l1 = XNMALLOC (mlp->nitems, message_ty *);
size_t n1;
message_ty **l2 = XNMALLOC (mlp->nitems, message_ty *);
size_t n2;
size_t j;
/* Sort the non-obsolete messages into l1 and the obsolete messages
into l2. */
n1 = 0;
n2 = 0;
for (j = 0; j < mlp->nitems; j++)
{
message_ty *mp = mlp->item[j];
if (mp->obsolete)
l2[n2++] = mp;
else
l1[n1++] = mp;
}
if (n1 > 0 && n2 > 0)
{
memcpy (mlp->item, l1, n1 * sizeof (message_ty *));
memcpy (mlp->item + n1, l2, n2 * sizeof (message_ty *));
}
free (l2);
free (l1);
}
}
}
/* Data structure representing the messages with known translations.
They are composed of
- A message list from def.po,
- The compendiums.
The data structure is optimized for exact and fuzzy searches. */
typedef struct definitions_ty definitions_ty;
struct definitions_ty
{
/* A list of message lists. The first comes from def.po, the other ones
from the compendiums. Each message list has a built-in hash table,
for speed when doing the exact searches. */
message_list_list_ty *lists;
/* A fuzzy index of the current list of non-compendium messages, for speed
when doing fuzzy searches. Used only if use_fuzzy_matching is true. */
message_fuzzy_index_ty *curr_findex;
/* A once-only execution guard for the initialization of the fuzzy index.
Needed for OpenMP. */
gl_lock_define(, curr_findex_init_lock)
/* A fuzzy index of the compendiums, for speed when doing fuzzy searches.
Used only if use_fuzzy_matching is true and compendiums != NULL. */
message_fuzzy_index_ty *comp_findex;
/* A once-only execution guard for the initialization of the fuzzy index.
Needed for OpenMP. */
gl_lock_define(, comp_findex_init_lock)
/* The canonical encoding of the definitions and the compendiums.
Only used for fuzzy matching. */
const char *canon_charset;
};
static inline void
definitions_init (definitions_ty *definitions, const char *canon_charset)
{
definitions->lists = message_list_list_alloc ();
message_list_list_append (definitions->lists, NULL);
if (compendiums != NULL)
message_list_list_append_list (definitions->lists, compendiums);
definitions->curr_findex = NULL;
gl_lock_init (definitions->curr_findex_init_lock);
definitions->comp_findex = NULL;
gl_lock_init (definitions->comp_findex_init_lock);
definitions->canon_charset = canon_charset;
}
/* Return the current list of non-compendium messages. */
static inline message_list_ty *
definitions_current_list (const definitions_ty *definitions)
{
return definitions->lists->item[0];
}
/* Set the current list of non-compendium messages. */
static inline void
definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp)
{
definitions->lists->item[0] = mlp;
if (definitions->curr_findex != NULL)
{
message_fuzzy_index_free (definitions->curr_findex);
definitions->curr_findex = NULL;
}
}
/* Create the fuzzy index for the current list of non-compendium messages.
Used only if use_fuzzy_matching is true. */
static inline void
definitions_init_curr_findex (definitions_ty *definitions)
{
/* Protect against concurrent execution. */
gl_lock_lock (definitions->curr_findex_init_lock);
if (definitions->curr_findex == NULL)
definitions->curr_findex =
message_fuzzy_index_alloc (definitions_current_list (definitions),
definitions->canon_charset);
gl_lock_unlock (definitions->curr_findex_init_lock);
}
/* Create the fuzzy index for the compendium messages.
Used only if use_fuzzy_matching is true and compendiums != NULL. */
static inline void
definitions_init_comp_findex (definitions_ty *definitions)
{
/* Protect against concurrent execution. */
gl_lock_lock (definitions->comp_findex_init_lock);
if (definitions->comp_findex == NULL)
{
/* Combine all the compendium message lists into a single one. Don't
bother checking for duplicates. */
message_list_ty *all_compendium;
size_t i;
all_compendium = message_list_alloc (false);
for (i = 0; i < compendiums->nitems; i++)
{
message_list_ty *mlp = compendiums->item[i];
size_t j;
for (j = 0; j < mlp->nitems; j++)
message_list_append (all_compendium, mlp->item[j]);
}
/* Create the fuzzy index from it. */
definitions->comp_findex =
message_fuzzy_index_alloc (all_compendium, definitions->canon_charset);
}
gl_lock_unlock (definitions->comp_findex_init_lock);
}
/* Exact search. */
static inline message_ty *
definitions_search (const definitions_ty *definitions,
const char *msgctxt, const char *msgid)
{
return message_list_list_search (definitions->lists, msgctxt, msgid);
}
/* Fuzzy search.
Used only if use_fuzzy_matching is true. */
static inline message_ty *
definitions_search_fuzzy (definitions_ty *definitions,
const char *msgctxt, const char *msgid)
{
message_ty *mp1;
if (false)
{
/* Old, slow code. */
mp1 =
message_list_search_fuzzy (definitions_current_list (definitions),
msgctxt, msgid);
}
else
{
/* Speedup through early abort in fstrcmp(), combined with pre-sorting
of the messages through a hashed index. */
/* Create the fuzzy index lazily. */
if (definitions->curr_findex == NULL)
definitions_init_curr_findex (definitions);
mp1 = message_fuzzy_index_search (definitions->curr_findex,
msgctxt, msgid,
FUZZY_THRESHOLD, false);
}
if (compendiums != NULL)
{
double lower_bound_for_mp2;
message_ty *mp2;
lower_bound_for_mp2 =
(mp1 != NULL
? fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0)
: FUZZY_THRESHOLD);
/* This lower bound must be >= FUZZY_THRESHOLD. */
if (!(lower_bound_for_mp2 >= FUZZY_THRESHOLD))
abort ();
/* Create the fuzzy index lazily. */
if (definitions->comp_findex == NULL)
definitions_init_comp_findex (definitions);
mp2 = message_fuzzy_index_search (definitions->comp_findex,
msgctxt, msgid,
lower_bound_for_mp2, true);
/* Choose the best among mp1, mp2. */
if (mp1 == NULL
|| (mp2 != NULL
&& (fuzzy_search_goal_function (mp2, msgctxt, msgid,
lower_bound_for_mp2)
> lower_bound_for_mp2)))
mp1 = mp2;
}
return mp1;
}
static inline void
definitions_destroy (definitions_ty *definitions)
{
message_list_list_free (definitions->lists, 2);
if (definitions->curr_findex != NULL)
message_fuzzy_index_free (definitions->curr_findex);
if (definitions->comp_findex != NULL)
message_fuzzy_index_free (definitions->comp_findex);
}
/* A silent error logger. We are only interested in knowing whether errors
occurred at all. */
static void
silent_error_logger (const char *format, ...)
__attribute__ ((__format__ (__printf__, 1, 2)));
static void
silent_error_logger (const char *format, ...)
{
}
/* Another silent error logger. */
static void
silent_xerror (int severity,
const struct message_ty *message,
const char *filename, size_t lineno, size_t column,
int multiline_p, const char *message_text)
{
}
static message_ty *
message_merge (message_ty *def, message_ty *ref, bool force_fuzzy,
const struct plural_distribution *distribution)
{
const char *msgstr;
size_t msgstr_len;
const char *prev_msgctxt;
const char *prev_msgid;
const char *prev_msgid_plural;
message_ty *result;
size_t j, i;
/* Take the msgid from the reference. When fuzzy matches are made,
the definition will not be unique, but the reference will be -
usually because it has only been slightly changed. */
/* Take the msgstr from the definition. The msgstr of the reference
is usually empty, as it was generated by xgettext. If we currently
process the header entry we have to merge the msgstr by using the
Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference. */
if (is_header (ref))
{
/* Oh, oh. The header entry and we have something to fill in. */
static const struct
{
const char *name;
size_t len;
} known_fields[] =
{
{ "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
#define PROJECT_ID 0
{ "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
#define REPORT_MSGID_BUGS_TO 1
{ "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
#define POT_CREATION_DATE 2
{ "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
#define PO_REVISION_DATE 3
{ "Last-Translator:", sizeof ("Last-Translator:") - 1 },
#define LAST_TRANSLATOR 4
{ "Language-Team:", sizeof ("Language-Team:") - 1 },
#define LANGUAGE_TEAM 5
{ "Language:", sizeof ("Language:") - 1 },
#define LANGUAGE 6
{ "MIME-Version:", sizeof ("MIME-Version:") - 1 },
#define MIME_VERSION 7
{ "Content-Type:", sizeof ("Content-Type:") - 1 },
#define CONTENT_TYPE 8
{ "Content-Transfer-Encoding:",
sizeof ("Content-Transfer-Encoding:") - 1 }
#define CONTENT_TRANSFER 9
};
#define UNKNOWN 10
struct
{
const char *string;
size_t len;
} header_fields[UNKNOWN + 1];
struct obstack pool;
const char *cp;
char *newp;
size_t len, cnt;
/* Clear all fields. */
memset (header_fields, '\0', sizeof (header_fields));
/* Prepare a temporary memory pool. */
obstack_init (&pool);
cp = def->msgstr;
while (*cp != '\0')
{
const char *endp = strchr (cp, '\n');
int terminated = endp != NULL;
if (!terminated)
{
/* Add a trailing newline. */
char *copy;
endp = strchr (cp, '\0');
len = endp - cp + 1;
copy = (char *) obstack_alloc (&pool, len + 1);
stpcpy (stpcpy (copy, cp), "\n");
cp = copy;
}
else
{
len = (endp - cp) + 1;
++endp;
}
/* Compare with any of the known fields. */
for (cnt = 0;
cnt < sizeof (known_fields) / sizeof (known_fields[0]);
++cnt)
if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
== 0)
break;
if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
{
header_fields[cnt].string = &cp[known_fields[cnt].len];
header_fields[cnt].len = len - known_fields[cnt].len;
}
else
{
/* It's an unknown field. Append content to what is already
known. */
char *extended =
(char *) obstack_alloc (&pool,
header_fields[UNKNOWN].len + len + 1);
if (header_fields[UNKNOWN].string)
memcpy (extended, header_fields[UNKNOWN].string,
header_fields[UNKNOWN].len);
memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
extended[header_fields[UNKNOWN].len + len] = '\0';
header_fields[UNKNOWN].string = extended;
header_fields[UNKNOWN].len += len;
}
cp = endp;
}
/* Set the Language field if specified on the command line. */
if (catalogname != NULL)
{
/* Prepend a space and append a newline. */
size_t len = strlen (catalogname);
char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
stpcpy (stpcpy (stpcpy (copy, " "), catalogname), "\n");
header_fields[LANGUAGE].string = copy;
header_fields[LANGUAGE].len = strlen (header_fields[LANGUAGE].string);
}
/* Add a Language field to PO files that don't have one. The Language
field was introduced in gettext-0.18. */
else if (header_fields[LANGUAGE].string == NULL)
{
const char *language_team_ptr = header_fields[LANGUAGE_TEAM].string;
if (language_team_ptr != NULL)
{
size_t language_team_len = header_fields[LANGUAGE_TEAM].len;
/* Trim leading blanks. */
while (language_team_len > 0
&& (*language_team_ptr == ' '
|| *language_team_ptr == '\t'))
{
language_team_ptr++;
language_team_len--;
}
/* Trim trailing blanks. */
while (language_team_len > 0
&& (language_team_ptr[language_team_len - 1] == ' '
|| language_team_ptr[language_team_len - 1] == '\t'))
language_team_len--;
/* Trim last word, if it looks like an URL or email address. */
{
size_t i;
for (i = language_team_len; i > 0; i--)
if (language_team_ptr[i - 1] == ' '
|| language_team_ptr[i - 1] == '\t')
break;
/* The last word: language_team_ptr[i..language_team_len-1]. */
if (i < language_team_len
&& (language_team_ptr[i] == '<'
|| language_team_ptr[language_team_len - 1] == '>'
|| memchr (language_team_ptr, '@', language_team_len)
!= NULL
|| memchr (language_team_ptr, '/', language_team_len)
!= NULL))
{
/* Trim last word and blanks before it. */
while (i > 0
&& (language_team_ptr[i - 1] == ' '
|| language_team_ptr[i - 1] == '\t'))
i--;
language_team_len = i;
}
}
/* The rest of the Language-Team field should be the english name
of the languge. Convert to ISO 639 and ISO 3166 syntax. */
{
size_t i;
for (i = 0; i < language_variant_table_size; i++)
if (strlen (language_variant_table[i].english)
== language_team_len
&& memcmp (language_variant_table[i].english,
language_team_ptr, language_team_len) == 0)
{
header_fields[LANGUAGE].string =
language_variant_table[i].code;
break;
}
}
if (header_fields[LANGUAGE].string == NULL)
{
size_t i;
for (i = 0; i < language_table_size; i++)
if (strlen (language_table[i].english) == language_team_len
&& memcmp (language_table[i].english,
language_team_ptr, language_team_len) == 0)
{
header_fields[LANGUAGE].string = language_table[i].code;
break;
}
}
if (header_fields[LANGUAGE].string != NULL)
{
/* Prepend a space and append a newline. */
const char *str = header_fields[LANGUAGE].string;
size_t len = strlen (str);
char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
stpcpy (stpcpy (stpcpy (copy, " "), str), "\n");
header_fields[LANGUAGE].string = copy;
}
else
header_fields[LANGUAGE].string = " \n";
header_fields[LANGUAGE].len =
strlen (header_fields[LANGUAGE].string);
}
}
{
const char *msgid_bugs_ptr;
msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
if (msgid_bugs_ptr != NULL)
{
size_t msgid_bugs_len;
const char *endp;
msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;
endp = strchr (msgid_bugs_ptr, '\n');
if (endp == NULL)
{
/* Add a trailing newline. */
char *extended;
endp = strchr (msgid_bugs_ptr, '\0');
msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
msgid_bugs_ptr = extended;
}
else
msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
}
}
{
const char *pot_date_ptr;
pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:");
if (pot_date_ptr != NULL)
{
size_t pot_date_len;
const char *endp;
pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;
endp = strchr (pot_date_ptr, '\n');
if (endp == NULL)
{
/* Add a trailing newline. */
char *extended;
endp = strchr (pot_date_ptr, '\0');
pot_date_len = (endp - pot_date_ptr) + 1;
extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
stpcpy (stpcpy (extended, pot_date_ptr), "\n");
pot_date_ptr = extended;
}
else
pot_date_len = (endp - pot_date_ptr) + 1;
header_fields[POT_CREATION_DATE].string = pot_date_ptr;
header_fields[POT_CREATION_DATE].len = pot_date_len;
}
}
/* Concatenate all the various fields. */
len = 0;
for (cnt = 0; cnt < UNKNOWN; ++cnt)
if (header_fields[cnt].string != NULL)
len += known_fields[cnt].len + header_fields[cnt].len;
len += header_fields[UNKNOWN].len;
cp = newp = XNMALLOC (len + 1, char);
newp[len] = '\0';
#define IF_FILLED(idx) \
if (header_fields[idx].string) \
newp = stpncpy (stpcpy (newp, known_fields[idx].name), \
header_fields[idx].string, header_fields[idx].len)
IF_FILLED (PROJECT_ID);
IF_FILLED (REPORT_MSGID_BUGS_TO);
IF_FILLED (POT_CREATION_DATE);
IF_FILLED (PO_REVISION_DATE);
IF_FILLED (LAST_TRANSLATOR);
IF_FILLED (LANGUAGE_TEAM);
IF_FILLED (LANGUAGE);
IF_FILLED (MIME_VERSION);
IF_FILLED (CONTENT_TYPE);
IF_FILLED (CONTENT_TRANSFER);
if (header_fields[UNKNOWN].string != NULL)
stpcpy (newp, header_fields[UNKNOWN].string);
#undef IF_FILLED
/* Free the temporary memory pool. */
obstack_free (&pool, NULL);
msgstr = cp;
msgstr_len = strlen (cp) + 1;
prev_msgctxt = NULL;
prev_msgid = NULL;
prev_msgid_plural = NULL;
}
else
{
msgstr = def->msgstr;
msgstr_len = def->msgstr_len;
if (def->is_fuzzy)
{
prev_msgctxt = def->prev_msgctxt;
prev_msgid = def->prev_msgid;
prev_msgid_plural = def->prev_msgid_plural;
}
else
{
prev_msgctxt = def->msgctxt;
prev_msgid = def->msgid;
prev_msgid_plural = def->msgid_plural;
}
}
result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL,
xstrdup (ref->msgid), ref->msgid_plural,
msgstr, msgstr_len, &def->pos);
/* Take the comments from the definition file. There will be none at
all in the reference file, as it was generated by xgettext. */
if (def->comment)
for (j = 0; j < def->comment->nitems; ++j)
message_comment_append (result, def->comment->item[j]);
/* Take the dot comments from the reference file, as they are
generated by xgettext. Any in the definition file are old ones
collected by previous runs of xgettext and msgmerge. */
if (ref->comment_dot)
for (j = 0; j < ref->comment_dot->nitems; ++j)
message_comment_dot_append (result, ref->comment_dot->item[j]);
/* The flags are mixed in a special way. Some informations come
from the reference message (such as format/no-format), others
come from the definition file (fuzzy or not). */
result->is_fuzzy = def->is_fuzzy | force_fuzzy;
/* If ref and def have the same msgid but different msgid_plural, it's
a reason to mark the result fuzzy. */
if (!result->is_fuzzy
&& (ref->msgid_plural != NULL
? def->msgid_plural == NULL
|| strcmp (ref->msgid_plural, def->msgid_plural) != 0
: def->msgid_plural != NULL))
result->is_fuzzy = true;
for (i = 0; i < NFORMATS; i++)
{
result->is_format[i] = ref->is_format[i];
/* If the reference message is marked as being a format specifier,
but the definition message is not, we check if the resulting
message would pass "msgfmt -c". If yes, then all is fine. If
not, we add a fuzzy marker, because
1. the message needs the translator's attention,
2. msgmerge must not transform a PO file which passes "msgfmt -c"
into a PO file which doesn't. */
if (!result->is_fuzzy
&& possible_format_p (ref->is_format[i])
&& !possible_format_p (def->is_format[i])
&& check_msgid_msgstr_format_i (ref->msgid, ref->msgid_plural,
msgstr, msgstr_len, i, ref->range,
distribution, silent_error_logger)
> 0)
result->is_fuzzy = true;
}
result->range = ref->range;
/* If the definition message was assuming a certain range, but the reference
message does not specify a range any more or specifies a range that is
not the same or a subset, we add a fuzzy marker, because
1. the message needs the translator's attention,
2. msgmerge must not transform a PO file which passes "msgfmt -c"
into a PO file which doesn't. */
if (!result->is_fuzzy
&& has_range_p (def->range)
&& !(has_range_p (ref->range)
&& ref->range.min >= def->range.min
&& ref->range.max <= def->range.max))
result->is_fuzzy = true;
result->do_wrap = ref->do_wrap;
for (i = 0; i < NSYNTAXCHECKS; i++)
result->do_syntax_check[i] = ref->do_syntax_check[i];
/* Insert previous msgid, commented out with "#|".
Do so only when --previous is specified, for backward compatibility.
Since the "previous msgid" represents the original msgid that led to
the current msgstr,
- we can omit it if the resulting message is not fuzzy or is
untranslated (but do this in a later pass, since result->is_fuzzy
is not finalized at this point),
- otherwise, if the corresponding message from the definition file
was translated (not fuzzy), we use that message's msgid,
- otherwise, we use that message's prev_msgid. */
if (keep_previous)
{
result->prev_msgctxt = prev_msgctxt;
result->prev_msgid = prev_msgid;
result->prev_msgid_plural = prev_msgid_plural;
}
/* If the reference message was obsolete, make the resulting message
obsolete. This case doesn't occur for POT files, but users sometimes
use PO files that are themselves the result of msgmerge instead of POT
files. */
result->obsolete = ref->obsolete;
/* Take the file position comments from the reference file, as they
are generated by xgettext. Any in the definition file are old ones
collected by previous runs of xgettext and msgmerge. */
for (j = 0; j < ref->filepos_count; ++j)
{
lex_pos_ty *pp = &ref->filepos[j];
message_comment_filepos (result, pp->file_name, pp->line_number);
}
/* Special postprocessing is needed if the reference message is a
plural form and the definition message isn't, or vice versa. */
if (ref->msgid_plural != NULL)
{
if (def->msgid_plural == NULL)
result->used = 1;
}
else
{
if (def->msgid_plural != NULL)
result->used = 2;
}
/* All done, return the merged message to the caller. */
return result;
}
#define DOT_FREQUENCY 10
static void
match_domain (const char *fn1, const char *fn2,
definitions_ty *definitions, message_list_ty *refmlp,
message_list_ty *resultmlp,
struct statistics *stats, unsigned int *processed)
{
message_ty *header_entry;
unsigned long int nplurals;
const struct expression *plural_expr;
char *untranslated_plural_msgstr;
struct plural_distribution distribution;
struct search_result { message_ty *found; bool fuzzy; } *search_results;
size_t j;
header_entry =
message_list_search (definitions_current_list (definitions), NULL, "");
extract_plural_expression (header_entry ? header_entry->msgstr : NULL,
&plural_expr, &nplurals);
untranslated_plural_msgstr = XNMALLOC (nplurals, char);
memset (untranslated_plural_msgstr, '\0', nplurals);
/* Determine the plural distribution of the plural_expr formula. */
{
/* Disable error output temporarily. */
void (*old_po_xerror) (int, const struct message_ty *, const char *, size_t,
size_t, int, const char *)
= po_xerror;
po_xerror = silent_xerror;
if (check_plural_eval (plural_expr, nplurals, header_entry,
&distribution) > 0)
{
distribution.expr = NULL;
distribution.often = NULL;
distribution.often_length = 0;
distribution.histogram = NULL;
}
po_xerror = old_po_xerror;
}
/* Most of the time is spent in definitions_search_fuzzy.
Perform it in a separate loop that can be parallelized by an OpenMP
capable compiler. */
search_results = XNMALLOC (refmlp->nitems, struct search_result);
{
long int nn = refmlp->nitems;
long int jj;
/* Tell the OpenMP capable compiler to distribute this loop across
several threads. The schedule is dynamic, because for some messages
the loop body can be executed very quickly, whereas for others it takes
a long time.
Note: The Sun Workshop 6.2 C compiler does not allow a space between
'#' and 'pragma'. */
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic)
#endif
for (jj = 0; jj < nn; jj++)
{
message_ty *refmsg = refmlp->item[jj];
message_ty *defmsg;
/* Because merging can take a while we print something to signal
we are not dead. */
if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
fputc ('.', stderr);
#ifdef _OPENMP
#pragma omp atomic
#endif
(*processed)++;
/* See if it is in the other file. */
defmsg =
definitions_search (definitions, refmsg->msgctxt, refmsg->msgid);
if (defmsg != NULL)
{
search_results[jj].found = defmsg;
search_results[jj].fuzzy = false;
}
else if (!is_header (refmsg)
/* If the message was not defined at all, try to find a very
similar message, it could be a typo, or the suggestion may
help. */
&& use_fuzzy_matching
&& ((defmsg =
definitions_search_fuzzy (definitions,
refmsg->msgctxt,
refmsg->msgid)) != NULL))
{
search_results[jj].found = defmsg;
search_results[jj].fuzzy = true;
}
else
search_results[jj].found = NULL;
}
}
for (j = 0; j < refmlp->nitems; j++)
{
message_ty *refmsg = refmlp->item[j];
/* See if it is in the other file.
This used definitions_search. */
if (search_results[j].found != NULL && !search_results[j].fuzzy)
{
message_ty *defmsg = search_results[j].found;
/* Merge the reference with the definition: take the #. and
#: comments from the reference, take the # comments from
the definition, take the msgstr from the definition. Add
this merged entry to the output message list. */
message_ty *mp =
message_merge (defmsg, refmsg, false, &distribution);
/* When producing output for msgfmt, omit messages that are
untranslated or fuzzy (except the header entry). */
if (!(for_msgfmt
&& (mp->msgstr[0] == '\0' /* untranslated? */
|| (mp->is_fuzzy && !is_header (mp))))) /* fuzzy? */
{
message_list_append (resultmlp, mp);
/* Remember that this message has been used, when we scan
later to see if anything was omitted. */
defmsg->used = 1;
}
stats->merged++;
}
else if (!is_header (refmsg))
{
/* If the message was not defined at all, try to find a very
similar message, it could be a typo, or the suggestion may
help. This search assumed use_fuzzy_matching and used
definitions_search_fuzzy. */
if (search_results[j].found != NULL && search_results[j].fuzzy)
{
message_ty *defmsg = search_results[j].found;
message_ty *mp;
if (verbosity_level > 1)
{
po_gram_error_at_line (&refmsg->pos,
_("this message is used but not defined..."));
error_message_count--;
po_gram_error_at_line (&defmsg->pos,
_("...but this definition is similar"));
}
/* Merge the reference with the definition: take the #. and
#: comments from the reference, take the # comments from
the definition, take the msgstr from the definition. Add
this merged entry to the output message list. */
mp = message_merge (defmsg, refmsg, true, &distribution);
message_list_append (resultmlp, mp);
/* Remember that this message has been used, when we scan
later to see if anything was omitted. */
defmsg->used = 1;
stats->fuzzied++;
if (!quiet && verbosity_level <= 1)
/* Always print a dot if we handled a fuzzy match. */
fputc ('.', stderr);
}
else
{
message_ty *mp;
bool is_untranslated;
const char *p;
const char *pend;
if (verbosity_level > 1)
po_gram_error_at_line (&refmsg->pos,
_("this message is used but not defined in %s"),
fn1);
mp = message_copy (refmsg);
/* Test if mp is untranslated. (It most likely is.) */
is_untranslated = true;
for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
if (*p != '\0')
{
is_untranslated = false;
break;
}
if (mp->msgid_plural != NULL && is_untranslated)
{
/* Change mp->msgstr_len consecutive empty strings into
nplurals consecutive empty strings. */
if (nplurals > mp->msgstr_len)
mp->msgstr = untranslated_plural_msgstr;
mp->msgstr_len = nplurals;
}
/* When producing output for msgfmt, omit messages that are
untranslated or fuzzy (except the header entry). */
if (!(for_msgfmt && (is_untranslated || mp->is_fuzzy)))
{
message_list_append (resultmlp, mp);
}
stats->missing++;
}
}
}
free (search_results);
/* Now postprocess the problematic merges. This is needed because we
want the result to pass the "msgfmt -c -v" check. */
{
/* message_merge sets mp->used to 1 or 2, depending on the problem.
Compute the bitwise OR of all these. */
int problematic = 0;
for (j = 0; j < resultmlp->nitems; j++)
problematic |= resultmlp->item[j]->used;
if (problematic)
{
unsigned long int nplurals = 0;
if (problematic & 1)
{
/* Need to know nplurals of the result domain. */
message_ty *header_entry =
message_list_search (resultmlp, NULL, "");
nplurals = get_plural_count (header_entry
? header_entry->msgstr
: NULL);
}
for (j = 0; j < resultmlp->nitems; j++)
{
message_ty *mp = resultmlp->item[j];
if ((mp->used & 1) && (nplurals > 0))
{
/* ref->msgid_plural != NULL but def->msgid_plural == NULL.
Use a copy of def->msgstr for each possible plural form. */
size_t new_msgstr_len;
char *new_msgstr;
char *p;
unsigned long i;
if (verbosity_level > 1)
po_gram_error_at_line (&mp->pos,
_("this message should define plural forms"));
new_msgstr_len = nplurals * mp->msgstr_len;
new_msgstr = XNMALLOC (new_msgstr_len, char);
for (i = 0, p = new_msgstr; i < nplurals; i++)
{
memcpy (p, mp->msgstr, mp->msgstr_len);
p += mp->msgstr_len;
}
mp->msgstr = new_msgstr;
mp->msgstr_len = new_msgstr_len;
mp->is_fuzzy = true;
}
if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
{
/* ref->msgid_plural == NULL but def->msgid_plural != NULL.
Use only the first among the plural forms. */
if (verbosity_level > 1)
po_gram_error_at_line (&mp->pos,
_("this message should not define plural forms"));
mp->msgstr_len = strlen (mp->msgstr) + 1;
mp->is_fuzzy = true;
}
/* Postprocessing of this message is done. */
mp->used = 0;
}
}
}
/* Now that mp->is_fuzzy is finalized for all messages, remove the
"previous msgid" information from all messages that are not fuzzy or
are untranslated. */
for (j = 0; j < resultmlp->nitems; j++)
{
message_ty *mp = resultmlp->item[j];
if (!mp->is_fuzzy || mp->msgstr[0] == '\0')
{
mp->prev_msgctxt = NULL;
mp->prev_msgid = NULL;
mp->prev_msgid_plural = NULL;
}
}
}
static msgdomain_list_ty *
merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax,
msgdomain_list_ty **defp)
{
msgdomain_list_ty *def;
msgdomain_list_ty *ref;
size_t j, k;
unsigned int processed;
struct statistics stats;
msgdomain_list_ty *result;
const char *def_canon_charset;
definitions_ty definitions;
message_list_ty *empty_list;
stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;
/* This is the definitions file, created by a human. */
def = read_catalog_file (fn1, input_syntax);
/* This is the references file, created by groping the sources with
the xgettext program. */
ref = read_catalog_file (fn2, input_syntax);
/* Add a dummy header entry, if the references file contains none. */
for (k = 0; k < ref->nitems; k++)
if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
{
static lex_pos_ty pos = { __FILE__, __LINE__ };
message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos);
message_list_prepend (ref->item[k]->messages, refheader);
}
/* The references file can be either in ASCII or in UTF-8. If it is
in UTF-8, we have to convert the definitions and the compendiums to
UTF-8 as well. */
{
bool was_utf8 = false;
for (k = 0; k < ref->nitems; k++)
{
message_list_ty *mlp = ref->item[k]->messages;
for (j = 0; j < mlp->nitems; j++)
if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
{
const char *header = mlp->item[j]->msgstr;
if (header != NULL)
{
const char *charsetstr = c_strstr (header, "charset=");
if (charsetstr != NULL)
{
size_t len;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
if (len == strlen ("UTF-8")
&& c_strncasecmp (charsetstr, "UTF-8", len) == 0)
was_utf8 = true;
}
}
}
}
if (was_utf8)
{
def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
if (compendiums != NULL)
for (k = 0; k < compendiums->nitems; k++)
iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
compendium_filenames->item[k]);
}
else if (compendiums != NULL && compendiums->nitems > 0)
{
/* Ensure that the definitions and the compendiums are in the same
encoding. Prefer the encoding of the definitions file, if
possible; otherwise, if the definitions file is empty and the
compendiums are all in the same encoding, use that encoding;
otherwise, use UTF-8. */
bool conversion_done = false;
{
char *charset = NULL;
/* Get the encoding of the definitions file. */
for (k = 0; k < def->nitems; k++)
{
message_list_ty *mlp = def->item[k]->messages;
for (j = 0; j < mlp->nitems; j++)
if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
{
const char *header = mlp->item[j]->msgstr;
if (header != NULL)
{
const char *charsetstr = c_strstr (header, "charset=");
if (charsetstr != NULL)
{
size_t len;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
charset = (char *) xmalloca (len + 1);
memcpy (charset, charsetstr, len);
charset[len] = '\0';
break;
}
}
}
if (charset != NULL)
break;
}
if (charset != NULL)
{
const char *canon_charset = po_charset_canonicalize (charset);
if (canon_charset != NULL)
{
bool all_compendiums_iconvable = true;
if (compendiums != NULL)
for (k = 0; k < compendiums->nitems; k++)
if (!is_message_list_iconvable (compendiums->item[k],
NULL, canon_charset))
{
all_compendiums_iconvable = false;
break;
}
if (all_compendiums_iconvable)
{
/* Convert the compendiums to def's encoding. */
if (compendiums != NULL)
for (k = 0; k < compendiums->nitems; k++)
iconv_message_list (compendiums->item[k],
NULL, canon_charset,
compendium_filenames->item[k]);
conversion_done = true;
}
}
freea (charset);
}
}
if (!conversion_done)
{
if (def->nitems == 0
|| (def->nitems == 1 && def->item[0]->messages->nitems == 0))
{
/* The definitions file is empty.
Compare the encodings of the compendiums. */
const char *common_canon_charset = NULL;
for (k = 0; k < compendiums->nitems; k++)
{
message_list_ty *mlp = compendiums->item[k];
char *charset = NULL;
const char *canon_charset = NULL;
for (j = 0; j < mlp->nitems; j++)
if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
{
const char *header = mlp->item[j]->msgstr;
if (header != NULL)
{
const char *charsetstr =
c_strstr (header, "charset=");
if (charsetstr != NULL)
{
size_t len;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
charset = (char *) xmalloca (len + 1);
memcpy (charset, charsetstr, len);
charset[len] = '\0';
break;
}
}
}
if (charset != NULL)
{
canon_charset = po_charset_canonicalize (charset);
freea (charset);
}
/* If no charset declaration was found in this file,
or if it is not a valid encoding name, or if it
differs from the common charset found so far,
we have no common charset. */
if (canon_charset == NULL
|| (common_canon_charset != NULL
&& canon_charset != common_canon_charset))
{
common_canon_charset = NULL;
break;
}
common_canon_charset = canon_charset;
}
if (common_canon_charset != NULL)
/* No conversion needed in this case. */
conversion_done = true;
}
if (!conversion_done)
{
/* It's too hairy to find out what would be the optimal target
encoding. So, convert everything to UTF-8. */
def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
if (compendiums != NULL)
for (k = 0; k < compendiums->nitems; k++)
iconv_message_list (compendiums->item[k],
NULL, po_charset_utf8,
compendium_filenames->item[k]);
}
}
}
}
/* Determine canonicalized encoding name of the definitions now, after
conversion. Only used for fuzzy matching. */
if (use_fuzzy_matching)
{
def_canon_charset = def->encoding;
if (def_canon_charset == NULL)
{
char *charset = NULL;
/* Get the encoding of the definitions file. */
for (k = 0; k < def->nitems; k++)
{
message_list_ty *mlp = def->item[k]->messages;
for (j = 0; j < mlp->nitems; j++)
if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
{
const char *header = mlp->item[j]->msgstr;
if (header != NULL)
{
const char *charsetstr = c_strstr (header, "charset=");
if (charsetstr != NULL)
{
size_t len;
charsetstr += strlen ("charset=");
len = strcspn (charsetstr, " \t\n");
charset = (char *) xmalloca (len + 1);
memcpy (charset, charsetstr, len);
charset[len] = '\0';
break;
}
}
}
if (charset != NULL)
break;
}
if (charset != NULL)
def_canon_charset = po_charset_canonicalize (charset);
if (def_canon_charset == NULL)
/* Unspecified encoding. Assume unibyte encoding. */
def_canon_charset = po_charset_ascii;
}
}
else
def_canon_charset = NULL;
/* Initialize and preprocess the total set of message definitions. */
definitions_init (&definitions, def_canon_charset);
empty_list = message_list_alloc (false);
result = msgdomain_list_alloc (false);
processed = 0;
/* Every reference must be matched with its definition. */
if (!multi_domain_mode)
for (k = 0; k < ref->nitems; k++)
{
const char *domain = ref->item[k]->domain;
message_list_ty *refmlp = ref->item[k]->messages;
message_list_ty *resultmlp =
msgdomain_list_sublist (result, domain, true);
message_list_ty *defmlp;
defmlp = msgdomain_list_sublist (def, domain, false);
if (defmlp == NULL)
defmlp = empty_list;
definitions_set_current_list (&definitions, defmlp);
match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
&stats, &processed);
}
else
{
/* Apply the references messages in the default domain to each of
the definition domains. */
message_list_ty *refmlp = ref->item[0]->messages;
for (k = 0; k < def->nitems; k++)
{
const char *domain = def->item[k]->domain;
message_list_ty *defmlp = def->item[k]->messages;
/* Ignore the default message domain if it has no messages. */
if (k > 0 || defmlp->nitems > 0)
{
message_list_ty *resultmlp =
msgdomain_list_sublist (result, domain, true);
definitions_set_current_list (&definitions, defmlp);
match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
&stats, &processed);
}
}
}
definitions_destroy (&definitions);
if (!for_msgfmt)
{
/* Look for messages in the definition file, which are not present
in the reference file, indicating messages which defined but not
used in the program. Don't scan the compendium(s). */
for (k = 0; k < def->nitems; ++k)
{
const char *domain = def->item[k]->domain;
message_list_ty *defmlp = def->item[k]->messages;
for (j = 0; j < defmlp->nitems; j++)
{
message_ty *defmsg = defmlp->item[j];
if (!defmsg->used)
{
/* Remember the old translation although it is not used anymore.
But we mark it as obsolete. */
message_ty *mp;
mp = message_copy (defmsg);
/* Clear the extracted comments. */
if (mp->comment_dot != NULL)
{
string_list_free (mp->comment_dot);
mp->comment_dot = NULL;
}
/* Clear the file position comments. */
if (mp->filepos != NULL)
{
size_t i;
for (i = 0; i < mp->filepos_count; i++)
free ((char *) mp->filepos[i].file_name);
mp->filepos_count = 0;
free (mp->filepos);
mp->filepos = NULL;
}
/* Mark as obsolete. */
mp->obsolete = true;
message_list_append (msgdomain_list_sublist (result, domain, true),
mp);
stats.obsolete++;
}
}
}
}
/* Determine the known a-priori encoding, if any. */
if (def->encoding == ref->encoding)
result->encoding = def->encoding;
/* Report some statistics. */
if (verbosity_level > 0)
fprintf (stderr, _("%s\
Read %ld old + %ld reference, \
merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
!quiet && verbosity_level <= 1 ? "\n" : "",
(long) def->nitems, (long) ref->nitems,
(long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
(long) stats.obsolete);
else if (!quiet)
fputs (_(" done.\n"), stderr);
/* Return results. */
*defp = def;
return result;
}