| /* Copyright (C) 1996-2014 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Contributed by Ulrich Drepper <drepper@redhat.com>, 1996. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published |
| by the Free Software Foundation; version 2 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
| |
| #ifdef HAVE_CONFIG_H |
| # include "config.h" |
| #endif |
| |
| #include <argp.h> |
| #include <assert.h> |
| #include <ctype.h> |
| #include <endian.h> |
| #include <errno.h> |
| #include <error.h> |
| #include <fcntl.h> |
| #include <iconv.h> |
| #include <langinfo.h> |
| #include <locale.h> |
| #include <libintl.h> |
| #include <limits.h> |
| #include <nl_types.h> |
| #include <obstack.h> |
| #include <stdint.h> |
| #include <stdio.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <unistd.h> |
| #include <wchar.h> |
| |
| #include "version.h" |
| |
| #include "catgetsinfo.h" |
| |
| |
| #define SWAPU32(w) \ |
| (((w) << 24) | (((w) & 0xff00) << 8) | (((w) >> 8) & 0xff00) | ((w) >> 24)) |
| |
| struct message_list |
| { |
| int number; |
| const char *message; |
| |
| const char *fname; |
| size_t line; |
| const char *symbol; |
| |
| struct message_list *next; |
| }; |
| |
| |
| struct set_list |
| { |
| int number; |
| int deleted; |
| struct message_list *messages; |
| int last_message; |
| |
| const char *fname; |
| size_t line; |
| const char *symbol; |
| |
| struct set_list *next; |
| }; |
| |
| |
| struct catalog |
| { |
| struct set_list *all_sets; |
| struct set_list *current_set; |
| size_t total_messages; |
| wint_t quote_char; |
| int last_set; |
| |
| struct obstack mem_pool; |
| }; |
| |
| |
| /* If non-zero force creation of new file, not using existing one. */ |
| static int force_new; |
| |
| /* Name of output file. */ |
| static const char *output_name; |
| |
| /* Name of generated C header file. */ |
| static const char *header_name; |
| |
| /* Name and version of program. */ |
| static void print_version (FILE *stream, struct argp_state *state); |
| void (*argp_program_version_hook) (FILE *, struct argp_state *) = print_version; |
| |
| #define OPT_NEW 1 |
| |
| /* Definitions of arguments for argp functions. */ |
| static const struct argp_option options[] = |
| { |
| { "header", 'H', N_("NAME"), 0, |
| N_("Create C header file NAME containing symbol definitions") }, |
| { "new", OPT_NEW, NULL, 0, |
| N_("Do not use existing catalog, force new output file") }, |
| { "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") }, |
| { NULL, 0, NULL, 0, NULL } |
| }; |
| |
| /* Short description of program. */ |
| static const char doc[] = N_("Generate message catalog.\ |
| \vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\ |
| is -, output is written to standard output.\n"); |
| |
| /* Strings for arguments in help texts. */ |
| static const char args_doc[] = N_("\ |
| -o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]"); |
| |
| /* Prototype for option handler. */ |
| static error_t parse_opt (int key, char *arg, struct argp_state *state); |
| |
| /* Function to print some extra text in the help message. */ |
| static char *more_help (int key, const char *text, void *input); |
| |
| /* Data structure to communicate with argp functions. */ |
| static struct argp argp = |
| { |
| options, parse_opt, args_doc, doc, NULL, more_help |
| }; |
| |
| |
| /* Wrapper functions with error checking for standard functions. */ |
| #include <programs/xmalloc.h> |
| |
| /* Prototypes for local functions. */ |
| static void error_print (void); |
| static struct catalog *read_input_file (struct catalog *current, |
| const char *fname); |
| static void write_out (struct catalog *result, const char *output_name, |
| const char *header_name); |
| static struct set_list *find_set (struct catalog *current, int number); |
| static void normalize_line (const char *fname, size_t line, iconv_t cd, |
| wchar_t *string, wchar_t quote_char, |
| wchar_t escape_char); |
| static void read_old (struct catalog *catalog, const char *file_name); |
| static int open_conversion (const char *codesetp, iconv_t *cd_towcp, |
| iconv_t *cd_tombp, wchar_t *escape_charp); |
| |
| |
| int |
| main (int argc, char *argv[]) |
| { |
| struct catalog *result; |
| int remaining; |
| |
| /* Set program name for messages. */ |
| error_print_progname = error_print; |
| |
| /* Set locale via LC_ALL. */ |
| setlocale (LC_ALL, ""); |
| |
| /* Set the text message domain. */ |
| textdomain (PACKAGE); |
| |
| /* Initialize local variables. */ |
| result = NULL; |
| |
| /* Parse and process arguments. */ |
| argp_parse (&argp, argc, argv, 0, &remaining, NULL); |
| |
| /* Determine output file. */ |
| if (output_name == NULL) |
| output_name = remaining < argc ? argv[remaining++] : "-"; |
| |
| /* Process all input files. */ |
| setlocale (LC_CTYPE, "C"); |
| if (remaining < argc) |
| do |
| result = read_input_file (result, argv[remaining]); |
| while (++remaining < argc); |
| else |
| result = read_input_file (NULL, "-"); |
| |
| /* Write out the result. */ |
| if (result != NULL) |
| write_out (result, output_name, header_name); |
| |
| return error_message_count != 0; |
| } |
| |
| |
| /* Handle program arguments. */ |
| static error_t |
| parse_opt (int key, char *arg, struct argp_state *state) |
| { |
| switch (key) |
| { |
| case 'H': |
| header_name = arg; |
| break; |
| case OPT_NEW: |
| force_new = 1; |
| break; |
| case 'o': |
| output_name = arg; |
| break; |
| default: |
| return ARGP_ERR_UNKNOWN; |
| } |
| return 0; |
| } |
| |
| |
| static char * |
| more_help (int key, const char *text, void *input) |
| { |
| char *tp = NULL; |
| switch (key) |
| { |
| case ARGP_KEY_HELP_EXTRA: |
| /* We print some extra information. */ |
| if (asprintf (&tp, gettext ("\ |
| For bug reporting instructions, please see:\n\ |
| %s.\n"), REPORT_BUGS_TO) < 0) |
| return NULL; |
| return tp; |
| default: |
| break; |
| } |
| return (char *) text; |
| } |
| |
| /* Print the version information. */ |
| static void |
| print_version (FILE *stream, struct argp_state *state) |
| { |
| fprintf (stream, "gencat %s%s\n", PKGVERSION, VERSION); |
| fprintf (stream, gettext ("\ |
| Copyright (C) %s Free Software Foundation, Inc.\n\ |
| This is free software; see the source for copying conditions. There is NO\n\ |
| warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\ |
| "), "2014"); |
| fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper"); |
| } |
| |
| |
| /* The address of this function will be assigned to the hook in the |
| error functions. */ |
| static void |
| error_print (void) |
| { |
| /* We don't want the program name to be printed in messages. Emacs' |
| compile.el does not like this. */ |
| } |
| |
| |
| static struct catalog * |
| read_input_file (struct catalog *current, const char *fname) |
| { |
| FILE *fp; |
| char *buf; |
| size_t len; |
| size_t line_number; |
| wchar_t *wbuf; |
| size_t wbufsize; |
| iconv_t cd_towc = (iconv_t) -1; |
| iconv_t cd_tomb = (iconv_t) -1; |
| wchar_t escape_char = L'\\'; |
| char *codeset = NULL; |
| |
| if (strcmp (fname, "-") == 0 || strcmp (fname, "/dev/stdin") == 0) |
| { |
| fp = stdin; |
| fname = gettext ("*standard input*"); |
| } |
| else |
| fp = fopen (fname, "r"); |
| if (fp == NULL) |
| { |
| error (0, errno, gettext ("cannot open input file `%s'"), fname); |
| return current; |
| } |
| |
| /* If we haven't seen anything yet, allocate result structure. */ |
| if (current == NULL) |
| { |
| current = (struct catalog *) xcalloc (1, sizeof (*current)); |
| |
| #define obstack_chunk_alloc malloc |
| #define obstack_chunk_free free |
| obstack_init (¤t->mem_pool); |
| |
| current->current_set = find_set (current, NL_SETD); |
| } |
| |
| buf = NULL; |
| len = 0; |
| line_number = 0; |
| |
| wbufsize = 1024; |
| wbuf = (wchar_t *) xmalloc (wbufsize); |
| |
| while (!feof (fp)) |
| { |
| int continued; |
| int used; |
| size_t start_line = line_number + 1; |
| char *this_line; |
| |
| do |
| { |
| int act_len; |
| |
| act_len = getline (&buf, &len, fp); |
| if (act_len <= 0) |
| break; |
| ++line_number; |
| |
| /* It the line continued? */ |
| continued = 0; |
| if (buf[act_len - 1] == '\n') |
| { |
| --act_len; |
| |
| /* There might be more than one backslash at the end of |
| the line. Only if there is an odd number of them is |
| the line continued. */ |
| if (act_len > 0 && buf[act_len - 1] == '\\') |
| { |
| int temp_act_len = act_len; |
| |
| do |
| { |
| --temp_act_len; |
| continued = !continued; |
| } |
| while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\'); |
| |
| if (continued) |
| --act_len; |
| } |
| } |
| |
| /* Append to currently selected line. */ |
| obstack_grow (¤t->mem_pool, buf, act_len); |
| } |
| while (continued); |
| |
| obstack_1grow (¤t->mem_pool, '\0'); |
| this_line = (char *) obstack_finish (¤t->mem_pool); |
| |
| used = 0; |
| if (this_line[0] == '$') |
| { |
| if (isblank (this_line[1])) |
| { |
| int cnt = 1; |
| while (isblank (this_line[cnt])) |
| ++cnt; |
| if (strncmp (&this_line[cnt], "codeset=", 8) != 0) |
| /* This is a comment line. Do nothing. */; |
| else if (codeset != NULL) |
| /* Ignore multiple codeset. */; |
| else |
| { |
| int start = cnt + 8; |
| cnt = start; |
| while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) |
| ++cnt; |
| if (cnt != start) |
| { |
| int len = cnt - start; |
| codeset = xmalloc (len + 1); |
| *((char *) mempcpy (codeset, &this_line[start], len)) |
| = '\0'; |
| } |
| } |
| } |
| else if (strncmp (&this_line[1], "set", 3) == 0) |
| { |
| int cnt = sizeof ("set"); |
| int set_number; |
| const char *symbol = NULL; |
| while (isspace (this_line[cnt])) |
| ++cnt; |
| |
| if (isdigit (this_line[cnt])) |
| { |
| set_number = atol (&this_line[cnt]); |
| |
| /* If the given number for the character set is |
| higher than any we used for symbolic set names |
| avoid clashing by using only higher numbers for |
| the following symbolic definitions. */ |
| if (set_number > current->last_set) |
| current->last_set = set_number; |
| } |
| else |
| { |
| /* See whether it is a reasonable identifier. */ |
| int start = cnt; |
| while (isalnum (this_line[cnt]) || this_line[cnt] == '_') |
| ++cnt; |
| |
| if (cnt == start) |
| { |
| /* No correct character found. */ |
| error_at_line (0, 0, fname, start_line, |
| gettext ("illegal set number")); |
| set_number = 0; |
| } |
| else |
| { |
| /* We have found seomthing that looks like a |
| correct identifier. */ |
| struct set_list *runp; |
| |
| this_line[cnt] = '\0'; |
| used = 1; |
| symbol = &this_line[start]; |
| |
| /* Test whether the identifier was already used. */ |
| runp = current->all_sets; |
| while (runp != 0) |
| if (runp->symbol != NULL |
| && strcmp (runp->symbol, symbol) == 0) |
| break; |
| else |
| runp = runp->next; |
| |
| if (runp != NULL) |
| { |
| /* We cannot allow duplicate identifiers for |
| message sets. */ |
| error_at_line (0, 0, fname, start_line, |
| gettext ("duplicate set definition")); |
| error_at_line (0, 0, runp->fname, runp->line, |
| gettext ("\ |
| this is the first definition")); |
| set_number = 0; |
| } |
| else |
| /* Allocate next free message set for identifier. */ |
| set_number = ++current->last_set; |
| } |
| } |
| |
| if (set_number != 0) |
| { |
| /* We found a legal set number. */ |
| current->current_set = find_set (current, set_number); |
| if (symbol != NULL) |
| used = 1; |
| current->current_set->symbol = symbol; |
| current->current_set->fname = fname; |
| current->current_set->line = start_line; |
| } |
| } |
| else if (strncmp (&this_line[1], "delset", 6) == 0) |
| { |
| int cnt = sizeof ("delset"); |
| while (isspace (this_line[cnt])) |
| ++cnt; |
| |
| if (isdigit (this_line[cnt])) |
| { |
| size_t set_number = atol (&this_line[cnt]); |
| struct set_list *set; |
| |
| /* Mark the message set with the given number as |
| deleted. */ |
| set = find_set (current, set_number); |
| set->deleted = 1; |
| } |
| else |
| { |
| /* See whether it is a reasonable identifier. */ |
| int start = cnt; |
| while (isalnum (this_line[cnt]) || this_line[cnt] == '_') |
| ++cnt; |
| |
| if (cnt == start) |
| error_at_line (0, 0, fname, start_line, |
| gettext ("illegal set number")); |
| else |
| { |
| const char *symbol; |
| struct set_list *runp; |
| |
| this_line[cnt] = '\0'; |
| used = 1; |
| symbol = &this_line[start]; |
| |
| /* We have a symbolic set name. This name must |
| appear somewhere else in the catalogs read so |
| far. */ |
| for (runp = current->all_sets; runp != NULL; |
| runp = runp->next) |
| { |
| if (strcmp (runp->symbol, symbol) == 0) |
| { |
| runp->deleted = 1; |
| break; |
| } |
| } |
| if (runp == NULL) |
| /* Name does not exist before. */ |
| error_at_line (0, 0, fname, start_line, |
| gettext ("unknown set `%s'"), symbol); |
| } |
| } |
| } |
| else if (strncmp (&this_line[1], "quote", 5) == 0) |
| { |
| char buf[2]; |
| char *bufptr; |
| size_t buflen; |
| char *wbufptr; |
| size_t wbuflen; |
| int cnt; |
| |
| cnt = sizeof ("quote"); |
| while (isspace (this_line[cnt])) |
| ++cnt; |
| |
| /* We need the conversion. */ |
| if (cd_towc == (iconv_t) -1 |
| && open_conversion (codeset, &cd_towc, &cd_tomb, |
| &escape_char) != 0) |
| /* Something is wrong. */ |
| goto out; |
| |
| /* Yes, the quote char can be '\0'; this means no quote |
| char. The function using the information works on |
| wide characters so we have to convert it here. */ |
| buf[0] = this_line[cnt]; |
| buf[1] = '\0'; |
| bufptr = buf; |
| buflen = 2; |
| |
| wbufptr = (char *) wbuf; |
| wbuflen = wbufsize; |
| |
| /* Flush the state. */ |
| iconv (cd_towc, NULL, NULL, NULL, NULL); |
| |
| iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen); |
| if (buflen != 0 || (wchar_t *) wbufptr != &wbuf[2]) |
| error_at_line (0, 0, fname, start_line, |
| gettext ("invalid quote character")); |
| else |
| /* Use the converted wide character. */ |
| current->quote_char = wbuf[0]; |
| } |
| else |
| { |
| int cnt; |
| cnt = 2; |
| while (this_line[cnt] != '\0' && !isspace (this_line[cnt])) |
| ++cnt; |
| this_line[cnt] = '\0'; |
| error_at_line (0, 0, fname, start_line, |
| gettext ("unknown directive `%s': line ignored"), |
| &this_line[1]); |
| } |
| } |
| else if (isalnum (this_line[0]) || this_line[0] == '_') |
| { |
| const char *ident = this_line; |
| char *line = this_line; |
| int message_number; |
| |
| do |
| ++line; |
| while (line[0] != '\0' && !isspace (line[0])); |
| if (line[0] != '\0') |
| *line++ = '\0'; /* Terminate the identifier. */ |
| |
| /* Now we found the beginning of the message itself. */ |
| |
| if (isdigit (ident[0])) |
| { |
| struct message_list *runp; |
| struct message_list *lastp; |
| |
| message_number = atoi (ident); |
| |
| /* Find location to insert the new message. */ |
| runp = current->current_set->messages; |
| lastp = NULL; |
| while (runp != NULL) |
| if (runp->number == message_number) |
| break; |
| else |
| { |
| lastp = runp; |
| runp = runp->next; |
| } |
| if (runp != NULL) |
| { |
| /* Oh, oh. There is already a message with this |
| number in the message set. */ |
| if (runp->symbol == NULL) |
| { |
| /* The existing message had its number specified |
| by the user. Fatal collision type uh, oh. */ |
| error_at_line (0, 0, fname, start_line, |
| gettext ("duplicated message number")); |
| error_at_line (0, 0, runp->fname, runp->line, |
| gettext ("this is the first definition")); |
| message_number = 0; |
| } |
| else |
| { |
| /* Collision was with number auto-assigned to a |
| symbolic. Change existing symbolic number |
| and move to end the list (if not already there). */ |
| runp->number = ++current->current_set->last_message; |
| |
| if (runp->next != NULL) |
| { |
| struct message_list *endp; |
| |
| if (lastp == NULL) |
| current->current_set->messages=runp->next; |
| else |
| lastp->next=runp->next; |
| |
| endp = runp->next; |
| while (endp->next != NULL) |
| endp = endp->next; |
| |
| endp->next = runp; |
| runp->next = NULL; |
| } |
| } |
| } |
| ident = NULL; /* We don't have a symbol. */ |
| |
| if (message_number != 0 |
| && message_number > current->current_set->last_message) |
| current->current_set->last_message = message_number; |
| } |
| else if (ident[0] != '\0') |
| { |
| struct message_list *runp; |
| |
| /* Test whether the symbolic name was not used for |
| another message in this message set. */ |
| runp = current->current_set->messages; |
| while (runp != NULL) |
| if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0) |
| break; |
| else |
| runp = runp->next; |
| if (runp != NULL) |
| { |
| /* The name is already used. */ |
| error_at_line (0, 0, fname, start_line, gettext ("\ |
| duplicated message identifier")); |
| error_at_line (0, 0, runp->fname, runp->line, |
| gettext ("this is the first definition")); |
| message_number = 0; |
| } |
| else |
| /* Give the message the next unused number. */ |
| message_number = ++current->current_set->last_message; |
| } |
| else |
| message_number = 0; |
| |
| if (message_number != 0) |
| { |
| char *inbuf; |
| size_t inlen; |
| char *outbuf; |
| size_t outlen; |
| struct message_list *newp; |
| size_t line_len = strlen (line) + 1; |
| size_t ident_len = 0; |
| |
| /* We need the conversion. */ |
| if (cd_towc == (iconv_t) -1 |
| && open_conversion (codeset, &cd_towc, &cd_tomb, |
| &escape_char) != 0) |
| /* Something is wrong. */ |
| goto out; |
| |
| /* Convert to a wide character string. We have to |
| interpret escape sequences which will be impossible |
| without doing the conversion if the codeset of the |
| message is stateful. */ |
| while (1) |
| { |
| inbuf = line; |
| inlen = line_len; |
| outbuf = (char *) wbuf; |
| outlen = wbufsize; |
| |
| /* Flush the state. */ |
| iconv (cd_towc, NULL, NULL, NULL, NULL); |
| |
| iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen); |
| if (inlen == 0) |
| { |
| /* The string is converted. */ |
| assert (outlen < wbufsize); |
| assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1] |
| == L'\0'); |
| break; |
| } |
| |
| if (outlen != 0) |
| { |
| /* Something is wrong with this string, we ignore it. */ |
| error_at_line (0, 0, fname, start_line, gettext ("\ |
| invalid character: message ignored")); |
| goto ignore; |
| } |
| |
| /* The output buffer is too small. */ |
| wbufsize *= 2; |
| wbuf = (wchar_t *) xrealloc (wbuf, wbufsize); |
| } |
| |
| /* Strip quote characters, change escape sequences into |
| correct characters etc. */ |
| normalize_line (fname, start_line, cd_towc, wbuf, |
| current->quote_char, escape_char); |
| |
| if (ident) |
| ident_len = line - this_line; |
| |
| /* Now the string is free of escape sequences. Convert it |
| back into a multibyte character string. First free the |
| memory allocated for the original string. */ |
| obstack_free (¤t->mem_pool, this_line); |
| |
| used = 1; /* Yes, we use the line. */ |
| |
| /* Now fill in the new string. It should never happen that |
| the replaced string is longer than the original. */ |
| inbuf = (char *) wbuf; |
| inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t); |
| |
| outlen = obstack_room (¤t->mem_pool); |
| obstack_blank (¤t->mem_pool, outlen); |
| this_line = (char *) obstack_base (¤t->mem_pool); |
| outbuf = this_line + ident_len; |
| outlen -= ident_len; |
| |
| /* Flush the state. */ |
| iconv (cd_tomb, NULL, NULL, NULL, NULL); |
| |
| iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen); |
| if (inlen != 0) |
| { |
| error_at_line (0, 0, fname, start_line, |
| gettext ("invalid line")); |
| goto ignore; |
| } |
| assert (outbuf[-1] == '\0'); |
| |
| /* Free the memory in the obstack we don't use. */ |
| obstack_blank (¤t->mem_pool, -(int) outlen); |
| line = obstack_finish (¤t->mem_pool); |
| |
| newp = (struct message_list *) xmalloc (sizeof (*newp)); |
| newp->number = message_number; |
| newp->message = line + ident_len; |
| /* Remember symbolic name; is NULL if no is given. */ |
| newp->symbol = ident ? line : NULL; |
| /* Remember where we found the character. */ |
| newp->fname = fname; |
| newp->line = start_line; |
| |
| /* Find place to insert to message. We keep them in a |
| sorted single linked list. */ |
| if (current->current_set->messages == NULL |
| || current->current_set->messages->number > message_number) |
| { |
| newp->next = current->current_set->messages; |
| current->current_set->messages = newp; |
| } |
| else |
| { |
| struct message_list *runp; |
| runp = current->current_set->messages; |
| while (runp->next != NULL) |
| if (runp->next->number > message_number) |
| break; |
| else |
| runp = runp->next; |
| newp->next = runp->next; |
| runp->next = newp; |
| } |
| } |
| ++current->total_messages; |
| } |
| else |
| { |
| size_t cnt; |
| |
| cnt = 0; |
| /* See whether we have any non-white space character in this |
| line. */ |
| while (this_line[cnt] != '\0' && isspace (this_line[cnt])) |
| ++cnt; |
| |
| if (this_line[cnt] != '\0') |
| /* Yes, some unknown characters found. */ |
| error_at_line (0, 0, fname, start_line, |
| gettext ("malformed line ignored")); |
| } |
| |
| ignore: |
| /* We can save the memory for the line if it was not used. */ |
| if (!used) |
| obstack_free (¤t->mem_pool, this_line); |
| } |
| |
| /* Close the conversion modules. */ |
| iconv_close (cd_towc); |
| iconv_close (cd_tomb); |
| free (codeset); |
| |
| out: |
| free (wbuf); |
| |
| if (fp != stdin) |
| fclose (fp); |
| return current; |
| } |
| |
| |
| static void |
| write_out (struct catalog *catalog, const char *output_name, |
| const char *header_name) |
| { |
| /* Computing the "optimal" size. */ |
| struct set_list *set_run; |
| size_t best_total, best_size, best_depth; |
| size_t act_size, act_depth; |
| struct catalog_obj obj; |
| struct obstack string_pool; |
| const char *strings; |
| size_t strings_size; |
| uint32_t *array1, *array2; |
| size_t cnt; |
| int fd; |
| |
| /* If not otherwise told try to read file with existing |
| translations. */ |
| if (!force_new) |
| read_old (catalog, output_name); |
| |
| /* Initialize best_size with a very high value. */ |
| best_total = best_size = best_depth = UINT_MAX; |
| |
| /* We need some start size for testing. Let's start with |
| TOTAL_MESSAGES / 5, which theoretically provides a mean depth of |
| 5. */ |
| act_size = 1 + catalog->total_messages / 5; |
| |
| /* We determine the size of a hash table here. Because the message |
| numbers can be chosen arbitrary by the programmer we cannot use |
| the simple method of accessing the array using the message |
| number. The algorithm is based on the trivial hash function |
| NUMBER % TABLE_SIZE, where collisions are stored in a second |
| dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that |
| the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */ |
| while (act_size <= best_total) |
| { |
| size_t deep[act_size]; |
| |
| act_depth = 1; |
| memset (deep, '\0', act_size * sizeof (size_t)); |
| set_run = catalog->all_sets; |
| while (set_run != NULL) |
| { |
| struct message_list *message_run; |
| |
| message_run = set_run->messages; |
| while (message_run != NULL) |
| { |
| size_t idx = (message_run->number * set_run->number) % act_size; |
| |
| ++deep[idx]; |
| if (deep[idx] > act_depth) |
| { |
| act_depth = deep[idx]; |
| if (act_depth * act_size > best_total) |
| break; |
| } |
| message_run = message_run->next; |
| } |
| set_run = set_run->next; |
| } |
| |
| if (act_depth * act_size <= best_total) |
| { |
| /* We have found a better solution. */ |
| best_total = act_depth * act_size; |
| best_size = act_size; |
| best_depth = act_depth; |
| } |
| |
| ++act_size; |
| } |
| |
| /* let's be prepared for an empty message file. */ |
| if (best_size == UINT_MAX) |
| { |
| best_size = 1; |
| best_depth = 1; |
| } |
| |
| /* OK, now we have the size we will use. Fill in the header, build |
| the table and the second one with swapped byte order. */ |
| obj.magic = CATGETS_MAGIC; |
| obj.plane_size = best_size; |
| obj.plane_depth = best_depth; |
| |
| /* Allocate room for all needed arrays. */ |
| array1 = |
| (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3); |
| memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3); |
| array2 |
| = (uint32_t *) alloca (best_size * best_depth * sizeof (uint32_t) * 3); |
| obstack_init (&string_pool); |
| |
| set_run = catalog->all_sets; |
| while (set_run != NULL) |
| { |
| struct message_list *message_run; |
| |
| message_run = set_run->messages; |
| while (message_run != NULL) |
| { |
| size_t idx = (((message_run->number * set_run->number) % best_size) |
| * 3); |
| /* Determine collision depth. */ |
| while (array1[idx] != 0) |
| idx += best_size * 3; |
| |
| /* Store set number, message number and pointer into string |
| space, relative to the first string. */ |
| array1[idx + 0] = set_run->number; |
| array1[idx + 1] = message_run->number; |
| array1[idx + 2] = obstack_object_size (&string_pool); |
| |
| /* Add current string to the continuous space containing all |
| strings. */ |
| obstack_grow0 (&string_pool, message_run->message, |
| strlen (message_run->message)); |
| |
| message_run = message_run->next; |
| } |
| |
| set_run = set_run->next; |
| } |
| strings_size = obstack_object_size (&string_pool); |
| strings = obstack_finish (&string_pool); |
| |
| /* Compute ARRAY2 by changing the byte order. */ |
| for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt) |
| array2[cnt] = SWAPU32 (array1[cnt]); |
| |
| /* Now we can write out the whole data. */ |
| if (strcmp (output_name, "-") == 0 |
| || strcmp (output_name, "/dev/stdout") == 0) |
| fd = STDOUT_FILENO; |
| else |
| { |
| fd = creat (output_name, 0666); |
| if (fd < 0) |
| error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"), |
| output_name); |
| } |
| |
| /* Write out header. */ |
| write (fd, &obj, sizeof (obj)); |
| |
| /* We always write out the little endian version of the index |
| arrays. */ |
| #if __BYTE_ORDER == __LITTLE_ENDIAN |
| write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3); |
| write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3); |
| #elif __BYTE_ORDER == __BIG_ENDIAN |
| write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3); |
| write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3); |
| #else |
| # error Cannot handle __BYTE_ORDER byte order |
| #endif |
| |
| /* Finally write the strings. */ |
| write (fd, strings, strings_size); |
| |
| if (fd != STDOUT_FILENO) |
| close (fd); |
| |
| /* If requested now write out the header file. */ |
| if (header_name != NULL) |
| { |
| int first = 1; |
| FILE *fp; |
| |
| /* Open output file. "-" or "/dev/stdout" means write to |
| standard output. */ |
| if (strcmp (header_name, "-") == 0 |
| || strcmp (header_name, "/dev/stdout") == 0) |
| fp = stdout; |
| else |
| { |
| fp = fopen (header_name, "w"); |
| if (fp == NULL) |
| error (EXIT_FAILURE, errno, |
| gettext ("cannot open output file `%s'"), header_name); |
| } |
| |
| /* Iterate over all sets and all messages. */ |
| set_run = catalog->all_sets; |
| while (set_run != NULL) |
| { |
| struct message_list *message_run; |
| |
| /* If the current message set has a symbolic name write this |
| out first. */ |
| if (set_run->symbol != NULL) |
| fprintf (fp, "%s#define %sSet %#x\t/* %s:%Zu */\n", |
| first ? "" : "\n", set_run->symbol, set_run->number - 1, |
| set_run->fname, set_run->line); |
| first = 0; |
| |
| message_run = set_run->messages; |
| while (message_run != NULL) |
| { |
| /* If the current message has a symbolic name write |
| #define out. But we have to take care for the set |
| not having a symbolic name. */ |
| if (message_run->symbol != NULL) |
| { |
| if (set_run->symbol == NULL) |
| fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n", |
| set_run->number, message_run->symbol, |
| message_run->number, message_run->fname, |
| message_run->line); |
| else |
| fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n", |
| set_run->symbol, message_run->symbol, |
| message_run->number, message_run->fname, |
| message_run->line); |
| } |
| |
| message_run = message_run->next; |
| } |
| |
| set_run = set_run->next; |
| } |
| |
| if (fp != stdout) |
| fclose (fp); |
| } |
| } |
| |
| |
| static struct set_list * |
| find_set (struct catalog *current, int number) |
| { |
| struct set_list *result = current->all_sets; |
| |
| /* We must avoid set number 0 because a set of this number signals |
| in the tables that the entry is not occupied. */ |
| ++number; |
| |
| while (result != NULL) |
| if (result->number == number) |
| return result; |
| else |
| result = result->next; |
| |
| /* Prepare new message set. */ |
| result = (struct set_list *) xcalloc (1, sizeof (*result)); |
| result->number = number; |
| result->next = current->all_sets; |
| current->all_sets = result; |
| |
| return result; |
| } |
| |
| |
| /* Normalize given string *in*place* by processing escape sequences |
| and quote characters. */ |
| static void |
| normalize_line (const char *fname, size_t line, iconv_t cd, wchar_t *string, |
| wchar_t quote_char, wchar_t escape_char) |
| { |
| int is_quoted; |
| wchar_t *rp = string; |
| wchar_t *wp = string; |
| |
| if (quote_char != L'\0' && *rp == quote_char) |
| { |
| is_quoted = 1; |
| ++rp; |
| } |
| else |
| is_quoted = 0; |
| |
| while (*rp != L'\0') |
| if (*rp == quote_char) |
| /* We simply end the string when we find the first time an |
| not-escaped quote character. */ |
| break; |
| else if (*rp == escape_char) |
| { |
| ++rp; |
| if (quote_char != L'\0' && *rp == quote_char) |
| /* This is an extension to XPG. */ |
| *wp++ = *rp++; |
| else |
| /* Recognize escape sequences. */ |
| switch (*rp) |
| { |
| case L'n': |
| *wp++ = L'\n'; |
| ++rp; |
| break; |
| case L't': |
| *wp++ = L'\t'; |
| ++rp; |
| break; |
| case L'v': |
| *wp++ = L'\v'; |
| ++rp; |
| break; |
| case L'b': |
| *wp++ = L'\b'; |
| ++rp; |
| break; |
| case L'r': |
| *wp++ = L'\r'; |
| ++rp; |
| break; |
| case L'f': |
| *wp++ = L'\f'; |
| ++rp; |
| break; |
| case L'0' ... L'7': |
| { |
| int number; |
| char cbuf[2]; |
| char *cbufptr; |
| size_t cbufin; |
| wchar_t wcbuf[2]; |
| char *wcbufptr; |
| size_t wcbufin; |
| |
| number = *rp++ - L'0'; |
| while (number <= (255 / 8) && *rp >= L'0' && *rp <= L'7') |
| { |
| number *= 8; |
| number += *rp++ - L'0'; |
| } |
| |
| cbuf[0] = (char) number; |
| cbuf[1] = '\0'; |
| cbufptr = cbuf; |
| cbufin = 2; |
| |
| wcbufptr = (char *) wcbuf; |
| wcbufin = sizeof (wcbuf); |
| |
| /* Flush the state. */ |
| iconv (cd, NULL, NULL, NULL, NULL); |
| |
| iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin); |
| if (cbufptr != &cbuf[2] || (wchar_t *) wcbufptr != &wcbuf[2]) |
| error_at_line (0, 0, fname, line, |
| gettext ("invalid escape sequence")); |
| else |
| *wp++ = wcbuf[0]; |
| } |
| break; |
| default: |
| if (*rp == escape_char) |
| { |
| *wp++ = escape_char; |
| ++rp; |
| } |
| else |
| /* Simply ignore the backslash character. */; |
| break; |
| } |
| } |
| else |
| *wp++ = *rp++; |
| |
| /* If we saw a quote character at the beginning we expect another |
| one at the end. */ |
| if (is_quoted && *rp != quote_char) |
| error_at_line (0, 0, fname, line, gettext ("unterminated message")); |
| |
| /* Terminate string. */ |
| *wp = L'\0'; |
| return; |
| } |
| |
| |
| static void |
| read_old (struct catalog *catalog, const char *file_name) |
| { |
| struct catalog_info old_cat_obj; |
| struct set_list *set = NULL; |
| int last_set = -1; |
| size_t cnt; |
| |
| /* Try to open catalog, but don't look through the NLSPATH. */ |
| if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0) |
| { |
| if (errno == ENOENT) |
| /* No problem, the catalog simply does not exist. */ |
| return; |
| else |
| error (EXIT_FAILURE, errno, |
| gettext ("while opening old catalog file")); |
| } |
| |
| /* OK, we have the catalog loaded. Now read all messages and merge |
| them. When set and message number clash for any message the new |
| one is used. If the new one is empty it indicates that the |
| message should be deleted. */ |
| for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt) |
| { |
| struct message_list *message, *last; |
| |
| if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0) |
| /* No message in this slot. */ |
| continue; |
| |
| if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set) |
| { |
| last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1; |
| set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1); |
| } |
| |
| last = NULL; |
| message = set->messages; |
| while (message != NULL) |
| { |
| if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1]) |
| break; |
| last = message; |
| message = message->next; |
| } |
| |
| if (message == NULL |
| || (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1]) |
| { |
| /* We have found a message which is not yet in the catalog. |
| Insert it at the right position. */ |
| struct message_list *newp; |
| |
| newp = (struct message_list *) xmalloc (sizeof(*newp)); |
| newp->number = old_cat_obj.name_ptr[cnt * 3 + 1]; |
| newp->message = |
| &old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]]; |
| newp->fname = NULL; |
| newp->line = 0; |
| newp->symbol = NULL; |
| newp->next = message; |
| |
| if (last == NULL) |
| set->messages = newp; |
| else |
| last->next = newp; |
| |
| ++catalog->total_messages; |
| } |
| else if (*message->message == '\0') |
| { |
| /* The new empty message has overridden the old one thus |
| "deleting" it as required. Now remove the empty remains. */ |
| if (last == NULL) |
| set->messages = message->next; |
| else |
| last->next = message->next; |
| } |
| } |
| } |
| |
| |
| static int |
| open_conversion (const char *codeset, iconv_t *cd_towcp, iconv_t *cd_tombp, |
| wchar_t *escape_charp) |
| { |
| char buf[2]; |
| char *bufptr; |
| size_t bufsize; |
| wchar_t wbuf[2]; |
| char *wbufptr; |
| size_t wbufsize; |
| |
| /* If the input file does not specify the codeset use the locale's. */ |
| if (codeset == NULL) |
| { |
| setlocale (LC_ALL, ""); |
| codeset = nl_langinfo (CODESET); |
| setlocale (LC_ALL, "C"); |
| } |
| |
| /* Get the conversion modules. */ |
| *cd_towcp = iconv_open ("WCHAR_T", codeset); |
| *cd_tombp = iconv_open (codeset, "WCHAR_T"); |
| if (*cd_towcp == (iconv_t) -1 || *cd_tombp == (iconv_t) -1) |
| { |
| error (0, 0, gettext ("conversion modules not available")); |
| if (*cd_towcp != (iconv_t) -1) |
| iconv_close (*cd_towcp); |
| |
| return 1; |
| } |
| |
| /* One special case for historical reasons is the backslash |
| character. In some codesets the byte value 0x5c is not mapped to |
| U005c in Unicode. These charsets then don't have a backslash |
| character at all. Therefore we have to live with whatever the |
| codeset provides and recognize, instead of the U005c, the character |
| the byte value 0x5c is mapped to. */ |
| buf[0] = '\\'; |
| buf[1] = '\0'; |
| bufptr = buf; |
| bufsize = 2; |
| |
| wbufptr = (char *) wbuf; |
| wbufsize = sizeof (wbuf); |
| |
| iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize); |
| if (bufsize != 0 || wbufsize != 0) |
| { |
| /* Something went wrong, we couldn't convert the byte 0x5c. Go |
| on with using U005c. */ |
| error (0, 0, gettext ("cannot determine escape character")); |
| *escape_charp = L'\\'; |
| } |
| else |
| *escape_charp = wbuf[0]; |
| |
| return 0; |
| } |