| /* Copyright (C) 1995-2014 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| Contributed by Ulrich Drepper <drepper@gnu.org>, 1995. |
| |
| This program is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published |
| by the Free Software Foundation; version 2 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program; if not, see <http://www.gnu.org/licenses/>. */ |
| |
| #ifdef HAVE_CONFIG_H |
| # include <config.h> |
| #endif |
| |
| #include <alloca.h> |
| #include <byteswap.h> |
| #include <endian.h> |
| #include <errno.h> |
| #include <limits.h> |
| #include <obstack.h> |
| #include <stdlib.h> |
| #include <string.h> |
| #include <wchar.h> |
| #include <wctype.h> |
| #include <stdint.h> |
| #include <sys/uio.h> |
| |
| #include "localedef.h" |
| #include "charmap.h" |
| #include "localeinfo.h" |
| #include "langinfo.h" |
| #include "linereader.h" |
| #include "locfile-token.h" |
| #include "locfile.h" |
| |
| #include <assert.h> |
| |
| |
| #ifdef PREDEFINED_CLASSES |
| /* These are the extra bits not in wctype.h since these are not preallocated |
| classes. */ |
| # define _ISwspecial1 (1 << 29) |
| # define _ISwspecial2 (1 << 30) |
| # define _ISwspecial3 (1 << 31) |
| #endif |
| |
| |
| /* The bit used for representing a special class. */ |
| #define BITPOS(class) ((class) - tok_upper) |
| #define BIT(class) (_ISbit (BITPOS (class))) |
| #define BITw(class) (_ISwbit (BITPOS (class))) |
| |
| #define ELEM(ctype, collection, idx, value) \ |
| *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \ |
| &ctype->collection##_act idx, value) |
| |
| |
| /* To be compatible with former implementations we for now restrict |
| the number of bits for character classes to 16. When compatibility |
| is not necessary anymore increase the number to 32. */ |
| #define char_class_t uint16_t |
| #define char_class32_t uint32_t |
| |
| |
| /* Type to describe a transliteration action. We have a possibly |
| multiple character from-string and a set of multiple character |
| to-strings. All are 32bit values since this is what is used in |
| the gconv functions. */ |
| struct translit_to_t |
| { |
| uint32_t *str; |
| |
| struct translit_to_t *next; |
| }; |
| |
| struct translit_t |
| { |
| uint32_t *from; |
| |
| const char *fname; |
| size_t lineno; |
| |
| struct translit_to_t *to; |
| |
| struct translit_t *next; |
| }; |
| |
| struct translit_ignore_t |
| { |
| uint32_t from; |
| uint32_t to; |
| uint32_t step; |
| |
| const char *fname; |
| size_t lineno; |
| |
| struct translit_ignore_t *next; |
| }; |
| |
| |
| /* Type to describe a transliteration include statement. */ |
| struct translit_include_t |
| { |
| const char *copy_locale; |
| const char *copy_repertoire; |
| |
| struct translit_include_t *next; |
| }; |
| |
| |
| /* Sparse table of uint32_t. */ |
| #define TABLE idx_table |
| #define ELEMENT uint32_t |
| #define DEFAULT ((uint32_t) ~0) |
| #define NO_ADD_LOCALE |
| #include "3level.h" |
| |
| #define TABLE wcwidth_table |
| #define ELEMENT uint8_t |
| #define DEFAULT 0xff |
| #include "3level.h" |
| |
| #define TABLE wctrans_table |
| #define ELEMENT int32_t |
| #define DEFAULT 0 |
| #define wctrans_table_add wctrans_table_add_internal |
| #include "3level.h" |
| #undef wctrans_table_add |
| /* The wctrans_table must actually store the difference between the |
| desired result and the argument. */ |
| static inline void |
| wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc) |
| { |
| wctrans_table_add_internal (t, wc, mapped_wc - wc); |
| } |
| |
| /* Construction of sparse 3-level tables. |
| See wchar-lookup.h for their structure and the meaning of p and q. */ |
| |
| struct wctype_table |
| { |
| /* Parameters. */ |
| unsigned int p; |
| unsigned int q; |
| /* Working representation. */ |
| size_t level1_alloc; |
| size_t level1_size; |
| uint32_t *level1; |
| size_t level2_alloc; |
| size_t level2_size; |
| uint32_t *level2; |
| size_t level3_alloc; |
| size_t level3_size; |
| uint32_t *level3; |
| size_t result_size; |
| }; |
| |
| static void add_locale_wctype_table (struct locale_file *file, |
| struct wctype_table *t); |
| |
| /* The real definition of the struct for the LC_CTYPE locale. */ |
| struct locale_ctype_t |
| { |
| uint32_t *charnames; |
| size_t charnames_max; |
| size_t charnames_act; |
| /* An index lookup table, to speedup find_idx. */ |
| struct idx_table charnames_idx; |
| |
| struct repertoire_t *repertoire; |
| |
| /* We will allow up to 8 * sizeof (uint32_t) character classes. */ |
| #define MAX_NR_CHARCLASS (8 * sizeof (uint32_t)) |
| size_t nr_charclass; |
| const char *classnames[MAX_NR_CHARCLASS]; |
| uint32_t last_class_char; |
| uint32_t class256_collection[256]; |
| uint32_t *class_collection; |
| size_t class_collection_max; |
| size_t class_collection_act; |
| uint32_t class_done; |
| uint32_t class_offset; |
| |
| struct charseq **mbdigits; |
| size_t mbdigits_act; |
| size_t mbdigits_max; |
| uint32_t *wcdigits; |
| size_t wcdigits_act; |
| size_t wcdigits_max; |
| |
| struct charseq *mboutdigits[10]; |
| uint32_t wcoutdigits[10]; |
| size_t outdigits_act; |
| |
| /* If the following number ever turns out to be too small simply |
| increase it. But I doubt it will. --drepper@gnu */ |
| #define MAX_NR_CHARMAP 16 |
| const char *mapnames[MAX_NR_CHARMAP]; |
| uint32_t *map_collection[MAX_NR_CHARMAP]; |
| uint32_t map256_collection[2][256]; |
| size_t map_collection_max[MAX_NR_CHARMAP]; |
| size_t map_collection_act[MAX_NR_CHARMAP]; |
| size_t map_collection_nr; |
| size_t last_map_idx; |
| int tomap_done[MAX_NR_CHARMAP]; |
| uint32_t map_offset; |
| |
| /* Transliteration information. */ |
| struct translit_include_t *translit_include; |
| struct translit_t *translit; |
| struct translit_ignore_t *translit_ignore; |
| uint32_t ntranslit_ignore; |
| |
| uint32_t *default_missing; |
| const char *default_missing_file; |
| size_t default_missing_lineno; |
| |
| uint32_t to_nonascii; |
| uint32_t nonascii_case; |
| |
| /* The arrays for the binary representation. */ |
| char_class_t *ctype_b; |
| char_class32_t *ctype32_b; |
| uint32_t **map_b; |
| uint32_t **map32_b; |
| uint32_t **class_b; |
| struct wctype_table *class_3level; |
| struct wctrans_table *map_3level; |
| uint32_t *class_name_ptr; |
| uint32_t *map_name_ptr; |
| struct wcwidth_table width; |
| uint32_t mb_cur_max; |
| const char *codeset_name; |
| uint32_t *translit_from_idx; |
| uint32_t *translit_from_tbl; |
| uint32_t *translit_to_idx; |
| uint32_t *translit_to_tbl; |
| uint32_t translit_idx_size; |
| size_t translit_from_tbl_size; |
| size_t translit_to_tbl_size; |
| |
| struct obstack mempool; |
| }; |
| |
| |
| /* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless |
| whether 'int' is 16 bit, 32 bit, or 64 bit. */ |
| #define EMPTY ((uint32_t) ~0) |
| |
| |
| #define obstack_chunk_alloc xmalloc |
| #define obstack_chunk_free free |
| |
| |
| /* Prototypes for local functions. */ |
| static void ctype_startup (struct linereader *lr, struct localedef_t *locale, |
| const struct charmap_t *charmap, |
| struct localedef_t *copy_locale, |
| int ignore_content); |
| static void ctype_class_new (struct linereader *lr, |
| struct locale_ctype_t *ctype, const char *name); |
| static void ctype_map_new (struct linereader *lr, |
| struct locale_ctype_t *ctype, |
| const char *name, const struct charmap_t *charmap); |
| static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table, |
| size_t *max, size_t *act, uint32_t idx); |
| static void set_class_defaults (struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire); |
| static void allocate_arrays (struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire); |
| |
| |
| static const char *longnames[] = |
| { |
| "zero", "one", "two", "three", "four", |
| "five", "six", "seven", "eight", "nine" |
| }; |
| static const char *uninames[] = |
| { |
| "U00000030", "U00000031", "U00000032", "U00000033", "U00000034", |
| "U00000035", "U00000036", "U00000037", "U00000038", "U00000039" |
| }; |
| static const unsigned char digits[] = "0123456789"; |
| |
| |
| static void |
| ctype_startup (struct linereader *lr, struct localedef_t *locale, |
| const struct charmap_t *charmap, |
| struct localedef_t *copy_locale, int ignore_content) |
| { |
| unsigned int cnt; |
| struct locale_ctype_t *ctype; |
| |
| if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL) |
| { |
| if (copy_locale == NULL) |
| { |
| /* Allocate the needed room. */ |
| locale->categories[LC_CTYPE].ctype = ctype = |
| (struct locale_ctype_t *) xcalloc (1, |
| sizeof (struct locale_ctype_t)); |
| |
| /* We have seen no names yet. */ |
| ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512; |
| ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max |
| * sizeof (uint32_t)); |
| for (cnt = 0; cnt < 256; ++cnt) |
| ctype->charnames[cnt] = cnt; |
| ctype->charnames_act = 256; |
| idx_table_init (&ctype->charnames_idx); |
| |
| /* Fill character class information. */ |
| ctype->last_class_char = ILLEGAL_CHAR_VALUE; |
| /* The order of the following instructions determines the bit |
| positions! */ |
| ctype_class_new (lr, ctype, "upper"); |
| ctype_class_new (lr, ctype, "lower"); |
| ctype_class_new (lr, ctype, "alpha"); |
| ctype_class_new (lr, ctype, "digit"); |
| ctype_class_new (lr, ctype, "xdigit"); |
| ctype_class_new (lr, ctype, "space"); |
| ctype_class_new (lr, ctype, "print"); |
| ctype_class_new (lr, ctype, "graph"); |
| ctype_class_new (lr, ctype, "blank"); |
| ctype_class_new (lr, ctype, "cntrl"); |
| ctype_class_new (lr, ctype, "punct"); |
| ctype_class_new (lr, ctype, "alnum"); |
| #ifdef PREDEFINED_CLASSES |
| /* The following are extensions from ISO 14652. */ |
| ctype_class_new (lr, ctype, "left_to_right"); |
| ctype_class_new (lr, ctype, "right_to_left"); |
| ctype_class_new (lr, ctype, "num_terminator"); |
| ctype_class_new (lr, ctype, "num_separator"); |
| ctype_class_new (lr, ctype, "segment_separator"); |
| ctype_class_new (lr, ctype, "block_separator"); |
| ctype_class_new (lr, ctype, "direction_control"); |
| ctype_class_new (lr, ctype, "sym_swap_layout"); |
| ctype_class_new (lr, ctype, "char_shape_selector"); |
| ctype_class_new (lr, ctype, "num_shape_selector"); |
| ctype_class_new (lr, ctype, "non_spacing"); |
| ctype_class_new (lr, ctype, "non_spacing_level3"); |
| ctype_class_new (lr, ctype, "normal_connect"); |
| ctype_class_new (lr, ctype, "r_connect"); |
| ctype_class_new (lr, ctype, "no_connect"); |
| ctype_class_new (lr, ctype, "no_connect-space"); |
| ctype_class_new (lr, ctype, "vowel_connect"); |
| #endif |
| |
| ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512; |
| ctype->class_collection |
| = (uint32_t *) xcalloc (sizeof (unsigned long int), |
| ctype->class_collection_max); |
| ctype->class_collection_act = 256; |
| |
| /* Fill character map information. */ |
| ctype->last_map_idx = MAX_NR_CHARMAP; |
| ctype_map_new (lr, ctype, "toupper", charmap); |
| ctype_map_new (lr, ctype, "tolower", charmap); |
| #ifdef PREDEFINED_CLASSES |
| ctype_map_new (lr, ctype, "tosymmetric", charmap); |
| #endif |
| |
| /* Fill first 256 entries in `toXXX' arrays. */ |
| for (cnt = 0; cnt < 256; ++cnt) |
| { |
| ctype->map_collection[0][cnt] = cnt; |
| ctype->map_collection[1][cnt] = cnt; |
| #ifdef PREDEFINED_CLASSES |
| ctype->map_collection[2][cnt] = cnt; |
| #endif |
| ctype->map256_collection[0][cnt] = cnt; |
| ctype->map256_collection[1][cnt] = cnt; |
| } |
| |
| if (enc_not_ascii_compatible) |
| ctype->to_nonascii = 1; |
| |
| obstack_init (&ctype->mempool); |
| } |
| else |
| ctype = locale->categories[LC_CTYPE].ctype = |
| copy_locale->categories[LC_CTYPE].ctype; |
| } |
| } |
| |
| |
| void |
| ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap) |
| { |
| /* See POSIX.2, table 2-6 for the meaning of the following table. */ |
| #define NCLASS 12 |
| static const struct |
| { |
| const char *name; |
| const char allow[NCLASS]; |
| } |
| valid_table[NCLASS] = |
| { |
| /* The order is important. See token.h for more information. |
| M = Always, D = Default, - = Permitted, X = Mutually exclusive */ |
| { "upper", "--MX-XDDXXX-" }, |
| { "lower", "--MX-XDDXXX-" }, |
| { "alpha", "---X-XDDXXX-" }, |
| { "digit", "XXX--XDDXXX-" }, |
| { "xdigit", "-----XDDXXX-" }, |
| { "space", "XXXXX------X" }, |
| { "print", "---------X--" }, |
| { "graph", "---------X--" }, |
| { "blank", "XXXXXM-----X" }, |
| { "cntrl", "XXXXX-XX--XX" }, |
| { "punct", "XXXXX-DD-X-X" }, |
| { "alnum", "-----XDDXXX-" } |
| }; |
| size_t cnt; |
| int cls1, cls2; |
| uint32_t space_value; |
| struct charseq *space_seq; |
| struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; |
| int warned; |
| const void *key; |
| size_t len; |
| void *vdata; |
| void *curs; |
| |
| /* Now resolve copying and also handle completely missing definitions. */ |
| if (ctype == NULL) |
| { |
| const char *repertoire_name; |
| |
| /* First see whether we were supposed to copy. If yes, find the |
| actual definition. */ |
| if (locale->copy_name[LC_CTYPE] != NULL) |
| { |
| /* Find the copying locale. This has to happen transitively since |
| the locale we are copying from might also copying another one. */ |
| struct localedef_t *from = locale; |
| |
| do |
| from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE], |
| from->repertoire_name, charmap); |
| while (from->categories[LC_CTYPE].ctype == NULL |
| && from->copy_name[LC_CTYPE] != NULL); |
| |
| ctype = locale->categories[LC_CTYPE].ctype |
| = from->categories[LC_CTYPE].ctype; |
| } |
| |
| /* If there is still no definition issue an warning and create an |
| empty one. */ |
| if (ctype == NULL) |
| { |
| if (! be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| No definition for %s category found"), "LC_CTYPE")); |
| ctype_startup (NULL, locale, charmap, NULL, 0); |
| ctype = locale->categories[LC_CTYPE].ctype; |
| } |
| |
| /* Get the repertoire we have to use. */ |
| repertoire_name = locale->repertoire_name ?: repertoire_global; |
| if (repertoire_name != NULL) |
| ctype->repertoire = repertoire_read (repertoire_name); |
| } |
| |
| /* We need the name of the currently used 8-bit character set to |
| make correct conversion between this 8-bit representation and the |
| ISO 10646 character set used internally for wide characters. */ |
| ctype->codeset_name = charmap->code_set_name; |
| if (ctype->codeset_name == NULL) |
| { |
| if (! be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| No character set name specified in charmap"))); |
| ctype->codeset_name = "//UNKNOWN//"; |
| } |
| |
| /* Set default value for classes not specified. */ |
| set_class_defaults (ctype, charmap, ctype->repertoire); |
| |
| /* Check according to table. */ |
| for (cnt = 0; cnt < ctype->class_collection_act; ++cnt) |
| { |
| uint32_t tmp = ctype->class_collection[cnt]; |
| |
| if (tmp != 0) |
| { |
| for (cls1 = 0; cls1 < NCLASS; ++cls1) |
| if ((tmp & _ISwbit (cls1)) != 0) |
| for (cls2 = 0; cls2 < NCLASS; ++cls2) |
| if (valid_table[cls1].allow[cls2] != '-') |
| { |
| int eq = (tmp & _ISwbit (cls2)) != 0; |
| switch (valid_table[cls1].allow[cls2]) |
| { |
| case 'M': |
| if (!eq) |
| { |
| uint32_t value = ctype->charnames[cnt]; |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character L'\\u%0*x' in class `%s' must be in class `%s'"), |
| value > 0xffff ? 8 : 4, |
| value, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'X': |
| if (eq) |
| { |
| uint32_t value = ctype->charnames[cnt]; |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character L'\\u%0*x' in class `%s' must not be in class `%s'"), |
| value > 0xffff ? 8 : 4, |
| value, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'D': |
| ctype->class_collection[cnt] |= _ISwbit (cls2); |
| break; |
| |
| default: |
| WITH_CUR_LOCALE (error (5, 0, _("\ |
| internal error in %s, line %u"), __FUNCTION__, __LINE__)); |
| } |
| } |
| } |
| } |
| |
| for (cnt = 0; cnt < 256; ++cnt) |
| { |
| uint32_t tmp = ctype->class256_collection[cnt]; |
| |
| if (tmp != 0) |
| { |
| for (cls1 = 0; cls1 < NCLASS; ++cls1) |
| if ((tmp & _ISbit (cls1)) != 0) |
| for (cls2 = 0; cls2 < NCLASS; ++cls2) |
| if (valid_table[cls1].allow[cls2] != '-') |
| { |
| int eq = (tmp & _ISbit (cls2)) != 0; |
| switch (valid_table[cls1].allow[cls2]) |
| { |
| case 'M': |
| if (!eq) |
| { |
| char buf[17]; |
| |
| snprintf (buf, sizeof buf, "\\%Zo", cnt); |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character '%s' in class `%s' must be in class `%s'"), |
| buf, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'X': |
| if (eq) |
| { |
| char buf[17]; |
| |
| snprintf (buf, sizeof buf, "\\%Zo", cnt); |
| |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character '%s' in class `%s' must not be in class `%s'"), |
| buf, |
| valid_table[cls1].name, |
| valid_table[cls2].name)); |
| } |
| break; |
| |
| case 'D': |
| ctype->class256_collection[cnt] |= _ISbit (cls2); |
| break; |
| |
| default: |
| WITH_CUR_LOCALE (error (5, 0, _("\ |
| internal error in %s, line %u"), __FUNCTION__, __LINE__)); |
| } |
| } |
| } |
| } |
| |
| /* ... and now test <SP> as a special case. */ |
| space_value = 32; |
| if (((cnt = BITPOS (tok_space), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_space)) == 0) |
| || (cnt = BITPOS (tok_blank), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_blank)) == 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else if (((cnt = BITPOS (tok_punct), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_punct)) != 0) |
| || (cnt = BITPOS (tok_graph), |
| (ELEM (ctype, class_collection, , space_value) |
| & BITw (tok_graph)) |
| != 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| <SP> character must not be in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else |
| ELEM (ctype, class_collection, , space_value) |= BITw (tok_print); |
| |
| space_seq = charmap_find_value (charmap, "SP", 2); |
| if (space_seq == NULL) |
| space_seq = charmap_find_value (charmap, "space", 5); |
| if (space_seq == NULL) |
| space_seq = charmap_find_value (charmap, "U00000020", 9); |
| if (space_seq == NULL || space_seq->nbytes != 1) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| character <SP> not defined in character map"))); |
| } |
| else if (((cnt = BITPOS (tok_space), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_space)) == 0) |
| || (cnt = BITPOS (tok_blank), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_blank)) == 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else if (((cnt = BITPOS (tok_punct), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_punct)) != 0) |
| || (cnt = BITPOS (tok_graph), |
| (ctype->class256_collection[space_seq->bytes[0]] |
| & BIT (tok_graph)) != 0))) |
| { |
| if (!be_quiet) |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| <SP> character must not be in class `%s'"), |
| valid_table[cnt].name)); |
| } |
| else |
| ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print); |
| |
| /* Check whether all single-byte characters make to their upper/lowercase |
| equivalent according to the ASCII rules. */ |
| for (cnt = 'A'; cnt <= 'Z'; ++cnt) |
| { |
| uint32_t uppval = ctype->map256_collection[0][cnt]; |
| uint32_t lowval = ctype->map256_collection[1][cnt]; |
| uint32_t lowuppval = ctype->map256_collection[0][lowval]; |
| uint32_t lowlowval = ctype->map256_collection[1][lowval]; |
| |
| if (uppval != cnt |
| || lowval != cnt + 0x20 |
| || lowuppval != cnt |
| || lowlowval != cnt + 0x20) |
| ctype->nonascii_case = 1; |
| } |
| for (cnt = 0; cnt < 256; ++cnt) |
| if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z') |
| if (ctype->map256_collection[0][cnt] != cnt |
| || ctype->map256_collection[1][cnt] != cnt) |
| ctype->nonascii_case = 1; |
| |
| /* Now that the tests are done make sure the name array contains all |
| characters which are handled in the WIDTH section of the |
| character set definition file. */ |
| if (charmap->width_rules != NULL) |
| for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt) |
| { |
| unsigned char bytes[charmap->mb_cur_max]; |
| int nbytes = charmap->width_rules[cnt].from->nbytes; |
| |
| /* We have the range of character for which the width is |
| specified described using byte sequences of the multibyte |
| charset. We have to convert this to UCS4 now. And we |
| cannot simply convert the beginning and the end of the |
| sequence, we have to iterate over the byte sequence and |
| convert it for every single character. */ |
| memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes); |
| |
| while (nbytes < charmap->width_rules[cnt].to->nbytes |
| || memcmp (bytes, charmap->width_rules[cnt].to->bytes, |
| nbytes) <= 0) |
| { |
| /* Find the UCS value for `bytes'. */ |
| int inner; |
| uint32_t wch; |
| struct charseq *seq |
| = charmap_find_symbol (charmap, (char *) bytes, nbytes); |
| |
| if (seq == NULL) |
| wch = ILLEGAL_CHAR_VALUE; |
| else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE) |
| wch = seq->ucs4; |
| else |
| wch = repertoire_find_value (ctype->repertoire, seq->name, |
| strlen (seq->name)); |
| |
| if (wch != ILLEGAL_CHAR_VALUE) |
| /* We are only interested in the side-effects of the |
| `find_idx' call. It will add appropriate entries in |
| the name array if this is necessary. */ |
| (void) find_idx (ctype, NULL, NULL, NULL, wch); |
| |
| /* "Increment" the bytes sequence. */ |
| inner = nbytes - 1; |
| while (inner >= 0 && bytes[inner] == 0xff) |
| --inner; |
| |
| if (inner < 0) |
| { |
| /* We have to extend the byte sequence. */ |
| if (nbytes >= charmap->width_rules[cnt].to->nbytes) |
| break; |
| |
| bytes[0] = 1; |
| memset (&bytes[1], 0, nbytes); |
| ++nbytes; |
| } |
| else |
| { |
| ++bytes[inner]; |
| while (++inner < nbytes) |
| bytes[inner] = 0; |
| } |
| } |
| } |
| |
| /* Now set all the other characters of the character set to the |
| default width. */ |
| curs = NULL; |
| while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0) |
| { |
| struct charseq *data = (struct charseq *) vdata; |
| |
| if (data->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| data->ucs4 = repertoire_find_value (ctype->repertoire, |
| data->name, len); |
| |
| if (data->ucs4 != ILLEGAL_CHAR_VALUE) |
| (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4); |
| } |
| |
| /* There must be a multiple of 10 digits. */ |
| if (ctype->mbdigits_act % 10 != 0) |
| { |
| assert (ctype->mbdigits_act == ctype->wcdigits_act); |
| ctype->wcdigits_act -= ctype->mbdigits_act % 10; |
| ctype->mbdigits_act -= ctype->mbdigits_act % 10; |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| `digit' category has not entries in groups of ten"))); |
| } |
| |
| /* Check the input digits. There must be a multiple of ten available. |
| In each group it could be that one or the other character is missing. |
| In this case the whole group must be removed. */ |
| cnt = 0; |
| while (cnt < ctype->mbdigits_act) |
| { |
| size_t inner; |
| for (inner = 0; inner < 10; ++inner) |
| if (ctype->mbdigits[cnt + inner] == NULL) |
| break; |
| |
| if (inner == 10) |
| cnt += 10; |
| else |
| { |
| /* Remove the group. */ |
| memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10], |
| ((ctype->wcdigits_act - cnt - 10) |
| * sizeof (ctype->mbdigits[0]))); |
| ctype->mbdigits_act -= 10; |
| } |
| } |
| |
| /* If no input digits are given use the default. */ |
| if (ctype->mbdigits_act == 0) |
| { |
| if (ctype->mbdigits_max == 0) |
| { |
| ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, |
| 10 * sizeof (struct charseq *)); |
| ctype->mbdigits_max = 10; |
| } |
| |
| for (cnt = 0; cnt < 10; ++cnt) |
| { |
| ctype->mbdigits[cnt] = charmap_find_symbol (charmap, |
| (char *) digits + cnt, 1); |
| if (ctype->mbdigits[cnt] == NULL) |
| { |
| ctype->mbdigits[cnt] = charmap_find_symbol (charmap, |
| longnames[cnt], |
| strlen (longnames[cnt])); |
| if (ctype->mbdigits[cnt] == NULL) |
| { |
| /* Hum, this ain't good. */ |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| no input digits defined and none of the standard names in the charmap"))); |
| |
| ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, |
| sizeof (struct charseq) + 1); |
| |
| /* This is better than nothing. */ |
| ctype->mbdigits[cnt]->bytes[0] = digits[cnt]; |
| ctype->mbdigits[cnt]->nbytes = 1; |
| } |
| } |
| } |
| |
| ctype->mbdigits_act = 10; |
| } |
| |
| /* Check the wide character input digits. There must be a multiple |
| of ten available. In each group it could be that one or the other |
| character is missing. In this case the whole group must be |
| removed. */ |
| cnt = 0; |
| while (cnt < ctype->wcdigits_act) |
| { |
| size_t inner; |
| for (inner = 0; inner < 10; ++inner) |
| if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE) |
| break; |
| |
| if (inner == 10) |
| cnt += 10; |
| else |
| { |
| /* Remove the group. */ |
| memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10], |
| ((ctype->wcdigits_act - cnt - 10) |
| * sizeof (ctype->wcdigits[0]))); |
| ctype->wcdigits_act -= 10; |
| } |
| } |
| |
| /* If no input digits are given use the default. */ |
| if (ctype->wcdigits_act == 0) |
| { |
| if (ctype->wcdigits_max == 0) |
| { |
| ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool, |
| 10 * sizeof (uint32_t)); |
| ctype->wcdigits_max = 10; |
| } |
| |
| for (cnt = 0; cnt < 10; ++cnt) |
| ctype->wcdigits[cnt] = L'0' + cnt; |
| |
| ctype->mbdigits_act = 10; |
| } |
| |
| /* Check the outdigits. */ |
| warned = 0; |
| for (cnt = 0; cnt < 10; ++cnt) |
| if (ctype->mboutdigits[cnt] == NULL) |
| { |
| static struct charseq replace[2]; |
| |
| if (!warned) |
| { |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| not all characters used in `outdigit' are available in the charmap"))); |
| warned = 1; |
| } |
| |
| replace[0].nbytes = 1; |
| replace[0].bytes[0] = '?'; |
| replace[0].bytes[1] = '\0'; |
| ctype->mboutdigits[cnt] = &replace[0]; |
| } |
| |
| warned = 0; |
| for (cnt = 0; cnt < 10; ++cnt) |
| if (ctype->wcoutdigits[cnt] == 0) |
| { |
| if (!warned) |
| { |
| WITH_CUR_LOCALE (error (0, 0, _("\ |
| not all characters used in `outdigit' are available in the repertoire"))); |
| warned = 1; |
| } |
| |
| ctype->wcoutdigits[cnt] = L'?'; |
| } |
| |
| /* Sort the entries in the translit_ignore list. */ |
| if (ctype->translit_ignore != NULL) |
| { |
| struct translit_ignore_t *firstp = ctype->translit_ignore; |
| struct translit_ignore_t *runp; |
| |
| ctype->ntranslit_ignore = 1; |
| |
| for (runp = firstp->next; runp != NULL; runp = runp->next) |
| { |
| struct translit_ignore_t *lastp = NULL; |
| struct translit_ignore_t *cmpp; |
| |
| ++ctype->ntranslit_ignore; |
| |
| for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next) |
| if (runp->from < cmpp->from) |
| break; |
| |
| runp->next = lastp; |
| if (lastp == NULL) |
| firstp = runp; |
| } |
| |
| ctype->translit_ignore = firstp; |
| } |
| } |
| |
| |
| void |
| ctype_output (struct localedef_t *locale, const struct charmap_t *charmap, |
| const char *output_path) |
| { |
| struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype; |
| const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1) |
| + ctype->nr_charclass + ctype->map_collection_nr); |
| struct locale_file file; |
| uint32_t default_missing_len; |
| size_t elem, cnt; |
| |
| /* Now prepare the output: Find the sizes of the table we can use. */ |
| allocate_arrays (ctype, charmap, ctype->repertoire); |
| |
| default_missing_len = (ctype->default_missing |
| ? wcslen ((wchar_t *) ctype->default_missing) |
| : 0); |
| |
| init_locale_data (&file, nelems); |
| for (elem = 0; elem < nelems; ++elem) |
| { |
| if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)) |
| switch (elem) |
| { |
| #define CTYPE_EMPTY(name) \ |
| case name: \ |
| add_locale_empty (&file); \ |
| break |
| |
| CTYPE_EMPTY(_NL_CTYPE_GAP1); |
| CTYPE_EMPTY(_NL_CTYPE_GAP2); |
| CTYPE_EMPTY(_NL_CTYPE_GAP3); |
| CTYPE_EMPTY(_NL_CTYPE_GAP4); |
| CTYPE_EMPTY(_NL_CTYPE_GAP5); |
| CTYPE_EMPTY(_NL_CTYPE_GAP6); |
| |
| #define CTYPE_RAW_DATA(name, base, size) \ |
| case _NL_ITEM_INDEX (name): \ |
| add_locale_raw_data (&file, base, size); \ |
| break |
| |
| CTYPE_RAW_DATA (_NL_CTYPE_CLASS, |
| ctype->ctype_b, |
| (256 + 128) * sizeof (char_class_t)); |
| |
| #define CTYPE_UINT32_ARRAY(name, base, n_elems) \ |
| case _NL_ITEM_INDEX (name): \ |
| add_locale_uint32_array (&file, base, n_elems); \ |
| break |
| |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128); |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128); |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256); |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256); |
| CTYPE_RAW_DATA (_NL_CTYPE_CLASS32, |
| ctype->ctype32_b, |
| 256 * sizeof (char_class32_t)); |
| |
| #define CTYPE_UINT32(name, value) \ |
| case _NL_ITEM_INDEX (name): \ |
| add_locale_uint32 (&file, value); \ |
| break |
| |
| CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset); |
| CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset); |
| CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size); |
| |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX, |
| ctype->translit_from_idx, |
| ctype->translit_idx_size); |
| |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL, |
| ctype->translit_from_tbl, |
| ctype->translit_from_tbl_size |
| / sizeof (uint32_t)); |
| |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX, |
| ctype->translit_to_idx, |
| ctype->translit_idx_size); |
| |
| CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL, |
| ctype->translit_to_tbl, |
| ctype->translit_to_tbl_size / sizeof (uint32_t)); |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES): |
| /* The class name array. */ |
| start_locale_structure (&file); |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) |
| add_locale_string (&file, ctype->classnames[cnt]); |
| add_locale_char (&file, 0); |
| align_locale_data (&file, LOCFILE_ALIGN); |
| end_locale_structure (&file); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES): |
| /* The class name array. */ |
| start_locale_structure (&file); |
| for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) |
| add_locale_string (&file, ctype->mapnames[cnt]); |
| add_locale_char (&file, 0); |
| align_locale_data (&file, LOCFILE_ALIGN); |
| end_locale_structure (&file); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH): |
| add_locale_wcwidth_table (&file, &ctype->width); |
| break; |
| |
| CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max); |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME): |
| add_locale_string (&file, ctype->codeset_name); |
| break; |
| |
| CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii); |
| |
| CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case); |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN): |
| add_locale_uint32 (&file, ctype->mbdigits_act / 10); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN): |
| add_locale_uint32 (&file, ctype->wcdigits_act / 10); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB): |
| start_locale_structure (&file); |
| for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB); |
| cnt < ctype->mbdigits_act; cnt += 10) |
| { |
| add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes, |
| ctype->mbdigits[cnt]->nbytes); |
| add_locale_char (&file, 0); |
| } |
| end_locale_structure (&file); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB): |
| start_locale_structure (&file); |
| cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB); |
| add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes, |
| ctype->mboutdigits[cnt]->nbytes); |
| add_locale_char (&file, 0); |
| end_locale_structure (&file); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC): |
| start_locale_structure (&file); |
| for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC); |
| cnt < ctype->wcdigits_act; cnt += 10) |
| add_locale_uint32 (&file, ctype->wcdigits[cnt]); |
| end_locale_structure (&file); |
| break; |
| |
| case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC): |
| cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC); |
| add_locale_uint32 (&file, ctype->wcoutdigits[cnt]); |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN): |
| add_locale_uint32 (&file, default_missing_len); |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING): |
| add_locale_uint32_array (&file, ctype->default_missing, |
| default_missing_len); |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN): |
| add_locale_uint32 (&file, ctype->ntranslit_ignore); |
| break; |
| |
| case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE): |
| start_locale_structure (&file); |
| { |
| struct translit_ignore_t *runp; |
| for (runp = ctype->translit_ignore; runp != NULL; |
| runp = runp->next) |
| { |
| add_locale_uint32 (&file, runp->from); |
| add_locale_uint32 (&file, runp->to); |
| add_locale_uint32 (&file, runp->step); |
| } |
| } |
| end_locale_structure (&file); |
| break; |
| |
| default: |
| assert (! "unknown CTYPE element"); |
| } |
| else |
| { |
| /* Handle extra maps. */ |
| size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1); |
| if (nr < ctype->nr_charclass) |
| { |
| start_locale_prelude (&file); |
| add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32); |
| end_locale_prelude (&file); |
| add_locale_wctype_table (&file, &ctype->class_3level[nr]); |
| } |
| else |
| { |
| nr -= ctype->nr_charclass; |
| assert (nr < ctype->map_collection_nr); |
| add_locale_wctrans_table (&file, &ctype->map_3level[nr]); |
| } |
| } |
| } |
| |
| write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file); |
| } |
| |
| |
| /* Local functions. */ |
| static void |
| ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype, |
| const char *name) |
| { |
| size_t cnt; |
| |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) |
| if (strcmp (ctype->classnames[cnt], name) == 0) |
| break; |
| |
| if (cnt < ctype->nr_charclass) |
| { |
| lr_error (lr, _("character class `%s' already defined"), name); |
| return; |
| } |
| |
| if (ctype->nr_charclass == MAX_NR_CHARCLASS) |
| /* Exit code 2 is prescribed in P1003.2b. */ |
| WITH_CUR_LOCALE (error (2, 0, _("\ |
| implementation limit: no more than %Zd character classes allowed"), |
| MAX_NR_CHARCLASS)); |
| |
| ctype->classnames[ctype->nr_charclass++] = name; |
| } |
| |
| |
| static void |
| ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype, |
| const char *name, const struct charmap_t *charmap) |
| { |
| size_t max_chars = 0; |
| size_t cnt; |
| |
| for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) |
| { |
| if (strcmp (ctype->mapnames[cnt], name) == 0) |
| break; |
| |
| if (max_chars < ctype->map_collection_max[cnt]) |
| max_chars = ctype->map_collection_max[cnt]; |
| } |
| |
| if (cnt < ctype->map_collection_nr) |
| { |
| lr_error (lr, _("character map `%s' already defined"), name); |
| return; |
| } |
| |
| if (ctype->map_collection_nr == MAX_NR_CHARMAP) |
| /* Exit code 2 is prescribed in P1003.2b. */ |
| WITH_CUR_LOCALE (error (2, 0, _("\ |
| implementation limit: no more than %d character maps allowed"), |
| MAX_NR_CHARMAP)); |
| |
| ctype->mapnames[cnt] = name; |
| |
| if (max_chars == 0) |
| ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512; |
| else |
| ctype->map_collection_max[cnt] = max_chars; |
| |
| ctype->map_collection[cnt] = (uint32_t *) |
| xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]); |
| ctype->map_collection_act[cnt] = 256; |
| |
| ++ctype->map_collection_nr; |
| } |
| |
| |
| /* We have to be prepared that TABLE, MAX, and ACT can be NULL. This |
| is possible if we only want to extend the name array. */ |
| static uint32_t * |
| find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max, |
| size_t *act, uint32_t idx) |
| { |
| size_t cnt; |
| |
| if (idx < 256) |
| return table == NULL ? NULL : &(*table)[idx]; |
| |
| /* Use the charnames_idx lookup table instead of the slow search loop. */ |
| #if 1 |
| cnt = idx_table_get (&ctype->charnames_idx, idx); |
| if (cnt == EMPTY) |
| /* Not found. */ |
| cnt = ctype->charnames_act; |
| #else |
| for (cnt = 256; cnt < ctype->charnames_act; ++cnt) |
| if (ctype->charnames[cnt] == idx) |
| break; |
| #endif |
| |
| /* We have to distinguish two cases: the name is found or not. */ |
| if (cnt == ctype->charnames_act) |
| { |
| /* Extend the name array. */ |
| if (ctype->charnames_act == ctype->charnames_max) |
| { |
| ctype->charnames_max *= 2; |
| ctype->charnames = (uint32_t *) |
| xrealloc (ctype->charnames, |
| sizeof (uint32_t) * ctype->charnames_max); |
| } |
| ctype->charnames[ctype->charnames_act++] = idx; |
| idx_table_add (&ctype->charnames_idx, idx, cnt); |
| } |
| |
| if (table == NULL) |
| /* We have done everything we are asked to do. */ |
| return NULL; |
| |
| if (max == NULL) |
| /* The caller does not want to extend the table. */ |
| return (cnt >= *act ? NULL : &(*table)[cnt]); |
| |
| if (cnt >= *act) |
| { |
| if (cnt >= *max) |
| { |
| size_t old_max = *max; |
| do |
| *max *= 2; |
| while (*max <= cnt); |
| |
| *table = |
| (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t)); |
| memset (&(*table)[old_max], '\0', |
| (*max - old_max) * sizeof (uint32_t)); |
| } |
| |
| *act = cnt + 1; |
| } |
| |
| return &(*table)[cnt]; |
| } |
| |
| |
| static int |
| get_character (struct token *now, const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct charseq **seqp, uint32_t *wchp) |
| { |
| if (now->tok == tok_bsymbol) |
| { |
| /* This will hopefully be the normal case. */ |
| *wchp = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| *seqp = charmap_find_value (charmap, now->val.str.startmb, |
| now->val.str.lenmb); |
| } |
| else if (now->tok == tok_ucs4) |
| { |
| char utmp[10]; |
| |
| snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4); |
| *seqp = charmap_find_value (charmap, utmp, 9); |
| |
| if (*seqp == NULL) |
| *seqp = repertoire_find_seq (repertoire, now->val.ucs4); |
| |
| if (*seqp == NULL) |
| { |
| /* Compute the value in the charmap from the UCS value. */ |
| const char *symbol = repertoire_find_symbol (repertoire, |
| now->val.ucs4); |
| |
| if (symbol == NULL) |
| *seqp = NULL; |
| else |
| *seqp = charmap_find_value (charmap, symbol, strlen (symbol)); |
| |
| if (*seqp == NULL) |
| { |
| if (repertoire != NULL) |
| { |
| /* Insert a negative entry. */ |
| static const struct charseq negative |
| = { .ucs4 = ILLEGAL_CHAR_VALUE }; |
| uint32_t *newp = obstack_alloc (&repertoire->mem_pool, |
| sizeof (uint32_t)); |
| *newp = now->val.ucs4; |
| |
| insert_entry (&repertoire->seq_table, newp, |
| sizeof (uint32_t), (void *) &negative); |
| } |
| } |
| else |
| (*seqp)->ucs4 = now->val.ucs4; |
| } |
| else if ((*seqp)->ucs4 != now->val.ucs4) |
| *seqp = NULL; |
| |
| *wchp = now->val.ucs4; |
| } |
| else if (now->tok == tok_charcode) |
| { |
| /* We must map from the byte code to UCS4. */ |
| *seqp = charmap_find_symbol (charmap, now->val.str.startmb, |
| now->val.str.lenmb); |
| |
| if (*seqp == NULL) |
| *wchp = ILLEGAL_CHAR_VALUE; |
| else |
| { |
| if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name, |
| strlen ((*seqp)->name)); |
| *wchp = (*seqp)->ucs4; |
| } |
| } |
| else |
| return 1; |
| |
| return 0; |
| } |
| |
| |
| /* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and |
| the .(2). counterparts. */ |
| static void |
| charclass_symbolic_ellipsis (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct token *now, |
| const char *last_str, |
| unsigned long int class256_bit, |
| unsigned long int class_bit, int base, |
| int ignore_content, int handle_digits, int step) |
| { |
| const char *nowstr = now->val.str.startmb; |
| char tmp[now->val.str.lenmb + 1]; |
| const char *cp; |
| char *endp; |
| unsigned long int from; |
| unsigned long int to; |
| |
| /* We have to compute the ellipsis values using the symbolic names. */ |
| assert (last_str != NULL); |
| |
| if (strlen (last_str) != now->val.str.lenmb) |
| { |
| invalid_range: |
| lr_error (ldfile, |
| _("`%s' and `%.*s' are not valid names for symbolic range"), |
| last_str, (int) now->val.str.lenmb, nowstr); |
| return; |
| } |
| |
| if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0) |
| /* Nothing to do, the names are the same. */ |
| return; |
| |
| for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp) |
| ; |
| |
| errno = 0; |
| from = strtoul (cp, &endp, base); |
| if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0') |
| goto invalid_range; |
| |
| to = strtoul (nowstr + (cp - last_str), &endp, base); |
| if ((to == UINT_MAX && errno == ERANGE) |
| || (endp - nowstr) != now->val.str.lenmb || from >= to) |
| goto invalid_range; |
| |
| /* OK, we have a range FROM - TO. Now we can create the symbolic names. */ |
| if (!ignore_content) |
| { |
| now->val.str.startmb = tmp; |
| while ((from += step) <= to) |
| { |
| struct charseq *seq; |
| uint32_t wch; |
| |
| sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"), |
| (int) (cp - last_str), last_str, |
| (int) (now->val.str.lenmb - (cp - last_str)), |
| from); |
| |
| get_character (now, charmap, repertoire, &seq, &wch); |
| |
| if (seq != NULL && seq->nbytes == 1) |
| /* Yep, we can store information about this byte sequence. */ |
| ctype->class256_collection[seq->bytes[0]] |= class256_bit; |
| |
| if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) |
| /* We have the UCS4 position. */ |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, wch) |= class_bit; |
| |
| if (handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max *= 2; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max *= 2; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = seq; |
| ctype->wcdigits[ctype->wcdigits_act++] = wch; |
| } |
| else if (handle_digits == 2) |
| { |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| return; |
| } |
| |
| ctype->mboutdigits[ctype->outdigits_act] = seq; |
| ctype->wcoutdigits[ctype->outdigits_act] = wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| } |
| } |
| |
| |
| /* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */ |
| static void |
| charclass_ucs4_ellipsis (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct token *now, uint32_t last_wch, |
| unsigned long int class256_bit, |
| unsigned long int class_bit, int ignore_content, |
| int handle_digits, int step) |
| { |
| if (last_wch > now->val.ucs4) |
| { |
| lr_error (ldfile, _("\ |
| to-value <U%0*X> of range is smaller than from-value <U%0*X>"), |
| (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4, |
| (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch); |
| return; |
| } |
| |
| if (!ignore_content) |
| while ((last_wch += step) <= now->val.ucs4) |
| { |
| /* We have to find out whether there is a byte sequence corresponding |
| to this UCS4 value. */ |
| struct charseq *seq; |
| char utmp[10]; |
| |
| snprintf (utmp, sizeof (utmp), "U%08X", last_wch); |
| seq = charmap_find_value (charmap, utmp, 9); |
| if (seq == NULL) |
| { |
| snprintf (utmp, sizeof (utmp), "U%04X", last_wch); |
| seq = charmap_find_value (charmap, utmp, 5); |
| } |
| |
| if (seq == NULL) |
| /* Try looking in the repertoire map. */ |
| seq = repertoire_find_seq (repertoire, last_wch); |
| |
| /* If this is the first time we look for this sequence create a new |
| entry. */ |
| if (seq == NULL) |
| { |
| static const struct charseq negative |
| = { .ucs4 = ILLEGAL_CHAR_VALUE }; |
| |
| /* Find the symbolic name for this UCS4 value. */ |
| if (repertoire != NULL) |
| { |
| const char *symbol = repertoire_find_symbol (repertoire, |
| last_wch); |
| uint32_t *newp = obstack_alloc (&repertoire->mem_pool, |
| sizeof (uint32_t)); |
| *newp = last_wch; |
| |
| if (symbol != NULL) |
| /* We have a name, now search the multibyte value. */ |
| seq = charmap_find_value (charmap, symbol, strlen (symbol)); |
| |
| if (seq == NULL) |
| /* We have to create a fake entry. */ |
| seq = (struct charseq *) &negative; |
| else |
| seq->ucs4 = last_wch; |
| |
| insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t), |
| seq); |
| } |
| else |
| /* We have to create a fake entry. */ |
| seq = (struct charseq *) &negative; |
| } |
| |
| /* We have a name, now search the multibyte value. */ |
| if (seq->ucs4 == last_wch && seq->nbytes == 1) |
| /* Yep, we can store information about this byte sequence. */ |
| ctype->class256_collection[(size_t) seq->bytes[0]] |
| |= class256_bit; |
| |
| /* And of course we have the UCS4 position. */ |
| if (class_bit != 0) |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, last_wch) |= class_bit; |
| |
| if (handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max *= 2; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max *= 2; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch |
| ? seq : NULL); |
| ctype->wcdigits[ctype->wcdigits_act++] = last_wch; |
| } |
| else if (handle_digits == 2) |
| { |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| return; |
| } |
| |
| ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch |
| ? seq : NULL); |
| ctype->wcoutdigits[ctype->outdigits_act] = last_wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| } |
| |
| |
| /* Ellipsis as in `/xea/x12.../xea/x34'. */ |
| static void |
| charclass_charcode_ellipsis (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire, |
| struct token *now, char *last_charcode, |
| uint32_t last_charcode_len, |
| unsigned long int class256_bit, |
| unsigned long int class_bit, int ignore_content, |
| int handle_digits) |
| { |
| /* First check whether the to-value is larger. */ |
| if (now->val.charcode.nbytes != last_charcode_len) |
| { |
| lr_error (ldfile, _("\ |
| start and end character sequence of range must have the same length")); |
| return; |
| } |
| |
| if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0) |
| { |
| lr_error (ldfile, _("\ |
| to-value character sequence is smaller than from-value sequence")); |
| return; |
| } |
| |
| if (!ignore_content) |
| { |
| do |
| { |
| /* Increment the byte sequence value. */ |
| struct charseq *seq; |
| uint32_t wch; |
| int i; |
| |
| for (i = last_charcode_len - 1; i >= 0; --i) |
| if (++last_charcode[i] != 0) |
| break; |
| |
| if (last_charcode_len == 1) |
| /* Of course we have the charcode value. */ |
| ctype->class256_collection[(size_t) last_charcode[0]] |
| |= class256_bit; |
| |
| /* Find the symbolic name. */ |
| seq = charmap_find_symbol (charmap, last_charcode, |
| last_charcode_len); |
| if (seq != NULL) |
| { |
| if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| seq->ucs4 = repertoire_find_value (repertoire, seq->name, |
| strlen (seq->name)); |
| wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4; |
| |
| if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0) |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, wch) |= class_bit; |
| } |
| else |
| wch = ILLEGAL_CHAR_VALUE; |
| |
| if (handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max *= 2; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max *= 2; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| seq = xmalloc (sizeof (struct charseq) + last_charcode_len); |
| memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); |
| seq->nbytes = last_charcode_len; |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = seq; |
| ctype->wcdigits[ctype->wcdigits_act++] = wch; |
| } |
| else if (handle_digits == 2) |
| { |
| struct charseq *seq; |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| return; |
| } |
| |
| seq = xmalloc (sizeof (struct charseq) + last_charcode_len); |
| memcpy ((char *) (seq + 1), last_charcode, last_charcode_len); |
| seq->nbytes = last_charcode_len; |
| |
| ctype->mboutdigits[ctype->outdigits_act] = seq; |
| ctype->wcoutdigits[ctype->outdigits_act] = wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| while (memcmp (last_charcode, now->val.charcode.bytes, |
| last_charcode_len) != 0); |
| } |
| } |
| |
| |
| static uint32_t * |
| find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap, |
| uint32_t wch) |
| { |
| struct translit_t *trunp = ctype->translit; |
| struct translit_ignore_t *tirunp = ctype->translit_ignore; |
| |
| while (trunp != NULL) |
| { |
| /* XXX We simplify things here. The transliterations we look |
| for are only allowed to have one character. */ |
| if (trunp->from[0] == wch && trunp->from[1] == 0) |
| { |
| /* Found it. Now look for a transliteration which can be |
| represented with the character set. */ |
| struct translit_to_t *torunp = trunp->to; |
| |
| while (torunp != NULL) |
| { |
| int i; |
| |
| for (i = 0; torunp->str[i] != 0; ++i) |
| { |
| char utmp[10]; |
| |
| snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]); |
| if (charmap_find_value (charmap, utmp, 9) == NULL) |
| /* This character cannot be represented. */ |
| break; |
| } |
| |
| if (torunp->str[i] == 0) |
| return torunp->str; |
| |
| torunp = torunp->next; |
| } |
| |
| break; |
| } |
| |
| trunp = trunp->next; |
| } |
| |
| /* Check for ignored chars. */ |
| while (tirunp != NULL) |
| { |
| if (tirunp->from <= wch && tirunp->to >= wch) |
| { |
| uint32_t wi; |
| |
| for (wi = tirunp->from; wi <= wch; wi += tirunp->step) |
| if (wi == wch) |
| return (uint32_t []) { 0 }; |
| } |
| } |
| |
| /* Nothing found. */ |
| return NULL; |
| } |
| |
| |
| uint32_t * |
| find_translit (struct localedef_t *locale, const struct charmap_t *charmap, |
| uint32_t wch) |
| { |
| struct locale_ctype_t *ctype; |
| uint32_t *result = NULL; |
| |
| assert (locale != NULL); |
| ctype = locale->categories[LC_CTYPE].ctype; |
| |
| if (ctype == NULL) |
| return NULL; |
| |
| if (ctype->translit != NULL) |
| result = find_translit2 (ctype, charmap, wch); |
| |
| if (result == NULL) |
| { |
| struct translit_include_t *irunp = ctype->translit_include; |
| |
| while (irunp != NULL && result == NULL) |
| { |
| result = find_translit (find_locale (CTYPE_LOCALE, |
| irunp->copy_locale, |
| irunp->copy_repertoire, |
| charmap), |
| charmap, wch); |
| irunp = irunp->next; |
| } |
| } |
| |
| return result; |
| } |
| |
| |
| /* Read one transliteration entry. */ |
| static uint32_t * |
| read_widestring (struct linereader *ldfile, struct token *now, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| uint32_t *wstr; |
| |
| if (now->tok == tok_default_missing) |
| /* The special name "" will denote this case. */ |
| wstr = ((uint32_t *) { 0 }); |
| else if (now->tok == tok_bsymbol) |
| { |
| /* Get the value from the repertoire. */ |
| wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); |
| wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| if (wstr[0] == ILLEGAL_CHAR_VALUE) |
| { |
| /* We cannot proceed, we don't know the UCS4 value. */ |
| free (wstr); |
| return NULL; |
| } |
| |
| wstr[1] = 0; |
| } |
| else if (now->tok == tok_ucs4) |
| { |
| wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); |
| wstr[0] = now->val.ucs4; |
| wstr[1] = 0; |
| } |
| else if (now->tok == tok_charcode) |
| { |
| /* Argh, we have to convert to the symbol name first and then to the |
| UCS4 value. */ |
| struct charseq *seq = charmap_find_symbol (charmap, |
| now->val.str.startmb, |
| now->val.str.lenmb); |
| if (seq == NULL) |
| /* Cannot find the UCS4 value. */ |
| return NULL; |
| |
| if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE) |
| seq->ucs4 = repertoire_find_value (repertoire, seq->name, |
| strlen (seq->name)); |
| if (seq->ucs4 == ILLEGAL_CHAR_VALUE) |
| /* We cannot proceed, we don't know the UCS4 value. */ |
| return NULL; |
| |
| wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t)); |
| wstr[0] = seq->ucs4; |
| wstr[1] = 0; |
| } |
| else if (now->tok == tok_string) |
| { |
| wstr = now->val.str.startwc; |
| if (wstr == NULL || wstr[0] == 0) |
| return NULL; |
| } |
| else |
| { |
| if (now->tok != tok_eol && now->tok != tok_eof) |
| lr_ignore_rest (ldfile, 0); |
| SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); |
| return (uint32_t *) -1l; |
| } |
| |
| return wstr; |
| } |
| |
| |
| static void |
| read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype, |
| struct token *now, const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire); |
| struct translit_t *result; |
| struct translit_to_t **top; |
| struct obstack *ob = &ctype->mempool; |
| int first; |
| int ignore; |
| |
| if (from_wstr == NULL) |
| /* There is no valid from string. */ |
| return; |
| |
| result = (struct translit_t *) obstack_alloc (ob, |
| sizeof (struct translit_t)); |
| result->from = from_wstr; |
| result->fname = ldfile->fname; |
| result->lineno = ldfile->lineno; |
| result->next = NULL; |
| result->to = NULL; |
| top = &result->to; |
| first = 1; |
| ignore = 0; |
| |
| while (1) |
| { |
| uint32_t *to_wstr; |
| |
| /* Next we have one or more transliterations. They are |
| separated by semicolons. */ |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| |
| if (!first && (now->tok == tok_semicolon || now->tok == tok_eol)) |
| { |
| /* One string read. */ |
| const uint32_t zero = 0; |
| |
| if (!ignore) |
| { |
| obstack_grow (ob, &zero, 4); |
| to_wstr = obstack_finish (ob); |
| |
| *top = obstack_alloc (ob, sizeof (struct translit_to_t)); |
| (*top)->str = to_wstr; |
| (*top)->next = NULL; |
| } |
| |
| if (now->tok == tok_eol) |
| { |
| result->next = ctype->translit; |
| ctype->translit = result; |
| return; |
| } |
| |
| if (!ignore) |
| top = &(*top)->next; |
| ignore = 0; |
| } |
| else |
| { |
| to_wstr = read_widestring (ldfile, now, charmap, repertoire); |
| if (to_wstr == (uint32_t *) -1l) |
| { |
| /* An error occurred. */ |
| obstack_free (ob, result); |
| return; |
| } |
| |
| if (to_wstr == NULL) |
| ignore = 1; |
| else |
| /* This value is usable. */ |
| obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4); |
| |
| first = 0; |
| } |
| } |
| } |
| |
| |
| static void |
| read_translit_ignore_entry (struct linereader *ldfile, |
| struct locale_ctype_t *ctype, |
| const struct charmap_t *charmap, |
| struct repertoire_t *repertoire) |
| { |
| /* We expect a semicolon-separated list of characters we ignore. We are |
| only interested in the wide character definitions. These must be |
| single characters, possibly defining a range when an ellipsis is used. */ |
| while (1) |
| { |
| struct token *now = lr_token (ldfile, charmap, NULL, repertoire, |
| verbose); |
| struct translit_ignore_t *newp; |
| uint32_t from; |
| |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| { |
| lr_error (ldfile, |
| _("premature end of `translit_ignore' definition")); |
| return; |
| } |
| |
| if (now->tok != tok_bsymbol && now->tok != tok_ucs4) |
| { |
| lr_error (ldfile, _("syntax error")); |
| lr_ignore_rest (ldfile, 0); |
| return; |
| } |
| |
| if (now->tok == tok_ucs4) |
| from = now->val.ucs4; |
| else |
| /* Try to get the value. */ |
| from = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| |
| if (from == ILLEGAL_CHAR_VALUE) |
| { |
| lr_error (ldfile, "invalid character name"); |
| newp = NULL; |
| } |
| else |
| { |
| newp = (struct translit_ignore_t *) |
| obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t)); |
| newp->from = from; |
| newp->to = from; |
| newp->step = 1; |
| |
| newp->next = ctype->translit_ignore; |
| ctype->translit_ignore = newp; |
| } |
| |
| /* Now we expect either a semicolon, an ellipsis, or the end of the |
| line. */ |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| |
| if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2) |
| { |
| /* XXX Should we bother implementing `....'? `...' certainly |
| will not be implemented. */ |
| uint32_t to; |
| int step = now->tok == tok_ellipsis2_2 ? 2 : 1; |
| |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| { |
| lr_error (ldfile, |
| _("premature end of `translit_ignore' definition")); |
| return; |
| } |
| |
| if (now->tok != tok_bsymbol && now->tok != tok_ucs4) |
| { |
| lr_error (ldfile, _("syntax error")); |
| lr_ignore_rest (ldfile, 0); |
| return; |
| } |
| |
| if (now->tok == tok_ucs4) |
| to = now->val.ucs4; |
| else |
| /* Try to get the value. */ |
| to = repertoire_find_value (repertoire, now->val.str.startmb, |
| now->val.str.lenmb); |
| |
| if (to == ILLEGAL_CHAR_VALUE) |
| lr_error (ldfile, "invalid character name"); |
| else |
| { |
| /* Make sure the `to'-value is larger. */ |
| if (to >= from) |
| { |
| newp->to = to; |
| newp->step = step; |
| } |
| else |
| lr_error (ldfile, _("\ |
| to-value <U%0*X> of range is smaller than from-value <U%0*X>"), |
| (to | from) < 65536 ? 4 : 8, to, |
| (to | from) < 65536 ? 4 : 8, from); |
| } |
| |
| /* And the next token. */ |
| now = lr_token (ldfile, charmap, NULL, repertoire, verbose); |
| } |
| |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| /* We are done. */ |
| return; |
| |
| if (now->tok == tok_semicolon) |
| /* Next round. */ |
| continue; |
| |
| /* If we come here something is wrong. */ |
| lr_error (ldfile, _("syntax error")); |
| lr_ignore_rest (ldfile, 0); |
| return; |
| } |
| } |
| |
| |
| /* The parser for the LC_CTYPE section of the locale definition. */ |
| void |
| ctype_read (struct linereader *ldfile, struct localedef_t *result, |
| const struct charmap_t *charmap, const char *repertoire_name, |
| int ignore_content) |
| { |
| struct repertoire_t *repertoire = NULL; |
| struct locale_ctype_t *ctype; |
| struct token *now; |
| enum token_t nowtok; |
| size_t cnt; |
| uint32_t last_wch = 0; |
| enum token_t last_token; |
| enum token_t ellipsis_token; |
| int step; |
| char last_charcode[16]; |
| size_t last_charcode_len = 0; |
| const char *last_str = NULL; |
| int mapidx; |
| struct localedef_t *copy_locale = NULL; |
| |
| /* Get the repertoire we have to use. */ |
| if (repertoire_name != NULL) |
| repertoire = repertoire_read (repertoire_name); |
| |
| /* The rest of the line containing `LC_CTYPE' must be free. */ |
| lr_ignore_rest (ldfile, 1); |
| |
| |
| do |
| { |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| nowtok = now->tok; |
| } |
| while (nowtok == tok_eol); |
| |
| /* If we see `copy' now we are almost done. */ |
| if (nowtok == tok_copy) |
| { |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_string) |
| { |
| SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE"); |
| |
| skip_category: |
| do |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok != tok_eof && now->tok != tok_end); |
| |
| if (now->tok != tok_eof |
| || (now = lr_token (ldfile, charmap, NULL, NULL, verbose), |
| now->tok == tok_eof)) |
| lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE"); |
| else if (now->tok != tok_lc_ctype) |
| { |
| lr_error (ldfile, _("\ |
| %1$s: definition does not end with `END %1$s'"), "LC_CTYPE"); |
| lr_ignore_rest (ldfile, 0); |
| } |
| else |
| lr_ignore_rest (ldfile, 1); |
| |
| return; |
| } |
| |
| if (! ignore_content) |
| { |
| /* Get the locale definition. */ |
| copy_locale = load_locale (LC_CTYPE, now->val.str.startmb, |
| repertoire_name, charmap, NULL); |
| if ((copy_locale->avail & CTYPE_LOCALE) == 0) |
| { |
| /* Not yet loaded. So do it now. */ |
| if (locfile_read (copy_locale, charmap) != 0) |
| goto skip_category; |
| } |
| |
| if (copy_locale->categories[LC_CTYPE].ctype == NULL) |
| return; |
| } |
| |
| lr_ignore_rest (ldfile, 1); |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| nowtok = now->tok; |
| } |
| |
| /* Prepare the data structures. */ |
| ctype_startup (ldfile, result, charmap, copy_locale, ignore_content); |
| ctype = result->categories[LC_CTYPE].ctype; |
| |
| /* Remember the repertoire we use. */ |
| if (!ignore_content) |
| ctype->repertoire = repertoire; |
| |
| while (1) |
| { |
| unsigned long int class_bit = 0; |
| unsigned long int class256_bit = 0; |
| int handle_digits = 0; |
| |
| /* Of course we don't proceed beyond the end of file. */ |
| if (nowtok == tok_eof) |
| break; |
| |
| /* Ingore empty lines. */ |
| if (nowtok == tok_eol) |
| { |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| nowtok = now->tok; |
| continue; |
| } |
| |
| switch (nowtok) |
| { |
| case tok_charclass: |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok == tok_ident || now->tok == tok_string) |
| { |
| ctype_class_new (ldfile, ctype, now->val.str.startmb); |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| break; |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| if (now->tok != tok_eol) |
| SYNTAX_ERROR (_("\ |
| %s: syntax error in definition of new character class"), "LC_CTYPE"); |
| break; |
| |
| case tok_charconv: |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok == tok_ident || now->tok == tok_string) |
| { |
| ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| break; |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| if (now->tok != tok_eol) |
| SYNTAX_ERROR (_("\ |
| %s: syntax error in definition of new character map"), "LC_CTYPE"); |
| break; |
| |
| case tok_class: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| /* We simply forget the `class' keyword and use the following |
| operand to determine the bit. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_ident || now->tok == tok_string) |
| { |
| /* Must can be one of the predefined class names. */ |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) |
| if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0) |
| break; |
| if (cnt >= ctype->nr_charclass) |
| { |
| #ifdef PREDEFINED_CLASSES |
| if (now->val.str.lenmb == 8 |
| && memcmp ("special1", now->val.str.startmb, 8) == 0) |
| class_bit = _ISwspecial1; |
| else if (now->val.str.lenmb == 8 |
| && memcmp ("special2", now->val.str.startmb, 8) == 0) |
| class_bit = _ISwspecial2; |
| else if (now->val.str.lenmb == 8 |
| && memcmp ("special3", now->val.str.startmb, 8) == 0) |
| class_bit = _ISwspecial3; |
| else |
| #endif |
| { |
| /* OK, it's a new class. */ |
| ctype_class_new (ldfile, ctype, now->val.str.startmb); |
| |
| class_bit = _ISwbit (ctype->nr_charclass - 1); |
| } |
| } |
| else |
| { |
| class_bit = _ISwbit (cnt); |
| |
| free (now->val.str.startmb); |
| } |
| } |
| else if (now->tok == tok_digit) |
| goto handle_tok_digit; |
| else if (now->tok < tok_upper || now->tok > tok_blank) |
| goto err_label; |
| else |
| { |
| class_bit = BITw (now->tok); |
| class256_bit = BIT (now->tok); |
| } |
| |
| /* The next character must be a semicolon. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| goto err_label; |
| goto read_charclass; |
| |
| case tok_upper: |
| case tok_lower: |
| case tok_alpha: |
| case tok_alnum: |
| case tok_space: |
| case tok_cntrl: |
| case tok_punct: |
| case tok_graph: |
| case tok_print: |
| case tok_xdigit: |
| case tok_blank: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| class_bit = BITw (now->tok); |
| class256_bit = BIT (now->tok); |
| handle_digits = 0; |
| read_charclass: |
| ctype->class_done |= class_bit; |
| last_token = tok_none; |
| ellipsis_token = tok_none; |
| step = 1; |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok != tok_eol && now->tok != tok_eof) |
| { |
| uint32_t wch; |
| struct charseq *seq; |
| |
| if (ellipsis_token == tok_none) |
| { |
| if (get_character (now, charmap, repertoire, &seq, &wch)) |
| goto err_label; |
| |
| if (!ignore_content && seq != NULL && seq->nbytes == 1) |
| /* Yep, we can store information about this byte |
| sequence. */ |
| ctype->class256_collection[seq->bytes[0]] |= class256_bit; |
| |
| if (!ignore_content && wch != ILLEGAL_CHAR_VALUE |
| && class_bit != 0) |
| /* We have the UCS4 position. */ |
| *find_idx (ctype, &ctype->class_collection, |
| &ctype->class_collection_max, |
| &ctype->class_collection_act, wch) |= class_bit; |
| |
| last_token = now->tok; |
| /* Terminate the string. */ |
| if (last_token == tok_bsymbol) |
| { |
| now->val.str.startmb[now->val.str.lenmb] = '\0'; |
| last_str = now->val.str.startmb; |
| } |
| else |
| last_str = NULL; |
| last_wch = wch; |
| memcpy (last_charcode, now->val.charcode.bytes, 16); |
| last_charcode_len = now->val.charcode.nbytes; |
| |
| if (!ignore_content && handle_digits == 1) |
| { |
| /* We must store the digit values. */ |
| if (ctype->mbdigits_act == ctype->mbdigits_max) |
| { |
| ctype->mbdigits_max += 10; |
| ctype->mbdigits = xrealloc (ctype->mbdigits, |
| (ctype->mbdigits_max |
| * sizeof (char *))); |
| ctype->wcdigits_max += 10; |
| ctype->wcdigits = xrealloc (ctype->wcdigits, |
| (ctype->wcdigits_max |
| * sizeof (uint32_t))); |
| } |
| |
| ctype->mbdigits[ctype->mbdigits_act++] = seq; |
| ctype->wcdigits[ctype->wcdigits_act++] = wch; |
| } |
| else if (!ignore_content && handle_digits == 2) |
| { |
| /* We must store the digit values. */ |
| if (ctype->outdigits_act >= 10) |
| { |
| lr_error (ldfile, _("\ |
| %s: field `%s' does not contain exactly ten entries"), |
| "LC_CTYPE", "outdigit"); |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| ctype->mboutdigits[ctype->outdigits_act] = seq; |
| ctype->wcoutdigits[ctype->outdigits_act] = wch; |
| ++ctype->outdigits_act; |
| } |
| } |
| else |
| { |
| /* Now it gets complicated. We have to resolve the |
| ellipsis problem. First we must distinguish between |
| the different kind of ellipsis and this must match the |
| tokens we have seen. */ |
| assert (last_token != tok_none); |
| |
| if (last_token != now->tok) |
| { |
| lr_error (ldfile, _("\ |
| ellipsis range must be marked by two operands of same type")); |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| if (last_token == tok_bsymbol) |
| { |
| if (ellipsis_token == tok_ellipsis3) |
| lr_error (ldfile, _("with symbolic name range values \ |
| the absolute ellipsis `...' must not be used")); |
| |
| charclass_symbolic_ellipsis (ldfile, ctype, charmap, |
| repertoire, now, last_str, |
| class256_bit, class_bit, |
| (ellipsis_token |
| == tok_ellipsis4 |
| ? 10 : 16), |
| ignore_content, |
| handle_digits, step); |
| } |
| else if (last_token == tok_ucs4) |
| { |
| if (ellipsis_token != tok_ellipsis2) |
| lr_error (ldfile, _("\ |
| with UCS range values one must use the hexadecimal symbolic ellipsis `..'")); |
| |
| charclass_ucs4_ellipsis (ldfile, ctype, charmap, |
| repertoire, now, last_wch, |
| class256_bit, class_bit, |
| ignore_content, handle_digits, |
| step); |
| } |
| else |
| { |
| assert (last_token == tok_charcode); |
| |
| if (ellipsis_token != tok_ellipsis3) |
| lr_error (ldfile, _("\ |
| with character code range values one must use the absolute ellipsis `...'")); |
| |
| charclass_charcode_ellipsis (ldfile, ctype, charmap, |
| repertoire, now, |
| last_charcode, |
| last_charcode_len, |
| class256_bit, class_bit, |
| ignore_content, |
| handle_digits); |
| } |
| |
| /* Now we have used the last value. */ |
| last_token = tok_none; |
| } |
| |
| /* Next we expect a semicolon or the end of the line. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| break; |
| |
| if (last_token != tok_none |
| && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2) |
| { |
| if (now->tok == tok_ellipsis2_2) |
| { |
| now->tok = tok_ellipsis2; |
| step = 2; |
| } |
| else if (now->tok == tok_ellipsis4_2) |
| { |
| now->tok = tok_ellipsis4; |
| step = 2; |
| } |
| |
| ellipsis_token = now->tok; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| continue; |
| } |
| |
| if (now->tok != tok_semicolon) |
| goto err_label; |
| |
| /* And get the next character. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| |
| ellipsis_token = tok_none; |
| step = 1; |
| } |
| break; |
| |
| case tok_digit: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| handle_tok_digit: |
| class_bit = _ISwdigit; |
| class256_bit = _ISdigit; |
| handle_digits = 1; |
| goto read_charclass; |
| |
| case tok_outdigit: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| if (ctype->outdigits_act != 0) |
| lr_error (ldfile, _("\ |
| %s: field `%s' declared more than once"), |
| "LC_CTYPE", "outdigit"); |
| class_bit = 0; |
| class256_bit = 0; |
| handle_digits = 2; |
| goto read_charclass; |
| |
| case tok_toupper: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| mapidx = 0; |
| goto read_mapping; |
| |
| case tok_tolower: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| mapidx = 1; |
| goto read_mapping; |
| |
| case tok_map: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| /* We simply forget the `map' keyword and use the following |
| operand to determine the mapping. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_ident || now->tok == tok_string) |
| { |
| size_t cnt; |
| |
| for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt) |
| if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) |
| break; |
| |
| if (cnt < ctype->map_collection_nr) |
| free (now->val.str.startmb); |
| else |
| /* OK, it's a new map. */ |
| ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap); |
| |
| mapidx = cnt; |
| } |
| else if (now->tok < tok_toupper || now->tok > tok_tolower) |
| goto err_label; |
| else |
| mapidx = now->tok - tok_toupper; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| /* This better should be a semicolon. */ |
| if (now->tok != tok_semicolon) |
| goto err_label; |
| |
| read_mapping: |
| /* Test whether this mapping was already defined. */ |
| if (ctype->tomap_done[mapidx]) |
| { |
| lr_error (ldfile, _("duplicated definition for mapping `%s'"), |
| ctype->mapnames[mapidx]); |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| ctype->tomap_done[mapidx] = 1; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| while (now->tok != tok_eol && now->tok != tok_eof) |
| { |
| struct charseq *from_seq; |
| uint32_t from_wch; |
| struct charseq *to_seq; |
| uint32_t to_wch; |
| |
| /* Every pair starts with an opening brace. */ |
| if (now->tok != tok_open_brace) |
| goto err_label; |
| |
| /* Next comes the from-value. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (get_character (now, charmap, repertoire, &from_seq, |
| &from_wch) != 0) |
| goto err_label; |
| |
| /* The next is a comma. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_comma) |
| goto err_label; |
| |
| /* And the other value. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (get_character (now, charmap, repertoire, &to_seq, |
| &to_wch) != 0) |
| goto err_label; |
| |
| /* And the last thing is the closing brace. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_close_brace) |
| goto err_label; |
| |
| if (!ignore_content) |
| { |
| /* Check whether the mapping converts from an ASCII value |
| to a non-ASCII value. */ |
| if (from_seq != NULL && from_seq->nbytes == 1 |
| && isascii (from_seq->bytes[0]) |
| && to_seq != NULL && (to_seq->nbytes != 1 |
| || !isascii (to_seq->bytes[0]))) |
| ctype->to_nonascii = 1; |
| |
| if (mapidx < 2 && from_seq != NULL && to_seq != NULL |
| && from_seq->nbytes == 1 && to_seq->nbytes == 1) |
| /* We can use this value. */ |
| ctype->map256_collection[mapidx][from_seq->bytes[0]] |
| = to_seq->bytes[0]; |
| |
| if (from_wch != ILLEGAL_CHAR_VALUE |
| && to_wch != ILLEGAL_CHAR_VALUE) |
| /* Both correct values. */ |
| *find_idx (ctype, &ctype->map_collection[mapidx], |
| &ctype->map_collection_max[mapidx], |
| &ctype->map_collection_act[mapidx], |
| from_wch) = to_wch; |
| } |
| |
| /* Now comes a semicolon or the end of the line/file. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_semicolon) |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| break; |
| |
| case tok_translit_start: |
| /* Ignore the entire translit section with its peculiar syntax |
| if we don't need the input. */ |
| if (ignore_content) |
| { |
| do |
| { |
| lr_ignore_rest (ldfile, 0); |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| } |
| while (now->tok != tok_translit_end && now->tok != tok_eof); |
| |
| if (now->tok == tok_eof) |
| lr_error (ldfile, _(\ |
| "%s: `translit_start' section does not end with `translit_end'"), |
| "LC_CTYPE"); |
| |
| break; |
| } |
| |
| /* The rest of the line better should be empty. */ |
| lr_ignore_rest (ldfile, 1); |
| |
| /* We count here the number of allocated entries in the `translit' |
| array. */ |
| cnt = 0; |
| |
| ldfile->translate_strings = 1; |
| ldfile->return_widestr = 1; |
| |
| /* We proceed until we see the `translit_end' token. */ |
| while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose), |
| now->tok != tok_translit_end && now->tok != tok_eof) |
| { |
| if (now->tok == tok_eol) |
| /* Ignore empty lines. */ |
| continue; |
| |
| if (now->tok == tok_include) |
| { |
| /* We have to include locale. */ |
| const char *locale_name; |
| const char *repertoire_name; |
| struct translit_include_t *include_stmt, **include_ptr; |
| |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| /* This should be a string or an identifier. In any |
| case something to name a locale. */ |
| if (now->tok != tok_string && now->tok != tok_ident) |
| { |
| translit_syntax: |
| lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE"); |
| lr_ignore_rest (ldfile, 0); |
| continue; |
| } |
| locale_name = now->val.str.startmb; |
| |
| /* Next should be a semicolon. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok != tok_semicolon) |
| goto translit_syntax; |
| |
| /* Now the repertoire name. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if ((now->tok != tok_string && now->tok != tok_ident) |
| || now->val.str.startmb == NULL) |
| goto translit_syntax; |
| repertoire_name = now->val.str.startmb; |
| if (repertoire_name[0] == '\0') |
| /* Ignore the empty string. */ |
| repertoire_name = NULL; |
| |
| /* Save the include statement for later processing. */ |
| include_stmt = (struct translit_include_t *) |
| xmalloc (sizeof (struct translit_include_t)); |
| include_stmt->copy_locale = locale_name; |
| include_stmt->copy_repertoire = repertoire_name; |
| include_stmt->next = NULL; |
| |
| include_ptr = &ctype->translit_include; |
| while (*include_ptr != NULL) |
| include_ptr = &(*include_ptr)->next; |
| *include_ptr = include_stmt; |
| |
| /* The rest of the line must be empty. */ |
| lr_ignore_rest (ldfile, 1); |
| |
| /* Make sure the locale is read. */ |
| add_to_readlist (LC_CTYPE, locale_name, repertoire_name, |
| 1, NULL); |
| continue; |
| } |
| else if (now->tok == tok_default_missing) |
| { |
| uint32_t *wstr; |
| |
| while (1) |
| { |
| /* We expect a single character or string as the |
| argument. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| wstr = read_widestring (ldfile, now, charmap, |
| repertoire); |
| |
| if (wstr != NULL) |
| { |
| if (ctype->default_missing != NULL) |
| { |
| lr_error (ldfile, _("\ |
| %s: duplicate `default_missing' definition"), "LC_CTYPE"); |
| WITH_CUR_LOCALE (error_at_line (0, 0, |
| ctype->default_missing_file, |
| ctype->default_missing_lineno, |
| _("\ |
| previous definition was here"))); |
| } |
| else |
| { |
| ctype->default_missing = wstr; |
| ctype->default_missing_file = ldfile->fname; |
| ctype->default_missing_lineno = ldfile->lineno; |
| } |
| /* We can have more entries, ignore them. */ |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| else if (wstr == (uint32_t *) -1l) |
| /* This was an syntax error. */ |
| break; |
| |
| /* Maybe there is another replacement we can use. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_eol || now->tok == tok_eof) |
| { |
| /* Nothing found. We tell the user. */ |
| lr_error (ldfile, _("\ |
| %s: no representable `default_missing' definition found"), "LC_CTYPE"); |
| break; |
| } |
| if (now->tok != tok_semicolon) |
| goto translit_syntax; |
| } |
| |
| continue; |
| } |
| else if (now->tok == tok_translit_ignore) |
| { |
| read_translit_ignore_entry (ldfile, ctype, charmap, |
| repertoire); |
| continue; |
| } |
| |
| read_translit_entry (ldfile, ctype, now, charmap, repertoire); |
| } |
| ldfile->return_widestr = 0; |
| |
| if (now->tok == tok_eof) |
| lr_error (ldfile, _(\ |
| "%s: `translit_start' section does not end with `translit_end'"), |
| "LC_CTYPE"); |
| |
| break; |
| |
| case tok_ident: |
| /* Ignore the rest of the line if we don't need the input of |
| this line. */ |
| if (ignore_content) |
| { |
| lr_ignore_rest (ldfile, 0); |
| break; |
| } |
| |
| /* This could mean one of several things. First test whether |
| it's a character class name. */ |
| for (cnt = 0; cnt < ctype->nr_charclass; ++cnt) |
| if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0) |
| break; |
| if (cnt < ctype->nr_charclass) |
| { |
| class_bit = _ISwbit (cnt); |
| class256_bit = cnt <= 11 ? _ISbit (cnt) : 0; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt) |
| if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0) |
| break; |
| if (cnt < ctype->map_collection_nr) |
| { |
| mapidx = cnt; |
| free (now->val.str.startmb); |
| goto read_mapping; |
| } |
| #ifdef PREDEFINED_CLASSES |
| if (strcmp (now->val.str.startmb, "special1") == 0) |
| { |
| class_bit = _ISwspecial1; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| if (strcmp (now->val.str.startmb, "special2") == 0) |
| { |
| class_bit = _ISwspecial2; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| if (strcmp (now->val.str.startmb, "special3") == 0) |
| { |
| class_bit = _ISwspecial3; |
| free (now->val.str.startmb); |
| goto read_charclass; |
| } |
| if (strcmp (now->val.str.startmb, "tosymmetric") == 0) |
| { |
| mapidx = 2; |
| goto read_mapping; |
| } |
| #endif |
| break; |
| |
| case tok_end: |
| /* Next we assume `LC_CTYPE'. */ |
| now = lr_token (ldfile, charmap, NULL, NULL, verbose); |
| if (now->tok == tok_eof) |
| break; |
| if (now->tok == tok_eol) |
| lr_error (ldfile, _("%s: incomplete `END' line"), |
|