| /* Python brace format strings. |
| Copyright (C) 2004, 2006-2007, 2013-2014, 2016, 2019 Free Software Foundation, |
| Inc. |
| Written by Daiki Ueno <ueno@gnu.org>, 2013. |
| |
| This program is free software: you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3 of the License, or |
| (at your option) any later version. |
| |
| This program is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with this program. If not, see <https://www.gnu.org/licenses/>. */ |
| |
| #ifdef HAVE_CONFIG_H |
| # include <config.h> |
| #endif |
| |
| #include <stdbool.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| #include "format.h" |
| #include "c-ctype.h" |
| #include "xalloc.h" |
| #include "xvasprintf.h" |
| #include "format-invalid.h" |
| #include "gettext.h" |
| |
| #define _(str) gettext (str) |
| |
| /* Python brace format strings are defined by PEP3101 together with |
| 'format' method of string class. |
| A format string directive here consists of |
| - an opening brace '{', |
| - an identifier [_A-Za-z][_0-9A-Za-z]*|[0-9]+, |
| - an optional getattr ('.') or getitem ('['..']') operator with |
| an identifier as argument, |
| - an optional format specifier starting with ':', with a |
| (unnested) format string as argument, |
| - a closing brace '}'. |
| Brace characters '{' and '}' can be escaped by doubles '{{' and '}}'. |
| */ |
| |
| struct named_arg |
| { |
| char *name; |
| }; |
| |
| struct spec |
| { |
| unsigned int directives; |
| unsigned int named_arg_count; |
| unsigned int allocated; |
| struct named_arg *named; |
| }; |
| |
| |
| static bool parse_upto (struct spec *spec, const char **formatp, |
| bool is_toplevel, char terminator, |
| bool translated, char *fdi, char **invalid_reason); |
| static void free_named_args (struct spec *spec); |
| |
| |
| /* All the parse_* functions (except parse_upto) follow the same |
| calling convention. FORMATP shall point to the beginning of a token. |
| If parsing succeeds, FORMATP will point to the next character after |
| the token, and true is returned. Otherwise, FORMATP will be |
| unchanged and false is returned. */ |
| |
| static bool |
| parse_named_field (struct spec *spec, |
| const char **formatp, bool translated, char *fdi, |
| char **invalid_reason) |
| { |
| const char *format = *formatp; |
| char c; |
| |
| c = *format; |
| if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_') |
| { |
| do |
| c = *++format; |
| while ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_' |
| || (c >= '0' && c <= '9')); |
| *formatp = format; |
| return true; |
| } |
| return false; |
| } |
| |
| static bool |
| parse_numeric_field (struct spec *spec, |
| const char **formatp, bool translated, char *fdi, |
| char **invalid_reason) |
| { |
| const char *format = *formatp; |
| char c; |
| |
| c = *format; |
| if (c >= '0' && c <= '9') |
| { |
| do |
| c = *++format; |
| while (c >= '0' && c <= '9'); |
| *formatp = format; |
| return true; |
| } |
| return false; |
| } |
| |
| static bool |
| parse_directive (struct spec *spec, |
| const char **formatp, bool is_toplevel, |
| bool translated, char *fdi, char **invalid_reason) |
| { |
| const char *format = *formatp; |
| const char *const format_start = format; |
| const char *name_start; |
| char c; |
| |
| c = *++format; |
| if (c == '{') |
| { |
| *formatp = ++format; |
| return true; |
| } |
| |
| name_start = format; |
| if (!parse_named_field (spec, &format, translated, fdi, invalid_reason) |
| && !parse_numeric_field (spec, &format, translated, fdi, invalid_reason)) |
| { |
| *invalid_reason = |
| xasprintf (_("In the directive number %u, '%c' cannot start a field name."), spec->directives, *format); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| |
| /* Parse '.' (getattr) or '[..]' (getitem) operators followed by a |
| name. If must not recurse, but can be specifed in a chain, such |
| as "foo.bar.baz[0]". */ |
| for (;;) |
| { |
| c = *format; |
| |
| if (c == '.') |
| { |
| format++; |
| if (!parse_named_field (spec, &format, translated, fdi, |
| invalid_reason)) |
| { |
| *invalid_reason = |
| xasprintf (_("In the directive number %u, '%c' cannot start a getattr argument."), spec->directives, *format); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| } |
| else if (c == '[') |
| { |
| format++; |
| if (!parse_named_field (spec, &format, translated, fdi, |
| invalid_reason) |
| && !parse_numeric_field (spec, &format, translated, fdi, |
| invalid_reason)) |
| { |
| *invalid_reason = |
| xasprintf (_("In the directive number %u, '%c' cannot start a getitem argument."), spec->directives, *format); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| |
| c = *format++; |
| if (c != ']') |
| { |
| *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| } |
| else |
| break; |
| } |
| |
| if (c == ':') |
| { |
| if (!is_toplevel) |
| { |
| *invalid_reason = |
| xasprintf (_("In the directive number %u, no more nesting is allowed in a format specifier."), spec->directives); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| |
| /* Format specifiers. Although a format specifier can be any |
| string in theory, we can only recognize two types of format |
| specifiers below, because otherwise we would need to evaluate |
| Python expressions by ourselves: |
| |
| - A nested format directive expanding to the whole string |
| - The Standard Format Specifiers, as described in PEP3101, |
| not including a nested format directive */ |
| format++; |
| if (*format == '{') |
| { |
| /* Nested format directive. */ |
| if (!parse_directive (spec, &format, false, translated, fdi, |
| invalid_reason)) |
| { |
| /* FDI and INVALID_REASON will be set by a recursive call of |
| parse_directive. */ |
| return false; |
| } |
| |
| if (*format != '}') |
| { |
| *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| } |
| else |
| { |
| /* Standard format specifiers is in the form: |
| [[fill]align][sign][#][0][minimumwidth][.precision][type] */ |
| |
| /* Look ahead two characters to skip [[fill]align]. */ |
| int c1, c2; |
| |
| c1 = format[0]; |
| c2 = format[1]; |
| |
| if (c2 == '<' || c2 == '>' || c2 == '=' || c2 == '^') |
| format += 2; |
| else if (c1 == '<' || c1 == '>' || c1 == '=' || c1 == '^') |
| format++; |
| if (*format == '+' || *format == '-' || *format == ' ') |
| format++; |
| if (*format == '#') |
| format++; |
| if (*format == '0') |
| format++; |
| while (c_isdigit (*format)) |
| format++; |
| if (*format == '.') |
| { |
| format++; |
| while (c_isdigit (*format)) |
| format++; |
| } |
| switch (*format) |
| { |
| case 'b': case 'c': case 'd': case 'o': case 'x': case 'X': |
| case 'n': |
| case 'e': case 'E': case 'f': case 'F': case 'g': case 'G': |
| case '%': |
| format++; |
| break; |
| default: |
| break; |
| } |
| if (*format != '}') |
| { |
| *invalid_reason = INVALID_UNTERMINATED_DIRECTIVE (); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| } |
| c = *format; |
| } |
| |
| if (c != '}') |
| { |
| *invalid_reason = |
| xasprintf (_("In the directive number %u, there is an unterminated format directive."), spec->directives); |
| FDI_SET (format, FMTDIR_ERROR); |
| return false; |
| } |
| |
| if (is_toplevel) |
| { |
| char *name; |
| size_t n = format - name_start; |
| |
| FDI_SET (name_start - 1, FMTDIR_START); |
| |
| name = XNMALLOC (n + 1, char); |
| memcpy (name, name_start, n); |
| name[n] = '\0'; |
| |
| spec->directives++; |
| |
| if (spec->allocated == spec->named_arg_count) |
| { |
| spec->allocated = 2 * spec->allocated + 1; |
| spec->named = (struct named_arg *) xrealloc (spec->named, spec->allocated * sizeof (struct named_arg)); |
| } |
| spec->named[spec->named_arg_count].name = name; |
| spec->named_arg_count++; |
| |
| FDI_SET (format, FMTDIR_END); |
| } |
| |
| *formatp = ++format; |
| return true; |
| } |
| |
| static bool |
| parse_upto (struct spec *spec, |
| const char **formatp, bool is_toplevel, char terminator, |
| bool translated, char *fdi, char **invalid_reason) |
| { |
| const char *format = *formatp; |
| |
| for (; *format != terminator && *format != '\0';) |
| { |
| if (*format == '{') |
| { |
| if (!parse_directive (spec, &format, is_toplevel, translated, fdi, |
| invalid_reason)) |
| return false; |
| } |
| else |
| format++; |
| } |
| |
| *formatp = format; |
| return true; |
| } |
| |
| static int |
| named_arg_compare (const void *p1, const void *p2) |
| { |
| return strcmp (((const struct named_arg *) p1)->name, |
| ((const struct named_arg *) p2)->name); |
| } |
| |
| static void * |
| format_parse (const char *format, bool translated, char *fdi, |
| char **invalid_reason) |
| { |
| struct spec spec; |
| struct spec *result; |
| |
| spec.directives = 0; |
| spec.named_arg_count = 0; |
| spec.allocated = 0; |
| spec.named = NULL; |
| |
| if (!parse_upto (&spec, &format, true, '\0', translated, fdi, invalid_reason)) |
| { |
| free_named_args (&spec); |
| return NULL; |
| } |
| |
| /* Sort the named argument array, and eliminate duplicates. */ |
| if (spec.named_arg_count > 1) |
| { |
| unsigned int i, j; |
| |
| qsort (spec.named, spec.named_arg_count, sizeof (struct named_arg), |
| named_arg_compare); |
| |
| /* Remove duplicates: Copy from i to j, keeping 0 <= j <= i. */ |
| for (i = j = 0; i < spec.named_arg_count; i++) |
| if (j > 0 && strcmp (spec.named[i].name, spec.named[j-1].name) == 0) |
| free (spec.named[i].name); |
| else |
| { |
| if (j < i) |
| spec.named[j].name = spec.named[i].name; |
| j++; |
| } |
| spec.named_arg_count = j; |
| } |
| |
| result = XMALLOC (struct spec); |
| *result = spec; |
| return result; |
| } |
| |
| static void |
| free_named_args (struct spec *spec) |
| { |
| if (spec->named != NULL) |
| { |
| unsigned int i; |
| for (i = 0; i < spec->named_arg_count; i++) |
| free (spec->named[i].name); |
| free (spec->named); |
| } |
| } |
| |
| static void |
| format_free (void *descr) |
| { |
| struct spec *spec = (struct spec *) descr; |
| |
| free_named_args (spec); |
| free (spec); |
| } |
| |
| static int |
| format_get_number_of_directives (void *descr) |
| { |
| struct spec *spec = (struct spec *) descr; |
| |
| return spec->directives; |
| } |
| |
| static bool |
| format_check (void *msgid_descr, void *msgstr_descr, bool equality, |
| formatstring_error_logger_t error_logger, |
| const char *pretty_msgid, const char *pretty_msgstr) |
| { |
| struct spec *spec1 = (struct spec *) msgid_descr; |
| struct spec *spec2 = (struct spec *) msgstr_descr; |
| bool err = false; |
| |
| if (spec1->named_arg_count + spec2->named_arg_count > 0) |
| { |
| unsigned int i, j; |
| unsigned int n1 = spec1->named_arg_count; |
| unsigned int n2 = spec2->named_arg_count; |
| |
| /* Check the argument names in spec1 are contained in those of spec2. |
| Both arrays are sorted. We search for the differences. */ |
| for (i = 0, j = 0; i < n1 || j < n2; ) |
| { |
| int cmp = (i >= n1 ? 1 : |
| j >= n2 ? -1 : |
| strcmp (spec1->named[i].name, spec2->named[j].name)); |
| |
| if (cmp > 0) |
| { |
| if (equality) |
| { |
| if (error_logger) |
| error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), |
| spec2->named[i].name, pretty_msgid); |
| err = true; |
| break; |
| } |
| else |
| j++; |
| } |
| else if (cmp < 0) |
| { |
| if (equality) |
| { |
| if (error_logger) |
| error_logger (_("a format specification for argument '%s' doesn't exist in '%s'"), |
| spec1->named[i].name, pretty_msgstr); |
| err = true; |
| break; |
| } |
| else |
| i++; |
| } |
| else |
| j++, i++; |
| } |
| } |
| |
| return err; |
| } |
| |
| |
| struct formatstring_parser formatstring_python_brace = |
| { |
| format_parse, |
| format_free, |
| format_get_number_of_directives, |
| NULL, |
| format_check |
| }; |
| |
| |
| #ifdef TEST |
| |
| /* Test program: Print the argument list specification returned by |
| format_parse for strings read from standard input. */ |
| |
| #include <stdio.h> |
| |
| static void |
| format_print (void *descr) |
| { |
| struct spec *spec = (struct spec *) descr; |
| unsigned int i; |
| |
| if (spec == NULL) |
| { |
| printf ("INVALID"); |
| return; |
| } |
| |
| printf ("{"); |
| for (i = 0; i < spec->named_arg_count; i++) |
| { |
| if (i > 0) |
| printf (", "); |
| printf ("'%s'", spec->named[i].name); |
| } |
| printf ("}"); |
| } |
| |
| int |
| main () |
| { |
| for (;;) |
| { |
| char *line = NULL; |
| size_t line_size = 0; |
| int line_len; |
| char *invalid_reason; |
| void *descr; |
| |
| line_len = getline (&line, &line_size, stdin); |
| if (line_len < 0) |
| break; |
| if (line_len > 0 && line[line_len - 1] == '\n') |
| line[--line_len] = '\0'; |
| |
| invalid_reason = NULL; |
| descr = format_parse (line, false, NULL, &invalid_reason); |
| |
| format_print (descr); |
| printf ("\n"); |
| if (descr == NULL) |
| printf ("%s\n", invalid_reason); |
| |
| free (invalid_reason); |
| free (line); |
| } |
| |
| return 0; |
| } |
| |
| /* |
| * For Emacs M-x compile |
| * Local Variables: |
| * compile-command: "/bin/sh ../libtool --tag=CC --mode=link gcc -o a.out -static -O -g -Wall -I.. -I../gnulib-lib -I../../gettext-runtime/intl -DHAVE_CONFIG_H -DTEST format-python-brace.c ../gnulib-lib/libgettextlib.la" |
| * End: |
| */ |
| |
| #endif /* TEST */ |