blob: 0ab49bd6d5ddd2e456b261800bca85641fa85fc6 [file] [log] [blame]
/* Charset handling while reading PO files.
Copyright (C) 2001-2003, 2006, 2021 Free Software Foundation, Inc.
Written by Bruno Haible <haible@clisp.cons.org>, 2001.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <https://www.gnu.org/licenses/>. */
#ifndef _PO_CHARSET_H
#define _PO_CHARSET_H
#include <stdbool.h>
#include <stddef.h>
#if HAVE_ICONV
#include <iconv.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* Canonicalize an encoding name.
The results of this function are statically allocated and can be
compared using ==.
Return NULL if CHARSET is not a valid encoding name. */
extern const char *po_charset_canonicalize (const char *charset);
/* The canonicalized encoding name for ASCII. */
extern DLL_VARIABLE const char *po_charset_ascii;
/* The canonicalized encoding name for UTF-8. */
extern DLL_VARIABLE const char *po_charset_utf8;
/* Test for ASCII compatibility. */
extern bool po_charset_ascii_compatible (const char *canon_charset);
/* Test for a weird encoding, i.e. an encoding which has double-byte
characters ending in 0x5C. */
extern bool po_is_charset_weird (const char *canon_charset);
/* Test for a weird CJK encoding, i.e. a weird encoding with CJK structure.
An encoding has CJK structure if every valid character stream is composed
of single bytes in the range 0x{00..7F} and of byte pairs in the range
0x{80..FF}{30..FF}. */
extern bool po_is_charset_weird_cjk (const char *canon_charset);
/* Returns a character iterator for a given encoding.
Given a pointer into a string, it returns the number occupied by the next
single character. If the piece of string is not valid or if the *s == '\0',
it returns 1. */
typedef size_t (*character_iterator_t) (const char *s);
extern character_iterator_t po_charset_character_iterator (const char *canon_charset);
/* The PO file's encoding, as specified in the header entry. */
extern DLL_VARIABLE const char *po_lex_charset;
/* Representation of U+2068 FIRST STRONG ISOLATE (FSI) in the PO file's
encoding, or NULL if not available. */
extern DLL_VARIABLE const char *po_lex_isolate_start;
/* Representation of U+2069 POP DIRECTIONAL ISOLATE (PDI) in the PO file's
encoding, or NULL if not available. */
extern DLL_VARIABLE const char *po_lex_isolate_end;
#if HAVE_ICONV
/* Converter from the PO file's encoding to UTF-8. */
extern DLL_VARIABLE iconv_t po_lex_iconv;
#endif
/* If no converter is available, some information about the structure of the
PO file's encoding. */
extern DLL_VARIABLE bool po_lex_weird_cjk;
/* Initialize the PO file's encoding. */
extern void po_lex_charset_init (void);
/* Set the PO file's encoding from the header entry. */
extern void po_lex_charset_set (const char *header_entry,
const char *filename);
/* Finish up with the PO file's encoding. */
extern void po_lex_charset_close (void);
#ifdef __cplusplus
}
#endif
#endif /* _PO_CHARSET_H */