blob: d18a1d47aea39cc72719391040ba57596307d650 [file] [log] [blame]
/*
* R : A Computer Language for Statistical Data Analysis
* Copyright (C) 2005-2017 The R Core Team
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, a copy is available at
* https://www.R-project.org/Licenses/
*/
/* Internal header, not installed */
/* This file was contributed by Ei-ji Nakama.
* See also the comments in ../main/rlocale.c.
* It does 2 things:
* (a) supplies wrapper/substitute wc[s]width functions for use in
* character.c, errors.c, printutils.c, devPS.c, RGui console.
* (b) Defines a replacment for iswctype to be used on Windows, maxOS and AIX.
* in gram.c
*
* It is not an installed header.
*/
#ifndef R_LOCALE_H
#define R_LOCALE_H
#include <wchar.h>
#include <ctype.h>
#include <wctype.h>
/*
* The Rwchar_t typedef represents a single Unicode code point. On most systems it's the same
* as wchar_t, but on Windows (and others?) where wchar_t is too small and UTF-16 is used,
* it is an unsigned int instead.
*/
#ifdef Win32
typedef unsigned int Rwchar_t;
#else
typedef wchar_t Rwchar_t;
#endif
/*
* Windows CJK
* In Unicode, there is not a rule about character width.
* A letter of breadth is used in a CJK (China, Japan, Korea,
* Taiwan, Hong Kong, and Singapore) area, and there are a
* letter and a standard (character width is not still prescribed)
* of a cord in a country.
* Letter width is a problem of a font, but it is a rule route
* besides a alphanumeric character that use a breadth letter.
* It is generally defined as a breadth letter for a font such
* as Japanese.
* - Win32
* Attempted explanation by BDR
* The display widths of characters are not prescribed in Unicode.
* Double-width characters are used in the CJK area: their width can
* be font-specific, with different fonts in use in different parts
* of the CJK area. The tables supplied in many OSes and by Markus
* Kuhn are not do not take the exact locale into account. The
* tables supplied in rlocale_data.h allow different widths for
* different parts of the CJK area, and also where needed different
* widths on Windows. (The Windows differences are in zh_CN, and
* apply to European characters.)
*
* The differences are mainly (but not exclusively) in the
* Unicode 'East Asian Ambiguous' class.
*
*/
extern int Ri18n_wcwidth(Rwchar_t);
extern int Ri18n_wcswidth (const wchar_t *, size_t);
/* macOS CJK and WindowXP(Japanese)
* iswctypes of macOS calls isctypes. no i18n.
* For example, iswprint of Windows does not accept a macron of
* Japanese "a-ru" of R as a letter.
* Therefore Japanese "Buraian.Ripuri-" of "Brian Ripley" is
* shown of hex-string.:-)
* We define alternatives to be used if
* defined(Win32) || defined(__APPLE__) || defined(_AIX)
*/
extern wctype_t Ri18n_wctype(const char *);
extern int Ri18n_iswctype(wint_t, wctype_t);
#ifndef IN_RLOCALE_C
/* We want to avoid these redefinitions in rlocale.c itself */
#undef iswupper
#undef iswlower
#undef iswalpha
#undef iswdigit
#undef iswxdigit
#undef iswspace
#undef iswprint
#undef iswgraph
#undef iswblank
#undef iswcntrl
#undef iswpunct
#undef iswalnum
#undef wctype
#undef iswctype
#define iswupper(__x) Ri18n_iswctype(__x, Ri18n_wctype("upper"))
#define iswlower(__x) Ri18n_iswctype(__x, Ri18n_wctype("lower"))
#define iswalpha(__x) Ri18n_iswctype(__x, Ri18n_wctype("alpha"))
#define iswdigit(__x) Ri18n_iswctype(__x, Ri18n_wctype("digit"))
#define iswxdigit(__x) Ri18n_iswctype(__x, Ri18n_wctype("xdigit"))
#define iswspace(__x) Ri18n_iswctype(__x, Ri18n_wctype("space"))
#define iswprint(__x) Ri18n_iswctype(__x, Ri18n_wctype("print"))
#define iswgraph(__x) Ri18n_iswctype(__x, Ri18n_wctype("graph"))
#define iswblank(__x) Ri18n_iswctype(__x, Ri18n_wctype("blank"))
#define iswcntrl(__x) Ri18n_iswctype(__x, Ri18n_wctype("cntrl"))
#define iswpunct(__x) Ri18n_iswctype(__x, Ri18n_wctype("punct"))
#define iswalnum(__x) Ri18n_iswctype(__x, Ri18n_wctype("alnum"))
#define wctype(__x) Ri18n_wctype(__x)
#define iswctype(__x,__y) Ri18n_iswctype(__x,__y)
#endif
/* These definitions are from winnls.h in Mingw_w64. We don't need the rest of that file. */
#define HIGH_SURROGATE_START 0xd800
#define HIGH_SURROGATE_END 0xdbff
#define LOW_SURROGATE_START 0xdc00
#define LOW_SURROGATE_END 0xdfff
/* The first two of these definitions use the argument twice which is bad, but we include them here in
* the original form for consistency with Mingw_w64. Users should be careful that evaluating
* the argument doesn't result in side effects.
*/
#define IS_HIGH_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && ((wch) <= HIGH_SURROGATE_END))
#define IS_LOW_SURROGATE(wch) (((wch) >= LOW_SURROGATE_START) && ((wch) <= LOW_SURROGATE_END))
#define IS_SURROGATE_PAIR(hs, ls) (IS_HIGH_SURROGATE (hs) && IS_LOW_SURROGATE (ls))
# define utf8toucs32 Rf_utf8toucs32
Rwchar_t utf8toucs32(wchar_t high, const char *s);
#endif /* R_LOCALE_H */