src/include/rlocale.h - R - Git at Google

 /*
  *  R : A Computer Language for Statistical Data Analysis
  *  Copyright (C) 2005-2017   The R Core Team
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
  *  the Free Software Foundation; either version 2 of the License, or
  *  (at your option) any later version.
  *
  *  This program is distributed in the hope that it will be useful,
  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  *  GNU General Public License for more details.
  *
  *  You should have received a copy of the GNU General Public License
  *  along with this program; if not, a copy is available at
  *  https://www.R-project.org/Licenses/
  */

 /* Internal header, not installed */

 /*  This file was contributed by Ei-ji Nakama.
  *  See also the comments in  ../main/rlocale.c.

  *  It does 2 things:
  * (a) supplies wrapper/substitute wc[s]width functions for use in
  *    character.c, errors.c, printutils.c, devPS.c, RGui console.
  * (b) Defines a replacment for iswctype to be used on Windows, maxOS and AIX.
  * in gram.c
  *
  * It is not an installed header.
  */

 #ifndef R_LOCALE_H
 #define R_LOCALE_H

 #include <wchar.h>
 #include <ctype.h>
 #include <wctype.h>

 /*
  * The Rwchar_t typedef represents a single Unicode code point.  On most systems it's the same
  * as wchar_t, but on Windows (and others?) where wchar_t is too small and UTF-16 is used,
  * it is an unsigned int instead.
  */

 #ifdef Win32
 typedef unsigned int Rwchar_t;
 #else
 typedef wchar_t Rwchar_t;
 #endif

 /*
  * Windows CJK
  * In Unicode, there is not a rule about character width.
  * A letter of breadth is used in a CJK (China, Japan, Korea,
  * Taiwan, Hong Kong, and Singapore) area, and there are a
  * letter and a standard (character width is not still prescribed)
  * of a cord in a country.
  * Letter width is a problem of a font, but it is a rule route
  * besides a alphanumeric character that use a breadth letter.
  * It is generally defined as a breadth letter for a font such
  * as Japanese.
  * - Win32

  *  Attempted explanation by BDR
  *  The display widths of characters are not prescribed in Unicode.
  *  Double-width characters are used in the CJK area: their width can
  *  be font-specific, with different fonts in use in different parts
  *  of the CJK area.  The tables supplied in many OSes and by Markus
  *  Kuhn are not do not take the exact locale into account.  The
  *  tables supplied in rlocale_data.h allow different widths for
  *  different parts of the CJK area, and also where needed different
  *  widths on Windows.  (The Windows differences are in zh_CN, and
  *  apply to European characters.)
  *
  * The differences are mainly (but not exclusively) in the
  * Unicode 'East Asian Ambiguous' class.
  *
  */

 extern int Ri18n_wcwidth(Rwchar_t);
 extern int Ri18n_wcswidth (const wchar_t *, size_t);

 /* macOS CJK and WindowXP(Japanese)
  * iswctypes of macOS calls isctypes. no i18n.
  * For example, iswprint of Windows does not accept a macron of
  * Japanese "a-ru" of R as a letter.
  * Therefore Japanese "Buraian.Ripuri-" of "Brian Ripley" is
  * shown of hex-string.:-)
  * We define alternatives to be used if
  * defined(Win32) || defined(__APPLE__) || defined(_AIX)
  */
 extern wctype_t Ri18n_wctype(const char *);
 extern int      Ri18n_iswctype(wint_t, wctype_t);

 #ifndef IN_RLOCALE_C
 /* We want to avoid these redefinitions in rlocale.c itself */
 #undef iswupper
 #undef iswlower
 #undef iswalpha
 #undef iswdigit
 #undef iswxdigit
 #undef iswspace
 #undef iswprint
 #undef iswgraph
 #undef iswblank
 #undef iswcntrl
 #undef iswpunct
 #undef iswalnum
 #undef wctype
 #undef iswctype

 #define iswupper(__x)     Ri18n_iswctype(__x, Ri18n_wctype("upper"))
 #define iswlower(__x)     Ri18n_iswctype(__x, Ri18n_wctype("lower"))
 #define iswalpha(__x)     Ri18n_iswctype(__x, Ri18n_wctype("alpha"))
 #define iswdigit(__x)     Ri18n_iswctype(__x, Ri18n_wctype("digit"))
 #define iswxdigit(__x)    Ri18n_iswctype(__x, Ri18n_wctype("xdigit"))
 #define iswspace(__x)     Ri18n_iswctype(__x, Ri18n_wctype("space"))
 #define iswprint(__x)     Ri18n_iswctype(__x, Ri18n_wctype("print"))
 #define iswgraph(__x)     Ri18n_iswctype(__x, Ri18n_wctype("graph"))
 #define iswblank(__x)     Ri18n_iswctype(__x, Ri18n_wctype("blank"))
 #define iswcntrl(__x)     Ri18n_iswctype(__x, Ri18n_wctype("cntrl"))
 #define iswpunct(__x)     Ri18n_iswctype(__x, Ri18n_wctype("punct"))
 #define iswalnum(__x)     Ri18n_iswctype(__x, Ri18n_wctype("alnum"))
 #define wctype(__x)       Ri18n_wctype(__x)
 #define iswctype(__x,__y) Ri18n_iswctype(__x,__y)
 #endif

 /* These definitions are from winnls.h in Mingw_w64.  We don't need the rest of that file. */

 #define HIGH_SURROGATE_START 0xd800
 #define HIGH_SURROGATE_END 0xdbff
 #define LOW_SURROGATE_START 0xdc00
 #define LOW_SURROGATE_END 0xdfff

 /* The first two of these definitions use the argument twice which is bad, but we include them here in
  * the original form for consistency with Mingw_w64.  Users should be careful that evaluating
  * the argument doesn't result in side effects.
  */

 #define IS_HIGH_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && ((wch) <= HIGH_SURROGATE_END))
 #define IS_LOW_SURROGATE(wch) (((wch) >= LOW_SURROGATE_START) && ((wch) <= LOW_SURROGATE_END))
 #define IS_SURROGATE_PAIR(hs, ls) (IS_HIGH_SURROGATE (hs) && IS_LOW_SURROGATE (ls))

 # define utf8toucs32		Rf_utf8toucs32
 Rwchar_t utf8toucs32(wchar_t high, const char *s);

 #endif /* R_LOCALE_H */
	/*
	* R : A Computer Language for Statistical Data Analysis
	* Copyright (C) 2005-2017 The R Core Team
	*
	* This program is free software; you can redistribute it and/or modify
	* it under the terms of the GNU General Public License as published by
	* the Free Software Foundation; either version 2 of the License, or
	* (at your option) any later version.
	*
	* This program is distributed in the hope that it will be useful,
	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	* GNU General Public License for more details.
	*
	* You should have received a copy of the GNU General Public License
	* along with this program; if not, a copy is available at
	* https://www.R-project.org/Licenses/
	*/

	/* Internal header, not installed */

	/* This file was contributed by Ei-ji Nakama.
	* See also the comments in ../main/rlocale.c.

	* It does 2 things:
	* (a) supplies wrapper/substitute wc[s]width functions for use in
	* character.c, errors.c, printutils.c, devPS.c, RGui console.
	* (b) Defines a replacment for iswctype to be used on Windows, maxOS and AIX.
	* in gram.c
	*
	* It is not an installed header.
	*/

	#ifndef R_LOCALE_H
	#define R_LOCALE_H

	#include <wchar.h>
	#include <ctype.h>
	#include <wctype.h>

	/*
	* The Rwchar_t typedef represents a single Unicode code point. On most systems it's the same
	* as wchar_t, but on Windows (and others?) where wchar_t is too small and UTF-16 is used,
	* it is an unsigned int instead.
	*/

	#ifdef Win32
	typedef unsigned int Rwchar_t;
	#else
	typedef wchar_t Rwchar_t;
	#endif

	/*
	* Windows CJK
	* In Unicode, there is not a rule about character width.
	* A letter of breadth is used in a CJK (China, Japan, Korea,
	* Taiwan, Hong Kong, and Singapore) area, and there are a
	* letter and a standard (character width is not still prescribed)
	* of a cord in a country.
	* Letter width is a problem of a font, but it is a rule route
	* besides a alphanumeric character that use a breadth letter.
	* It is generally defined as a breadth letter for a font such
	* as Japanese.
	* - Win32

	* Attempted explanation by BDR
	* The display widths of characters are not prescribed in Unicode.
	* Double-width characters are used in the CJK area: their width can
	* be font-specific, with different fonts in use in different parts
	* of the CJK area. The tables supplied in many OSes and by Markus
	* Kuhn are not do not take the exact locale into account. The
	* tables supplied in rlocale_data.h allow different widths for
	* different parts of the CJK area, and also where needed different
	* widths on Windows. (The Windows differences are in zh_CN, and
	* apply to European characters.)
	*
	* The differences are mainly (but not exclusively) in the
	* Unicode 'East Asian Ambiguous' class.
	*
	*/

	extern int Ri18n_wcwidth(Rwchar_t);
	extern int Ri18n_wcswidth (const wchar_t *, size_t);

	/* macOS CJK and WindowXP(Japanese)
	* iswctypes of macOS calls isctypes. no i18n.
	* For example, iswprint of Windows does not accept a macron of
	* Japanese "a-ru" of R as a letter.
	* Therefore Japanese "Buraian.Ripuri-" of "Brian Ripley" is
	* shown of hex-string.:-)
	* We define alternatives to be used if
	* defined(Win32) \|\| defined(__APPLE__) \|\| defined(_AIX)
	*/
	extern wctype_t Ri18n_wctype(const char *);
	extern int Ri18n_iswctype(wint_t, wctype_t);

	#ifndef IN_RLOCALE_C
	/* We want to avoid these redefinitions in rlocale.c itself */
	#undef iswupper
	#undef iswlower
	#undef iswalpha
	#undef iswdigit
	#undef iswxdigit
	#undef iswspace
	#undef iswprint
	#undef iswgraph
	#undef iswblank
	#undef iswcntrl
	#undef iswpunct
	#undef iswalnum
	#undef wctype
	#undef iswctype

	#define iswupper(__x) Ri18n_iswctype(__x, Ri18n_wctype("upper"))
	#define iswlower(__x) Ri18n_iswctype(__x, Ri18n_wctype("lower"))
	#define iswalpha(__x) Ri18n_iswctype(__x, Ri18n_wctype("alpha"))
	#define iswdigit(__x) Ri18n_iswctype(__x, Ri18n_wctype("digit"))
	#define iswxdigit(__x) Ri18n_iswctype(__x, Ri18n_wctype("xdigit"))
	#define iswspace(__x) Ri18n_iswctype(__x, Ri18n_wctype("space"))
	#define iswprint(__x) Ri18n_iswctype(__x, Ri18n_wctype("print"))
	#define iswgraph(__x) Ri18n_iswctype(__x, Ri18n_wctype("graph"))
	#define iswblank(__x) Ri18n_iswctype(__x, Ri18n_wctype("blank"))
	#define iswcntrl(__x) Ri18n_iswctype(__x, Ri18n_wctype("cntrl"))
	#define iswpunct(__x) Ri18n_iswctype(__x, Ri18n_wctype("punct"))
	#define iswalnum(__x) Ri18n_iswctype(__x, Ri18n_wctype("alnum"))
	#define wctype(__x) Ri18n_wctype(__x)
	#define iswctype(__x,__y) Ri18n_iswctype(__x,__y)
	#endif

	/* These definitions are from winnls.h in Mingw_w64. We don't need the rest of that file. */

	#define HIGH_SURROGATE_START 0xd800
	#define HIGH_SURROGATE_END 0xdbff
	#define LOW_SURROGATE_START 0xdc00
	#define LOW_SURROGATE_END 0xdfff

	/* The first two of these definitions use the argument twice which is bad, but we include them here in
	* the original form for consistency with Mingw_w64. Users should be careful that evaluating
	* the argument doesn't result in side effects.
	*/

	#define IS_HIGH_SURROGATE(wch) (((wch) >= HIGH_SURROGATE_START) && ((wch) <= HIGH_SURROGATE_END))
	#define IS_LOW_SURROGATE(wch) (((wch) >= LOW_SURROGATE_START) && ((wch) <= LOW_SURROGATE_END))
	#define IS_SURROGATE_PAIR(hs, ls) (IS_HIGH_SURROGATE (hs) && IS_LOW_SURROGATE (ls))

	# define utf8toucs32 Rf_utf8toucs32
	Rwchar_t utf8toucs32(wchar_t high, const char *s);

	#endif /* R_LOCALE_H */