common/loadinfo.c - glibc_locales - Git at Google

 /* Copyright (C) 1995-2014 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.

    The GNU C Library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    The GNU C Library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with the GNU C Library;

 /* Extracted from glibc's intl/i10nflist.c and intl/explodename.c.  */

 #include "third_party/glibc_locales/common/loadinfo.h"

 #include <ctype.h>
 #include <locale.h>
 #include <stdlib.h>
 #include <string.h>

 const char *google_nl_normalize_locale_name(const char *name) {
   /* If there is nothing to do, we'll return the name we got.  */
   const char *normalized_name = name;

   /* Determine if the name contains a codeset.  */
   const char *p = strchr(name, '.');
   if (p != NULL && p[1] != '@' && p[1] != '\0') {
     const char *rest = strchrnul(++p, '@');
     const char *normalized_codeset = google_nl_normalize_codeset(p, rest - p);
     if (normalized_codeset == NULL) return NULL;

     if (strncmp(normalized_codeset, p, rest - p) != 0 ||
         normalized_codeset[rest - p] != '\0') {
       /* There is a normalized codeset name that is different from
          what was specified; reconstruct a new locale name using it.  */
       size_t normlen = strlen(normalized_codeset);
       size_t restlen = strlen(rest) + 1;

       char *newname = malloc(p - name + normlen + restlen);
       if (newname == NULL) return NULL;

       memcpy(mempcpy(mempcpy(newname, name, p - name), normalized_codeset,
                      normlen),
              rest, restlen);
       normalized_name = newname;
     }
     free((char *)normalized_codeset);
   }

   return normalized_name;
 }

 const char *google_nl_normalize_codeset(const char *codeset, size_t name_len) {
   int len = 0;
   int only_digit = 1;
   char *retval;
   char *wp;
   size_t cnt;
   locale_t locale = newlocale(0, "C", NULL);

   for (cnt = 0; cnt < name_len; ++cnt)
     if (isalnum_l((unsigned char)codeset[cnt], locale)) {
       ++len;

       if (!isdigit_l((unsigned char)codeset[cnt], locale)) only_digit = 0;
     }

   retval = (char *)malloc((only_digit ? 3 : 0) + len + 1);

   if (retval != NULL) {
     wp = retval;
     if (only_digit) wp = stpcpy(wp, "iso");

     for (cnt = 0; cnt < name_len; ++cnt)
       if (isalpha_l((unsigned char)codeset[cnt], locale))
         *wp++ = tolower((unsigned char)codeset[cnt]);
       else if (isdigit_l((unsigned char)codeset[cnt], locale))
         *wp++ = codeset[cnt];

     *wp = '\0';
   }

   return (const char *)retval;
 }

 static char *google_nl_find_language(const char *name) {
   while (name[0] != '\0' && name[0] != '_' && name[0] != '@' && name[0] != '.')
     ++name;

   return (char *)name;
 }

 int google_nl_explode_name(char *name, const char **language,
                            const char **modifier, const char **territory,
                            const char **codeset,
                            const char **normalized_codeset) {
   char *cp;
   int mask;

   *modifier = NULL;
   *territory = NULL;
   *codeset = NULL;
   *normalized_codeset = NULL;

   /* Now we determine the single parts of the locale name.  First
      look for the language.  Termination symbols are `_', '.', and `@'.  */
   mask = 0;
   *language = cp = name;
   cp = google_nl_find_language(*language);

   if (*language == cp)
     /* This does not make sense: language has to be specified.  Use
        this entry as it is without exploding.  Perhaps it is an alias.  */
     return 0;

   if (cp[0] != '@') {
     if (cp[0] == '_') {
       /* Next is the territory.  */
       cp[0] = '\0';
       *territory = ++cp;

       while (cp[0] != '\0' && cp[0] != '.' && cp[0] != '@') ++cp;

       mask |= XPG_TERRITORY;
     }

     if (cp[0] == '.') {
       /* Next is the codeset.  */
       cp[0] = '\0';
       *codeset = ++cp;

       while (cp[0] != '\0' && cp[0] != '@') ++cp;

       mask |= XPG_CODESET;

       if (*codeset != cp && (*codeset)[0] != '\0') {
         *normalized_codeset =
             google_nl_normalize_codeset(*codeset, cp - *codeset);
         if (*normalized_codeset == NULL)
           return -1;
         else if (strcmp(*codeset, *normalized_codeset) == 0)
           free((char *)*normalized_codeset);
         else
           mask |= XPG_NORM_CODESET;
       }
     }
   }

   if (cp[0] == '@') {
     /* Next is the modifier.  */
     cp[0] = '\0';
     *modifier = ++cp;

     if (cp[0] != '\0') mask |= XPG_MODIFIER;
   }

   if (*territory != NULL && (*territory)[0] == '\0') mask &= ~XPG_TERRITORY;

   if (*codeset != NULL && (*codeset)[0] == '\0') mask &= ~XPG_CODESET;

   return mask;
 }
	/* Copyright (C) 1995-2014 Free Software Foundation, Inc.
	This file is part of the GNU C Library.
	Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1995.

	The GNU C Library is free software; you can redistribute it and/or
	modify it under the terms of the GNU Lesser General Public
	License as published by the Free Software Foundation; either
	version 2.1 of the License, or (at your option) any later version.

	The GNU C Library is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	Lesser General Public License for more details.

	You should have received a copy of the GNU Lesser General Public
	License along with the GNU C Library;

	/* Extracted from glibc's intl/i10nflist.c and intl/explodename.c. */

	#include "third_party/glibc_locales/common/loadinfo.h"

	#include <ctype.h>
	#include <locale.h>
	#include <stdlib.h>
	#include <string.h>

	const char google_nl_normalize_locale_name(const char name) {
	/* If there is nothing to do, we'll return the name we got. */
	const char *normalized_name = name;

	/* Determine if the name contains a codeset. */
	const char *p = strchr(name, '.');
	if (p != NULL && p[1] != '@' && p[1] != '\0') {
	const char *rest = strchrnul(++p, '@');
	const char *normalized_codeset = google_nl_normalize_codeset(p, rest - p);
	if (normalized_codeset == NULL) return NULL;

	if (strncmp(normalized_codeset, p, rest - p) != 0 \|\|
	normalized_codeset[rest - p] != '\0') {
	/* There is a normalized codeset name that is different from
	what was specified; reconstruct a new locale name using it. */
	size_t normlen = strlen(normalized_codeset);
	size_t restlen = strlen(rest) + 1;

	char *newname = malloc(p - name + normlen + restlen);
	if (newname == NULL) return NULL;

	memcpy(mempcpy(mempcpy(newname, name, p - name), normalized_codeset,
	normlen),
	rest, restlen);
	normalized_name = newname;
	}
	free((char *)normalized_codeset);
	}

	return normalized_name;
	}

	const char google_nl_normalize_codeset(const char codeset, size_t name_len) {
	int len = 0;
	int only_digit = 1;
	char *retval;
	char *wp;
	size_t cnt;
	locale_t locale = newlocale(0, "C", NULL);

	for (cnt = 0; cnt < name_len; ++cnt)
	if (isalnum_l((unsigned char)codeset[cnt], locale)) {
	++len;

	if (!isdigit_l((unsigned char)codeset[cnt], locale)) only_digit = 0;
	}

	retval = (char *)malloc((only_digit ? 3 : 0) + len + 1);

	if (retval != NULL) {
	wp = retval;
	if (only_digit) wp = stpcpy(wp, "iso");

	for (cnt = 0; cnt < name_len; ++cnt)
	if (isalpha_l((unsigned char)codeset[cnt], locale))
	*wp++ = tolower((unsigned char)codeset[cnt]);
	else if (isdigit_l((unsigned char)codeset[cnt], locale))
	*wp++ = codeset[cnt];

	*wp = '\0';
	}

	return (const char *)retval;
	}

	static char google_nl_find_language(const char name) {
	while (name[0] != '\0' && name[0] != '_' && name[0] != '@' && name[0] != '.')
	++name;

	return (char *)name;
	}

	int google_nl_explode_name(char name, const char *language,
	const char modifier, const char territory,
	const char **codeset,
	const char **normalized_codeset) {
	char *cp;
	int mask;

	*modifier = NULL;
	*territory = NULL;
	*codeset = NULL;
	*normalized_codeset = NULL;

	/* Now we determine the single parts of the locale name. First
	look for the language. Termination symbols are `_', '.', and `@'. */
	mask = 0;
	*language = cp = name;
	cp = google_nl_find_language(*language);

	if (*language == cp)
	/* This does not make sense: language has to be specified. Use
	this entry as it is without exploding. Perhaps it is an alias. */
	return 0;

	if (cp[0] != '@') {
	if (cp[0] == '_') {
	/* Next is the territory. */
	cp[0] = '\0';
	*territory = ++cp;

	while (cp[0] != '\0' && cp[0] != '.' && cp[0] != '@') ++cp;

	mask \|= XPG_TERRITORY;
	}

	if (cp[0] == '.') {
	/* Next is the codeset. */
	cp[0] = '\0';
	*codeset = ++cp;

	while (cp[0] != '\0' && cp[0] != '@') ++cp;

	mask \|= XPG_CODESET;

	if (codeset != cp && (codeset)[0] != '\0') {
	*normalized_codeset =
	google_nl_normalize_codeset(codeset, cp - codeset);
	if (*normalized_codeset == NULL)
	return -1;
	else if (strcmp(codeset, normalized_codeset) == 0)
	free((char )normalized_codeset);
	else
	mask \|= XPG_NORM_CODESET;
	}
	}
	}

	if (cp[0] == '@') {
	/* Next is the modifier. */
	cp[0] = '\0';
	*modifier = ++cp;

	if (cp[0] != '\0') mask \|= XPG_MODIFIER;
	}

	if (territory != NULL && (territory)[0] == '\0') mask &= ~XPG_TERRITORY;

	if (codeset != NULL && (codeset)[0] == '\0') mask &= ~XPG_CODESET;

	return mask;
	}