mirror of
https://git.savannah.gnu.org/git/gnulib.git
synced 2025-08-17 12:41:05 +03:00
* build-aux/gendocs.sh (version): * doc/gendocs_template: * doc/gendocs_template_min: * doc/gnulib.texi: * lib/version-etc.c (COPYRIGHT_YEAR): Update copyright dates by hand in templates and the like. * all files: Run 'make update-copyright'.
391 lines
10 KiB
C
391 lines
10 KiB
C
/* Test the Unicode character name functions.
|
|
Copyright (C) 2000-2003, 2005, 2007, 2009-2017 Free Software Foundation,
|
|
Inc.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
|
|
|
#include <config.h>
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "xalloc.h"
|
|
#include "uniname.h"
|
|
|
|
/* The names according to the UnicodeData.txt file, modified to contain the
|
|
Hangul syllable names, as described in the Unicode 3.0 book. */
|
|
static const char * unicode_names [0x110000];
|
|
|
|
/* Maximum entries in unicode_aliases. */
|
|
#define ALIASLEN 0x200
|
|
|
|
/* The aliases according to the NameAliases.txt file. */
|
|
struct unicode_alias
|
|
{
|
|
const char *name;
|
|
unsigned int uc;
|
|
};
|
|
|
|
static struct unicode_alias unicode_aliases [ALIASLEN];
|
|
static int aliases_count;
|
|
|
|
/* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
|
|
file. */
|
|
static void
|
|
fill_names (const char *unicodedata_filename)
|
|
{
|
|
FILE *stream;
|
|
char *field0;
|
|
char *field1;
|
|
char line[1024];
|
|
int lineno = 0;
|
|
|
|
stream = fopen (unicodedata_filename, "r");
|
|
if (stream == NULL)
|
|
{
|
|
fprintf (stderr, "error during fopen of '%s'\n", unicodedata_filename);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
|
|
while (fgets (line, sizeof line, stream))
|
|
{
|
|
char *p;
|
|
char *comment;
|
|
unsigned int i;
|
|
|
|
lineno++;
|
|
|
|
comment = strchr (line, '#');
|
|
if (comment != NULL)
|
|
*comment = '\0';
|
|
if (line[strspn (line, " \t\r\n")] == '\0')
|
|
continue;
|
|
|
|
field0 = p = line;
|
|
p = strchr (p, ';');
|
|
if (!p)
|
|
{
|
|
fprintf (stderr, "short line in '%s':%d\n",
|
|
unicodedata_filename, lineno);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
*p++ = '\0';
|
|
|
|
field1 = p;
|
|
if (*field1 == '<')
|
|
continue;
|
|
p = strchr (p, ';');
|
|
if (!p)
|
|
{
|
|
fprintf (stderr, "short line in '%s':%d\n",
|
|
unicodedata_filename, lineno);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
*p = '\0';
|
|
i = strtoul (field0, NULL, 16);
|
|
if (i >= 0x110000)
|
|
{
|
|
fprintf (stderr, "index too large\n");
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
unicode_names[i] = xstrdup (field1);
|
|
}
|
|
if (ferror (stream) || fclose (stream))
|
|
{
|
|
fprintf (stderr, "error reading from '%s'\n", unicodedata_filename);
|
|
exit (1);
|
|
}
|
|
}
|
|
|
|
/* Stores in unicode_aliases[] the relevant contents of the NameAliases.txt
|
|
file. */
|
|
static void
|
|
fill_aliases (const char *namealiases_filename)
|
|
{
|
|
FILE *stream;
|
|
char *field0;
|
|
char *field1;
|
|
char line[1024];
|
|
int lineno = 0;
|
|
|
|
stream = fopen (namealiases_filename, "r");
|
|
if (stream == NULL)
|
|
{
|
|
fprintf (stderr, "error during fopen of '%s'\n", namealiases_filename);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
|
|
while (fgets (line, sizeof line, stream))
|
|
{
|
|
char *p;
|
|
char *comment;
|
|
unsigned int uc;
|
|
|
|
comment = strchr (line, '#');
|
|
if (comment != NULL)
|
|
*comment = '\0';
|
|
if (line[strspn (line, " \t\r\n")] == '\0')
|
|
continue;
|
|
|
|
lineno++;
|
|
|
|
field0 = p = line;
|
|
p = strchr (p, ';');
|
|
if (!p)
|
|
{
|
|
fprintf (stderr, "short line in '%s':%d\n",
|
|
namealiases_filename, lineno);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
*p++ = '\0';
|
|
|
|
field1 = p;
|
|
p = strchr (p, ';');
|
|
if (!p)
|
|
{
|
|
fprintf (stderr, "short line in '%s':%d\n",
|
|
namealiases_filename, lineno);
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
*p = '\0';
|
|
|
|
uc = strtoul (field0, NULL, 16);
|
|
if (uc >= 0x110000)
|
|
{
|
|
fprintf (stderr, "index too large\n");
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
|
|
if (aliases_count == ALIASLEN)
|
|
{
|
|
fprintf (stderr, "too many aliases\n");
|
|
exit (EXIT_FAILURE);
|
|
}
|
|
unicode_aliases[aliases_count].name = xstrdup (field1);
|
|
unicode_aliases[aliases_count].uc = uc;
|
|
aliases_count++;
|
|
}
|
|
if (ferror (stream) || fclose (stream))
|
|
{
|
|
fprintf (stderr, "error reading from '%s'\n", namealiases_filename);
|
|
exit (1);
|
|
}
|
|
}
|
|
|
|
static int
|
|
name_has_alias (unsigned int uc)
|
|
{
|
|
int i;
|
|
for (i = 0; i < ALIASLEN; i++)
|
|
if (unicode_aliases[i].uc == uc)
|
|
return 1;
|
|
return 0;
|
|
}
|
|
|
|
/* Perform an exhaustive test of the unicode_character_name function. */
|
|
static int
|
|
test_name_lookup ()
|
|
{
|
|
int error = 0;
|
|
unsigned int i;
|
|
char buf[UNINAME_MAX];
|
|
|
|
for (i = 0; i < 0x11000; i++)
|
|
{
|
|
char *result = unicode_character_name (i, buf);
|
|
|
|
if (unicode_names[i] != NULL)
|
|
{
|
|
if (result == NULL)
|
|
{
|
|
fprintf (stderr, "\\u%04X name lookup failed!\n", i);
|
|
error = 1;
|
|
}
|
|
else if (strcmp (result, unicode_names[i]) != 0)
|
|
{
|
|
fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
|
|
i, result);
|
|
error = 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (result != NULL)
|
|
{
|
|
fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
|
|
i, result);
|
|
error = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0x110000; i < 0x1000000; i++)
|
|
{
|
|
char *result = unicode_character_name (i, buf);
|
|
|
|
if (result != NULL)
|
|
{
|
|
fprintf (stderr, "\\u%04X name lookup returned wrong name: %s\n",
|
|
i, result);
|
|
error = 1;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
/* Perform a test of the unicode_name_character function. */
|
|
static int
|
|
test_inverse_lookup ()
|
|
{
|
|
int error = 0;
|
|
unsigned int i;
|
|
|
|
/* First, verify all valid character names are recognized. */
|
|
for (i = 0; i < 0x110000; i++)
|
|
if (unicode_names[i] != NULL)
|
|
{
|
|
unsigned int result = unicode_name_character (unicode_names[i]);
|
|
if (result != i)
|
|
{
|
|
if (result == UNINAME_INVALID)
|
|
fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
|
|
unicode_names[i]);
|
|
else
|
|
fprintf (stderr,
|
|
"inverse name lookup of \"%s\" returned 0x%04X\n",
|
|
unicode_names[i], result);
|
|
error = 1;
|
|
}
|
|
}
|
|
|
|
/* Second, generate random but likely names and verify they are not
|
|
recognized unless really valid. */
|
|
for (i = 0; i < 10000; i++)
|
|
{
|
|
unsigned int i1, i2;
|
|
const char *s1;
|
|
const char *s2;
|
|
unsigned int l1, l2, j1, j2;
|
|
char buf[2*UNINAME_MAX];
|
|
unsigned int result;
|
|
|
|
do i1 = ((rand () % 0x11) << 16)
|
|
+ ((rand () & 0xff) << 8)
|
|
+ (rand () & 0xff);
|
|
while (unicode_names[i1] == NULL);
|
|
|
|
do i2 = ((rand () % 0x11) << 16)
|
|
+ ((rand () & 0xff) << 8)
|
|
+ (rand () & 0xff);
|
|
while (unicode_names[i2] == NULL);
|
|
|
|
s1 = unicode_names[i1];
|
|
l1 = strlen (s1);
|
|
s2 = unicode_names[i2];
|
|
l2 = strlen (s2);
|
|
|
|
/* Concatenate a starting piece of s1 with an ending piece of s2. */
|
|
for (j1 = 1; j1 <= l1; j1++)
|
|
if (j1 == l1 || s1[j1] == ' ')
|
|
for (j2 = 0; j2 < l2; j2++)
|
|
if (j2 == 0 || s2[j2-1] == ' ')
|
|
{
|
|
memcpy (buf, s1, j1);
|
|
buf[j1] = ' ';
|
|
memcpy (buf + j1 + 1, s2 + j2, l2 - j2 + 1);
|
|
|
|
result = unicode_name_character (buf);
|
|
if (result != UNINAME_INVALID
|
|
&& !name_has_alias (result)
|
|
&& !(unicode_names[result] != NULL
|
|
&& strcmp (unicode_names[result], buf) == 0))
|
|
{
|
|
fprintf (stderr,
|
|
"inverse name lookup of \"%s\" returned 0x%04X\n",
|
|
unicode_names[i], result);
|
|
error = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Third, some extreme case that used to loop. */
|
|
if (unicode_name_character ("A A") != UNINAME_INVALID)
|
|
error = 1;
|
|
|
|
return error;
|
|
}
|
|
|
|
/* Perform a test of the unicode_name_character function for aliases. */
|
|
static int
|
|
test_alias_lookup ()
|
|
{
|
|
int error = 0;
|
|
unsigned int i;
|
|
char buf[UNINAME_MAX];
|
|
|
|
/* Verify all valid character names are recognized. */
|
|
for (i = 0; i < ALIASLEN; i++)
|
|
if (unicode_aliases[i].uc != UNINAME_INVALID
|
|
/* Skip if the character has no canonical name (e.g. control
|
|
characters). */
|
|
&& unicode_character_name (unicode_aliases[i].uc, buf))
|
|
{
|
|
unsigned int result = unicode_name_character (unicode_aliases[i].name);
|
|
if (result != unicode_aliases[i].uc)
|
|
{
|
|
if (result == UNINAME_INVALID)
|
|
fprintf (stderr, "inverse name lookup of \"%s\" failed\n",
|
|
unicode_aliases[i].name);
|
|
else
|
|
fprintf (stderr,
|
|
"inverse name lookup of \"%s\" returned 0x%04X\n",
|
|
unicode_aliases[i].name, result);
|
|
error = 1;
|
|
}
|
|
}
|
|
|
|
return error;
|
|
}
|
|
|
|
int
|
|
main (int argc, char *argv[])
|
|
{
|
|
int error = 0;
|
|
int i;
|
|
|
|
for (i = 1; i < argc && strcmp (argv[i], "--") != 0; i++)
|
|
fill_names (argv[i]);
|
|
|
|
if (i < argc)
|
|
{
|
|
int j;
|
|
for (j = 0; j < ALIASLEN; j++)
|
|
unicode_aliases[j].uc = UNINAME_INVALID;
|
|
|
|
i++;
|
|
for (; i < argc; i++)
|
|
fill_aliases (argv[i]);
|
|
}
|
|
|
|
error |= test_name_lookup ();
|
|
error |= test_inverse_lookup ();
|
|
|
|
if (aliases_count > 0)
|
|
error |= test_alias_lookup ();
|
|
|
|
return error;
|
|
}
|