mirror of
https://git.savannah.gnu.org/git/gnulib.git
synced 2025-08-17 12:41:05 +03:00
Tests for module 'uninorm/nfc'.
This commit is contained in:
@@ -1,5 +1,14 @@
|
||||
2009-02-21 Bruno Haible <bruno@clisp.org>
|
||||
|
||||
Tests for module 'uninorm/nfc'.
|
||||
* tests/uninorm/test-nfc.c: New file.
|
||||
* tests/uninorm/test-u8-nfc.c: New file.
|
||||
* tests/uninorm/test-u16-nfc.c: New file.
|
||||
* tests/uninorm/test-u32-nfc.c: New file.
|
||||
* tests/uninorm/test-u32-nfc-big.sh: New file.
|
||||
* tests/uninorm/test-u32-nfc-big.c: New file.
|
||||
* modules/uninorm/nfc-tests: New file.
|
||||
|
||||
New module 'uninorm/nfc'.
|
||||
* lib/uninorm/nfc.c: New file.
|
||||
* modules/uninorm/nfc: New file.
|
||||
|
33
modules/uninorm/nfc-tests
Normal file
33
modules/uninorm/nfc-tests
Normal file
@@ -0,0 +1,33 @@
|
||||
Files:
|
||||
tests/uninorm/test-nfc.c
|
||||
tests/uninorm/test-u8-nfc.c
|
||||
tests/uninorm/test-u16-nfc.c
|
||||
tests/uninorm/test-u32-nfc.c
|
||||
tests/uninorm/test-u32-nfc-big.sh
|
||||
tests/uninorm/test-u32-nfc-big.c
|
||||
tests/uninorm/test-u32-normalize-big.h
|
||||
tests/uninorm/test-u32-normalize-big.c
|
||||
tests/uninorm/NormalizationTest.txt
|
||||
|
||||
Depends-on:
|
||||
unistr/u8-cmp
|
||||
unistr/u16-cmp
|
||||
unistr/u32-cmp
|
||||
unistr/u32-strlen
|
||||
xalloc
|
||||
progname
|
||||
|
||||
configure.ac:
|
||||
AC_CHECK_DECLS_ONCE([alarm])
|
||||
|
||||
Makefile.am:
|
||||
TESTS += test-nfc uninorm/test-u32-nfc-big.sh
|
||||
check_PROGRAMS += test-nfc test-u32-nfc-big
|
||||
test_nfc_SOURCES = \
|
||||
uninorm/test-nfc.c \
|
||||
uninorm/test-u8-nfc.c \
|
||||
uninorm/test-u16-nfc.c \
|
||||
uninorm/test-u32-nfc.c
|
||||
test_u32_nfc_big_SOURCES = \
|
||||
uninorm/test-u32-nfc-big.c \
|
||||
uninorm/test-u32-normalize-big.c
|
38
tests/uninorm/test-nfc.c
Normal file
38
tests/uninorm/test-nfc.c
Normal file
@@ -0,0 +1,38 @@
|
||||
/* Test of canonical normalization of Unicode strings.
|
||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#include "uninorm.h"
|
||||
|
||||
/* Check that UNINORM_NFC is defined and links. */
|
||||
uninorm_t n = UNINORM_NFC;
|
||||
|
||||
extern void test_u8_nfc (void);
|
||||
extern void test_u16_nfc (void);
|
||||
extern void test_u32_nfc (void);
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
test_u32_nfc ();
|
||||
test_u16_nfc ();
|
||||
test_u8_nfc ();
|
||||
|
||||
return 0;
|
||||
}
|
342
tests/uninorm/test-u16-nfc.c
Normal file
342
tests/uninorm/test-u16-nfc.c
Normal file
@@ -0,0 +1,342 @@
|
||||
/* Test of canonical normalization of UTF-16 strings.
|
||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#if GNULIB_UNINORM_U16_NORMALIZE
|
||||
|
||||
#include "uninorm.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "unistr.h"
|
||||
|
||||
#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
|
||||
#define ASSERT(expr) \
|
||||
do \
|
||||
{ \
|
||||
if (!(expr)) \
|
||||
{ \
|
||||
fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
|
||||
fflush (stderr); \
|
||||
abort (); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
static int
|
||||
check (const uint16_t *input, size_t input_length,
|
||||
const uint16_t *expected, size_t expected_length)
|
||||
{
|
||||
size_t length;
|
||||
uint16_t *result;
|
||||
|
||||
/* Test return conventions with resultbuf == NULL. */
|
||||
result = u16_normalize (UNINORM_NFC, input, input_length, NULL, &length);
|
||||
if (!(result != NULL))
|
||||
return 1;
|
||||
if (!(length == expected_length))
|
||||
return 2;
|
||||
if (!(u16_cmp (result, expected, expected_length) == 0))
|
||||
return 3;
|
||||
free (result);
|
||||
|
||||
/* Test return conventions with resultbuf too small. */
|
||||
if (expected_length > 0)
|
||||
{
|
||||
uint16_t *preallocated;
|
||||
|
||||
length = expected_length - 1;
|
||||
preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
|
||||
result = u16_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
|
||||
if (!(result != NULL))
|
||||
return 4;
|
||||
if (!(result != preallocated))
|
||||
return 5;
|
||||
if (!(length == expected_length))
|
||||
return 6;
|
||||
if (!(u16_cmp (result, expected, expected_length) == 0))
|
||||
return 7;
|
||||
free (result);
|
||||
free (preallocated);
|
||||
}
|
||||
|
||||
/* Test return conventions with resultbuf large enough. */
|
||||
{
|
||||
uint16_t *preallocated;
|
||||
|
||||
length = expected_length;
|
||||
preallocated = (uint16_t *) malloc (length * sizeof (uint16_t));
|
||||
result = u16_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
|
||||
if (!(result != NULL))
|
||||
return 8;
|
||||
if (!(result == preallocated))
|
||||
return 9;
|
||||
if (!(length == expected_length))
|
||||
return 10;
|
||||
if (!(u16_cmp (result, expected, expected_length) == 0))
|
||||
return 11;
|
||||
free (preallocated);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
test_u16_nfc (void)
|
||||
{
|
||||
{ /* SPACE */
|
||||
static const uint16_t input[] = { 0x0020 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN CAPITAL LETTER A WITH DIAERESIS */
|
||||
static const uint16_t input[] = { 0x00C4 };
|
||||
static const uint16_t decomposed[] = { 0x0041, 0x0308 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
|
||||
static const uint16_t input[] = { 0x01DE };
|
||||
static const uint16_t decomposed[] = { 0x0041, 0x0308, 0x0304 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ANGSTROM SIGN */
|
||||
static const uint16_t input[] = { 0x212B };
|
||||
static const uint16_t decomposed[] = { 0x0041, 0x030A };
|
||||
static const uint16_t expected[] = { 0x00C5 };
|
||||
ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
|
||||
ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0);
|
||||
}
|
||||
|
||||
{ /* GREEK DIALYTIKA AND PERISPOMENI */
|
||||
static const uint16_t input[] = { 0x1FC1 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SCRIPT SMALL L */
|
||||
static const uint16_t input[] = { 0x2113 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* NO-BREAK SPACE */
|
||||
static const uint16_t input[] = { 0x00A0 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH INITIAL FORM */
|
||||
static const uint16_t input[] = { 0xFB6C };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH MEDIAL FORM */
|
||||
static const uint16_t input[] = { 0xFB6D };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH FINAL FORM */
|
||||
static const uint16_t input[] = { 0xFB6B };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH ISOLATED FORM */
|
||||
static const uint16_t input[] = { 0xFB6A };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* CIRCLED NUMBER FIFTEEN */
|
||||
static const uint16_t input[] = { 0x246E };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* TRADE MARK SIGN */
|
||||
static const uint16_t input[] = { 0x2122 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN SUBSCRIPT SMALL LETTER I */
|
||||
static const uint16_t input[] = { 0x1D62 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
|
||||
static const uint16_t input[] = { 0xFE35 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* FULLWIDTH LATIN CAPITAL LETTER A */
|
||||
static const uint16_t input[] = { 0xFF21 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HALFWIDTH IDEOGRAPHIC COMMA */
|
||||
static const uint16_t input[] = { 0xFF64 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SMALL IDEOGRAPHIC COMMA */
|
||||
static const uint16_t input[] = { 0xFE51 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SQUARE MHZ */
|
||||
static const uint16_t input[] = { 0x3392 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* VULGAR FRACTION THREE EIGHTHS */
|
||||
static const uint16_t input[] = { 0x215C };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* MICRO SIGN */
|
||||
static const uint16_t input[] = { 0x00B5 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
|
||||
static const uint16_t input[] = { 0xFDFA };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HANGUL SYLLABLE GEUL */
|
||||
static const uint16_t input[] = { 0xAE00 };
|
||||
static const uint16_t decomposed[] = { 0x1100, 0x1173, 0x11AF };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HANGUL SYLLABLE GEU */
|
||||
static const uint16_t input[] = { 0xADF8 };
|
||||
static const uint16_t decomposed[] = { 0x1100, 0x1173 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
|
||||
static const uint16_t input[] =
|
||||
{ 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
|
||||
0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
|
||||
0x0439, 0x0442, 0x0435, '!', ' ',
|
||||
'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
|
||||
'-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
|
||||
0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
|
||||
};
|
||||
static const uint16_t decomposed[] =
|
||||
{ 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
|
||||
0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
|
||||
0x0438, 0x0306, 0x0442, 0x0435, '!', ' ',
|
||||
'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
|
||||
'-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
|
||||
0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',',
|
||||
0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n'
|
||||
};
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
#if HAVE_DECL_ALARM
|
||||
/* Declare failure if test takes too long, by using default abort
|
||||
caused by SIGALRM. */
|
||||
signal (SIGALRM, SIG_DFL);
|
||||
alarm (50);
|
||||
#endif
|
||||
|
||||
/* Check that the sorting is not O(n²) but O(n log n). */
|
||||
{
|
||||
int pass;
|
||||
for (pass = 0; pass < 3; pass++)
|
||||
{
|
||||
size_t repeat = 1;
|
||||
size_t m = 100000;
|
||||
uint16_t *input = (uint16_t *) malloc (2 * m * sizeof (uint16_t));
|
||||
if (input != NULL)
|
||||
{
|
||||
uint16_t *expected = input + m;
|
||||
size_t m1 = m / 2;
|
||||
size_t m2 = (m - 1) / 2;
|
||||
/* NB: m1 + m2 == m - 1. */
|
||||
uint16_t *p;
|
||||
size_t i;
|
||||
|
||||
input[0] = 0x0041;
|
||||
p = input + 1;
|
||||
switch (pass)
|
||||
{
|
||||
case 0:
|
||||
for (i = 0; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
for (i = 0; i < m2; i++)
|
||||
*p++ = 0x0300;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
for (i = 0; i < m2; i++)
|
||||
*p++ = 0x0300;
|
||||
for (i = 0; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
for (i = 0; i < m2; i++)
|
||||
{
|
||||
*p++ = 0x0319;
|
||||
*p++ = 0x0300;
|
||||
}
|
||||
for (; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
break;
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
expected[0] = 0x00C0;
|
||||
p = expected + 1;
|
||||
for (i = 0; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
for (i = 0; i < m2 - 1; i++)
|
||||
*p++ = 0x0300;
|
||||
|
||||
for (; repeat > 0; repeat--)
|
||||
{
|
||||
ASSERT (check (input, m, expected, m - 1) == 0);
|
||||
ASSERT (check (expected, m - 1, expected, m - 1) == 0);
|
||||
}
|
||||
|
||||
free (input);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void
|
||||
test_u16_nfc (void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
125
tests/uninorm/test-u32-nfc-big.c
Normal file
125
tests/uninorm/test-u32-nfc-big.c
Normal file
@@ -0,0 +1,125 @@
|
||||
/* Test of Unicode compliance of canonical normalization of UTF-32 strings.
|
||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#if GNULIB_UNINORM_U32_NORMALIZE
|
||||
|
||||
#include "uninorm.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "unistr.h"
|
||||
#include "progname.h"
|
||||
#include "test-u32-normalize-big.h"
|
||||
|
||||
static int
|
||||
check (const uint32_t *c1, size_t c1_length,
|
||||
const uint32_t *c2, size_t c2_length,
|
||||
const uint32_t *c3, size_t c3_length,
|
||||
const uint32_t *c4, size_t c4_length,
|
||||
const uint32_t *c5, size_t c5_length)
|
||||
{
|
||||
/* Check
|
||||
c2 == NFC(c1) == NFC(c2) == NFC(c3)
|
||||
c4 == NFC(c4) == NFC(c5)
|
||||
*/
|
||||
{
|
||||
size_t length;
|
||||
uint32_t *result;
|
||||
|
||||
result = u32_normalize (UNINORM_NFC, c1, c1_length, NULL, &length);
|
||||
if (!(result != NULL
|
||||
&& length == c2_length
|
||||
&& u32_cmp (result, c2, c2_length) == 0))
|
||||
return 1;
|
||||
free (result);
|
||||
}
|
||||
{
|
||||
size_t length;
|
||||
uint32_t *result;
|
||||
|
||||
result = u32_normalize (UNINORM_NFC, c2, c2_length, NULL, &length);
|
||||
if (!(result != NULL
|
||||
&& length == c2_length
|
||||
&& u32_cmp (result, c2, c2_length) == 0))
|
||||
return 2;
|
||||
free (result);
|
||||
}
|
||||
{
|
||||
size_t length;
|
||||
uint32_t *result;
|
||||
|
||||
result = u32_normalize (UNINORM_NFC, c3, c3_length, NULL, &length);
|
||||
if (!(result != NULL
|
||||
&& length == c2_length
|
||||
&& u32_cmp (result, c2, c2_length) == 0))
|
||||
return 3;
|
||||
free (result);
|
||||
}
|
||||
{
|
||||
size_t length;
|
||||
uint32_t *result;
|
||||
|
||||
result = u32_normalize (UNINORM_NFC, c4, c4_length, NULL, &length);
|
||||
if (!(result != NULL
|
||||
&& length == c4_length
|
||||
&& u32_cmp (result, c4, c4_length) == 0))
|
||||
return 4;
|
||||
free (result);
|
||||
}
|
||||
{
|
||||
size_t length;
|
||||
uint32_t *result;
|
||||
|
||||
result = u32_normalize (UNINORM_NFC, c5, c5_length, NULL, &length);
|
||||
if (!(result != NULL
|
||||
&& length == c4_length
|
||||
&& u32_cmp (result, c4, c4_length) == 0))
|
||||
return 5;
|
||||
free (result);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
struct normalization_test_file file;
|
||||
|
||||
set_program_name (argv[0]);
|
||||
read_normalization_test_file (argv[1], &file);
|
||||
|
||||
test_specific (&file, check);
|
||||
test_other (&file, UNINORM_NFC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
fprintf (stderr, "Skipping test: uninorm/u32-normalize module not included.\n");
|
||||
return 77;
|
||||
}
|
||||
|
||||
#endif
|
2
tests/uninorm/test-u32-nfc-big.sh
Executable file
2
tests/uninorm/test-u32-nfc-big.sh
Executable file
@@ -0,0 +1,2 @@
|
||||
#!/bin/sh
|
||||
exec ./test-u32-nfc-big${EXEEXT} "$srcdir/uninorm/NormalizationTest.txt"
|
342
tests/uninorm/test-u32-nfc.c
Normal file
342
tests/uninorm/test-u32-nfc.c
Normal file
@@ -0,0 +1,342 @@
|
||||
/* Test of canonical normalization of UTF-32 strings.
|
||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#if GNULIB_UNINORM_U32_NORMALIZE
|
||||
|
||||
#include "uninorm.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "unistr.h"
|
||||
|
||||
#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
|
||||
#define ASSERT(expr) \
|
||||
do \
|
||||
{ \
|
||||
if (!(expr)) \
|
||||
{ \
|
||||
fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
|
||||
fflush (stderr); \
|
||||
abort (); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
static int
|
||||
check (const uint32_t *input, size_t input_length,
|
||||
const uint32_t *expected, size_t expected_length)
|
||||
{
|
||||
size_t length;
|
||||
uint32_t *result;
|
||||
|
||||
/* Test return conventions with resultbuf == NULL. */
|
||||
result = u32_normalize (UNINORM_NFC, input, input_length, NULL, &length);
|
||||
if (!(result != NULL))
|
||||
return 1;
|
||||
if (!(length == expected_length))
|
||||
return 2;
|
||||
if (!(u32_cmp (result, expected, expected_length) == 0))
|
||||
return 3;
|
||||
free (result);
|
||||
|
||||
/* Test return conventions with resultbuf too small. */
|
||||
if (expected_length > 0)
|
||||
{
|
||||
uint32_t *preallocated;
|
||||
|
||||
length = expected_length - 1;
|
||||
preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
|
||||
result = u32_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
|
||||
if (!(result != NULL))
|
||||
return 4;
|
||||
if (!(result != preallocated))
|
||||
return 5;
|
||||
if (!(length == expected_length))
|
||||
return 6;
|
||||
if (!(u32_cmp (result, expected, expected_length) == 0))
|
||||
return 7;
|
||||
free (result);
|
||||
free (preallocated);
|
||||
}
|
||||
|
||||
/* Test return conventions with resultbuf large enough. */
|
||||
{
|
||||
uint32_t *preallocated;
|
||||
|
||||
length = expected_length;
|
||||
preallocated = (uint32_t *) malloc (length * sizeof (uint32_t));
|
||||
result = u32_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
|
||||
if (!(result != NULL))
|
||||
return 8;
|
||||
if (!(result == preallocated))
|
||||
return 9;
|
||||
if (!(length == expected_length))
|
||||
return 10;
|
||||
if (!(u32_cmp (result, expected, expected_length) == 0))
|
||||
return 11;
|
||||
free (preallocated);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
test_u32_nfc (void)
|
||||
{
|
||||
{ /* SPACE */
|
||||
static const uint32_t input[] = { 0x0020 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN CAPITAL LETTER A WITH DIAERESIS */
|
||||
static const uint32_t input[] = { 0x00C4 };
|
||||
static const uint32_t decomposed[] = { 0x0041, 0x0308 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
|
||||
static const uint32_t input[] = { 0x01DE };
|
||||
static const uint32_t decomposed[] = { 0x0041, 0x0308, 0x0304 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ANGSTROM SIGN */
|
||||
static const uint32_t input[] = { 0x212B };
|
||||
static const uint32_t decomposed[] = { 0x0041, 0x030A };
|
||||
static const uint32_t expected[] = { 0x00C5 };
|
||||
ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
|
||||
ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0);
|
||||
}
|
||||
|
||||
{ /* GREEK DIALYTIKA AND PERISPOMENI */
|
||||
static const uint32_t input[] = { 0x1FC1 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SCRIPT SMALL L */
|
||||
static const uint32_t input[] = { 0x2113 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* NO-BREAK SPACE */
|
||||
static const uint32_t input[] = { 0x00A0 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH INITIAL FORM */
|
||||
static const uint32_t input[] = { 0xFB6C };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH MEDIAL FORM */
|
||||
static const uint32_t input[] = { 0xFB6D };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH FINAL FORM */
|
||||
static const uint32_t input[] = { 0xFB6B };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH ISOLATED FORM */
|
||||
static const uint32_t input[] = { 0xFB6A };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* CIRCLED NUMBER FIFTEEN */
|
||||
static const uint32_t input[] = { 0x246E };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* TRADE MARK SIGN */
|
||||
static const uint32_t input[] = { 0x2122 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN SUBSCRIPT SMALL LETTER I */
|
||||
static const uint32_t input[] = { 0x1D62 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
|
||||
static const uint32_t input[] = { 0xFE35 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* FULLWIDTH LATIN CAPITAL LETTER A */
|
||||
static const uint32_t input[] = { 0xFF21 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HALFWIDTH IDEOGRAPHIC COMMA */
|
||||
static const uint32_t input[] = { 0xFF64 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SMALL IDEOGRAPHIC COMMA */
|
||||
static const uint32_t input[] = { 0xFE51 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SQUARE MHZ */
|
||||
static const uint32_t input[] = { 0x3392 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* VULGAR FRACTION THREE EIGHTHS */
|
||||
static const uint32_t input[] = { 0x215C };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* MICRO SIGN */
|
||||
static const uint32_t input[] = { 0x00B5 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
|
||||
static const uint32_t input[] = { 0xFDFA };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HANGUL SYLLABLE GEUL */
|
||||
static const uint32_t input[] = { 0xAE00 };
|
||||
static const uint32_t decomposed[] = { 0x1100, 0x1173, 0x11AF };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HANGUL SYLLABLE GEU */
|
||||
static const uint32_t input[] = { 0xADF8 };
|
||||
static const uint32_t decomposed[] = { 0x1100, 0x1173 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
|
||||
static const uint32_t input[] =
|
||||
{ 'G', 'r', 0x00FC, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
|
||||
0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
|
||||
0x0439, 0x0442, 0x0435, '!', ' ',
|
||||
'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
|
||||
'-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
|
||||
0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',', 0xD55C, 0xAE00, '\n'
|
||||
};
|
||||
static const uint32_t decomposed[] =
|
||||
{ 'G', 'r', 0x0075, 0x0308, 0x00DF, ' ', 'G', 'o', 't', 't', '.', ' ',
|
||||
0x0417, 0x0434, 0x0440, 0x0430, 0x0432, 0x0441, 0x0442, 0x0432, 0x0443,
|
||||
0x0438, 0x0306, 0x0442, 0x0435, '!', ' ',
|
||||
'x', '=', '(', '-', 'b', 0x00B1, 's', 'q', 'r', 't', '(', 'b', 0x00B2,
|
||||
'-', '4', 'a', 'c', ')', ')', '/', '(', '2', 'a', ')', ' ', ' ',
|
||||
0x65E5, 0x672C, 0x8A9E, ',', 0x4E2D, 0x6587, ',',
|
||||
0x1112, 0x1161, 0x11AB, 0x1100, 0x1173, 0x11AF, '\n'
|
||||
};
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
#if HAVE_DECL_ALARM
|
||||
/* Declare failure if test takes too long, by using default abort
|
||||
caused by SIGALRM. */
|
||||
signal (SIGALRM, SIG_DFL);
|
||||
alarm (50);
|
||||
#endif
|
||||
|
||||
/* Check that the sorting is not O(n²) but O(n log n). */
|
||||
{
|
||||
int pass;
|
||||
for (pass = 0; pass < 3; pass++)
|
||||
{
|
||||
size_t repeat = 1;
|
||||
size_t m = 100000;
|
||||
uint32_t *input = (uint32_t *) malloc (2 * m * sizeof (uint32_t));
|
||||
if (input != NULL)
|
||||
{
|
||||
uint32_t *expected = input + m;
|
||||
size_t m1 = m / 2;
|
||||
size_t m2 = (m - 1) / 2;
|
||||
/* NB: m1 + m2 == m - 1. */
|
||||
uint32_t *p;
|
||||
size_t i;
|
||||
|
||||
input[0] = 0x0041;
|
||||
p = input + 1;
|
||||
switch (pass)
|
||||
{
|
||||
case 0:
|
||||
for (i = 0; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
for (i = 0; i < m2; i++)
|
||||
*p++ = 0x0300;
|
||||
break;
|
||||
|
||||
case 1:
|
||||
for (i = 0; i < m2; i++)
|
||||
*p++ = 0x0300;
|
||||
for (i = 0; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
break;
|
||||
|
||||
case 2:
|
||||
for (i = 0; i < m2; i++)
|
||||
{
|
||||
*p++ = 0x0319;
|
||||
*p++ = 0x0300;
|
||||
}
|
||||
for (; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
break;
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
expected[0] = 0x00C0;
|
||||
p = expected + 1;
|
||||
for (i = 0; i < m1; i++)
|
||||
*p++ = 0x0319;
|
||||
for (i = 0; i < m2 - 1; i++)
|
||||
*p++ = 0x0300;
|
||||
|
||||
for (; repeat > 0; repeat--)
|
||||
{
|
||||
ASSERT (check (input, m, expected, m - 1) == 0);
|
||||
ASSERT (check (expected, m - 1, expected, m - 1) == 0);
|
||||
}
|
||||
|
||||
free (input);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void
|
||||
test_u32_nfc (void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
372
tests/uninorm/test-u8-nfc.c
Normal file
372
tests/uninorm/test-u8-nfc.c
Normal file
@@ -0,0 +1,372 @@
|
||||
/* Test of canonical normalization of UTF-8 strings.
|
||||
Copyright (C) 2009 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Written by Bruno Haible <bruno@clisp.org>, 2009. */
|
||||
|
||||
#include <config.h>
|
||||
|
||||
#if GNULIB_UNINORM_U8_NORMALIZE
|
||||
|
||||
#include "uninorm.h"
|
||||
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "unistr.h"
|
||||
|
||||
#define SIZEOF(array) (sizeof (array) / sizeof (array[0]))
|
||||
#define ASSERT(expr) \
|
||||
do \
|
||||
{ \
|
||||
if (!(expr)) \
|
||||
{ \
|
||||
fprintf (stderr, "%s:%d: assertion failed\n", __FILE__, __LINE__); \
|
||||
fflush (stderr); \
|
||||
abort (); \
|
||||
} \
|
||||
} \
|
||||
while (0)
|
||||
|
||||
static int
|
||||
check (const uint8_t *input, size_t input_length,
|
||||
const uint8_t *expected, size_t expected_length)
|
||||
{
|
||||
size_t length;
|
||||
uint8_t *result;
|
||||
|
||||
/* Test return conventions with resultbuf == NULL. */
|
||||
result = u8_normalize (UNINORM_NFC, input, input_length, NULL, &length);
|
||||
if (!(result != NULL))
|
||||
return 1;
|
||||
if (!(length == expected_length))
|
||||
return 2;
|
||||
if (!(u8_cmp (result, expected, expected_length) == 0))
|
||||
return 3;
|
||||
free (result);
|
||||
|
||||
/* Test return conventions with resultbuf too small. */
|
||||
if (expected_length > 0)
|
||||
{
|
||||
uint8_t *preallocated;
|
||||
|
||||
length = expected_length - 1;
|
||||
preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
|
||||
result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
|
||||
if (!(result != NULL))
|
||||
return 4;
|
||||
if (!(result != preallocated))
|
||||
return 5;
|
||||
if (!(length == expected_length))
|
||||
return 6;
|
||||
if (!(u8_cmp (result, expected, expected_length) == 0))
|
||||
return 7;
|
||||
free (result);
|
||||
free (preallocated);
|
||||
}
|
||||
|
||||
/* Test return conventions with resultbuf large enough. */
|
||||
{
|
||||
uint8_t *preallocated;
|
||||
|
||||
length = expected_length;
|
||||
preallocated = (uint8_t *) malloc (length * sizeof (uint8_t));
|
||||
result = u8_normalize (UNINORM_NFC, input, input_length, preallocated, &length);
|
||||
if (!(result != NULL))
|
||||
return 8;
|
||||
if (!(result == preallocated))
|
||||
return 9;
|
||||
if (!(length == expected_length))
|
||||
return 10;
|
||||
if (!(u8_cmp (result, expected, expected_length) == 0))
|
||||
return 11;
|
||||
free (preallocated);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
test_u8_nfc (void)
|
||||
{
|
||||
{ /* SPACE */
|
||||
static const uint8_t input[] = { 0x20 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN CAPITAL LETTER A WITH DIAERESIS */
|
||||
static const uint8_t input[] = { 0xC3, 0x84 };
|
||||
static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
|
||||
static const uint8_t input[] = { 0xC7, 0x9E };
|
||||
static const uint8_t decomposed[] = { 0x41, 0xCC, 0x88, 0xCC, 0x84 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ANGSTROM SIGN */
|
||||
static const uint8_t input[] = { 0xE2, 0x84, 0xAB };
|
||||
static const uint8_t decomposed[] = { 0x41, 0xCC, 0x8A };
|
||||
static const uint8_t expected[] = { 0xC3, 0x85 };
|
||||
ASSERT (check (input, SIZEOF (input), expected, SIZEOF (expected)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), expected, SIZEOF (expected)) == 0);
|
||||
ASSERT (check (expected, SIZEOF (expected), expected, SIZEOF (expected)) == 0);
|
||||
}
|
||||
|
||||
{ /* GREEK DIALYTIKA AND PERISPOMENI */
|
||||
static const uint8_t input[] = { 0xE1, 0xBF, 0x81 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SCRIPT SMALL L */
|
||||
static const uint8_t input[] = { 0xE2, 0x84, 0x93 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* NO-BREAK SPACE */
|
||||
static const uint8_t input[] = { 0xC2, 0xA0 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH INITIAL FORM */
|
||||
static const uint8_t input[] = { 0xEF, 0xAD, 0xAC };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH MEDIAL FORM */
|
||||
static const uint8_t input[] = { 0xEF, 0xAD, 0xAD };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH FINAL FORM */
|
||||
static const uint8_t input[] = { 0xEF, 0xAD, 0xAB };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LETTER VEH ISOLATED FORM */
|
||||
static const uint8_t input[] = { 0xEF, 0xAD, 0xAA };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* CIRCLED NUMBER FIFTEEN */
|
||||
static const uint8_t input[] = { 0xE2, 0x91, 0xAE };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* TRADE MARK SIGN */
|
||||
static const uint8_t input[] = { 0xE2, 0x84, 0xA2 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* LATIN SUBSCRIPT SMALL LETTER I */
|
||||
static const uint8_t input[] = { 0xE1, 0xB5, 0xA2 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* PRESENTATION FORM FOR VERTICAL LEFT PARENTHESIS */
|
||||
static const uint8_t input[] = { 0xEF, 0xB8, 0xB5 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* FULLWIDTH LATIN CAPITAL LETTER A */
|
||||
static const uint8_t input[] = { 0xEF, 0xBC, 0xA1 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HALFWIDTH IDEOGRAPHIC COMMA */
|
||||
static const uint8_t input[] = { 0xEF, 0xBD, 0xA4 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SMALL IDEOGRAPHIC COMMA */
|
||||
static const uint8_t input[] = { 0xEF, 0xB9, 0x91 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* SQUARE MHZ */
|
||||
static const uint8_t input[] = { 0xE3, 0x8E, 0x92 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* VULGAR FRACTION THREE EIGHTHS */
|
||||
static const uint8_t input[] = { 0xE2, 0x85, 0x9C };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* MICRO SIGN */
|
||||
static const uint8_t input[] = { 0xC2, 0xB5 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM */
|
||||
static const uint8_t input[] = { 0xEF, 0xB7, 0xBA };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HANGUL SYLLABLE GEUL */
|
||||
static const uint8_t input[] = { 0xEA, 0xB8, 0x80 };
|
||||
static const uint8_t decomposed[] =
|
||||
{ 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* HANGUL SYLLABLE GEU */
|
||||
static const uint8_t input[] = { 0xEA, 0xB7, 0xB8 };
|
||||
static const uint8_t decomposed[] = { 0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3 };
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
{ /* "Grüß Gott. Здравствуйте! x=(-b±sqrt(b²-4ac))/(2a) 日本語,中文,한글" */
|
||||
static const uint8_t input[] =
|
||||
{ 'G', 'r', 0xC3, 0xBC, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
|
||||
' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
|
||||
0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB9,
|
||||
0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
|
||||
's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
|
||||
'/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
|
||||
0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
|
||||
0xED, 0x95, 0x9C,
|
||||
0xEA, 0xB8, 0x80, '\n'
|
||||
};
|
||||
static const uint8_t decomposed[] =
|
||||
{ 'G', 'r', 0x75, 0xCC, 0x88, 0xC3, 0x9F, ' ', 'G', 'o', 't', 't', '.',
|
||||
' ', 0xD0, 0x97, 0xD0, 0xB4, 0xD1, 0x80, 0xD0, 0xB0, 0xD0, 0xB2, 0xD1,
|
||||
0x81, 0xD1, 0x82, 0xD0, 0xB2, 0xD1, 0x83, 0xD0, 0xB8, 0xCC, 0x86,
|
||||
0xD1, 0x82, 0xD0, 0xB5, '!', ' ', 'x', '=', '(', '-', 'b', 0xC2, 0xB1,
|
||||
's', 'q', 'r', 't', '(', 'b', 0xC2, 0xB2, '-', '4', 'a', 'c', ')', ')',
|
||||
'/', '(', '2', 'a', ')', ' ', ' ', 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC,
|
||||
0xE8, 0xAA, 0x9E, ',', 0xE4, 0xB8, 0xAD, 0xE6, 0x96, 0x87, ',',
|
||||
0xE1, 0x84, 0x92, 0xE1, 0x85, 0xA1, 0xE1, 0x86, 0xAB,
|
||||
0xE1, 0x84, 0x80, 0xE1, 0x85, 0xB3, 0xE1, 0x86, 0xAF, '\n'
|
||||
};
|
||||
ASSERT (check (input, SIZEOF (input), input, SIZEOF (input)) == 0);
|
||||
ASSERT (check (decomposed, SIZEOF (decomposed), input, SIZEOF (input)) == 0);
|
||||
}
|
||||
|
||||
#if HAVE_DECL_ALARM
|
||||
/* Declare failure if test takes too long, by using default abort
|
||||
caused by SIGALRM. */
|
||||
signal (SIGALRM, SIG_DFL);
|
||||
alarm (50);
|
||||
#endif
|
||||
|
||||
/* Check that the sorting is not O(n²) but O(n log n). */
|
||||
{
|
||||
int pass;
|
||||
for (pass = 0; pass < 3; pass++)
|
||||
{
|
||||
size_t repeat = 1;
|
||||
size_t m = 100000;
|
||||
uint8_t *input = (uint8_t *) malloc (2 * (2 * m - 1) * sizeof (uint8_t));
|
||||
if (input != NULL)
|
||||
{
|
||||
uint8_t *expected = input + (2 * m - 1);
|
||||
size_t m1 = m / 2;
|
||||
size_t m2 = (m - 1) / 2;
|
||||
/* NB: m1 + m2 == m - 1. */
|
||||
uint8_t *p;
|
||||
size_t i;
|
||||
|
||||
input[0] = 0x41;
|
||||
p = input + 1;
|
||||
switch (pass)
|
||||
{
|
||||
case 0:
|
||||
for (i = 0; i < m1; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x99;
|
||||
}
|
||||
for (i = 0; i < m2; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x80;
|
||||
}
|
||||
break;
|
||||
|
||||
case 1:
|
||||
for (i = 0; i < m2; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x80;
|
||||
}
|
||||
for (i = 0; i < m1; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x99;
|
||||
}
|
||||
break;
|
||||
|
||||
case 2:
|
||||
for (i = 0; i < m2; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x99;
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x80;
|
||||
}
|
||||
for (; i < m1; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x99;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
||||
expected[0] = 0xC3;
|
||||
expected[1] = 0x80;
|
||||
p = expected + 2;
|
||||
for (i = 0; i < m1; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x99;
|
||||
}
|
||||
for (i = 0; i < m2 - 1; i++)
|
||||
{
|
||||
*p++ = 0xCC;
|
||||
*p++ = 0x80;
|
||||
}
|
||||
|
||||
for (; repeat > 0; repeat--)
|
||||
{
|
||||
ASSERT (check (input, 2 * m - 1, expected, 2 * m - 2) == 0);
|
||||
ASSERT (check (expected, 2 * m - 2, expected, 2 * m - 2) == 0);
|
||||
}
|
||||
|
||||
free (input);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void
|
||||
test_u8_nfc (void)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
Reference in New Issue
Block a user