mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-30 22:43:12 +03:00
Fix memory handling in strxfrm_l [BZ #16009]
[Modified from the original email by Siddhesh Poyarekar] This patch solves bug #16009 by implementing an additional path in strxfrm that does not depend on caching the weight and rule indices. In detail the following changed: * The old main loop was factored out of strxfrm_l into the function do_xfrm_cached to be able to alternativly use the non-caching version do_xfrm. * strxfrm_l allocates a a fixed size array on the stack. If this is not sufficiant to store the weight and rule indices, the non-caching path is taken. As the cache size is not dependent on the input there can be no problems with integer overflows or stack allocations greater than __MAX_ALLOCA_CUTOFF. Note that malloc-ing is not possible because the definition of strxfrm does not allow an oom errorhandling. * The uncached path determines the weight and rule index for every char and for every pass again. * Passing all the locale data array by array resulted in very long parameter lists, so I introduced a structure that holds them. * Checking for zero src string has been moved a bit upwards, it is before the locale data initialization now. * To verify that the non-caching path works correct I added a test run to localedata/sort-test.sh & localedata/xfrm-test.c where all strings are patched up with spaces so that they are too large for the caching path.
This commit is contained in:
committed by
Siddhesh Poyarekar
parent
c60ec0e016
commit
0f9e585480
16
ChangeLog
16
ChangeLog
@ -1,3 +1,19 @@
|
|||||||
|
2015-01-13 Leonhard Holz <leonhard.holz@web.de>
|
||||||
|
|
||||||
|
[BZ #16009]
|
||||||
|
* string/strxfrm_l.c (STRXFRM): Allocate fixed size cache for
|
||||||
|
weights and rules. Use do_xfrm_cached if data fits in cache,
|
||||||
|
do_xfrm otherwise. Moved former main loop to...
|
||||||
|
* (do_xfrm_cached): New function.
|
||||||
|
* (do_xfrm): Non-caching version of do_xfrm_cached. Uses
|
||||||
|
find_idx, find_position and stack_push.
|
||||||
|
* (find_idx): New function.
|
||||||
|
* (find_position): Likewise.
|
||||||
|
* localedata/sort-test.sh: Added test run for do_xfrm.
|
||||||
|
* localedata/xfrm-test.c (main): Added command line option
|
||||||
|
-nocache to run the test with strings that are too large for
|
||||||
|
the STRXFRM cache.
|
||||||
|
|
||||||
2015-01-13 Torvald Riegel <triegel@redhat.com>
|
2015-01-13 Torvald Riegel <triegel@redhat.com>
|
||||||
|
|
||||||
* sysdeps/nptl/fork.c (__libc_fork): Provide address of futex
|
* sysdeps/nptl/fork.c (__libc_fork): Provide address of futex
|
||||||
|
16
NEWS
16
NEWS
@ -10,14 +10,14 @@ Version 2.21
|
|||||||
* The following bugs are resolved with this release:
|
* The following bugs are resolved with this release:
|
||||||
|
|
||||||
6652, 10672, 12847, 12926, 13862, 14132, 14138, 14171, 14498, 15215,
|
6652, 10672, 12847, 12926, 13862, 14132, 14138, 14171, 14498, 15215,
|
||||||
15884, 16191, 16469, 16617, 16619, 16657, 16740, 16857, 17192, 17266,
|
15884, 16009, 16191, 16469, 16617, 16619, 16657, 16740, 16857, 17192,
|
||||||
17273, 17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485, 17501,
|
17266, 17273, 17344, 17363, 17370, 17371, 17411, 17460, 17475, 17485,
|
||||||
17506, 17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574, 17582,
|
17501, 17506, 17508, 17522, 17555, 17570, 17571, 17572, 17573, 17574,
|
||||||
17583, 17584, 17585, 17589, 17594, 17601, 17608, 17616, 17625, 17630,
|
17582, 17583, 17584, 17585, 17589, 17594, 17601, 17608, 17616, 17625,
|
||||||
17633, 17634, 17635, 17647, 17653, 17657, 17658, 17664, 17665, 17668,
|
17630, 17633, 17634, 17635, 17647, 17653, 17657, 17658, 17664, 17665,
|
||||||
17682, 17717, 17719, 17722, 17723, 17724, 17725, 17732, 17733, 17744,
|
17668, 17682, 17717, 17719, 17722, 17723, 17724, 17725, 17732, 17733,
|
||||||
17745, 17746, 17747, 17748, 17775, 17777, 17780, 17781, 17782, 17791,
|
17744, 17745, 17746, 17747, 17748, 17775, 17777, 17780, 17781, 17782,
|
||||||
17793, 17796, 17797, 17803, 17806, 17834
|
17791, 17793, 17796, 17797, 17803, 17806, 17834
|
||||||
|
|
||||||
* Added support for TSX lock elision of pthread mutexes on powerpc32, powerpc64
|
* Added support for TSX lock elision of pthread mutexes on powerpc32, powerpc64
|
||||||
and powerpc64le. This may improve lock scaling of existing programs on
|
and powerpc64le. This may improve lock scaling of existing programs on
|
||||||
|
@ -53,11 +53,18 @@ for l in $lang; do
|
|||||||
${common_objpfx}localedata/xfrm-test $id < $cns.in \
|
${common_objpfx}localedata/xfrm-test $id < $cns.in \
|
||||||
> ${common_objpfx}localedata/$cns.xout || here=1
|
> ${common_objpfx}localedata/$cns.xout || here=1
|
||||||
cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
|
cmp -s $cns.in ${common_objpfx}localedata/$cns.xout || here=1
|
||||||
|
${test_program_prefix_before_env} \
|
||||||
|
${run_program_env} \
|
||||||
|
LC_ALL=$l ${test_program_prefix_after_env} \
|
||||||
|
${common_objpfx}localedata/xfrm-test $id -nocache < $cns.in \
|
||||||
|
> ${common_objpfx}localedata/$cns.nocache.xout || here=1
|
||||||
|
cmp -s $cns.in ${common_objpfx}localedata/$cns.nocache.xout || here=1
|
||||||
if test $here -eq 0; then
|
if test $here -eq 0; then
|
||||||
echo "$l xfrm-test OK"
|
echo "$l xfrm-test OK"
|
||||||
else
|
else
|
||||||
echo "$l xfrm-test FAIL"
|
echo "$l xfrm-test FAIL"
|
||||||
diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/ /'
|
diff -u $cns.in ${common_objpfx}localedata/$cns.xout | sed 's/^/ /'
|
||||||
|
diff -u $cns.in ${common_objpfx}localedata/$cns.nocache.xout | sed 's/^/ /'
|
||||||
status=1
|
status=1
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
|
@ -23,7 +23,10 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <stdbool.h>
|
||||||
|
|
||||||
|
/* Keep in sync with string/strxfrm_l.c. */
|
||||||
|
#define SMALL_STR_SIZE 4095
|
||||||
|
|
||||||
struct lines
|
struct lines
|
||||||
{
|
{
|
||||||
@ -37,6 +40,7 @@ int
|
|||||||
main (int argc, char *argv[])
|
main (int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int result = 0;
|
int result = 0;
|
||||||
|
bool nocache = false;
|
||||||
size_t nstrings, nstrings_max;
|
size_t nstrings, nstrings_max;
|
||||||
struct lines *strings;
|
struct lines *strings;
|
||||||
char *line = NULL;
|
char *line = NULL;
|
||||||
@ -44,7 +48,18 @@ main (int argc, char *argv[])
|
|||||||
size_t n;
|
size_t n;
|
||||||
|
|
||||||
if (argc < 2)
|
if (argc < 2)
|
||||||
error (1, 0, "usage: %s <random seed>", argv[0]);
|
error (1, 0, "usage: %s <random seed> [-nocache]", argv[0]);
|
||||||
|
|
||||||
|
if (argc == 3)
|
||||||
|
{
|
||||||
|
if (strcmp (argv[2], "-nocache") == 0)
|
||||||
|
nocache = true;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf ("Unknown option %s!\n", argv[2]);
|
||||||
|
exit (1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
setlocale (LC_ALL, "");
|
setlocale (LC_ALL, "");
|
||||||
|
|
||||||
@ -59,9 +74,9 @@ main (int argc, char *argv[])
|
|||||||
|
|
||||||
while (1)
|
while (1)
|
||||||
{
|
{
|
||||||
char saved, *newp;
|
char saved, *word, *newp;
|
||||||
int needed;
|
size_t l, line_len, needed;
|
||||||
int l;
|
|
||||||
if (getline (&line, &len, stdin) < 0)
|
if (getline (&line, &len, stdin) < 0)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -83,10 +98,35 @@ main (int argc, char *argv[])
|
|||||||
|
|
||||||
saved = line[l];
|
saved = line[l];
|
||||||
line[l] = '\0';
|
line[l] = '\0';
|
||||||
needed = strxfrm (NULL, line, 0);
|
|
||||||
|
if (nocache)
|
||||||
|
{
|
||||||
|
line_len = strlen (line);
|
||||||
|
word = malloc (line_len + SMALL_STR_SIZE + 1);
|
||||||
|
if (word == NULL)
|
||||||
|
{
|
||||||
|
printf ("malloc failed: %m\n");
|
||||||
|
exit (1);
|
||||||
|
}
|
||||||
|
memset (word, ' ', SMALL_STR_SIZE);
|
||||||
|
memcpy (word + SMALL_STR_SIZE, line, line_len);
|
||||||
|
word[line_len + SMALL_STR_SIZE] = '\0';
|
||||||
|
}
|
||||||
|
else
|
||||||
|
word = line;
|
||||||
|
|
||||||
|
needed = strxfrm (NULL, word, 0);
|
||||||
newp = malloc (needed + 1);
|
newp = malloc (needed + 1);
|
||||||
strxfrm (newp, line, needed + 1);
|
if (newp == NULL)
|
||||||
|
{
|
||||||
|
printf ("malloc failed: %m\n");
|
||||||
|
exit (1);
|
||||||
|
}
|
||||||
|
strxfrm (newp, word, needed + 1);
|
||||||
strings[nstrings].xfrm = newp;
|
strings[nstrings].xfrm = newp;
|
||||||
|
|
||||||
|
if (nocache)
|
||||||
|
free (word);
|
||||||
line[l] = saved;
|
line[l] = saved;
|
||||||
++nstrings;
|
++nstrings;
|
||||||
}
|
}
|
||||||
|
@ -40,9 +40,24 @@
|
|||||||
#define CONCAT(a,b) CONCAT1(a,b)
|
#define CONCAT(a,b) CONCAT1(a,b)
|
||||||
#define CONCAT1(a,b) a##b
|
#define CONCAT1(a,b) a##b
|
||||||
|
|
||||||
|
/* Maximum string size that is calculated with cached indices. Right now this
|
||||||
|
is an arbitrary value open to optimizations. SMALL_STR_SIZE * 4 has to be
|
||||||
|
lower than __MAX_ALLOCA_CUTOFF. Keep localedata/xfrm-test.c in sync. */
|
||||||
|
#define SMALL_STR_SIZE 4095
|
||||||
|
|
||||||
#include "../locale/localeinfo.h"
|
#include "../locale/localeinfo.h"
|
||||||
#include WEIGHT_H
|
#include WEIGHT_H
|
||||||
|
|
||||||
|
/* Group locale data for shorter parameter lists. */
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
uint_fast32_t nrules;
|
||||||
|
unsigned char *rulesets;
|
||||||
|
USTRING_TYPE *weights;
|
||||||
|
int32_t *table;
|
||||||
|
USTRING_TYPE *extra;
|
||||||
|
int32_t *indirect;
|
||||||
|
} locale_data_t;
|
||||||
|
|
||||||
#ifndef WIDE_CHAR_VERSION
|
#ifndef WIDE_CHAR_VERSION
|
||||||
|
|
||||||
@ -81,113 +96,325 @@ utf8_encode (char *buf, int val)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Find next weight and rule index. Inlined since called for every char. */
|
||||||
size_t
|
static __always_inline size_t
|
||||||
STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
|
find_idx (const USTRING_TYPE **us, int32_t *weight_idx,
|
||||||
|
unsigned char *rule_idx, const locale_data_t *l_data, const int pass)
|
||||||
{
|
{
|
||||||
struct __locale_data *current = l->__locales[LC_COLLATE];
|
int32_t tmp = findidx (l_data->table, l_data->indirect, l_data->extra, us,
|
||||||
uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
|
-1);
|
||||||
/* We don't assign the following values right away since it might be
|
*rule_idx = tmp >> 24;
|
||||||
unnecessary in case there are no rules. */
|
int32_t idx = tmp & 0xffffff;
|
||||||
const unsigned char *rulesets;
|
size_t len = l_data->weights[idx++];
|
||||||
const int32_t *table;
|
|
||||||
const USTRING_TYPE *weights;
|
/* Skip over indices of previous levels. */
|
||||||
const USTRING_TYPE *extra;
|
for (int i = 0; i < pass; i++)
|
||||||
const int32_t *indirect;
|
{
|
||||||
|
idx += len;
|
||||||
|
len = l_data->weights[idx++];
|
||||||
|
}
|
||||||
|
|
||||||
|
*weight_idx = idx;
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
find_position (const USTRING_TYPE *us, const locale_data_t *l_data,
|
||||||
|
const int pass)
|
||||||
|
{
|
||||||
|
int32_t weight_idx;
|
||||||
|
unsigned char rule_idx;
|
||||||
|
const USTRING_TYPE *usrc = us;
|
||||||
|
|
||||||
|
find_idx (&usrc, &weight_idx, &rule_idx, l_data, pass);
|
||||||
|
return l_data->rulesets[rule_idx * l_data->nrules + pass] & sort_position;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do the transformation. */
|
||||||
|
static size_t
|
||||||
|
do_xfrm (const USTRING_TYPE *usrc, STRING_TYPE *dest, size_t n,
|
||||||
|
const locale_data_t *l_data)
|
||||||
|
{
|
||||||
|
int32_t weight_idx;
|
||||||
|
unsigned char rule_idx;
|
||||||
uint_fast32_t pass;
|
uint_fast32_t pass;
|
||||||
size_t needed;
|
size_t needed = 0;
|
||||||
|
size_t last_needed;
|
||||||
|
|
||||||
|
/* Now the passes over the weights. */
|
||||||
|
for (pass = 0; pass < l_data->nrules; ++pass)
|
||||||
|
{
|
||||||
|
size_t backw_len = 0;
|
||||||
|
last_needed = needed;
|
||||||
|
const USTRING_TYPE *cur = usrc;
|
||||||
|
const USTRING_TYPE *backw_start = NULL;
|
||||||
|
|
||||||
|
/* We assume that if a rule has defined `position' in one section
|
||||||
|
this is true for all of them. */
|
||||||
|
int position = find_position (cur, l_data, pass);
|
||||||
|
|
||||||
|
if (position == 0)
|
||||||
|
{
|
||||||
|
while (*cur != L('\0'))
|
||||||
|
{
|
||||||
|
const USTRING_TYPE *pos = cur;
|
||||||
|
size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
|
||||||
|
pass);
|
||||||
|
int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
|
||||||
|
|
||||||
|
if ((rule & sort_forward) != 0)
|
||||||
|
{
|
||||||
|
/* Handle the pushed backward sequence. */
|
||||||
|
if (backw_start != NULL)
|
||||||
|
{
|
||||||
|
for (size_t i = backw_len; i > 0; )
|
||||||
|
{
|
||||||
|
int32_t weight_idx;
|
||||||
|
unsigned char rule_idx;
|
||||||
|
size_t len = find_idx (&backw_start, &weight_idx,
|
||||||
|
&rule_idx, l_data, pass);
|
||||||
|
if (needed + i < n)
|
||||||
|
for (size_t j = len; j > 0; j--)
|
||||||
|
dest[needed + i - j] =
|
||||||
|
l_data->weights[weight_idx++];
|
||||||
|
|
||||||
|
i -= len;
|
||||||
|
}
|
||||||
|
|
||||||
|
needed += backw_len;
|
||||||
|
backw_start = NULL;
|
||||||
|
backw_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now handle the forward element. */
|
||||||
|
if (needed + len < n)
|
||||||
|
while (len-- > 0)
|
||||||
|
dest[needed++] = l_data->weights[weight_idx++];
|
||||||
|
else
|
||||||
|
/* No more characters fit into the buffer. */
|
||||||
|
needed += len;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Remember start of the backward sequence & track length. */
|
||||||
|
if (backw_start == NULL)
|
||||||
|
backw_start = pos;
|
||||||
|
backw_len += len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Handle the pushed backward sequence. */
|
||||||
|
if (backw_start != NULL)
|
||||||
|
{
|
||||||
|
for (size_t i = backw_len; i > 0; )
|
||||||
|
{
|
||||||
|
size_t len = find_idx (&backw_start, &weight_idx, &rule_idx,
|
||||||
|
l_data, pass);
|
||||||
|
if (needed + i < n)
|
||||||
|
for (size_t j = len; j > 0; j--)
|
||||||
|
dest[needed + i - j] =
|
||||||
|
l_data->weights[weight_idx++];
|
||||||
|
|
||||||
|
i -= len;
|
||||||
|
}
|
||||||
|
|
||||||
|
needed += backw_len;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int val = 1;
|
||||||
|
#ifndef WIDE_CHAR_VERSION
|
||||||
|
char buf[7];
|
||||||
|
size_t buflen;
|
||||||
|
#endif
|
||||||
|
size_t i;
|
||||||
|
|
||||||
|
while (*cur != L('\0'))
|
||||||
|
{
|
||||||
|
const USTRING_TYPE *pos = cur;
|
||||||
|
size_t len = find_idx (&cur, &weight_idx, &rule_idx, l_data,
|
||||||
|
pass);
|
||||||
|
int rule = l_data->rulesets[rule_idx * l_data->nrules + pass];
|
||||||
|
|
||||||
|
if ((rule & sort_forward) != 0)
|
||||||
|
{
|
||||||
|
/* Handle the pushed backward sequence. */
|
||||||
|
if (backw_start != NULL)
|
||||||
|
{
|
||||||
|
for (size_t p = backw_len; p > 0; p--)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
int32_t weight_idx;
|
||||||
|
unsigned char rule_idx;
|
||||||
|
const USTRING_TYPE *backw_cur = backw_start;
|
||||||
|
|
||||||
|
/* To prevent a warning init the used vars. */
|
||||||
|
len = find_idx (&backw_cur, &weight_idx,
|
||||||
|
&rule_idx, l_data, pass);
|
||||||
|
|
||||||
|
for (i = 1; i < p; i++)
|
||||||
|
len = find_idx (&backw_cur, &weight_idx,
|
||||||
|
&rule_idx, l_data, pass);
|
||||||
|
|
||||||
|
if (len != 0)
|
||||||
|
{
|
||||||
|
#ifdef WIDE_CHAR_VERSION
|
||||||
|
if (needed + 1 + len < n)
|
||||||
|
{
|
||||||
|
dest[needed] = val;
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + 1 + i] =
|
||||||
|
l_data->weights[weight_idx + i];
|
||||||
|
}
|
||||||
|
needed += 1 + len;
|
||||||
|
#else
|
||||||
|
buflen = utf8_encode (buf, val);
|
||||||
|
if (needed + buflen + len < n)
|
||||||
|
{
|
||||||
|
for (i = 0; i < buflen; ++i)
|
||||||
|
dest[needed + i] = buf[i];
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + buflen + i] =
|
||||||
|
l_data->weights[weight_idx + i];
|
||||||
|
}
|
||||||
|
needed += buflen + len;
|
||||||
|
#endif
|
||||||
|
val = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++val;
|
||||||
|
}
|
||||||
|
|
||||||
|
backw_start = NULL;
|
||||||
|
backw_len = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now handle the forward element. */
|
||||||
|
if (len != 0)
|
||||||
|
{
|
||||||
|
#ifdef WIDE_CHAR_VERSION
|
||||||
|
if (needed + 1 + len < n)
|
||||||
|
{
|
||||||
|
dest[needed] = val;
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + 1 + i] =
|
||||||
|
l_data->weights[weight_idx + i];
|
||||||
|
}
|
||||||
|
needed += 1 + len;
|
||||||
|
#else
|
||||||
|
buflen = utf8_encode (buf, val);
|
||||||
|
if (needed + buflen + len < n)
|
||||||
|
{
|
||||||
|
for (i = 0; i < buflen; ++i)
|
||||||
|
dest[needed + i] = buf[i];
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + buflen + i] =
|
||||||
|
l_data->weights[weight_idx + i];
|
||||||
|
}
|
||||||
|
needed += buflen + len;
|
||||||
|
#endif
|
||||||
|
val = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++val;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Remember start of the backward sequence & track length. */
|
||||||
|
if (backw_start == NULL)
|
||||||
|
backw_start = pos;
|
||||||
|
backw_len++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle the pushed backward sequence. */
|
||||||
|
if (backw_start != NULL)
|
||||||
|
{
|
||||||
|
for (size_t p = backw_len; p > 0; p--)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
int32_t weight_idx;
|
||||||
|
unsigned char rule_idx;
|
||||||
|
const USTRING_TYPE *backw_cur = backw_start;
|
||||||
|
|
||||||
|
/* To prevent a warning init the used vars. */
|
||||||
|
len = find_idx (&backw_cur, &weight_idx,
|
||||||
|
&rule_idx, l_data, pass);
|
||||||
|
|
||||||
|
for (i = 1; i < p; i++)
|
||||||
|
len = find_idx (&backw_cur, &weight_idx,
|
||||||
|
&rule_idx, l_data, pass);
|
||||||
|
|
||||||
|
if (len != 0)
|
||||||
|
{
|
||||||
|
#ifdef WIDE_CHAR_VERSION
|
||||||
|
if (needed + 1 + len < n)
|
||||||
|
{
|
||||||
|
dest[needed] = val;
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + 1 + i] =
|
||||||
|
l_data->weights[weight_idx + i];
|
||||||
|
}
|
||||||
|
needed += 1 + len;
|
||||||
|
#else
|
||||||
|
buflen = utf8_encode (buf, val);
|
||||||
|
if (needed + buflen + len < n)
|
||||||
|
{
|
||||||
|
for (i = 0; i < buflen; ++i)
|
||||||
|
dest[needed + i] = buf[i];
|
||||||
|
for (i = 0; i < len; ++i)
|
||||||
|
dest[needed + buflen + i] =
|
||||||
|
l_data->weights[weight_idx + i];
|
||||||
|
}
|
||||||
|
needed += buflen + len;
|
||||||
|
#endif
|
||||||
|
val = 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
++val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Finally store the byte to separate the passes or terminate
|
||||||
|
the string. */
|
||||||
|
if (needed < n)
|
||||||
|
dest[needed] = pass + 1 < l_data->nrules ? L('\1') : L('\0');
|
||||||
|
++needed;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is a little optimization: many collation specifications have
|
||||||
|
a `position' rule at the end and if no non-ignored character
|
||||||
|
is found the last \1 byte is immediately followed by a \0 byte
|
||||||
|
signalling this. We can avoid the \1 byte(s). */
|
||||||
|
if (needed > 2 && needed == last_needed + 1)
|
||||||
|
{
|
||||||
|
/* Remove the \1 byte. */
|
||||||
|
if (--needed <= n)
|
||||||
|
dest[needed - 1] = L('\0');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Return the number of bytes/words we need, but don't count the NUL
|
||||||
|
byte/word at the end. */
|
||||||
|
return needed - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do the transformation using weight-index and rule cache. */
|
||||||
|
static size_t
|
||||||
|
do_xfrm_cached (STRING_TYPE *dest, size_t n, const locale_data_t *l_data,
|
||||||
|
size_t idxmax, int32_t *idxarr, const unsigned char *rulearr)
|
||||||
|
{
|
||||||
|
uint_fast32_t nrules = l_data->nrules;
|
||||||
|
unsigned char *rulesets = l_data->rulesets;
|
||||||
|
USTRING_TYPE *weights = l_data->weights;
|
||||||
|
uint_fast32_t pass;
|
||||||
|
size_t needed = 0;
|
||||||
size_t last_needed;
|
size_t last_needed;
|
||||||
const USTRING_TYPE *usrc;
|
|
||||||
size_t srclen = STRLEN (src);
|
|
||||||
int32_t *idxarr;
|
|
||||||
unsigned char *rulearr;
|
|
||||||
size_t idxmax;
|
|
||||||
size_t idxcnt;
|
size_t idxcnt;
|
||||||
int use_malloc;
|
|
||||||
|
|
||||||
if (nrules == 0)
|
/* Now the passes over the weights. */
|
||||||
{
|
|
||||||
if (n != 0)
|
|
||||||
STPNCPY (dest, src, MIN (srclen + 1, n));
|
|
||||||
|
|
||||||
return srclen;
|
|
||||||
}
|
|
||||||
|
|
||||||
rulesets = (const unsigned char *)
|
|
||||||
current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
|
|
||||||
table = (const int32_t *)
|
|
||||||
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
|
|
||||||
weights = (const USTRING_TYPE *)
|
|
||||||
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
|
|
||||||
extra = (const USTRING_TYPE *)
|
|
||||||
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
|
|
||||||
indirect = (const int32_t *)
|
|
||||||
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
|
|
||||||
use_malloc = 0;
|
|
||||||
|
|
||||||
assert (((uintptr_t) table) % __alignof__ (table[0]) == 0);
|
|
||||||
assert (((uintptr_t) weights) % __alignof__ (weights[0]) == 0);
|
|
||||||
assert (((uintptr_t) extra) % __alignof__ (extra[0]) == 0);
|
|
||||||
assert (((uintptr_t) indirect) % __alignof__ (indirect[0]) == 0);
|
|
||||||
|
|
||||||
/* Handle an empty string as a special case. */
|
|
||||||
if (srclen == 0)
|
|
||||||
{
|
|
||||||
if (n != 0)
|
|
||||||
*dest = L('\0');
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We need the elements of the string as unsigned values since they
|
|
||||||
are used as indeces. */
|
|
||||||
usrc = (const USTRING_TYPE *) src;
|
|
||||||
|
|
||||||
/* Perform the first pass over the string and while doing this find
|
|
||||||
and store the weights for each character. Since we want this to
|
|
||||||
be as fast as possible we are using `alloca' to store the temporary
|
|
||||||
values. But since there is no limit on the length of the string
|
|
||||||
we have to use `malloc' if the string is too long. We should be
|
|
||||||
very conservative here. */
|
|
||||||
if (! __libc_use_alloca ((srclen + 1) * (sizeof (int32_t) + 1)))
|
|
||||||
{
|
|
||||||
idxarr = (int32_t *) malloc ((srclen + 1) * (sizeof (int32_t) + 1));
|
|
||||||
rulearr = (unsigned char *) &idxarr[srclen];
|
|
||||||
|
|
||||||
if (idxarr == NULL)
|
|
||||||
/* No memory. Well, go with the stack then.
|
|
||||||
|
|
||||||
XXX Once this implementation is stable we will handle this
|
|
||||||
differently. Instead of precomputing the indeces we will
|
|
||||||
do this in time. This means, though, that this happens for
|
|
||||||
every pass again. */
|
|
||||||
goto try_stack;
|
|
||||||
use_malloc = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
try_stack:
|
|
||||||
idxarr = (int32_t *) alloca (srclen * sizeof (int32_t));
|
|
||||||
rulearr = (unsigned char *) alloca (srclen + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
idxmax = 0;
|
|
||||||
do
|
|
||||||
{
|
|
||||||
int32_t tmp = findidx (table, indirect, extra, &usrc, -1);
|
|
||||||
rulearr[idxmax] = tmp >> 24;
|
|
||||||
idxarr[idxmax] = tmp & 0xffffff;
|
|
||||||
|
|
||||||
++idxmax;
|
|
||||||
}
|
|
||||||
while (*usrc != L('\0'));
|
|
||||||
|
|
||||||
/* This element is only read, the value never used but to determine
|
|
||||||
another value which then is ignored. */
|
|
||||||
rulearr[idxmax] = '\0';
|
|
||||||
|
|
||||||
/* Now the passes over the weights. We now use the indeces we found
|
|
||||||
before. */
|
|
||||||
needed = 0;
|
|
||||||
for (pass = 0; pass < nrules; ++pass)
|
for (pass = 0; pass < nrules; ++pass)
|
||||||
{
|
{
|
||||||
size_t backw_stop = ~0ul;
|
size_t backw_stop = ~0ul;
|
||||||
@ -433,14 +660,87 @@ STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
|
|||||||
dest[needed - 1] = L('\0');
|
dest[needed - 1] = L('\0');
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Free the memory if needed. */
|
|
||||||
if (use_malloc)
|
|
||||||
free (idxarr);
|
|
||||||
|
|
||||||
/* Return the number of bytes/words we need, but don't count the NUL
|
/* Return the number of bytes/words we need, but don't count the NUL
|
||||||
byte/word at the end. */
|
byte/word at the end. */
|
||||||
return needed - 1;
|
return needed - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
STRXFRM (STRING_TYPE *dest, const STRING_TYPE *src, size_t n, __locale_t l)
|
||||||
|
{
|
||||||
|
locale_data_t l_data;
|
||||||
|
struct __locale_data *current = l->__locales[LC_COLLATE];
|
||||||
|
l_data.nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
|
||||||
|
|
||||||
|
/* Handle byte comparison case. */
|
||||||
|
if (l_data.nrules == 0)
|
||||||
|
{
|
||||||
|
size_t srclen = STRLEN (src);
|
||||||
|
|
||||||
|
if (n != 0)
|
||||||
|
STPNCPY (dest, src, MIN (srclen + 1, n));
|
||||||
|
|
||||||
|
return srclen;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle an empty string, code hereafter relies on strlen (src) > 0. */
|
||||||
|
if (*src == L('\0'))
|
||||||
|
{
|
||||||
|
if (n != 0)
|
||||||
|
*dest = L('\0');
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the locale data. */
|
||||||
|
l_data.rulesets = (unsigned char *)
|
||||||
|
current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
|
||||||
|
l_data.table = (int32_t *)
|
||||||
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
|
||||||
|
l_data.weights = (USTRING_TYPE *)
|
||||||
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
|
||||||
|
l_data.extra = (USTRING_TYPE *)
|
||||||
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
|
||||||
|
l_data.indirect = (int32_t *)
|
||||||
|
current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
|
||||||
|
|
||||||
|
assert (((uintptr_t) l_data.table) % __alignof__ (l_data.table[0]) == 0);
|
||||||
|
assert (((uintptr_t) l_data.weights) % __alignof__ (l_data.weights[0]) == 0);
|
||||||
|
assert (((uintptr_t) l_data.extra) % __alignof__ (l_data.extra[0]) == 0);
|
||||||
|
assert (((uintptr_t) l_data.indirect) % __alignof__ (l_data.indirect[0]) == 0);
|
||||||
|
|
||||||
|
/* We need the elements of the string as unsigned values since they
|
||||||
|
are used as indeces. */
|
||||||
|
const USTRING_TYPE *usrc = (const USTRING_TYPE *) src;
|
||||||
|
|
||||||
|
/* Allocate cache for small strings on the stack and fill it with weight and
|
||||||
|
rule indices. If the cache size is not sufficient, continue with the
|
||||||
|
uncached xfrm version. */
|
||||||
|
size_t idxmax = 0;
|
||||||
|
const USTRING_TYPE *cur = usrc;
|
||||||
|
int32_t *idxarr = alloca (SMALL_STR_SIZE * sizeof (int32_t));
|
||||||
|
unsigned char *rulearr = alloca (SMALL_STR_SIZE + 1);
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
int32_t tmp = findidx (l_data.table, l_data.indirect, l_data.extra, &cur,
|
||||||
|
-1);
|
||||||
|
rulearr[idxmax] = tmp >> 24;
|
||||||
|
idxarr[idxmax] = tmp & 0xffffff;
|
||||||
|
|
||||||
|
++idxmax;
|
||||||
|
}
|
||||||
|
while (*cur != L('\0') && idxmax < SMALL_STR_SIZE);
|
||||||
|
|
||||||
|
/* This element is only read, the value never used but to determine
|
||||||
|
another value which then is ignored. */
|
||||||
|
rulearr[idxmax] = '\0';
|
||||||
|
|
||||||
|
/* Do the transformation. */
|
||||||
|
if (*cur == L('\0'))
|
||||||
|
return do_xfrm_cached (dest, n, &l_data, idxmax, idxarr, rulearr);
|
||||||
|
else
|
||||||
|
return do_xfrm (usrc, dest, n, &l_data);
|
||||||
|
}
|
||||||
libc_hidden_def (STRXFRM)
|
libc_hidden_def (STRXFRM)
|
||||||
|
|
||||||
#ifndef WIDE_CHAR_VERSION
|
#ifndef WIDE_CHAR_VERSION
|
||||||
|
Reference in New Issue
Block a user