mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-30 22:43:12 +03:00
Update.
2003-11-24 Jakub Jelinek <jakub@redhat.com> * posix/regex_internal.h (re_token_t): Add word_char bit. Add comment. (re_dfa_t): Add sb_char field. (bitset_mask): New function. * posix/regcomp.c (free_dfa_content): Free sb_char. (init_dfa): Don't initialize word_char unnecessarily. Initialize sb_char. (duplicate_node): Don't duplicate !word_char CHARACTERs with NEXT_WORD_CONSTRAINT constraint or word_char CHARACTERs with NEXT_NOTWORD_CONSTRAINT. Return -1 in *new_idx instead. (duplicate_node_closure): Handle clone_dest == -1 from duplicate_node. (peek_token): Initialize word_char bit. (parse_expression, parse_dup_op): Add comments. (parse_bracket_exp): Don't set bitmask bits for multi-byte char starting bytes here at the beginning. Mask off the bits right before creating SIMPLE_BRACKET. (build_charclass_op): Likewise. * posix/regexec.c (group_nodes_into_DFAstates) <case OP_PERIOD>: Only set accept bits for single-byte characters. (group_nodes_into_DFAstates): Don't rely on characters 0 .. 127 being single byte encoded and the rest multi-byte. * posix/bug-regex19.c (tests): Add new tests. (do_mb_tests): Initialize t to *test. (main): Fail even on do_mb_tests errors.
This commit is contained in:
28
ChangeLog
28
ChangeLog
@ -1,3 +1,31 @@
|
|||||||
|
2003-11-24 Jakub Jelinek <jakub@redhat.com>
|
||||||
|
|
||||||
|
* posix/regex_internal.h (re_token_t): Add word_char bit. Add
|
||||||
|
comment.
|
||||||
|
(re_dfa_t): Add sb_char field.
|
||||||
|
(bitset_mask): New function.
|
||||||
|
* posix/regcomp.c (free_dfa_content): Free sb_char.
|
||||||
|
(init_dfa): Don't initialize word_char unnecessarily.
|
||||||
|
Initialize sb_char.
|
||||||
|
(duplicate_node): Don't duplicate !word_char CHARACTERs with
|
||||||
|
NEXT_WORD_CONSTRAINT constraint or word_char CHARACTERs with
|
||||||
|
NEXT_NOTWORD_CONSTRAINT. Return -1 in *new_idx instead.
|
||||||
|
(duplicate_node_closure): Handle clone_dest == -1 from
|
||||||
|
duplicate_node.
|
||||||
|
(peek_token): Initialize word_char bit.
|
||||||
|
(parse_expression, parse_dup_op): Add comments.
|
||||||
|
(parse_bracket_exp): Don't set bitmask bits for multi-byte char
|
||||||
|
starting bytes here at the beginning. Mask off the bits right
|
||||||
|
before creating SIMPLE_BRACKET.
|
||||||
|
(build_charclass_op): Likewise.
|
||||||
|
* posix/regexec.c (group_nodes_into_DFAstates) <case OP_PERIOD>: Only
|
||||||
|
set accept bits for single-byte characters.
|
||||||
|
(group_nodes_into_DFAstates): Don't rely on characters 0 .. 127
|
||||||
|
being single byte encoded and the rest multi-byte.
|
||||||
|
* posix/bug-regex19.c (tests): Add new tests.
|
||||||
|
(do_mb_tests): Initialize t to *test.
|
||||||
|
(main): Fail even on do_mb_tests errors.
|
||||||
|
|
||||||
2003-11-23 Ulrich Drepper <drepper@redhat.com>
|
2003-11-23 Ulrich Drepper <drepper@redhat.com>
|
||||||
|
|
||||||
* posix/regexec.c: Correct several memory allocation problems.
|
* posix/regexec.c: Correct several memory allocation problems.
|
||||||
|
@ -102,6 +102,20 @@ static struct test_s
|
|||||||
{ERE, ".\\b.", "=A=", 0, 0},
|
{ERE, ".\\b.", "=A=", 0, 0},
|
||||||
{ERE, ".\\b.", "==", 0, -1},
|
{ERE, ".\\b.", "==", 0, -1},
|
||||||
{ERE, ".\\b.", "ABA", 0, -1},
|
{ERE, ".\\b.", "ABA", 0, -1},
|
||||||
|
{ERE, "[^k]\\b[^k]", "AA~", 0, 1},
|
||||||
|
{ERE, "[^k]\\b[^k]", "=A=", 0, 0},
|
||||||
|
{ERE, "[^k]\\b[^k]", "Ak~kA~", 0, 4},
|
||||||
|
{ERE, "[^k]\\b[^k]", "==", 0, -1},
|
||||||
|
{ERE, "[^k]\\b[^k]", "ABA", 0, -1},
|
||||||
|
{ERE, "[^k]\\b[^k]", "Ak~", 0, -1},
|
||||||
|
{ERE, "[^k]\\b[^k]", "k=k", 0, -1},
|
||||||
|
{ERE, "[^C]\\b[^C]", "AA~", 0, 1},
|
||||||
|
{ERE, "[^C]\\b[^C]", "=A=", 0, 0},
|
||||||
|
{ERE, "[^C]\\b[^C]", "AC~CA~", 0, 4},
|
||||||
|
{ERE, "[^C]\\b[^C]", "==", 0, -1},
|
||||||
|
{ERE, "[^C]\\b[^C]", "ABA", 0, -1},
|
||||||
|
{ERE, "[^C]\\b[^C]", "AC~", 0, -1},
|
||||||
|
{ERE, "[^C]\\b[^C]", "C=C", 0, -1},
|
||||||
{ERE, "\\<(A|!|.B)", "A=AC", 0, 0},
|
{ERE, "\\<(A|!|.B)", "A=AC", 0, 0},
|
||||||
{ERE, "\\<(A|!|.B)", "=AC", 0, 1},
|
{ERE, "\\<(A|!|.B)", "=AC", 0, 1},
|
||||||
{ERE, "\\<(A|!|.B)", "!AC", 0, 1},
|
{ERE, "\\<(A|!|.B)", "!AC", 0, 1},
|
||||||
@ -140,12 +154,38 @@ static struct test_s
|
|||||||
{ERE, ".\\<.", "AA~", 0, -1},
|
{ERE, ".\\<.", "AA~", 0, -1},
|
||||||
{ERE, ".\\<.", "==", 0, -1},
|
{ERE, ".\\<.", "==", 0, -1},
|
||||||
{ERE, ".\\<.", "ABA", 0, -1},
|
{ERE, ".\\<.", "ABA", 0, -1},
|
||||||
|
{ERE, "[^k]\\<[^k]", "=k=A=", 0, 2},
|
||||||
|
{ERE, "[^k]\\<[^k]", "kk~", 0, -1},
|
||||||
|
{ERE, "[^k]\\<[^k]", "==", 0, -1},
|
||||||
|
{ERE, "[^k]\\<[^k]", "ABA", 0, -1},
|
||||||
|
{ERE, "[^k]\\<[^k]", "=k=", 0, -1},
|
||||||
|
{ERE, "[^C]\\<[^C]", "=C=A=", 0, 2},
|
||||||
|
{ERE, "[^C]\\<[^C]", "CC~", 0, -1},
|
||||||
|
{ERE, "[^C]\\<[^C]", "==", 0, -1},
|
||||||
|
{ERE, "[^C]\\<[^C]", "ABA", 0, -1},
|
||||||
|
{ERE, "[^C]\\<[^C]", "=C=", 0, -1},
|
||||||
{ERE, ".\\B.", "ABA", 0, 0},
|
{ERE, ".\\B.", "ABA", 0, 0},
|
||||||
{ERE, ".\\B.", "=BDC", 0, 1},
|
{ERE, ".\\B.", "=BDC", 0, 1},
|
||||||
|
{ERE, "[^k]\\B[^k]", "kkkABA", 0, 3},
|
||||||
|
{ERE, "[^k]\\B[^k]", "kBk", 0, -1},
|
||||||
|
{ERE, "[^C]\\B[^C]", "CCCABA", 0, 3},
|
||||||
|
{ERE, "[^C]\\B[^C]", "CBC", 0, -1},
|
||||||
{ERE, ".(\\b|\\B).", "=~AB", 0, 1},
|
{ERE, ".(\\b|\\B).", "=~AB", 0, 1},
|
||||||
{ERE, ".(\\b|\\B).", "A=C", 0, 0},
|
{ERE, ".(\\b|\\B).", "A=C", 0, 0},
|
||||||
{ERE, ".(\\b|\\B).", "ABC", 0, 0},
|
{ERE, ".(\\b|\\B).", "ABC", 0, 0},
|
||||||
{ERE, ".(\\b|\\B).", "=~\\!", 0, -1},
|
{ERE, ".(\\b|\\B).", "=~\\!", 0, -1},
|
||||||
|
{ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 1},
|
||||||
|
{ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
|
||||||
|
{ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
|
||||||
|
{ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 3},
|
||||||
|
{ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, -1},
|
||||||
|
{ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, -1},
|
||||||
|
{ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 1},
|
||||||
|
{ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
|
||||||
|
{ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
|
||||||
|
{ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 3},
|
||||||
|
{ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, -1},
|
||||||
|
{ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, -1},
|
||||||
{ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
|
{ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
|
||||||
{ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
|
{ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
|
||||||
{ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
|
{ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
|
||||||
@ -288,6 +328,7 @@ do_mb_tests (const struct test_s *test)
|
|||||||
char string[strlen (test->string) * 4 + 1];
|
char string[strlen (test->string) * 4 + 1];
|
||||||
char fail[8 + sizeof ("UTF-8 ")];
|
char fail[8 + sizeof ("UTF-8 ")];
|
||||||
|
|
||||||
|
t = *test;
|
||||||
t.pattern = pattern;
|
t.pattern = pattern;
|
||||||
t.string = string;
|
t.string = string;
|
||||||
strcpy (fail, "UTF-8 ");
|
strcpy (fail, "UTF-8 ");
|
||||||
@ -367,9 +408,7 @@ main (void)
|
|||||||
ret = 1;
|
ret = 1;
|
||||||
}
|
}
|
||||||
ret |= do_one_test (&tests[i], "UTF-8 ");
|
ret |= do_one_test (&tests[i], "UTF-8 ");
|
||||||
// Until the implementation is fixed, ignore the results of the
|
ret |= do_mb_tests (&tests[i]);
|
||||||
// MB tests.
|
|
||||||
/* ret |= */do_mb_tests (&tests[i]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
|
112
posix/regcomp.c
112
posix/regcomp.c
@ -607,6 +607,9 @@ free_dfa_content (re_dfa_t *dfa)
|
|||||||
}
|
}
|
||||||
re_free (dfa->state_table);
|
re_free (dfa->state_table);
|
||||||
re_free (dfa->word_char);
|
re_free (dfa->word_char);
|
||||||
|
#ifdef RE_ENABLE_I18N
|
||||||
|
re_free (dfa->sb_char);
|
||||||
|
#endif
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
re_free (dfa->re_str);
|
re_free (dfa->re_str);
|
||||||
#endif
|
#endif
|
||||||
@ -831,7 +834,7 @@ init_dfa (dfa, pat_len)
|
|||||||
|
|
||||||
dfa->subexps_alloc = 1;
|
dfa->subexps_alloc = 1;
|
||||||
dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
|
dfa->subexps = re_malloc (re_subexp_t, dfa->subexps_alloc);
|
||||||
dfa->word_char = NULL;
|
/* dfa->word_char = NULL; */
|
||||||
|
|
||||||
dfa->mb_cur_max = MB_CUR_MAX;
|
dfa->mb_cur_max = MB_CUR_MAX;
|
||||||
#ifdef _LIBC
|
#ifdef _LIBC
|
||||||
@ -841,6 +844,25 @@ init_dfa (dfa, pat_len)
|
|||||||
dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
|
dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
|
||||||
!= 0);
|
!= 0);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef RE_ENABLE_I18N
|
||||||
|
if (dfa->mb_cur_max > 1)
|
||||||
|
{
|
||||||
|
int i, j, ch;
|
||||||
|
|
||||||
|
dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
|
||||||
|
if (BE (dfa->sb_char == NULL, 0))
|
||||||
|
return REG_ESPACE;
|
||||||
|
#ifdef _LIBC
|
||||||
|
if (dfa->is_utf8)
|
||||||
|
memset (dfa->sb_char, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
|
||||||
|
for (j = 0; j < UINT_BITS; ++j, ++ch)
|
||||||
|
if (btowc (ch) != WEOF)
|
||||||
|
dfa->sb_char[i] |= 1 << j;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
if (BE (dfa->nodes == NULL || dfa->state_table == NULL
|
if (BE (dfa->nodes == NULL || dfa->state_table == NULL
|
||||||
|| dfa->subexps == NULL, 0))
|
|| dfa->subexps == NULL, 0))
|
||||||
@ -1311,6 +1333,8 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
|
|||||||
if (BE (err != REG_NOERROR, 0))
|
if (BE (err != REG_NOERROR, 0))
|
||||||
return err;
|
return err;
|
||||||
dfa->nexts[clone_node] = dfa->nexts[org_node];
|
dfa->nexts[clone_node] = dfa->nexts[org_node];
|
||||||
|
if (clone_dest == -1)
|
||||||
|
break;
|
||||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||||
if (BE (ret < 0, 0))
|
if (BE (ret < 0, 0))
|
||||||
return REG_ESPACE;
|
return REG_ESPACE;
|
||||||
@ -1348,6 +1372,8 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
|
|||||||
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
||||||
if (BE (err != REG_NOERROR, 0))
|
if (BE (err != REG_NOERROR, 0))
|
||||||
return err;
|
return err;
|
||||||
|
if (clone_dest == -1)
|
||||||
|
break;
|
||||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||||
if (BE (ret < 0, 0))
|
if (BE (ret < 0, 0))
|
||||||
return REG_ESPACE;
|
return REG_ESPACE;
|
||||||
@ -1366,13 +1392,16 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
|
|||||||
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
||||||
if (BE (err != REG_NOERROR, 0))
|
if (BE (err != REG_NOERROR, 0))
|
||||||
return err;
|
return err;
|
||||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
if (clone_dest != -1)
|
||||||
if (BE (ret < 0, 0))
|
{
|
||||||
return REG_ESPACE;
|
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||||
err = duplicate_node_closure (dfa, org_dest, clone_dest,
|
if (BE (ret < 0, 0))
|
||||||
root_node, constraint);
|
return REG_ESPACE;
|
||||||
if (BE (err != REG_NOERROR, 0))
|
err = duplicate_node_closure (dfa, org_dest, clone_dest,
|
||||||
return err;
|
root_node, constraint);
|
||||||
|
if (BE (err != REG_NOERROR, 0))
|
||||||
|
return err;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1387,6 +1416,8 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
|
|||||||
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
|
||||||
if (BE (err != REG_NOERROR, 0))
|
if (BE (err != REG_NOERROR, 0))
|
||||||
return err;
|
return err;
|
||||||
|
if (clone_dest == -1)
|
||||||
|
break;
|
||||||
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
|
||||||
if (BE (ret < 0, 0))
|
if (BE (ret < 0, 0))
|
||||||
return REG_ESPACE;
|
return REG_ESPACE;
|
||||||
@ -1426,7 +1457,21 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
|
|||||||
int *new_idx, org_idx;
|
int *new_idx, org_idx;
|
||||||
unsigned int constraint;
|
unsigned int constraint;
|
||||||
{
|
{
|
||||||
int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
|
int dup_idx;
|
||||||
|
|
||||||
|
if (dfa->nodes[org_idx].type == CHARACTER
|
||||||
|
&& (((constraint & NEXT_WORD_CONSTRAINT)
|
||||||
|
&& !dfa->nodes[org_idx].word_char)
|
||||||
|
|| ((constraint & NEXT_NOTWORD_CONSTRAINT)
|
||||||
|
&& dfa->nodes[org_idx].word_char)))
|
||||||
|
{
|
||||||
|
/* \<!, \>W etc. can never match. Don't duplicate them, instead
|
||||||
|
tell the caller they shouldn't be added to edests. */
|
||||||
|
*new_idx = -1;
|
||||||
|
return REG_NOERROR;
|
||||||
|
}
|
||||||
|
|
||||||
|
dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
|
||||||
if (BE (dup_idx == -1, 0))
|
if (BE (dup_idx == -1, 0))
|
||||||
return REG_ESPACE;
|
return REG_ESPACE;
|
||||||
dfa->nodes[dup_idx].constraint = constraint;
|
dfa->nodes[dup_idx].constraint = constraint;
|
||||||
@ -1614,6 +1659,7 @@ peek_token (token, input, syntax)
|
|||||||
c = re_string_peek_byte (input, 0);
|
c = re_string_peek_byte (input, 0);
|
||||||
token->opr.c = c;
|
token->opr.c = c;
|
||||||
|
|
||||||
|
token->word_char = 0;
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
token->mb_partial = 0;
|
token->mb_partial = 0;
|
||||||
if (input->mb_cur_max > 1 &&
|
if (input->mb_cur_max > 1 &&
|
||||||
@ -1636,6 +1682,17 @@ peek_token (token, input, syntax)
|
|||||||
c2 = re_string_peek_byte_case (input, 1);
|
c2 = re_string_peek_byte_case (input, 1);
|
||||||
token->opr.c = c2;
|
token->opr.c = c2;
|
||||||
token->type = CHARACTER;
|
token->type = CHARACTER;
|
||||||
|
#ifdef RE_ENABLE_I18N
|
||||||
|
if (input->mb_cur_max > 1)
|
||||||
|
{
|
||||||
|
wint_t wc = re_string_wchar_at (input,
|
||||||
|
re_string_cur_idx (input) + 1);
|
||||||
|
token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
token->word_char = IS_WORD_CHAR (c2) != 0;
|
||||||
|
|
||||||
switch (c2)
|
switch (c2)
|
||||||
{
|
{
|
||||||
case '|':
|
case '|':
|
||||||
@ -1739,6 +1796,16 @@ peek_token (token, input, syntax)
|
|||||||
}
|
}
|
||||||
|
|
||||||
token->type = CHARACTER;
|
token->type = CHARACTER;
|
||||||
|
#ifdef RE_ENABLE_I18N
|
||||||
|
if (input->mb_cur_max > 1)
|
||||||
|
{
|
||||||
|
wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
|
||||||
|
token->word_char = IS_WIDE_WORD_CHAR (wc) != 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
token->word_char = IS_WORD_CHAR (token->opr.c);
|
||||||
|
|
||||||
switch (c)
|
switch (c)
|
||||||
{
|
{
|
||||||
case '\n':
|
case '\n':
|
||||||
@ -2140,6 +2207,8 @@ parse_expression (regexp, preg, token, syntax, nest, err)
|
|||||||
|
|
||||||
/* Then we can these characters as normal characters. */
|
/* Then we can these characters as normal characters. */
|
||||||
token->type = CHARACTER;
|
token->type = CHARACTER;
|
||||||
|
/* mb_partial and word_char bits should be initialized already
|
||||||
|
by peek_token. */
|
||||||
tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
|
tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
|
||||||
if (BE (tree == NULL, 0))
|
if (BE (tree == NULL, 0))
|
||||||
{
|
{
|
||||||
@ -2475,6 +2544,8 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
|
|||||||
re_string_set_index (regexp, start_idx);
|
re_string_set_index (regexp, start_idx);
|
||||||
*token = start_token;
|
*token = start_token;
|
||||||
token->type = CHARACTER;
|
token->type = CHARACTER;
|
||||||
|
/* mb_partial and word_char bits should be already initialized by
|
||||||
|
peek_token. */
|
||||||
return dup_elem;
|
return dup_elem;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2952,7 +3023,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
|||||||
if (token->type == OP_NON_MATCH_LIST)
|
if (token->type == OP_NON_MATCH_LIST)
|
||||||
{
|
{
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
int i;
|
|
||||||
mbcset->non_match = 1;
|
mbcset->non_match = 1;
|
||||||
#else /* not RE_ENABLE_I18N */
|
#else /* not RE_ENABLE_I18N */
|
||||||
non_match = 1;
|
non_match = 1;
|
||||||
@ -2966,12 +3036,6 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
|||||||
*err = REG_BADPAT;
|
*err = REG_BADPAT;
|
||||||
goto parse_bracket_exp_free_return;
|
goto parse_bracket_exp_free_return;
|
||||||
}
|
}
|
||||||
#ifdef RE_ENABLE_I18N
|
|
||||||
if (dfa->mb_cur_max > 1)
|
|
||||||
for (i = 0; i < SBC_MAX; ++i)
|
|
||||||
if (__btowc (i) == WEOF)
|
|
||||||
bitset_set (sbcset, i);
|
|
||||||
#endif /* RE_ENABLE_I18N */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* We treat the first ']' as a normal character. */
|
/* We treat the first ']' as a normal character. */
|
||||||
@ -3126,6 +3190,11 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
|
|||||||
if (non_match)
|
if (non_match)
|
||||||
#endif /* not RE_ENABLE_I18N */
|
#endif /* not RE_ENABLE_I18N */
|
||||||
bitset_not (sbcset);
|
bitset_not (sbcset);
|
||||||
|
#ifdef RE_ENABLE_I18N
|
||||||
|
/* Ensure only single byte characters are set. */
|
||||||
|
if (dfa->mb_cur_max > 1)
|
||||||
|
bitset_mask (sbcset, dfa->sb_char);
|
||||||
|
#endif /* RE_ENABLE_I18N */
|
||||||
|
|
||||||
/* Build a tree for simple bracket. */
|
/* Build a tree for simple bracket. */
|
||||||
br_token.type = SIMPLE_BRACKET;
|
br_token.type = SIMPLE_BRACKET;
|
||||||
@ -3493,16 +3562,11 @@ build_charclass_op (dfa, trans, class_name, extra, not, err)
|
|||||||
if (not)
|
if (not)
|
||||||
{
|
{
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
int i;
|
|
||||||
/*
|
/*
|
||||||
if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
|
if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
|
||||||
bitset_set(cset->sbcset, '\0');
|
bitset_set(cset->sbcset, '\0');
|
||||||
*/
|
*/
|
||||||
mbcset->non_match = 1;
|
mbcset->non_match = 1;
|
||||||
if (dfa->mb_cur_max > 1)
|
|
||||||
for (i = 0; i < SBC_MAX; ++i)
|
|
||||||
if (__btowc (i) == WEOF)
|
|
||||||
bitset_set (sbcset, i);
|
|
||||||
#else /* not RE_ENABLE_I18N */
|
#else /* not RE_ENABLE_I18N */
|
||||||
non_match = 1;
|
non_match = 1;
|
||||||
#endif /* not RE_ENABLE_I18N */
|
#endif /* not RE_ENABLE_I18N */
|
||||||
@ -3536,6 +3600,12 @@ build_charclass_op (dfa, trans, class_name, extra, not, err)
|
|||||||
#endif /* not RE_ENABLE_I18N */
|
#endif /* not RE_ENABLE_I18N */
|
||||||
bitset_not (sbcset);
|
bitset_not (sbcset);
|
||||||
|
|
||||||
|
#ifdef RE_ENABLE_I18N
|
||||||
|
/* Ensure only single byte characters are set. */
|
||||||
|
if (dfa->mb_cur_max > 1)
|
||||||
|
bitset_mask (sbcset, dfa->sb_char);
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Build a tree for simple bracket. */
|
/* Build a tree for simple bracket. */
|
||||||
br_token.type = SIMPLE_BRACKET;
|
br_token.type = SIMPLE_BRACKET;
|
||||||
br_token.opr.sbcset = sbcset;
|
br_token.opr.sbcset = sbcset;
|
||||||
|
@ -133,6 +133,7 @@ typedef unsigned int *re_bitset_ptr_t;
|
|||||||
static inline void bitset_not (bitset set);
|
static inline void bitset_not (bitset set);
|
||||||
static inline void bitset_merge (bitset dest, const bitset src);
|
static inline void bitset_merge (bitset dest, const bitset src);
|
||||||
static inline void bitset_not_merge (bitset dest, const bitset src);
|
static inline void bitset_not_merge (bitset dest, const bitset src);
|
||||||
|
static inline void bitset_mask (bitset dest, const bitset src);
|
||||||
|
|
||||||
#define PREV_WORD_CONSTRAINT 0x0001
|
#define PREV_WORD_CONSTRAINT 0x0001
|
||||||
#define PREV_NOTWORD_CONSTRAINT 0x0002
|
#define PREV_NOTWORD_CONSTRAINT 0x0002
|
||||||
@ -281,8 +282,11 @@ typedef struct
|
|||||||
unsigned int constraint : 10; /* context constraint */
|
unsigned int constraint : 10; /* context constraint */
|
||||||
unsigned int duplicated : 1;
|
unsigned int duplicated : 1;
|
||||||
#ifdef RE_ENABLE_I18N
|
#ifdef RE_ENABLE_I18N
|
||||||
|
/* These 2 bits can be moved into the union if needed (e.g. if running out
|
||||||
|
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
|
||||||
unsigned int mb_partial : 1;
|
unsigned int mb_partial : 1;
|
||||||
#endif
|
#endif
|
||||||
|
unsigned int word_char : 1;
|
||||||
} re_token_t;
|
} re_token_t;
|
||||||
|
|
||||||
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
|
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
|
||||||
@ -601,6 +605,7 @@ struct re_dfa_t
|
|||||||
re_dfastate_t *init_state_begbuf;
|
re_dfastate_t *init_state_begbuf;
|
||||||
bin_tree_t *str_tree;
|
bin_tree_t *str_tree;
|
||||||
bin_tree_storage_t *str_tree_storage;
|
bin_tree_storage_t *str_tree_storage;
|
||||||
|
re_bitset_ptr_t sb_char;
|
||||||
int str_tree_storage_idx;
|
int str_tree_storage_idx;
|
||||||
|
|
||||||
/* number of subexpressions `re_nsub' is in regex_t. */
|
/* number of subexpressions `re_nsub' is in regex_t. */
|
||||||
@ -711,6 +716,16 @@ bitset_not_merge (dest, src)
|
|||||||
dest[i] |= ~src[i];
|
dest[i] |= ~src[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void
|
||||||
|
bitset_mask (dest, src)
|
||||||
|
bitset dest;
|
||||||
|
const bitset src;
|
||||||
|
{
|
||||||
|
int bitset_i;
|
||||||
|
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
|
||||||
|
dest[bitset_i] &= src[bitset_i];
|
||||||
|
}
|
||||||
|
|
||||||
#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES
|
#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES
|
||||||
/* Inline functions for re_string. */
|
/* Inline functions for re_string. */
|
||||||
static inline int
|
static inline int
|
||||||
|
@ -3341,7 +3341,12 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
|||||||
}
|
}
|
||||||
else if (type == OP_PERIOD)
|
else if (type == OP_PERIOD)
|
||||||
{
|
{
|
||||||
bitset_set_all (accepts);
|
#ifdef RE_ENABLE_I18N
|
||||||
|
if (dfa->mb_cur_max > 1)
|
||||||
|
bitset_merge (accepts, dfa->sb_char);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
bitset_set_all (accepts);
|
||||||
if (!(preg->syntax & RE_DOT_NEWLINE))
|
if (!(preg->syntax & RE_DOT_NEWLINE))
|
||||||
bitset_clear (accepts, '\n');
|
bitset_clear (accepts, '\n');
|
||||||
if (preg->syntax & RE_DOT_NOT_NULL)
|
if (preg->syntax & RE_DOT_NOT_NULL)
|
||||||
@ -3362,8 +3367,6 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
|||||||
match it the context. */
|
match it the context. */
|
||||||
if (constraint)
|
if (constraint)
|
||||||
{
|
{
|
||||||
int word_char_max;
|
|
||||||
|
|
||||||
if (constraint & NEXT_NEWLINE_CONSTRAINT)
|
if (constraint & NEXT_NEWLINE_CONSTRAINT)
|
||||||
{
|
{
|
||||||
int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
|
int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
|
||||||
@ -3379,16 +3382,28 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This assumes ASCII compatible locale. We cannot say
|
|
||||||
anything about the non-ascii chars. */
|
|
||||||
word_char_max
|
|
||||||
= dfa->mb_cur_max > 1 ? BITSET_UINTS / 2 : BITSET_UINTS;
|
|
||||||
if (constraint & NEXT_WORD_CONSTRAINT)
|
if (constraint & NEXT_WORD_CONSTRAINT)
|
||||||
for (j = 0; j < word_char_max; ++j)
|
{
|
||||||
accepts[j] &= dfa->word_char[j];
|
#ifdef RE_ENABLE_I18N
|
||||||
|
if (dfa->mb_cur_max > 1)
|
||||||
|
for (j = 0; j < BITSET_UINTS; ++j)
|
||||||
|
accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
for (j = 0; j < BITSET_UINTS; ++j)
|
||||||
|
accepts[j] &= dfa->word_char[j];
|
||||||
|
}
|
||||||
if (constraint & NEXT_NOTWORD_CONSTRAINT)
|
if (constraint & NEXT_NOTWORD_CONSTRAINT)
|
||||||
for (j = 0; j < word_char_max; ++j)
|
{
|
||||||
accepts[j] &= ~dfa->word_char[j];
|
#ifdef RE_ENABLE_I18N
|
||||||
|
if (dfa->mb_cur_max > 1)
|
||||||
|
for (j = 0; j < BITSET_UINTS; ++j)
|
||||||
|
accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]);
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
for (j = 0; j < BITSET_UINTS; ++j)
|
||||||
|
accepts[j] &= ~dfa->word_char[j];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Then divide `accepts' into DFA states, or create a new
|
/* Then divide `accepts' into DFA states, or create a new
|
||||||
|
Reference in New Issue
Block a user