1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-12-24 17:51:17 +03:00
This commit is contained in:
Jakub Jelinek
2007-07-12 18:26:36 +00:00
parent 7d58530341
commit 0ecb606cb6
6215 changed files with 494638 additions and 305010 deletions

View File

@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -39,6 +39,20 @@
#if defined HAVE_WCTYPE_H || defined _LIBC
# include <wctype.h>
#endif /* HAVE_WCTYPE_H || _LIBC */
#if defined HAVE_STDBOOL_H || defined _LIBC
# include <stdbool.h>
#endif /* HAVE_STDBOOL_H || _LIBC */
#if defined HAVE_STDINT_H || defined _LIBC
# include <stdint.h>
#endif /* HAVE_STDINT_H || _LIBC */
#if defined _LIBC
# include <bits/libc-lock.h>
#else
# define __libc_lock_define(CLASS,NAME)
# define __libc_lock_init(NAME) do { } while (0)
# define __libc_lock_lock(NAME) do { } while (0)
# define __libc_lock_unlock(NAME) do { } while (0)
#endif
/* In case that the system doesn't have isblank(). */
#if !defined _LIBC && !defined HAVE_ISBLANK && !defined isblank
@@ -60,7 +74,7 @@
# ifdef _LIBC
# undef gettext
# define gettext(msgid) \
INTUSE(__dcgettext) (INTUSE(_libc_intl_domainname), msgid, LC_MESSAGES)
INTUSE(__dcgettext) (_libc_intl_domainname, msgid, LC_MESSAGES)
# endif
#else
# define gettext(msgid) (msgid)
@@ -72,6 +86,11 @@
# define gettext_noop(String) String
#endif
/* For loser systems without the definition. */
#ifndef SIZE_MAX
# define SIZE_MAX ((size_t) -1)
#endif
#if (defined MB_CUR_MAX && HAVE_LOCALE_H && HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_WCRTOMB && HAVE_MBRTOWC && HAVE_WCSCOLL) || _LIBC
# define RE_ENABLE_I18N
#endif
@@ -83,8 +102,6 @@
# define inline
#endif
/* Number of bits in a byte. */
#define BYTE_BITS 8
/* Number of single byte character. */
#define SBC_MAX 256
@@ -114,26 +131,28 @@
extern const char __re_error_msgid[] attribute_hidden;
extern const size_t __re_error_msgid_idx[] attribute_hidden;
/* Number of bits in an unsinged int. */
#define UINT_BITS (sizeof (unsigned int) * BYTE_BITS)
/* Number of unsigned int in an bit_set. */
#define BITSET_UINTS ((SBC_MAX + UINT_BITS - 1) / UINT_BITS)
typedef unsigned int bitset[BITSET_UINTS];
typedef unsigned int *re_bitset_ptr_t;
typedef const unsigned int *re_const_bitset_ptr_t;
/* An integer used to represent a set of bits. It must be unsigned,
and must be at least as wide as unsigned int. */
typedef unsigned long int bitset_word_t;
/* All bits set in a bitset_word_t. */
#define BITSET_WORD_MAX ULONG_MAX
/* Number of bits in a bitset_word_t. */
#define BITSET_WORD_BITS (sizeof (bitset_word_t) * CHAR_BIT)
/* Number of bitset_word_t in a bit_set. */
#define BITSET_WORDS (SBC_MAX / BITSET_WORD_BITS)
typedef bitset_word_t bitset_t[BITSET_WORDS];
typedef bitset_word_t *re_bitset_ptr_t;
typedef const bitset_word_t *re_const_bitset_ptr_t;
#define bitset_set(set,i) (set[i / UINT_BITS] |= 1 << i % UINT_BITS)
#define bitset_clear(set,i) (set[i / UINT_BITS] &= ~(1 << i % UINT_BITS))
#define bitset_contain(set,i) (set[i / UINT_BITS] & (1 << i % UINT_BITS))
#define bitset_empty(set) memset (set, 0, sizeof (unsigned int) * BITSET_UINTS)
#define bitset_set_all(set) \
memset (set, 255, sizeof (unsigned int) * BITSET_UINTS)
#define bitset_copy(dest,src) \
memcpy (dest, src, sizeof (unsigned int) * BITSET_UINTS)
static inline void bitset_not (bitset set);
static inline void bitset_merge (bitset dest, const bitset src);
static inline void bitset_not_merge (bitset dest, const bitset src);
static inline void bitset_mask (bitset dest, const bitset src);
#define bitset_set(set,i) \
(set[i / BITSET_WORD_BITS] |= (bitset_word_t) 1 << i % BITSET_WORD_BITS)
#define bitset_clear(set,i) \
(set[i / BITSET_WORD_BITS] &= ~((bitset_word_t) 1 << i % BITSET_WORD_BITS))
#define bitset_contain(set,i) \
(set[i / BITSET_WORD_BITS] & ((bitset_word_t) 1 << i % BITSET_WORD_BITS))
#define bitset_empty(set) memset (set, '\0', sizeof (bitset_t))
#define bitset_set_all(set) memset (set, '\xff', sizeof (bitset_t))
#define bitset_copy(dest,src) memcpy (dest, src, sizeof (bitset_t))
#define PREV_WORD_CONSTRAINT 0x0001
#define PREV_NOTWORD_CONSTRAINT 0x0002
@@ -143,18 +162,21 @@ static inline void bitset_mask (bitset dest, const bitset src);
#define NEXT_NEWLINE_CONSTRAINT 0x0020
#define PREV_BEGBUF_CONSTRAINT 0x0040
#define NEXT_ENDBUF_CONSTRAINT 0x0080
#define DUMMY_CONSTRAINT 0x0100
#define WORD_DELIM_CONSTRAINT 0x0100
#define NOT_WORD_DELIM_CONSTRAINT 0x0200
typedef enum
{
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
WORD_DELIM = DUMMY_CONSTRAINT
WORD_DELIM = WORD_DELIM_CONSTRAINT,
NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
} re_context_type;
typedef struct
@@ -186,16 +208,16 @@ typedef enum
OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
OP_ALT = EPSILON_BIT | 2,
OP_DUP_ASTERISK = EPSILON_BIT | 3,
OP_DUP_PLUS = EPSILON_BIT | 4,
OP_DUP_QUESTION = EPSILON_BIT | 5,
ANCHOR = EPSILON_BIT | 6,
OP_DELETED_SUBEXP = EPSILON_BIT | 7,
ANCHOR = EPSILON_BIT | 4,
/* Tree type, these are used only by tree. */
CONCAT = 16,
SUBEXP = 17,
/* Token type, these are used only by token. */
OP_OPEN_BRACKET = 17,
OP_DUP_PLUS = 18,
OP_DUP_QUESTION,
OP_OPEN_BRACKET,
OP_CLOSE_BRACKET,
OP_CHARSET_RANGE,
OP_OPEN_DUP_NUM,
@@ -284,6 +306,7 @@ typedef struct
unsigned int duplicated : 1;
unsigned int opt_subexp : 1;
#ifdef RE_ENABLE_I18N
unsigned int accept_mb : 1;
/* These 2 bits can be moved into the union if needed (e.g. if running out
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
unsigned int mb_partial : 1;
@@ -292,8 +315,6 @@ typedef struct
} re_token_t;
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
#define ACCEPT_MB_NODE(type) \
((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
struct re_string_t
{
@@ -337,7 +358,7 @@ struct re_string_t
the beginning of the input string. */
unsigned int tip_context;
/* The translation passed as a part of an argument of re_compile_pattern. */
unsigned RE_TRANSLATE_TYPE trans;
RE_TRANSLATE_TYPE trans;
/* Copy of re_dfa_t's word_char. */
re_const_bitset_ptr_t word_char;
/* 1 if REG_ICASE. */
@@ -364,45 +385,18 @@ typedef struct re_dfa_t re_dfa_t;
# endif
#endif
#ifndef RE_NO_INTERNAL_PROTOTYPES
static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str,
int len, int init_len,
RE_TRANSLATE_TYPE trans, int icase,
const re_dfa_t *dfa)
internal_function;
static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str,
int len, RE_TRANSLATE_TYPE trans,
int icase, const re_dfa_t *dfa)
internal_function;
static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
int eflags) internal_function;
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
int new_buf_len)
internal_function;
# ifdef RE_ENABLE_I18N
#ifdef RE_ENABLE_I18N
static void build_wcs_buffer (re_string_t *pstr) internal_function;
static int build_wcs_upper_buffer (re_string_t *pstr) internal_function;
# endif /* RE_ENABLE_I18N */
#endif /* RE_ENABLE_I18N */
static void build_upper_buffer (re_string_t *pstr) internal_function;
static void re_string_translate_buffer (re_string_t *pstr) internal_function;
static void re_string_destruct (re_string_t *pstr) internal_function;
# ifdef RE_ENABLE_I18N
static int re_string_elem_size_at (const re_string_t *pstr, int idx)
internal_function __attribute ((pure));
static inline int re_string_char_size_at (const re_string_t *pstr, int idx)
internal_function __attribute ((pure));
static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx)
internal_function __attribute ((pure));
# endif /* RE_ENABLE_I18N */
static unsigned int re_string_context_at (const re_string_t *input, int idx,
int eflags)
internal_function __attribute ((pure));
static unsigned char re_string_peek_byte_case (const re_string_t *pstr,
int idx)
internal_function __attribute ((pure));
static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
internal_function __attribute ((pure));
#endif
#define re_string_peek_byte(pstr, offset) \
((pstr)->mbs[(pstr)->cur_idx + offset])
#define re_string_fetch_byte(pstr) \
@@ -420,6 +414,21 @@ static unsigned char re_string_fetch_byte_case (re_string_t *pstr)
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
#include <alloca.h>
#ifndef _LIBC
# if HAVE_ALLOCA
/* The OS usually guarantees only one guard page at the bottom of the stack,
and a page size can be as small as 4096 bytes. So we cannot safely
allocate anything larger than 4096 bytes. Also care for the possibility
of a few compiler-allocated temporary stack slots. */
# define __libc_use_alloca(n) ((n) < 4032)
# else
/* alloca is implemented with malloc, so just use malloc. */
# define __libc_use_alloca(n) 0
# endif
#endif
#define re_malloc(t,n) ((t *) malloc ((n) * sizeof (t)))
#define re_realloc(p,t,n) ((t *) realloc (p, (n) * sizeof (t)))
#define re_free(p) free (p)
@@ -429,15 +438,14 @@ struct bin_tree_t
struct bin_tree_t *parent;
struct bin_tree_t *left;
struct bin_tree_t *right;
struct bin_tree_t *first;
struct bin_tree_t *next;
re_token_t token;
/* `node_idx' is the index in dfa->nodes, if `type' == 0.
Otherwise `type' indicate the type of this node. */
re_token_type_t type;
int node_idx;
int first;
int next;
re_node_set eclosure;
};
typedef struct bin_tree_t bin_tree_t;
@@ -486,7 +494,7 @@ struct re_dfastate_t
re_node_set non_eps_nodes;
re_node_set inveclosure;
re_node_set *entrance_nodes;
struct re_dfastate_t **trtable;
struct re_dfastate_t **trtable, **word_trtable;
unsigned int context : 4;
unsigned int halt : 1;
/* If this state can accept `multi byte'.
@@ -496,7 +504,6 @@ struct re_dfastate_t
/* If this state has backreference node(s). */
unsigned int has_backref : 1;
unsigned int has_constraint : 1;
unsigned int word_trtable : 1;
};
typedef struct re_dfastate_t re_dfastate_t;
@@ -533,7 +540,6 @@ typedef struct
{
int str_idx;
int node;
int next_last_offset;
state_array_t *path;
int alasts; /* Allocation size of LASTS. */
int nlasts; /* The number of LASTS. */
@@ -556,9 +562,9 @@ typedef struct
/* The string object corresponding to the input string. */
re_string_t input;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
re_dfa_t *const dfa;
const re_dfa_t *const dfa;
#else
re_dfa_t *dfa;
const re_dfa_t *dfa;
#endif
/* EFLAGS of the argument of regexec. */
int eflags;
@@ -605,8 +611,8 @@ struct re_fail_stack_t
struct re_dfa_t
{
re_token_t *nodes;
int nodes_alloc;
int nodes_len;
size_t nodes_alloc;
size_t nodes_len;
int *nexts;
int *org_indices;
re_node_set *edests;
@@ -624,13 +630,12 @@ struct re_dfa_t
/* number of subexpressions `re_nsub' is in regex_t. */
unsigned int state_hash_mask;
int states_alloc;
int init_node;
int nbackref; /* The number of backreference in this dfa. */
/* Bitmap expressing which backreference is used. */
unsigned int used_bkref_map;
unsigned int completed_bkref_map;
bitset_word_t used_bkref_map;
bitset_word_t completed_bkref_map;
unsigned int has_plural_match : 1;
/* If this dfa has "multibyte node", which is a backreference or
@@ -641,52 +646,20 @@ struct re_dfa_t
unsigned int map_notascii : 1;
unsigned int word_ops_used : 1;
int mb_cur_max;
bitset word_char;
bitset_t word_char;
reg_syntax_t syntax;
int *subexp_map;
#ifdef DEBUG
char* re_str;
#endif
__libc_lock_define (, lock)
};
#ifndef RE_NO_INTERNAL_PROTOTYPES
static reg_errcode_t re_node_set_alloc (re_node_set *set, int size) internal_function;
static reg_errcode_t re_node_set_init_1 (re_node_set *set, int elem) internal_function;
static reg_errcode_t re_node_set_init_2 (re_node_set *set, int elem1,
int elem2) internal_function;
#define re_node_set_init_empty(set) memset (set, '\0', sizeof (re_node_set))
static reg_errcode_t re_node_set_init_copy (re_node_set *dest,
const re_node_set *src) internal_function;
static reg_errcode_t re_node_set_add_intersect (re_node_set *dest,
const re_node_set *src1,
const re_node_set *src2) internal_function;
static reg_errcode_t re_node_set_init_union (re_node_set *dest,
const re_node_set *src1,
const re_node_set *src2) internal_function;
static reg_errcode_t re_node_set_merge (re_node_set *dest,
const re_node_set *src) internal_function;
static int re_node_set_insert (re_node_set *set, int elem) internal_function;
static int re_node_set_insert_last (re_node_set *set,
int elem) internal_function;
static int re_node_set_compare (const re_node_set *set1,
const re_node_set *set2)
internal_function __attribute ((pure));
static int re_node_set_contains (const re_node_set *set, int elem)
internal_function __attribute ((pure));
static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
#define re_node_set_remove(set,id) \
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode) internal_function;
static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
const re_node_set *nodes) internal_function;
static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
re_dfa_t *dfa,
const re_node_set *nodes,
unsigned int context) internal_function;
static void free_state (re_dfastate_t *state) internal_function;
#endif
typedef enum
@@ -712,41 +685,33 @@ typedef struct
/* Inline functions for bitset operation. */
static inline void
bitset_not (bitset set)
bitset_not (bitset_t set)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
set[bitset_i] = ~set[bitset_i];
}
static inline void
bitset_merge (bitset dest, const bitset src)
bitset_merge (bitset_t dest, const bitset_t src)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
dest[bitset_i] |= src[bitset_i];
}
static inline void
bitset_not_merge (bitset dest, const bitset src)
{
int i;
for (i = 0; i < BITSET_UINTS; ++i)
dest[i] |= ~src[i];
}
static inline void
bitset_mask (bitset dest, const bitset src)
bitset_mask (bitset_t dest, const bitset_t src)
{
int bitset_i;
for (bitset_i = 0; bitset_i < BITSET_UINTS; ++bitset_i)
for (bitset_i = 0; bitset_i < BITSET_WORDS; ++bitset_i)
dest[bitset_i] &= src[bitset_i];
}
#if defined RE_ENABLE_I18N && !defined RE_NO_INTERNAL_PROTOTYPES
#ifdef RE_ENABLE_I18N
/* Inline functions for re_string. */
static inline int
internal_function
internal_function __attribute ((pure))
re_string_char_size_at (const re_string_t *pstr, int idx)
{
int byte_idx;
@@ -759,7 +724,7 @@ re_string_char_size_at (const re_string_t *pstr, int idx)
}
static inline wint_t
internal_function
internal_function __attribute ((pure))
re_string_wchar_at (const re_string_t *pstr, int idx)
{
if (pstr->mb_cur_max == 1)
@@ -768,14 +733,14 @@ re_string_wchar_at (const re_string_t *pstr, int idx)
}
static int
internal_function
internal_function __attribute ((pure))
re_string_elem_size_at (const re_string_t *pstr, int idx)
{
#ifdef _LIBC
# ifdef _LIBC
const unsigned char *p, *extra;
const int32_t *table, *indirect;
int32_t tmp;
# include <locale/weight.h>
# include <locale/weight.h>
uint_fast32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
if (nrules != 0)
@@ -790,7 +755,7 @@ re_string_elem_size_at (const re_string_t *pstr, int idx)
return p - pstr->mbs - idx;
}
else
#endif /* _LIBC */
# endif /* _LIBC */
return 1;
}
#endif /* RE_ENABLE_I18N */