mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-30 22:43:12 +03:00
Update.
2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_internal): Adapt it to new interface of buffer building functions. * posix/regex_internal.c (re_string_allocate): New function. (re_string_realloc_buffers): New function. (re_string_skip_chars): New function. (re_string_reconstruct): New function. (re_string_construct): Adapt it to new interface of buffer building functions. (re_string_construct_common): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (build_upper_buffer): Likewise. (re_string_translate_buffer): Likewise. (re_string_context_at): Adapt it to variable length buffers. * posix/regex_internal.h (re_string_t): Add new fields to handle variable length buffers. (re_match_context_t): Likewise. * posix/regexec.c (re_search_internal): Adapt it to new interface of re_string_t and re_match_context_t. (acquire_init_state_context): Likewise. (check_matching): Likewise. (check_halt_state_context): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (clean_state_log_if_need): Likewise. (sift_states_iter_mb): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref): Likewise. (transit_state_bkref_loop): Likewise. (check_node_accept): Likewise. (match_ctx_init): Likewise. (extend_buffers): New function. 2002-04-21 Bruno Haible <bruno@clisp.org> * iconvdata/tst-table.sh: For the second check, use the truncated GB18030 charmap table, like for the first check.
This commit is contained in:
@ -201,33 +201,67 @@ typedef struct
|
||||
|
||||
struct re_string_t
|
||||
{
|
||||
/* Indicate the raw buffer which is the original string passed as an
|
||||
argument of regexec(), re_search(), etc.. */
|
||||
const unsigned char *raw_mbs;
|
||||
/* Index in RAW_MBS. Each character mbs[i] corresponds to
|
||||
raw_mbs[raw_mbs_idx + i]. */
|
||||
int raw_mbs_idx;
|
||||
/* Store the multibyte string. In case of "case insensitive mode" like
|
||||
REG_ICASE, upper cases of the string are stored. */
|
||||
const unsigned char *mbs;
|
||||
REG_ICASE, upper cases of the string are stored, otherwise MBS points
|
||||
the same address that RAW_MBS points. */
|
||||
unsigned char *mbs;
|
||||
/* Store the case sensitive multibyte string. In case of
|
||||
"case insensitive mode", the original string are stored,
|
||||
otherwise MBS_CASE points the same address that MBS points. */
|
||||
const unsigned char *mbs_case;
|
||||
int cur_idx;
|
||||
int len;
|
||||
unsigned char *mbs_case;
|
||||
#ifdef RE_ENABLE_I18N
|
||||
/* Store the wide character string which is corresponding to MBS. */
|
||||
wchar_t *wcs;
|
||||
mbstate_t cur_state;
|
||||
#endif
|
||||
/* 1 if mbs is allocated by regex library. */
|
||||
unsigned int mbs_alloc : 1;
|
||||
/* 1 if mbs_case is allocated by regex library. */
|
||||
unsigned int mbs_case_alloc : 1;
|
||||
/* The length of the valid characters in the buffers. */
|
||||
int valid_len;
|
||||
/* The length of the buffers MBS, MBS_CASE, and WCS. */
|
||||
int bufs_len;
|
||||
/* The index in MBS, which is updated by re_string_fetch_byte. */
|
||||
int cur_idx;
|
||||
/* This is length_of_RAW_MBS - RAW_MBS_IDX. */
|
||||
int len;
|
||||
/* The context of mbs[0]. We store the context independently, since
|
||||
the context of mbs[0] may be different from raw_mbs[0], which is
|
||||
the beginning of the input string. */
|
||||
unsigned int tip_context;
|
||||
/* The translation passed as a part of an argument of re_compile_pattern. */
|
||||
RE_TRANSLATE_TYPE trans;
|
||||
/* 1 if REG_ICASE. */
|
||||
unsigned int icase : 1;
|
||||
};
|
||||
typedef struct re_string_t re_string_t;
|
||||
/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */
|
||||
#define MBS_ALLOCATED(pstr) (pstr->icase)
|
||||
/* In case that we need translation, we allocate the buffer dynamically
|
||||
for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */
|
||||
#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
|
||||
|
||||
|
||||
static reg_errcode_t re_string_allocate (re_string_t *pstr,
|
||||
const unsigned char *str, int len,
|
||||
int init_len,
|
||||
RE_TRANSLATE_TYPE trans, int icase);
|
||||
static reg_errcode_t re_string_construct (re_string_t *pstr,
|
||||
const unsigned char *str, int len,
|
||||
RE_TRANSLATE_TYPE trans);
|
||||
static reg_errcode_t re_string_construct_toupper (re_string_t *pstr,
|
||||
const unsigned char *str,
|
||||
int len,
|
||||
RE_TRANSLATE_TYPE trans);
|
||||
RE_TRANSLATE_TYPE trans, int icase);
|
||||
static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
|
||||
int eflags, int newline);
|
||||
static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
|
||||
int new_buf_len);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static void build_wcs_buffer (re_string_t *pstr);
|
||||
static void build_wcs_upper_buffer (re_string_t *pstr);
|
||||
#endif /* RE_ENABLE_I18N */
|
||||
static void build_upper_buffer (re_string_t *pstr);
|
||||
static void re_string_translate_buffer (re_string_t *pstr);
|
||||
static void re_string_destruct (re_string_t *pstr);
|
||||
#ifdef RE_ENABLE_I18N
|
||||
static int re_string_elem_size_at (const re_string_t *pstr, int idx);
|
||||
@ -253,8 +287,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
|
||||
#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
|
||||
#define re_string_get_buffer(pstr) ((pstr)->mbs)
|
||||
#define re_string_length(pstr) ((pstr)->len)
|
||||
#define re_string_byte_at(pstr,idx) \
|
||||
((pstr)->mbs[idx])
|
||||
#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
|
||||
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
|
||||
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
|
||||
|
||||
@ -279,27 +312,6 @@ struct bin_tree_t
|
||||
};
|
||||
typedef struct bin_tree_t bin_tree_t;
|
||||
|
||||
struct re_backref_cache_entry
|
||||
{
|
||||
int node;
|
||||
int from;
|
||||
int to;
|
||||
int flag;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int eflags;
|
||||
int match_first;
|
||||
int match_last;
|
||||
int state_log_top;
|
||||
/* Back reference cache. */
|
||||
int nbkref_ents;
|
||||
int abkref_ents;
|
||||
struct re_backref_cache_entry *bkref_ents;
|
||||
int max_bkref_len;
|
||||
} re_match_context_t;
|
||||
|
||||
|
||||
#define CONTEXT_WORD 1
|
||||
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
|
||||
@ -363,6 +375,32 @@ struct re_state_table_entry
|
||||
re_dfastate_t **array;
|
||||
};
|
||||
|
||||
struct re_backref_cache_entry
|
||||
{
|
||||
int node;
|
||||
int from;
|
||||
int to;
|
||||
int flag;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
/* EFLAGS of the argument of regexec. */
|
||||
int eflags;
|
||||
/* Where the matching ends. */
|
||||
int match_last;
|
||||
/* The string object corresponding to the input string. */
|
||||
re_string_t *input;
|
||||
/* The state log used by the matcher. */
|
||||
re_dfastate_t **state_log;
|
||||
int state_log_top;
|
||||
/* Back reference cache. */
|
||||
int nbkref_ents;
|
||||
int abkref_ents;
|
||||
struct re_backref_cache_entry *bkref_ents;
|
||||
int max_bkref_len;
|
||||
} re_match_context_t;
|
||||
|
||||
struct re_dfa_t
|
||||
{
|
||||
re_bitset_ptr_t word_char;
|
||||
|
Reference in New Issue
Block a user