Update.

2002-09-10 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regexec.c (build_trtable): Fix the destination of newline to prevent wrong states from overwriting. Append break statements to optimization. 2002-09-10 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c: Wrap #include wchar.h and wctype.h in #if. (build_range_exp): Add castings to strlen invocations. (build_collating_symbol): Restore the type of characters from "char" to "unsigned char", and supplement castings. (build_collating_symbol): Likewise. (build_equiv_class): Likewise. (build_charclass): Likewise. (seek_collating_symbol_entry): Likewise. (parse_bracket_exp): Likewise. (build_word_op): Supplement a casting. * posix/regex_internal.c: Wrap #include wchar.h and wctype.h in #if. (re_string_allocate): Fix castings. (re_string_construct): Likewise. (re_string_construct_common): Likewise. (re_string_realloc_buffers): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (re_string_skip_chars): Likewise. (re_string_reconstruct): Likewise. * posix/regex_internal.h: Restore the type of characters in re_string_t and bracket_elem_t from "char" to "unsigned char". (re_string_elem_size_at): Fix castings. * posix/regexec.c: Wrap #include wchar.h and wctype.h in #if. (transit_state_bkref_loop): Restore the type of characters from "char" to "unsigned char", and append a cast to "char*" pointer in array subscript. (check_node_accept_bytes): Likewise. (find_collation_sequence_value): Likewise.
2025-12-08 02:02:23 +03:00 · 2002-09-10 18:40:35 +00:00
parent 62439eac8b
commit c202c2c505
6 changed files with 129 additions and 47 deletions
--- a/37
+++ b/37
@@ -1,3 +1,40 @@
 2002-09-10  Isamu Hasegawa  <isamu@yamato.ibm.com>
 	* posix/regexec.c (build_trtable): Fix the destination of
 	newline to prevent wrong states from overwriting.
 	Append break statements to optimization.
 2002-09-10  Isamu Hasegawa  <isamu@yamato.ibm.com>
 	* posix/regcomp.c: Wrap #include wchar.h and wctype.h in #if.
 	(build_range_exp): Add castings to strlen invocations.
 	(build_collating_symbol): Restore the type of characters from "char"
 	to "unsigned char", and supplement castings.
 	(build_collating_symbol): Likewise.
 	(build_equiv_class): Likewise.
 	(build_charclass): Likewise.
 	(seek_collating_symbol_entry): Likewise.
 	(parse_bracket_exp): Likewise.
 	(build_word_op): Supplement a casting.
 	* posix/regex_internal.c: Wrap #include wchar.h and wctype.h in #if.
 	(re_string_allocate): Fix castings.
 	(re_string_construct): Likewise.
 	(re_string_construct_common): Likewise.
 	(re_string_realloc_buffers): Likewise.
 	(build_wcs_buffer): Likewise.
 	(build_wcs_upper_buffer): Likewise.
 	(re_string_skip_chars): Likewise.
 	(re_string_reconstruct): Likewise.
 	* posix/regex_internal.h: Restore the type of characters in
 	re_string_t and bracket_elem_t from "char" to "unsigned char".
 	(re_string_elem_size_at): Fix castings.
 	* posix/regexec.c: Wrap #include wchar.h and wctype.h in #if.
 	(transit_state_bkref_loop): Restore the type of characters from
 	"char" to "unsigned char", and append a cast to "char*" pointer in
 	array subscript.
 	(check_node_accept_bytes): Likewise.
 	(find_collation_sequence_value): Likewise.
 2002-09-10  Hartvig Ekner  <hartvige@mips.com>
 	* sysdeps/mips/memcpy.S: New file.
--- a/localedata/ChangeLog
+++ b/localedata/ChangeLog
@@ -1,3 +1,8 @@
 2002-09-10  Ulrich Drepper  <drepper@redhat.com>
 	* localedata/locales/bg_BG: Update LC_IDENTIFICATION info.
 	Patch by Yanko Kaneti <yaneti@declera.com>.
 2002-09-01  Roland McGrath  <roland@redhat.com>
 	* tst-ctype.c (main): Use nl_langinfo instead of __ctype_b global.
--- a/localedata/locales/bg_BG
+++ b/localedata/locales/bg_BG
@@ -21,10 +21,10 @@ contact    "Delyan Toshev"
 email      "delyant@yahoo.com"
 tel        ""
 fax        ""
-language   "bg"
+language   "Bulgarian"
-territory  "BG"
+territory  "Bulgaria"
-revision   "2.0"
+revision   "2.0.1"
-date       "2001-11-16"
+date       "2002-09-10"
 category  "bg_BG:2000";LC_IDENTIFICATION
 category  "bg_BG:2000";LC_CTYPE
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -24,8 +24,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <wchar.h>
+
-#include <wctype.h>
+#if defined HAVE_WCHAR_H || defined _LIBC
 # include <wchar.h>
 #endif /* HAVE_WCHAR_H || _LIBC */
 #if defined HAVE_WCTYPE_H || defined _LIBC
 # include <wctype.h>
 #endif /* HAVE_WCTYPE_H || _LIBC */
 #ifdef _LIBC
 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
@@ -99,7 +104,8 @@ re_string_allocate (pstr, str, len, init_len, trans, icase)
  if (BE (ret != REG_NOERROR, 0))
    return ret;
-  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case : (char *) str);
+  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
                    : (unsigned char *) str);
  pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
  pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
                     || MB_CUR_MAX > 1) ? pstr->valid_len : len;
@@ -127,7 +133,8 @@ re_string_construct (pstr, str, len, trans, icase)
      if (BE (ret != REG_NOERROR, 0))
        return ret;
    }
-  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case : (char *) str);
+  pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
                    : (unsigned char *) str);
  pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
  if (icase)
@@ -176,13 +183,13 @@ re_string_realloc_buffers (pstr, new_buf_len)
 #endif /* RE_ENABLE_I18N  */
  if (MBS_ALLOCATED (pstr))
    {
-      pstr->mbs = re_realloc (pstr->mbs, char, new_buf_len);
+      pstr->mbs = re_realloc (pstr->mbs, unsigned char, new_buf_len);
      if (BE (pstr->mbs == NULL, 0))
        return REG_ESPACE;
    }
  if (MBS_CASE_ALLOCATED (pstr))
    {
-      pstr->mbs_case = re_realloc (pstr->mbs_case, char, new_buf_len);
+      pstr->mbs_case = re_realloc (pstr->mbs_case, unsigned char, new_buf_len);
      if (BE (pstr->mbs_case == NULL, 0))
        return REG_ESPACE;
      if (!MBS_ALLOCATED (pstr))
@@ -202,7 +209,7 @@ re_string_construct_common (str, len, pstr, trans, icase)
     int icase;
 {
  memset (pstr, '\0', sizeof (re_string_t));
-  pstr->raw_mbs = str;
+  pstr->raw_mbs = (const unsigned char *) str;
  pstr->len = len;
  pstr->trans = trans;
  pstr->icase = icase ? 1 : 0;
@@ -235,8 +242,8 @@ build_wcs_buffer (pstr)
      wchar_t wc;
      remain_len = end_idx - byte_idx;
      prev_st = pstr->cur_state;
-      mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
+      mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
-                        remain_len, &pstr->cur_state);
+                              + byte_idx), remain_len, &pstr->cur_state);
      if (BE (mbclen == (size_t) -2, 0))
        {
          /* The buffer doesn't have enough space, finish to build.  */
@@ -254,9 +261,8 @@ build_wcs_buffer (pstr)
      /* Apply the translateion if we need.  */
      if (pstr->trans != NULL && mbclen == 1)
        {
-          int ch = *((unsigned char *) pstr->raw_mbs + pstr->raw_mbs_idx
+          int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
-                     + byte_idx);
+          pstr->mbs_case[byte_idx] = ch;
          pstr->mbs_case[byte_idx] = pstr->trans[ch];
        }
      /* Write wide character and padding.  */
      pstr->wcs[byte_idx++] = wc;
@@ -284,8 +290,8 @@ build_wcs_upper_buffer (pstr)
      wchar_t wc;
      remain_len = end_idx - byte_idx;
      prev_st = pstr->cur_state;
-      mbclen = mbrtowc (&wc, pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx,
+      mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
-                        remain_len, &pstr->cur_state);
+                              + byte_idx), remain_len, &pstr->cur_state);
      if (BE (mbclen == (size_t) -2, 0))
        {
          /* The buffer doesn't have enough space, finish to build.  */
@@ -310,7 +316,7 @@ build_wcs_upper_buffer (pstr)
      else /* mbclen > 1 */
        {
          if (iswlower (wc))
-            wcrtomb (pstr->mbs + byte_idx, towupper (wc), &prev_st);
+            wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
          else
            memcpy (pstr->mbs + byte_idx,
                    pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
@@ -340,7 +346,7 @@ re_string_skip_chars (pstr, new_raw_idx)
    {
      int remain_len = pstr->len - rawbuf_idx;
      prev_st = pstr->cur_state;
-      mbclen = mbrlen (pstr->raw_mbs + rawbuf_idx, remain_len,
+      mbclen = mbrlen ((const char *) pstr->raw_mbs + rawbuf_idx, remain_len,
                       &pstr->cur_state);
      if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
        {
@@ -420,9 +426,9 @@ re_string_reconstruct (pstr, idx, eflags, newline)
      pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
                           : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
      if (!MBS_CASE_ALLOCATED (pstr))
-        pstr->mbs_case = (char *) pstr->raw_mbs;
+        pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
      if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
-        pstr->mbs = (char *) pstr->raw_mbs;
+        pstr->mbs = (unsigned char *) pstr->raw_mbs;
      offset = idx;
    }
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -228,15 +228,15 @@ struct re_string_t
 {
  /* Indicate the raw buffer which is the original string passed as an
     argument of regexec(), re_search(), etc..  */
-  const char *raw_mbs;
+  const unsigned char *raw_mbs;
  /* Store the multibyte string.  In case of "case insensitive mode" like
     REG_ICASE, upper cases of the string are stored, otherwise MBS points
     the same address that RAW_MBS points.  */
-  char *mbs;
+  unsigned char *mbs;
  /* Store the case sensitive multibyte string.  In case of
     "case insensitive mode", the original string are stored,
     otherwise MBS_CASE points the same address that MBS points.  */
-  char *mbs_case;
+  unsigned char *mbs_case;
 #ifdef RE_ENABLE_I18N
  /* Store the wide character string which is corresponding to MBS.  */
  wint_t *wcs;
@@ -512,7 +512,7 @@ typedef struct
  union
  {
    unsigned char ch;
-    char *name;
+    unsigned char *name;
    wchar_t wch;
  } opr;
 } bracket_elem_t;
@@ -580,7 +580,7 @@ re_string_elem_size_at (pstr, idx)
     int idx;
 {
 #ifdef _LIBC
-  const char *extra, *p;
+  const unsigned char *p, *extra;
  const int32_t *table, *indirect;
  int32_t tmp;
 # include <locale/weight.h>
@@ -589,11 +589,12 @@ re_string_elem_size_at (pstr, idx)
  if (nrules != 0)
    {
      table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
-      extra = (const char *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+      extra = (const unsigned char *)
        _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
      indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
 						_NL_COLLATE_INDIRECTMB);
      p = pstr->mbs + idx;
-      tmp = findidx ((const unsigned char **) &p);
+      tmp = findidx (&p);
      return p - pstr->mbs - idx;
    }
  else
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -23,8 +23,13 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <wchar.h>
+
-#include <wctype.h>
+#if defined HAVE_WCHAR_H || defined _LIBC
 # include <wchar.h>
 #endif /* HAVE_WCHAR_H || _LIBC */
 #if defined HAVE_WCTYPE_H || defined _LIBC
 # include <wctype.h>
 #endif /* HAVE_WCTYPE_H || _LIBC */
 #ifdef _LIBC
 # ifndef _RE_DEFINE_LOCALE_FUNCTIONS
@@ -123,7 +128,7 @@ static re_dfastate_t **build_trtable (const regex_t *dfa,
 static int check_node_accept_bytes (const regex_t *preg, int node_idx,
                                    const re_string_t *input, int idx);
 # ifdef _LIBC
-static unsigned int find_collation_sequence_value (const char *mbs,
+static unsigned int find_collation_sequence_value (const unsigned char *mbs,
                                                   size_t name_len);
 # endif /* _LIBC */
 #endif /* RE_ENABLE_I18N */
@@ -1674,7 +1679,7 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
          if (BE (err != REG_NOERROR, 0))
            return err;
        }
-      buf = re_string_get_buffer (mctx->input);
+      buf = (char *) re_string_get_buffer (mctx->input);
      if (strncmp (buf + cur_regs[subexp_idx].rm_so, buf + cur_str_idx,
                   subexp_len) != 0)
        continue;
@@ -1855,27 +1860,51 @@ build_trtable (preg, state, fl_search)
    }
  /* Update the transition table.  */
  /* For all characters ch...:  */
  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
    for (j = 0; j < UINT_BITS; ++j, ++ch)
      if ((acceptable[i] >> j) & 1)
        {
          /* The current state accepts the character ch.  */
          if (IS_WORD_CHAR (ch))
            {
              for (k = 0; k < ndests; ++k)
                if ((dests_ch[k][i] >> j) & 1)
-                  trtable[ch] = dest_states_word[k];
+                  {
                    /* k-th destination accepts the word character ch.  */
                    trtable[ch] = dest_states_word[k];
                    /* There must be only one destination which accepts
                       character ch.  See group_nodes_into_DFAstates.  */
                    break;
                  }
            }
          else /* not WORD_CHAR */
            {
              for (k = 0; k < ndests; ++k)
                if ((dests_ch[k][i] >> j) & 1)
-                  trtable[ch] = dest_states[k];
+                  {
                    /* k-th destination accepts the non-word character ch.  */
                    trtable[ch] = dest_states[k];
                    /* There must be only one destination which accepts
                       character ch.  See group_nodes_into_DFAstates.  */
                    break;
                  }
            }
        }
  /* new line */
-  for (k = 0; k < ndests; ++k)
+  if (bitset_contain (acceptable, NEWLINE_CHAR))
-    if (bitset_contain (acceptable, NEWLINE_CHAR))
+    {
-      trtable[NEWLINE_CHAR] = dest_states_nl[k];
+      /* The current state accepts newline character.  */
      for (k = 0; k < ndests; ++k)
        if (bitset_contain (dests_ch[k], NEWLINE_CHAR))
          {
            /* k-th destination accepts newline character.  */
            trtable[NEWLINE_CHAR] = dest_states_nl[k];
            /* There must be only one destination which accepts
               newline.  See group_nodes_into_DFAstates.  */
            break;
          }
    }
  re_free (dest_states_nl);
  re_free (dest_states_word);
@@ -2069,7 +2098,7 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
    {
      const re_charset_t *cset = node->opr.mbcset;
 # ifdef _LIBC
-      const char *pin = re_string_get_buffer (input) + str_idx;
+      const unsigned char *pin = re_string_get_buffer (input) + str_idx;
 # endif /* _LIBC */
      int match_len = 0;
      wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
@@ -2098,17 +2127,19 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
        {
          unsigned int in_collseq = 0;
          const int32_t *table, *indirect;
-          const char *weights, *extra, *collseqwc;
+          const unsigned char *weights, *extra;
          const char *collseqwc;
          int32_t idx;
          /* This #include defines a local function!  */
 #  include <locale/weight.h>
          /* match with collating_symbol?  */
          if (cset->ncoll_syms)
-            extra = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
+            extra = (const unsigned char *)
              _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
          for (i = 0; i < cset->ncoll_syms; ++i)
            {
-              const char *coll_sym = extra + cset->coll_syms[i];
+              const unsigned char *coll_sym = extra + cset->coll_syms[i];
              /* Compare the length of input collating element and
                 the length of current collating element.  */
              if (*coll_sym != elem_len)
@@ -2147,11 +2178,13 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
          /* match with equivalence_class?  */
          if (cset->nequiv_classes)
            {
-              const unsigned char *cp = (const unsigned char *) pin;
+              const unsigned char *cp = pin;
              table = (const int32_t *)
                _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
-              weights = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
+              weights = (const unsigned char *)
-              extra = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
+                _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
              extra = (const unsigned char *)
                _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
              indirect = (const int32_t *)
                _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
              idx = findidx (&cp);
@@ -2215,7 +2248,7 @@ check_node_accept_bytes (preg, node_idx, input, str_idx)
 # ifdef _LIBC
 static unsigned int
 find_collation_sequence_value (mbs, mbs_len)
-    const char *mbs;
+    const unsigned char *mbs;
    size_t mbs_len;
 {
  uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
@@ -2226,7 +2259,7 @@ find_collation_sequence_value (mbs, mbs_len)
          /* No valid character.  Match it as a single byte character.  */
          const unsigned char *collseq = (const unsigned char *)
            _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
-          return collseq[*(unsigned char *) mbs];
+          return collseq[mbs[0]];
        }
      return UINT_MAX;
    }