Update.

2002-02-28 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (regcomp): Remove a redundant condition. (init_word_char): Add a check on malloc failure. (create_initial_state): Likewise. (duplicate_node): Likewise. (calc_eclosure): Likewise. (calc_eclosure_iter): Likewise. (parse_expression): Likewise. (parse_bracket_exp): Remove unnecessary malloc invocations. (build_equiv_class): Likewise. (build_charclass): Likewise. * posix/regex_internal.c (re_node_set_intersect): Add a check on malloc failure. (re_node_set_add_intersect): Likewise. (re_node_set_merge): Likewise. (re_acquire_state): Likewise. (re_acquire_state_context): Likewise. (create_newstate_common): Likewise. (register_state): Likewise. (create_ci_newstate): Likewise. (create_cd_newstate): Likewise. * posix/regex_internal.h: Fix prototypes of re_acquire_state and re_acquire_state_context. * posix/regexec.c (regexec): Suit it to the error handling of re_search_internal. (re_match): Likewise. (re_search): Likewise. (re_search_internal): Add a check on malloc failure. (acquire_init_state_context): Likewise. (check_matching): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref_loop): Likewise. (build_trtable): Likewise. (group_nodes_into_DFAstates): Likewise. (match_ctx_init): Likewise. (match_ctx_add_entry): Likewise.
2025-12-24 17:51:17 +03:00 · 2002-02-28 07:43:13 +00:00
parent 51f38e87b1
commit a9388965cc
5 changed files with 612 additions and 281 deletions
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -63,7 +63,7 @@ static void re_compile_fastmap_iter (regex_t *bufp,
                                     const re_dfastate_t *init_state,
                                     char *fastmap);
 static reg_errcode_t init_dfa (re_dfa_t *dfa, int pat_len);
-static void init_word_char (re_dfa_t *dfa);
+static reg_errcode_t init_word_char (re_dfa_t *dfa);
 static void free_charset (re_charset_t *cset);
 static void free_workarea_compile (regex_t *preg);
 static reg_errcode_t create_initial_state (re_dfa_t *dfa);
@@ -72,10 +72,11 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
 static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
 static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
 static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
-static int duplicate_node (re_dfa_t *dfa, int org_idx,
-                           unsigned int constraint);
+static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
+                                     unsigned int constraint);
 static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
-static re_node_set calc_eclosure_iter (re_dfa_t *dfa, int node, int root);
+static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
+                                         int node, int root);
 static void calc_inveclosure (re_dfa_t *dfa);
 static int fetch_number (re_string_t *input, re_token_t *token,
                         reg_syntax_t syntax);
@@ -446,8 +447,8 @@ regcomp (preg, pattern, cflags)
  if (ret == REG_ERPAREN)
    ret = REG_EPAREN;

-  /* XXX Why the test for preg->fastmap != NULL?  */
-  if (ret == REG_NOERROR && preg->fastmap != NULL)
+  /* We have already checked preg->fastmap != NULL.  */
+  if (ret == REG_NOERROR)
    {
      /* Compute the fastmap now, since regexec cannot modify the pattern
 	 buffer.  */
@@ -772,16 +773,19 @@ init_dfa (dfa, pat_len)
   "word".  In this case "word" means that it is the word construction
   character used by some operators like "\<", "\>", etc.  */

-static void
+static reg_errcode_t
 init_word_char (dfa)
     re_dfa_t *dfa;
 {
  int i, j, ch;
  dfa->word_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+  if (dfa->word_char == NULL)
+    return REG_ESPACE;
  for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
    for (j = 0; j < UINT_BITS; ++j, ++ch)
      if (isalnum (ch) || ch == '_')
        dfa->word_char[i] |= 1 << j;
+  return REG_NOERROR;
 }

 /* Free the work area which are only used while compiling.  */
@@ -844,24 +848,28 @@ create_initial_state (dfa)
      }

  /* It must be the first time to invoke acquire_state.  */
-  dfa->init_state = re_acquire_state_context (dfa, &init_nodes, 0);
+  dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, 0);
+  /* We don't check ERR here, since the initial state must not be NULL.  */
+  if (dfa->init_state == NULL)
+    return err;
  if (dfa->init_state->has_constraint)
    {
-      dfa->init_state_word = re_acquire_state_context (dfa, &init_nodes,
+      dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
                                                       CONTEXT_WORD);
-      dfa->init_state_nl = re_acquire_state_context (dfa, &init_nodes,
+      dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
                                                     CONTEXT_NEWLINE);
-      dfa->init_state_begbuf = re_acquire_state_context (dfa, &init_nodes,
+      dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
+                                                         &init_nodes,
                                                         CONTEXT_NEWLINE
                                                         | CONTEXT_BEGBUF);
+      if (dfa->init_state_word == NULL || dfa->init_state_nl == NULL
+          || dfa->init_state_begbuf == NULL)
+        return err;
    }
  else
    dfa->init_state_word = dfa->init_state_nl
      = dfa->init_state_begbuf = dfa->init_state;

-  if (dfa->init_state == NULL || dfa->init_state_word == NULL
-      || dfa->init_state_nl == NULL || dfa->init_state_begbuf == NULL )
-    return REG_ESPACE;
  re_node_set_free (&init_nodes);
  return REG_NOERROR;
 }
@@ -1114,20 +1122,30 @@ calc_epsdest (dfa, node)
    }
 }

-static int
-duplicate_node (dfa, org_idx, constraint)
+/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
+   The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
+   otherwise return the error code.  */
+
+static reg_errcode_t
+duplicate_node (new_idx, dfa, org_idx, constraint)
     re_dfa_t *dfa;
-     int org_idx;
+     int *new_idx, org_idx;
     unsigned int constraint;
 {
  re_token_t dup;
  int dup_idx;
+  reg_errcode_t err;

  dup.type = OP_CONTEXT_NODE;
  if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
    {
+      /* If the node whose index is ORG_IDX is the same as the intended
+         node, use it.  */
      if (dfa->nodes[org_idx].constraint == constraint)
-        return org_idx;
+        {
+          *new_idx = org_idx;
+          return REG_NOERROR;
+        }
      dup.constraint = constraint |
        dfa->nodes[org_idx].constraint;
    }
@@ -1137,23 +1155,32 @@ duplicate_node (dfa, org_idx, constraint)
  /* In case that `entity' points OP_CONTEXT_NODE,
     we correct `entity' to real entity in calc_inveclosures(). */
  dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
+  dup_idx = re_dfa_add_node (dfa, dup, 1);
+  if (dup.opr.ctx_info == NULL || dup_idx == -1)
+    return REG_ESPACE;
  dup.opr.ctx_info->entity = org_idx;
  dup.opr.ctx_info->bkref_eclosure = NULL;
-  dup_idx = re_dfa_add_node (dfa, dup, 1);
-  dfa->nodes[dup_idx].duplicated = 1;

+  dfa->nodes[dup_idx].duplicated = 1;
  dfa->firsts[dup_idx] = dfa->firsts[org_idx];
  dfa->nexts[dup_idx] = dfa->nexts[org_idx];
-  re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
+  err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
+  if (err != REG_NOERROR)
+    return err;
  /* Since we don't duplicate epsilon nodes, epsilon closure have
     only itself.  */
-  re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
-  re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
+  err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
+  if (err != REG_NOERROR)
+    return err;
+  err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
+  if (err != REG_NOERROR)
+    return err;
  /* Then we must update inveclosure for this node.
     We process them at last part of calc_eclosure(),
     since we don't complete to calculate them here.  */

-  return dup_idx;
+  *new_idx = dup_idx;
+  return REG_NOERROR;
 }

 static void
@@ -1193,6 +1220,7 @@ calc_eclosure (dfa)
  /* For each nodes, calculate epsilon closure.  */
  for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
    {
+      reg_errcode_t err;
      re_node_set eclosure_elem;
      if (node_idx == max)
        {
@@ -1210,7 +1238,9 @@ calc_eclosure (dfa)
      if (dfa->eclosures[node_idx].nelem != 0)
        continue;
      /* Calculate epsilon closure of `node_idx'.  */
-      eclosure_elem = calc_eclosure_iter (dfa, node_idx, 1);
+      err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, 1);
+      if (err != REG_NOERROR)
+        return err;

      if (dfa->eclosures[node_idx].nelem == 0)
        {
@@ -1241,7 +1271,13 @@ calc_eclosure (dfa)
        {
          int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
          if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
-            dest_node_idx = duplicate_node (dfa, dest_node_idx, constraint);
+            {
+              reg_errcode_t err;
+              err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
+                                    constraint);
+              if (err != REG_NOERROR)
+                return err;
+            }
          re_node_set_insert (bkref_eclosure, dest_node_idx);
        }
      dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
@@ -1252,15 +1288,19 @@ calc_eclosure (dfa)

 /* Calculate epsilon closure of NODE.  */

-static re_node_set
-calc_eclosure_iter (dfa, node, root)
+static reg_errcode_t
+calc_eclosure_iter (new_set, dfa, node, root)
+     re_node_set *new_set;
     re_dfa_t *dfa;
     int node, root;
 {
+  reg_errcode_t err;
  unsigned int constraint;
  int i, max, incomplete = 0;
  re_node_set eclosure;
-  re_node_set_alloc (&eclosure, 1);
+  err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
+  if (err != REG_NOERROR)
+    return err;

  /* This indicates that we are calculating this node now.
     We reference this value to avoid infinite loop.  */
@@ -1285,7 +1325,11 @@ calc_eclosure_iter (dfa, node, root)
        /* If we haven't calculated the epsilon closure of `edest' yet,
           calculate now. Otherwise use calculated epsilon closure.  */
        if (dfa->eclosures[edest].nelem == 0)
-          eclosure_elem = calc_eclosure_iter (dfa, edest, 0);
+          {
+            err = calc_eclosure_iter (&eclosure_elem, dfa, edest, 0);
+            if (err != REG_NOERROR)
+              return err;
+          }
        else
          eclosure_elem = dfa->eclosures[edest];
        /* Merge the epsilon closure of `edest'.  */
@@ -1307,7 +1351,11 @@ calc_eclosure_iter (dfa, node, root)
        int dest = eclosure.elems[i];
        if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
          {
-            int dup_dest = duplicate_node (dfa, dest, constraint);
+            int dup_dest;
+            reg_errcode_t err;
+            err = duplicate_node (&dup_dest, dfa, dest, constraint);
+            if (err != REG_NOERROR)
+              return err;
            if (dest != dup_dest)
              {
                re_node_set_remove_at (&eclosure, i--);
@@ -1323,7 +1371,8 @@ calc_eclosure_iter (dfa, node, root)
    dfa->eclosures[node].nelem = 0;
  else
    dfa->eclosures[node] = eclosure;
-  return eclosure;
+  *new_set = eclosure;
+  return REG_NOERROR;
 }

 /* Functions for token which are used in the parser.  */
@@ -1865,7 +1914,11 @@ parse_expression (regexp, preg, token, syntax, nest, err)
      break;
    case ANCHOR:
      if (dfa->word_char == NULL)
-        init_word_char (dfa);
+        {
+          *err = init_word_char (dfa);
+          if (*err != REG_NOERROR)
+            return NULL;
+        }
      if (token->opr.ctx_type == WORD_DELIM)
        {
          bin_tree_t *tree_first, *tree_last;
@@ -2137,28 +2190,6 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
   I'm not sure, but maybe enough.  */
 #define BRACKET_NAME_BUF_SIZE 32

-static inline void *
-extend_array_for_cset (array, num, alloc, type_size)
-     void *array;
-     int num, *alloc, type_size;
-{
-  void *new_array = array;
-  if (*alloc == num)
-    {
-      if (*alloc == 0)
-        {
-          new_array = malloc (type_size);
-          *alloc = 1;
-        }
-      else
-        {
-          new_array = realloc (array, type_size * num * 2);
-          *alloc = 2 * num;
-        }
-    }
-  return new_array;
-}
-
 /* This function parse bracket expression like "[abc]", "[a-c]",
   "[[.a-a.]]" etc.  */

@@ -2299,22 +2330,15 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
          uint32_t *new_array_end;
 	  int new_nranges;

-	  /* XXX If mbcset->range_starts and mbcset->range_ends are NULL
-	     if *range_alloc == 0 then we do not need the if.  */
-          if (*range_alloc == 0)
-            {
-              new_nranges = 1;
-              new_array_start = re_malloc (uint32_t, 1);
-              new_array_end = re_malloc (uint32_t, 1);
-            }
-          else
-            {
-              new_nranges = 2 * mbcset->nranges;
-              new_array_start = re_realloc (mbcset->range_starts, uint32_t,
-                                            new_nranges);
-              new_array_end = re_realloc (mbcset->range_ends, uint32_t,
-                                          new_nranges);
-            }
+          /* +1 in case of mbcset->nranges is 0.  */
+          new_nranges = 2 * mbcset->nranges + 1;
+	  /* Use realloc since mbcset->range_starts and mbcset->range_ends
+             are NULL if *range_alloc == 0.  */
+          new_array_start = re_realloc (mbcset->range_starts, uint32_t,
+                                        new_nranges);
+          new_array_end = re_realloc (mbcset->range_ends, uint32_t,
+                                      new_nranges);
+
          if (new_array_start == NULL || new_array_end == NULL)
            return REG_ESPACE;

@@ -2394,13 +2418,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)

          /* Got valid collation sequence, add it as a new entry.  */
          /* Check the space of the arrays.  */
-          mbcset->coll_syms = extend_array_for_cset (mbcset->coll_syms,
-                                                     mbcset->ncoll_syms,
-                                                     coll_sym_alloc,
-                                                     sizeof (int32_t));
-          if (mbcset->coll_syms == NULL)
-            return REG_ESPACE;
-
+          if (*coll_sym_alloc == mbcset->ncoll_syms)
+            {
+              /* Not enough, realloc it.  */
+              /* +1 in case of mbcset->ncoll_syms is 0.  */
+              *coll_sym_alloc = 2 * mbcset->ncoll_syms + 1;
+              /* Use realloc since mbcset->coll_syms is NULL
+                 if *alloc == 0.  */
+              mbcset->coll_syms = re_realloc (mbcset->coll_syms, int32_t,
+                                              *coll_sym_alloc);
+              if (mbcset->coll_syms == NULL)
+                return REG_ESPACE;
+            }
          mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
          return REG_NOERROR;
        }
@@ -2557,12 +2586,18 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
              bitset_set (sbcset, start_elem.opr.ch);
              break;
            case MB_CHAR:
-              mbcset->mbchars = extend_array_for_cset (mbcset->mbchars,
-						       mbcset->nmbchars,
-						       &mbchar_alloc,
-						       sizeof (wchar_t));
-              if (mbcset->mbchars == NULL)
-                goto parse_bracket_exp_espace;
+              /* Check whether the array has enough space.  */
+              if (mbchar_alloc == mbcset->nmbchars)
+                {
+                  /* Not enough, realloc it.  */
+                  /* +1 in case of mbcset->nmbchars is 0.  */
+                  mbchar_alloc = 2 * mbcset->nmbchars + 1;
+                  /* Use realloc since array is NULL if *alloc == 0.  */
+                  mbcset->mbchars = re_realloc (mbcset->mbchars, wchar_t,
+                                                mbchar_alloc);
+                  if (mbcset->mbchars == NULL)
+                    goto parse_bracket_exp_espace;
+                }
              mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
              break;
            case EQUIV_CLASS:
@@ -2779,14 +2814,18 @@ build_equiv_class (mbcset, sbcset, equiv_class_alloc, name)
                bitset_set (sbcset, ch);
            }
        }
-      /* Check the space of the arrays, and extend if we need.  */
-      mbcset->equiv_classes = extend_array_for_cset (mbcset->equiv_classes,
-						     mbcset->nequiv_classes,
-						     equiv_class_alloc,
-						     sizeof (int32_t));
-      if (mbcset->equiv_classes == NULL)
-        return REG_ESPACE;
-
+      /* Check whether the array has enough space.  */
+      if (*equiv_class_alloc == mbcset->nequiv_classes)
+        {
+          /* Not enough, realloc it.  */
+          /* +1 in case of mbcset->nequiv_classes is 0.  */
+          *equiv_class_alloc = 2 * mbcset->nequiv_classes + 1;
+          /* Use realloc since the array is NULL if *alloc == 0.  */
+          mbcset->equiv_classes = re_realloc (mbcset->equiv_classes, int32_t,
+                                              *equiv_class_alloc);
+          if (mbcset->equiv_classes == NULL)
+            return REG_ESPACE;
+        }
      mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
    }
  else
@@ -2815,12 +2854,17 @@ build_charclass (mbcset, sbcset, char_class_alloc, name)
  int i;

  /* Check the space of the arrays.  */
-  mbcset->char_classes = extend_array_for_cset (mbcset->char_classes,
-						mbcset->nchar_classes,
-						char_class_alloc,
-						sizeof (wctype_t));
-  if (mbcset->char_classes == NULL)
-    return REG_ESPACE;
+  if (*char_class_alloc == mbcset->nchar_classes)
+    {
+      /* Not enough, realloc it.  */
+      /* +1 in case of mbcset->nchar_classes is 0.  */
+      *char_class_alloc = 2 * mbcset->nchar_classes + 1;
+      /* Use realloc since array is NULL if *alloc == 0.  */
+      mbcset->char_classes = re_realloc (mbcset->char_classes, wctype_t,
+                                         *char_class_alloc);
+      if (mbcset->char_classes == NULL)
+        return REG_ESPACE;
+    }

  mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);