Update.

2003-11-25 Ulrich Drepper <drepper@redhat.com> * posix/runptests.c (main): Make errors fatal. * posix/PTESTS: One test in GA135 and GA136 check functionality which seems not guaranteed. 2003-11-25 Jakub Jelinek <jakub@redhat.com> * posix/regexec.c (re_search_internal): If prune_impossible_nodes returned REG_NOMATCH, set match_last to -1. Don't initialize pmatch[0] needlessly. Fix comment. (prune_impossible_nodes): Don't segfault on NULL state_log entry. (set_regs): Fix comment. * posix/regcomp.c (parse_bracket_exp): Only set has_plural_match if adding both SIMPLE_BRACKET and COMPLEX_BRACKET. (build_charclass_op): Set has_plural_match if adding both SIMPLE_BRACKET and COMPLEX_BRACKET. * posix/bug-regex11.c (tests): Fix register values for one commented out test. Add new tests. * posix/regex_internal.c (re_string_allocate): Make sure init_len is at least dfa->mb_cur_max. (re_string_reconstruct): If is_utf8, don't fall back into re_string_skip_chars just because idx points into a middle of valid UTF-8 character. Instead, set the wcs bytes which correspond to the partial character bytes to WEOF. * posix/regexec.c (re_search_internal): Allocate input.bufs_len + 1 instead of dfa->nodes_len + 1 state_log entries initially. * posix/bug-regex20.c (main): Uncomment backwards case insensitive tests.
2025-12-24 17:51:17 +03:00 · 2003-11-26 03:24:15 +00:00
parent 65e6becf5b
commit 97fd3a3003
19 changed files with 5570 additions and 5102 deletions
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -55,7 +55,12 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
     const re_dfa_t *dfa;
 {
  reg_errcode_t ret;
-  int init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
+  int init_buf_len;
+
+  /* Ensure at least one character fits into the buffers.  */
+  if (init_len < dfa->mb_cur_max)
+    init_len = dfa->mb_cur_max;
+  init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
  re_string_construct_common (str, len, pstr, trans, icase, dfa);
  pstr->stop = pstr->len;

@@ -516,33 +521,33 @@ re_string_reconstruct (pstr, idx, eflags, newline)
 		  /* Special case UTF-8.  Multi-byte chars start with any
 		     byte other than 0x80 - 0xbf.  */
 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
-		  end = raw + (pstr->valid_len > offset - pstr->mb_cur_max
-			       ? pstr->valid_len : offset - pstr->mb_cur_max);
+		  end = raw + (offset - pstr->mb_cur_max);
 		  for (p = raw + offset - 1; p >= end; --p)
 		    if ((*p & 0xc0) != 0x80)
 		      {
 			mbstate_t cur_state;
 			wchar_t wc2;
+			int mlen;

 			/* XXX Don't use mbrtowc, we know which conversion
 			   to use (UTF-8 -> UCS4).  */
 			memset (&cur_state, 0, sizeof (cur_state));
-			if (mbrtowc (&wc2, p, raw + offset - p, &cur_state)
-			    == raw + offset - p)
+			mlen = mbrtowc (&wc2, p, raw + pstr->len - p,
+					&cur_state) - (raw + offset - p);
+			if (mlen >= 0)
 			  {
 			    memset (&pstr->cur_state, '\0',
 				    sizeof (mbstate_t));
+			    pstr->valid_len = mlen;
 			    wc = wc2;
 			  }
 			break;
 		      }
 		}
 	      if (wc == WEOF)
-		{
-		  pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
-		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
-		    pstr->wcs[wcs_idx] = WEOF;
-		}
+		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
+	      for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
+		pstr->wcs[wcs_idx] = WEOF;
 	      if (pstr->trans && wc <= 0xff)
 		wc = pstr->trans[wc];
 	      pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD