CHARSET_INFO::instr was extended to return more substring match results:

- offset of substr begining - offset of substr end - number of characters (MB compatible)
2025-07-30 16:24:05 +03:00 · 2003-09-25 13:35:21 +05:00
parent 192fcb9cc6
commit 9b4b9f91de
5 changed files with 106 additions and 29 deletions
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@ -75,6 +75,12 @@ typedef struct my_uni_idx_st
  uchar  *tab;
 } MY_UNI_IDX;

+typedef struct
+{
+  uint beg;
+  uint end;
+  uint mblen;
+} my_match_t;

 enum my_lex_states
 {
@ -116,9 +122,10 @@ typedef struct my_collation_handler_st

  int  (*strcasecmp)(struct charset_info_st *, const char *, const char *);
  
-  int  (*instr)(struct charset_info_st *,
+  uint (*instr)(struct charset_info_st *,
                const char *big,   uint b_length,
-                const char *small, uint s_length);
+                const char *small, uint s_length,
+                my_match_t *match, uint nmatch);
  
  /* Hash calculation */
  void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len,
@ -249,9 +256,10 @@ extern void my_hash_sort_simple(CHARSET_INFO *cs,

 extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length);

-extern int  my_instr_simple(struct charset_info_st *,
+extern uint my_instr_simple(struct charset_info_st *,
                            const char *big,   uint b_length,
-                            const char *small, uint s_length);
+                            const char *small, uint s_length,
+                            my_match_t *match, uint nmatch);


 /* Functions for 8bit */
@ -317,9 +325,10 @@ int my_wildcmp_mb(CHARSET_INFO *,
 		  int escape, int w_one, int w_many);
 uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e);
 uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos);
-int  my_instr_mb(struct charset_info_st *,
+uint my_instr_mb(struct charset_info_st *,
                 const char *big,   uint b_length,
-                 const char *small, uint s_length);
+                 const char *small, uint s_length,
+                 my_match_t *match, uint nmatch);


 extern my_bool my_parse_charset_xml(const char *bug, uint len,
--- a/sql/item_func.cc
+++ b/sql/item_func.cc
@ -1161,7 +1161,7 @@ longlong Item_func_locate::val_int()
  null_value=0;
  uint start=0;
  uint start0=0;
-  int  ind;
+  my_match_t match;

  if (arg_count == 3)
  {
@ -1175,11 +1175,12 @@ longlong Item_func_locate::val_int()
  if (!b->length())				// Found empty string at start
    return (longlong) (start+1);
  
-  ind= cmp_collation.collation->coll->instr(cmp_collation.collation,
+  if (!cmp_collation.collation->coll->instr(cmp_collation.collation,
                                            a->ptr()+start, a->length()-start,
-                                            b->ptr(), b->length());
-
-  return (longlong) (ind >= 0 ? ind + start0 + 1 : ind + 1);
+                                            b->ptr(), b->length(),
+                                            &match, 1))
+    return 0;
+  return (longlong) match.mblen + start0 + 1;
 }


--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@ -263,16 +263,25 @@ static int my_strnxfrm_bin(CHARSET_INFO *cs __attribute__((unused)),
 }

 static
-int my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
+uint my_instr_bin(CHARSET_INFO *cs __attribute__((unused)),
                 const char *big,   uint b_length, 
-		 const char *small, uint s_length)
+		 const char *small, uint s_length,
+		 my_match_t *match, uint nmatch)
 {
  register const uchar *str, *search, *end, *search_end;
  
  if (s_length <= b_length)
  {
    if (!s_length)
-      return 0;		/* Empty string is always found */
+    {
+      if (nmatch)
+      {
+        match->beg= 0;
+        match->end= 0;
+        match->mblen= 0;
+      }
+      return 1;		/* Empty string is always found */
+    }
    
    str= (const uchar*) big;
    search= (const uchar*) small;
@ -293,11 +302,24 @@ skipp:
 	  if ((*i++) != (*j++))
            goto skipp;
        
-	return (int) (str- (const uchar*)big) -1;
+        if (nmatch > 0)
+	{
+	  match[0].beg= 0;
+	  match[0].end= str- (const uchar*)big-1;
+	  match[0].mblen= match[0].end;
+	  
+	  if (nmatch > 1)
+	  {
+	    match[1].beg= match[0].end;
+	    match[1].end= match[0].end+s_length;
+	    match[1].mblen= match[1].end-match[1].beg;
+	  }
+	}
+	return 2;
      }
    }
  }
-  return -1;
+  return 0;
 }


--- a/strings/ctype-mb.c
+++ b/strings/ctype-mb.c
@ -274,18 +274,28 @@ uint my_charpos_mb(CHARSET_INFO *cs __attribute__((unused)),
  return b-b0;
 }

-int my_instr_mb(CHARSET_INFO *cs,
+uint my_instr_mb(CHARSET_INFO *cs,
                 const char *big,   uint b_length, 
-                const char *small, uint s_length)
+                 const char *small, uint s_length,
+                 my_match_t *match, uint nmatch)
 {
-  register const char *end;
+  register const char *end, *big0;
  int res= 0;
  
  if (s_length <= b_length)
  {
    if (!s_length)
-      return 0;		// Empty string is always found
+    {
+      if (nmatch)
+      {
+        match->beg= 0;
+        match->end= 0;
+        match->mblen= 0;
+      }
+      return 1;		// Empty string is always found
+    }
    
+    big0= big;
    end= big+b_length-s_length+1;
    
    while (big < end)
@ -294,15 +304,28 @@ int my_instr_mb(CHARSET_INFO *cs,
      
      if (!cs->coll->strnncoll(cs, (unsigned char*) big,   s_length, 
      				   (unsigned char*) small, s_length))
-        return res;
-      
+      {
+        if (nmatch)
+        {
+          match[0].beg= big0;
+          match[0].end= big-big0;
+          match[0].mblen= res;
+          if (nmatch > 1)
+          {
+            match[1].beg= match[0].end;
+            match[1].end= match[0].end+s_length;
+            match[1].mblen= 0;	/* Not computed */
+          }
+        }
+        return 2;
+      }
      mblen= (mblen= my_ismbchar(cs, big, end)) ? mblen : 1;
      big+= mblen;
      b_length-= mblen;
      res++;
    }
  }
-  return -1;
+  return 0;
 }

 /* BINARY collations handlers for MB charsets */
--- a/strings/ctype-simple.c
+++ b/strings/ctype-simple.c
@ -1030,16 +1030,25 @@ uint my_lengthsp_8bit(CHARSET_INFO *cs __attribute__((unused)),
 }


-int my_instr_simple(CHARSET_INFO *cs,
+uint my_instr_simple(CHARSET_INFO *cs,
                    const char *big,   uint b_length, 
-		    const char *small, uint s_length)
+		    const char *small, uint s_length,
+		    my_match_t *match, uint nmatch)
 {
  register const uchar *str, *search, *end, *search_end;
  
  if (s_length <= b_length)
  {
    if (!s_length)
-      return 0;		// Empty string is always found
+    {
+      if (nmatch)
+      {
+        match->beg= 0;
+        match->end= 0;
+        match->mblen= 0;
+      }
+      return 1;		/* Empty string is always found */
+    }
    
    str= (const uchar*) big;
    search= (const uchar*) small;
@ -1060,11 +1069,24 @@ skipp:
 	  if (cs->sort_order[*i++] != cs->sort_order[*j++]) 
            goto skipp;
        
-	return (int) (str- (const uchar*)big) -1;
+	if (nmatch > 0)
+	{
+	  match[0].beg= 0;
+	  match[0].end= str- (const uchar*)big-1;
+	  match[0].mblen= match[0].end;
+	  
+	  if (nmatch > 1)
+	  {
+	    match[1].beg= match[0].end;
+	    match[1].end= match[0].end+s_length;
+	    match[1].mblen= match[1].end-match[1].beg;
+	  }
+	}
+	return 2;
      }
    }
  }
-  return -1;
+  return 0;
 }