WL#1324 table name to file name encoding

- Encoding itself, implemented as a charset "filename". Originally planned to use '.' as an escape character, but now changed to '@' for two reasons: "ls" does not return file names starting with '.' considering them as a kind of hidden files; some platforms do not allow several dots in a file name. - replacing many calls of my_snprintf() and strnxmov() to the new build_table_filename(). - Adding MY_APPEND_EXT mysys flag, to append an extention rather that replace it. - Replacing all numeric constants in fn_format flag arguments to their mysys definitions, e.g. MY_UNPACK_FILENAME, - Predictability in several function/methods: when a table name can appear with or withot .frm extension. Some functions/methods were changed so accept names strictly with .frm, other - strictly without .frm extensions. Several DBUG_ASSERTs were added to check whether an extension is passed. Many files: table name to file name encoding mysql_priv.h: Prototypes for new table name encoding tools. ctype-utf8.c: Implementing "filename" charset for table name to file name encoding. row0mysql.c: Fixing table name prefix. mf_format.c: Adding MY_APPEND_EXT processing. Many files: Fixing tests. my_sys.h: Adding new flag to append rather than replace an extension. m_ctype.h: Adding "filename" charset definition.
2025-07-30 16:24:05 +03:00 · 2005-12-31 09:01:26 +04:00
parent 83d8979ca2
commit 6ff211329f
42 changed files with 1803 additions and 269 deletions
--- a/sql/strfunc.cc
+++ b/sql/strfunc.cc
@ -235,3 +235,79 @@ uint check_word(TYPELIB *lib, const char *val, const char *end,
    *end_of_word= ptr;
  return res;
 }
+
+
+/*
+  Converts a string between character sets
+
+  SYNOPSIS
+    strconvert()
+    from_cs       source character set
+    from          source, a null terminated string
+    to            destination buffer
+    to_length     destination buffer length
+
+  NOTES
+    'to' is always terminated with a '\0' character.
+    If there is no enough space to convert whole string,
+    only prefix is converted, and terminated with '\0'.
+
+  RETURN VALUES
+    result string length
+*/
+
+
+uint strconvert(CHARSET_INFO *from_cs, const char *from,
+                CHARSET_INFO *to_cs, char *to, uint to_length)
+{
+  int cnvres;
+  my_wc_t wc;
+  char *to_start= to;
+  uchar *to_end= (uchar*) to + to_length - 1;
+  int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *,
+	       const uchar *)= from_cs->cset->mb_wc;
+  int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)=
+    to_cs->cset->wc_mb;
+  uint error_count= 0;
+
+  while (1)
+  {
+    /*
+      Using 'from + 10' is safe:
+      - it is enough to scan a single character in any character set.
+      - if remaining string is shorter than 10, then mb_wc will return
+        with error because of unexpected '\0' character.
+    */
+    if ((cnvres= (*mb_wc)(from_cs, &wc,
+                          (uchar*) from, (uchar*) from + 10)) > 0)
+    {
+      if (!wc)
+        break;
+      from+= cnvres;
+    }
+    else if (cnvres == MY_CS_ILSEQ)
+    {
+      error_count++;
+      from++;
+      wc= '?';
+    }
+    else
+      break; // Impossible char.
+
+outp:
+
+    if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
+      to+= cnvres;
+    else if (cnvres == MY_CS_ILUNI && wc != '?')
+    {
+      error_count++;
+      wc= '?';
+      goto outp;
+    }
+    else
+      break;
+  }
+  *to= '\0';
+  return (uint32) (to - to_start);
+
+}