mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
WL#1324 table name to file name encoding
- Encoding itself, implemented as a charset "filename". Originally planned to use '.' as an escape character, but now changed to '@' for two reasons: "ls" does not return file names starting with '.' considering them as a kind of hidden files; some platforms do not allow several dots in a file name. - replacing many calls of my_snprintf() and strnxmov() to the new build_table_filename(). - Adding MY_APPEND_EXT mysys flag, to append an extention rather that replace it. - Replacing all numeric constants in fn_format flag arguments to their mysys definitions, e.g. MY_UNPACK_FILENAME, - Predictability in several function/methods: when a table name can appear with or withot .frm extension. Some functions/methods were changed so accept names strictly with .frm, other - strictly without .frm extensions. Several DBUG_ASSERTs were added to check whether an extension is passed. Many files: table name to file name encoding mysql_priv.h: Prototypes for new table name encoding tools. ctype-utf8.c: Implementing "filename" charset for table name to file name encoding. row0mysql.c: Fixing table name prefix. mf_format.c: Adding MY_APPEND_EXT processing. Many files: Fixing tests. my_sys.h: Adding new flag to append rather than replace an extension. m_ctype.h: Adding "filename" charset definition.
This commit is contained in:
@ -235,3 +235,79 @@ uint check_word(TYPELIB *lib, const char *val, const char *end,
|
||||
*end_of_word= ptr;
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
Converts a string between character sets
|
||||
|
||||
SYNOPSIS
|
||||
strconvert()
|
||||
from_cs source character set
|
||||
from source, a null terminated string
|
||||
to destination buffer
|
||||
to_length destination buffer length
|
||||
|
||||
NOTES
|
||||
'to' is always terminated with a '\0' character.
|
||||
If there is no enough space to convert whole string,
|
||||
only prefix is converted, and terminated with '\0'.
|
||||
|
||||
RETURN VALUES
|
||||
result string length
|
||||
*/
|
||||
|
||||
|
||||
uint strconvert(CHARSET_INFO *from_cs, const char *from,
|
||||
CHARSET_INFO *to_cs, char *to, uint to_length)
|
||||
{
|
||||
int cnvres;
|
||||
my_wc_t wc;
|
||||
char *to_start= to;
|
||||
uchar *to_end= (uchar*) to + to_length - 1;
|
||||
int (*mb_wc)(struct charset_info_st *, my_wc_t *, const uchar *,
|
||||
const uchar *)= from_cs->cset->mb_wc;
|
||||
int (*wc_mb)(struct charset_info_st *, my_wc_t, uchar *s, uchar *e)=
|
||||
to_cs->cset->wc_mb;
|
||||
uint error_count= 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
/*
|
||||
Using 'from + 10' is safe:
|
||||
- it is enough to scan a single character in any character set.
|
||||
- if remaining string is shorter than 10, then mb_wc will return
|
||||
with error because of unexpected '\0' character.
|
||||
*/
|
||||
if ((cnvres= (*mb_wc)(from_cs, &wc,
|
||||
(uchar*) from, (uchar*) from + 10)) > 0)
|
||||
{
|
||||
if (!wc)
|
||||
break;
|
||||
from+= cnvres;
|
||||
}
|
||||
else if (cnvres == MY_CS_ILSEQ)
|
||||
{
|
||||
error_count++;
|
||||
from++;
|
||||
wc= '?';
|
||||
}
|
||||
else
|
||||
break; // Impossible char.
|
||||
|
||||
outp:
|
||||
|
||||
if ((cnvres= (*wc_mb)(to_cs, wc, (uchar*) to, to_end)) > 0)
|
||||
to+= cnvres;
|
||||
else if (cnvres == MY_CS_ILUNI && wc != '?')
|
||||
{
|
||||
error_count++;
|
||||
wc= '?';
|
||||
goto outp;
|
||||
}
|
||||
else
|
||||
break;
|
||||
}
|
||||
*to= '\0';
|
||||
return (uint32) (to - to_start);
|
||||
|
||||
}
|
||||
|
Reference in New Issue
Block a user