From 9ea83f7fbd6fe3f76ba0304851cfe96cd4cc1071 Mon Sep 17 00:00:00 2001 From: Vladislav Vaintroub Date: Mon, 29 Nov 2021 19:47:36 +0100 Subject: [PATCH] MDEV-26713 set console codepage to what user set in --default-character-set If someone on whatever reasons uses --default-character-set=cp850, this will avoid incorrect display, and inserting incorrect data. Adjusting console codepage sometimes also needs to happen with --default-charset=auto, on older Windows. This is because autodetection is not always exact. For example, console codepage on US editions of Windows is 437. Client autodetects it as cp850, a rather loose approximation, given 46 code point differences. We change the console codepage to cp850, so that there is no discrepancy. That fix is currently Windows-only, and serves people who used combination of chcp to achieve WYSIWYG effect (although, this would mostly likely used with utf8 in the past) Now, --default-character-set would be a replacement for that. Fix fs_character_set() detection of current codepage. --- client/mysql.cc | 17 ++++++ client/mysqladmin.cc | 1 + client/mysqlcheck.c | 1 + client/mysqlimport.c | 1 + client/mysqlshow.c | 1 + include/my_sys.h | 3 ++ mysys/charset.c | 120 ++++++++++++++++++++++++++++++++++--------- 7 files changed, 121 insertions(+), 23 deletions(-) diff --git a/client/mysql.cc b/client/mysql.cc index 01a511b35b9..d15a5ebdb40 100644 --- a/client/mysql.cc +++ b/client/mysql.cc @@ -3304,6 +3304,21 @@ com_clear(String *buffer,char *line __attribute__((unused))) return 0; } +static void adjust_console_codepage(const char *name __attribute__((unused))) +{ +#ifdef _WIN32 + if (my_set_console_cp(name) < 0) + { + char buf[128]; + snprintf(buf, sizeof(buf), + "WARNING: Could not determine Windows codepage for charset '%s'," + "continue using codepage %u", name, GetConsoleOutputCP()); + put_info(buf, INFO_INFO); + } +#endif +} + + /* ARGSUSED */ static int com_charset(String *buffer __attribute__((unused)), char *line) @@ -3325,6 +3340,7 @@ com_charset(String *buffer __attribute__((unused)), char *line) mysql_set_character_set(&mysql, charset_info->cs_name.str); default_charset= (char *)charset_info->cs_name.str; put_info("Charset changed", INFO_INFO); + adjust_console_codepage(charset_info->cs_name.str); } else put_info("Charset is not found", INFO_INFO); return 0; @@ -4873,6 +4889,7 @@ sql_real_connect(char *host,char *database,char *user,char *password, put_info(buff, INFO_ERROR); return 1; } + adjust_console_codepage(charset_info->cs_name.str); connected=1; #ifndef EMBEDDED_LIBRARY mysql_options(&mysql, MYSQL_OPT_RECONNECT, &debug_info_flag); diff --git a/client/mysqladmin.cc b/client/mysqladmin.cc index 6fa5d6c73d0..a7159d2bb6a 100644 --- a/client/mysqladmin.cc +++ b/client/mysqladmin.cc @@ -438,6 +438,7 @@ int main(int argc,char *argv[]) mysql_options(&mysql,MYSQL_OPT_PROTOCOL,(char*)&opt_protocol); if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME)) default_charset= (char *)my_default_csname(); + my_set_console_cp(default_charset); mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset); error_flags= (myf)(opt_nobeep ? 0 : ME_BELL); diff --git a/client/mysqlcheck.c b/client/mysqlcheck.c index 480308aa015..eb063765a37 100644 --- a/client/mysqlcheck.c +++ b/client/mysqlcheck.c @@ -503,6 +503,7 @@ static int get_options(int *argc, char ***argv) printf("Unsupported character set: %s\n", default_charset); DBUG_RETURN(1); } + my_set_console_cp(default_charset); if (*argc > 0 && opt_alldbs) { printf("You should give only options, no arguments at all, with option\n"); diff --git a/client/mysqlimport.c b/client/mysqlimport.c index 8723641c74b..48f11667cd1 100644 --- a/client/mysqlimport.c +++ b/client/mysqlimport.c @@ -525,6 +525,7 @@ static MYSQL *db_connect(char *host, char *database, mysql_options(mysql, MYSQL_DEFAULT_AUTH, opt_default_auth); if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME)) default_charset= (char *)my_default_csname(); + my_set_console_cp(default_charset); mysql_options(mysql, MYSQL_SET_CHARSET_NAME, my_default_csname()); mysql_options(mysql, MYSQL_OPT_CONNECT_ATTR_RESET, 0); mysql_options4(mysql, MYSQL_OPT_CONNECT_ATTR_ADD, diff --git a/client/mysqlshow.c b/client/mysqlshow.c index 9b31d87225c..cbac1817c3c 100644 --- a/client/mysqlshow.c +++ b/client/mysqlshow.c @@ -147,6 +147,7 @@ int main(int argc, char **argv) if (!strcmp(default_charset,MYSQL_AUTODETECT_CHARSET_NAME)) default_charset= (char *)my_default_csname(); + my_set_console_cp(default_charset); mysql_options(&mysql, MYSQL_SET_CHARSET_NAME, default_charset); if (opt_plugin_dir && *opt_plugin_dir) diff --git a/include/my_sys.h b/include/my_sys.h index 5a4608155e4..7186cd03cb1 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -1086,6 +1086,9 @@ extern char *get_tty_password(const char *opt_message); #define BACKSLASH_MBTAIL /* File system character set */ extern CHARSET_INFO *fs_character_set(void); +extern int my_set_console_cp(const char *name); +#else +#define my_set_console_cp(A) do {} while (0) #endif extern const char *my_default_csname(void); extern size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info, diff --git a/mysys/charset.c b/mysys/charset.c index 326fea26f1a..da6180dccba 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -1209,30 +1209,17 @@ size_t escape_string_for_mysql(CHARSET_INFO *charset_info, #ifdef BACKSLASH_MBTAIL -static CHARSET_INFO *fs_cset_cache= NULL; - CHARSET_INFO *fs_character_set() { - if (!fs_cset_cache) - { - char buf[10]= "cp"; - GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE, - buf+2, sizeof(buf)-3); - /* - We cannot call get_charset_by_name here - because fs_character_set() is executed before - LOCK_THD_charset mutex initialization, which - is used inside get_charset_by_name. - As we're now interested in cp932 only, - let's just detect it using strcmp(). - */ - fs_cset_cache= - #ifdef HAVE_CHARSET_cp932 - !strcmp(buf, "cp932") ? &my_charset_cp932_japanese_ci : - #endif - &my_charset_bin; - } - return fs_cset_cache; + static CHARSET_INFO *fs_cset_cache; + if (fs_cset_cache) + return fs_cset_cache; +#ifdef HAVE_CHARSET_cp932 + else if (GetACP() == 932) + return fs_cset_cache= &my_charset_cp932_japanese_ci; +#endif + else + return fs_cset_cache= &my_charset_bin; } #endif @@ -1394,7 +1381,7 @@ static const MY_CSET_OS_NAME charsets[] = {"cp54936", "gb18030", my_cs_exact}, #endif {"cp65001", "utf8mb4", my_cs_exact}, - + {"cp65001", "utf8mb3", my_cs_approx}, #else /* not Windows */ {"646", "latin1", my_cs_approx}, /* Default on Solaris */ @@ -1534,3 +1521,90 @@ const char* my_default_csname() #endif return csname ? csname : MYSQL_DEFAULT_CHARSET_NAME; } + + +#ifdef _WIN32 +/** + Extract codepage number from "cpNNNN" string, + and check that this codepage is supported. + + @return 0 - invalid codepage(or unsupported) + > 0 - valid codepage number. +*/ +static UINT get_codepage(const char *s) +{ + UINT cp; + if (s[0] != 'c' || s[1] != 'p') + { + DBUG_ASSERT(0); + return 0; + } + cp= strtoul(s + 2, NULL, 10); + if (!IsValidCodePage(cp)) + { + /* + Can happen also with documented CP, i.e 51936 + Perhaps differs from one machine to another. + */ + return 0; + } + return cp; +} + +static UINT mysql_charset_to_codepage(const char *my_cs_name) +{ + const MY_CSET_OS_NAME *csp; + UINT cp=0,tmp; + for (csp= charsets; csp->os_name; csp++) + { + if (!strcasecmp(csp->my_name, my_cs_name)) + { + switch (csp->param) + { + case my_cs_exact: + tmp= get_codepage(csp->os_name); + if (tmp) + return tmp; + break; + case my_cs_approx: + /* + don't return just yet, perhaps there is a better + (exact) match later. + */ + if (!cp) + cp= get_codepage(csp->os_name); + continue; + + default: + return 0; + } + } + } + return cp; +} + +/** Set console codepage for MariaDB's charset name */ +int my_set_console_cp(const char *csname) +{ + UINT cp; + if (fileno(stdout) < 0 || !isatty(fileno(stdout))) + return 0; + cp= mysql_charset_to_codepage(csname); + if (!cp) + { + /* No compatible os charset.*/ + return -1; + } + + if (GetConsoleOutputCP() != cp && !SetConsoleOutputCP(cp)) + { + return -1; + } + + if (GetConsoleCP() != cp && !SetConsoleCP(cp)) + { + return -1; + } + return 0; +} +#endif