Bug#15376: Unassigned multibyte codes are converted to U+0000

Mergeing changes into 5.0 mysql-test/r/ctype_eucjpms.result: Adding test mysql-test/r/ctype_ujis.result: After merge fix mysql-test/t/ctype_eucjpms.test: Adding test strings/ctype-cp932.c: After merge fix strings/ctype-eucjpms.c: Applying changes to eucjpms, similar to ujis. Note: eucjpms appeared in 5.0 so the original patch, which was for for 4.1, didn't fix eucjpms.
2025-07-30 16:24:05 +03:00 · 2006-03-23 14:03:39 +04:00
parent 1f129403af
commit 1b58717cd8
5 changed files with 45 additions and 22 deletions
--- a/mysql-test/r/ctype_eucjpms.result
+++ b/mysql-test/r/ctype_eucjpms.result
@ -9819,3 +9819,9 @@ eucjpms_bin	6109
 eucjpms_bin	61
 eucjpms_bin	6120
 drop table t1;
+select hex(convert(_eucjpms 0xA5FE41 using ucs2));
+hex(convert(_eucjpms 0xA5FE41 using ucs2))
+003F0041
+select hex(convert(_eucjpms 0x8FABF841 using ucs2));
+hex(convert(_eucjpms 0x8FABF841 using ucs2))
+003F0041
--- a/mysql-test/r/ctype_ujis.result
+++ b/mysql-test/r/ctype_ujis.result
@ -2307,6 +2307,12 @@ select c1 as c2h from t1 where c1 like 'ab#_def' escape '#';
 c2h
 ab_def
 drop table t1;
+select hex(convert(_ujis 0xA5FE41 using ucs2));
+hex(convert(_ujis 0xA5FE41 using ucs2))
+003F0041
+select hex(convert(_ujis 0x8FABF841 using ucs2));
+hex(convert(_ujis 0x8FABF841 using ucs2))
+003F0041
 DROP TABLE IF EXISTS t1, t2;
 DROP PROCEDURE IF EXISTS sp1;
 set names ujis;
@ -2337,9 +2343,3 @@ DROP TABLE t2;
 set names default;
 set character_set_database=default;
 set character_set_server=default;
-select hex(convert(_ujis 0xA5FE41 using ucs2));
-hex(convert(_ujis 0xA5FE41 using ucs2))
-003F0041
-select hex(convert(_ujis 0x8FABF841 using ucs2));
-hex(convert(_ujis 0x8FABF841 using ucs2))
-003F0041
--- a/mysql-test/t/ctype_eucjpms.test
+++ b/mysql-test/t/ctype_eucjpms.test
@ -363,3 +363,20 @@ SET collation_connection='eucjpms_japanese_ci';
 -- source include/ctype_filesort.inc
 SET collation_connection='eucjpms_bin';
 -- source include/ctype_filesort.inc
+
+
+#
+# Bugs#15375: Unassigned multibyte codes are broken
+# into parts when converting to Unicode.
+# This query should return 0x003F0041. I.e. it should
+# scan unassigned double-byte character 0xA5FE, convert
+# it as QUESTION MARK 0x003F and then scan the next
+# character, which is a single byte character 0x41.
+#
+select hex(convert(_eucjpms 0xA5FE41 using ucs2));
+# This one should return 0x003F0041:
+# scan unassigned three-byte character 0x8FABF8,
+# convert it as QUESTION MARK 0x003F and then scan
+# the next character, which is a single byte character 0x41.
+select hex(convert(_eucjpms 0x8FABF841 using ucs2));
+