mirror of
https://github.com/MariaDB/server.git
synced 2025-08-01 03:47:19 +03:00
MDEV-6255 DUPLICATE KEY Errors on SELECT .. GROUP BY that uses temporary and filesort.
The problem was that my_hash_sort didn't properly delete end-space characters properly, so strings that should compare identically was seen as different strings. (Space was handled correctly, but not NBSP) This caused duplicate key errors when a heap table was converted to Aria as part of overflow in group by. Fixed by removing all characters that compares as end space when creating a hash. Other things: - Fixed that --sorted_results also works for errors in mysqltest. - Speed up hash by not comparing strings that has different hash. - Speed up many my_hash_sort functions by using registers to calculate hash instead of pointers. This was previously done for some functions, but not for all. - Made a macro of the hash function, to simplify code and to be able to experiment with new hash functions. client/mysqltest.cc: Fixed that --sorted_results also works for error messages. mysql-test/r/ctype_partitions.result: New test to ensure that partitions on hash works mysql-test/suite/multi_source/gtid.result: Updated result mysql-test/suite/multi_source/gtid.test: Test that --sorted_result works for error messages mysql-test/suite/multi_source/gtid_ignore_duplicates.result: Updated result mysql-test/suite/multi_source/gtid_ignore_duplicates.test: Updated result mysql-test/suite/multi_source/load_data.result: Updated result mysql-test/suite/multi_source/load_data.test: Updated result mysql-test/t/ctype_partitions.test: New test to ensure that partitions on hash works storage/heap/hp_write.c: Speed up hash by not comparing strings that has different hash. storage/maria/ma_check.c: Extra debug strings/ctype-bin.c: Use macro for hash function strings/ctype-latin1.c: Use macro for hash function Use registers to calculate hash (speedup) strings/ctype-mb.c: Use macro for hash function Use registers to calculate hash (speedup) strings/ctype-simple.c: Use macro for hash function Use same variable names as in other my_hash_sort functions. Update my_hash_sort_simple() to properly remove end space (patch by Bar) strings/ctype-uca.c: Ignore duplicated space inside strings and end space in my_hash_sort_uca(). This fixed MDEV-6255 Use macro for hash function Use registers to calculate hash (speedup) strings/ctype-ucs2.c: Use macro for hash function Use registers to calculate hash (speedup) strings/ctype-utf8.c: Use macro for hash function Use registers to calculate hash (speedup) strings/strings_def.h: Made a macro of the hash function, to simplify code and to be able to experiment with new hash functions.
This commit is contained in:
@ -5087,12 +5087,13 @@ static size_t my_caseup_utf8(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
|
||||
|
||||
static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
||||
ulong *n1, ulong *n2)
|
||||
ulong *nr1, ulong *nr2)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
const uchar *e=s+slen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
register ulong m1= *nr1, m2= *nr2;
|
||||
|
||||
/*
|
||||
Remove end space. We have to do this to be able to compare
|
||||
@ -5104,12 +5105,11 @@ static void my_hash_sort_utf8(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
||||
while ((s < e) && (res=my_utf8_uni(cs,&wc, (uchar *)s, (uchar*)e))>0 )
|
||||
{
|
||||
my_tosort_unicode(uni_plane, &wc, cs->state);
|
||||
n1[0]^= (((n1[0] & 63)+n2[0])*(wc & 0xFF))+ (n1[0] << 8);
|
||||
n2[0]+=3;
|
||||
n1[0]^= (((n1[0] & 63)+n2[0])*(wc >> 8))+ (n1[0] << 8);
|
||||
n2[0]+=3;
|
||||
MY_HASH_ADD_16(m1, m2, wc);
|
||||
s+=res;
|
||||
}
|
||||
*nr1= m1;
|
||||
*nr2= m2;
|
||||
}
|
||||
|
||||
|
||||
@ -7597,22 +7597,15 @@ my_caseup_utf8mb4(CHARSET_INFO *cs, char *src, size_t srclen,
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
my_hash_add(ulong *n1, ulong *n2, uint ch)
|
||||
{
|
||||
n1[0]^= (((n1[0] & 63) + n2[0]) * (ch)) + (n1[0] << 8);
|
||||
n2[0]+= 3;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
||||
ulong *n1, ulong *n2)
|
||||
ulong *nr1, ulong *nr2)
|
||||
{
|
||||
my_wc_t wc;
|
||||
int res;
|
||||
const uchar *e= s + slen;
|
||||
MY_UNICASE_INFO *uni_plane= cs->caseinfo;
|
||||
register ulong m1= *nr1, m2= *nr2;
|
||||
|
||||
/*
|
||||
Remove end space. We do this to be able to compare
|
||||
@ -7624,8 +7617,7 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
||||
while ((res= my_mb_wc_utf8mb4(cs, &wc, (uchar*) s, (uchar*) e)) > 0)
|
||||
{
|
||||
my_tosort_unicode(uni_plane, &wc, cs->state);
|
||||
my_hash_add(n1, n2, (uint) (wc & 0xFF));
|
||||
my_hash_add(n1, n2, (uint) (wc >> 8) & 0xFF);
|
||||
MY_HASH_ADD_16(m1, m2, (uint) (wc & 0xFFFF));
|
||||
if (wc > 0xFFFF)
|
||||
{
|
||||
/*
|
||||
@ -7635,10 +7627,12 @@ my_hash_sort_utf8mb4(CHARSET_INFO *cs, const uchar *s, size_t slen,
|
||||
This is useful to keep order of records in
|
||||
test results, e.g. for "SHOW GRANTS".
|
||||
*/
|
||||
my_hash_add(n1, n2, (uint) (wc >> 16) & 0xFF);
|
||||
MY_HASH_ADD(m1, m2, (uint) ((wc >> 16) & 0xFF));
|
||||
}
|
||||
s+= res;
|
||||
}
|
||||
*nr1= m1;
|
||||
*nr2= m2;
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user