mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-19522 InnoDB commit fails when FTS_DOC_ID value is greater than 4294967295
InnoDB commit fails when consecutive FTS_DOC_ID value is greater than 4294967295. Fix is that InnoDB should remove the delta FTS_DOC_ID value limitations and fts should encode 8 byte value, remove FTS_DOC_ID_MAX_STEP variable. Replaced the fts0vlc.ic file with fts0vlc.h fts_encode_int(): Should be able to encode 10 bytes value fts_get_encoded_len(): Should get the length of the value which has 10 bytes fts_decode_vlc(): Add debug assertion to verify the maximum length allowed is 10. mach_read_uint64_little_endian(): Reads 64 bit stored in little endian format Added a unit test case which check for minimum and maximum value to do the fts encoding
This commit is contained in:
committed by
Marko Mäkelä
parent
6b4fad9402
commit
8ce8c269f4
@ -313,9 +313,7 @@ FTS_DOC_ID
|
||||
65536
|
||||
131071
|
||||
drop table t1;
|
||||
call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535");
|
||||
CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
|
||||
title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB;
|
||||
INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000);
|
||||
ERROR HY000: Invalid InnoDB FTS Doc ID
|
||||
DROP TABLE t1;
|
||||
|
@ -972,3 +972,24 @@ SELECT * FROM information_schema.innodb_ft_deleted;
|
||||
DOC_ID
|
||||
DROP TABLE t1;
|
||||
SET GLOBAL innodb_ft_aux_table=DEFAULT;
|
||||
#
|
||||
# MDEV-19522 InnoDB commit fails when FTS_DOC_ID value
|
||||
# is greater than 4294967295
|
||||
#
|
||||
CREATE TABLE t1(
|
||||
FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||
f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID),
|
||||
FULLTEXT KEY (f1)) ENGINE=InnoDB;
|
||||
INSERT INTO t1 VALUES (1,'txt','bbb');
|
||||
UPDATE t1 SET FTS_DOC_ID = 4294967298;
|
||||
SELECT * FROM t1 WHERE match(f1) against("txt");
|
||||
FTS_DOC_ID f1 f2
|
||||
4294967298 txt bbb
|
||||
SET @@session.insert_id = 100000000000;
|
||||
INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb');
|
||||
CREATE FULLTEXT INDEX i ON t1 (f2);
|
||||
SELECT * FROM t1 WHERE match(f2) against("bbb");
|
||||
FTS_DOC_ID f1 f2
|
||||
4294967298 txt bbb
|
||||
100000000000 aaa bbb
|
||||
DROP TABLE t1;
|
||||
|
@ -277,9 +277,7 @@ insert into t1(f1, f2) values(3, "This is the third record");
|
||||
select FTS_DOC_ID from t1;
|
||||
drop table t1;
|
||||
|
||||
call mtr.add_suppression("\\[ERROR\\] InnoDB: Doc ID 20030101000000 is too big. Its difference with largest used Doc ID 0 cannot exceed or equal to 65535");
|
||||
CREATE TABLE t1 (FTS_DOC_ID BIGINT UNSIGNED AUTO_INCREMENT NOT NULL PRIMARY KEY,
|
||||
title VARCHAR(200), FULLTEXT(title)) ENGINE=InnoDB;
|
||||
--error 182
|
||||
INSERT INTO t1 VALUES (NULL, NULL), (20030101000000, 20030102000000);
|
||||
DROP TABLE t1;
|
||||
|
@ -942,3 +942,21 @@ SET GLOBAL innodb_ft_aux_table='test/t1';
|
||||
SELECT * FROM information_schema.innodb_ft_deleted;
|
||||
DROP TABLE t1;
|
||||
SET GLOBAL innodb_ft_aux_table=DEFAULT;
|
||||
|
||||
--echo #
|
||||
--echo # MDEV-19522 InnoDB commit fails when FTS_DOC_ID value
|
||||
--echo # is greater than 4294967295
|
||||
--echo #
|
||||
CREATE TABLE t1(
|
||||
FTS_DOC_ID BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
|
||||
f1 TEXT, f2 TEXT, PRIMARY KEY (FTS_DOC_ID),
|
||||
FULLTEXT KEY (f1)) ENGINE=InnoDB;
|
||||
INSERT INTO t1 VALUES (1,'txt','bbb');
|
||||
UPDATE t1 SET FTS_DOC_ID = 4294967298;
|
||||
SELECT * FROM t1 WHERE match(f1) against("txt");
|
||||
SET @@session.insert_id = 100000000000;
|
||||
INSERT INTO t1(f1, f2) VALUES ('aaa', 'bbb');
|
||||
CREATE FULLTEXT INDEX i ON t1 (f2);
|
||||
SELECT * FROM t1 WHERE match(f2) against("bbb");
|
||||
# Cleanup
|
||||
DROP TABLE t1;
|
||||
|
@ -188,3 +188,7 @@ IF(MSVC)
|
||||
ENDIF()
|
||||
|
||||
ADD_SUBDIRECTORY(${CMAKE_SOURCE_DIR}/extra/mariabackup ${CMAKE_BINARY_DIR}/extra/mariabackup)
|
||||
|
||||
IF(WITH_UNIT_TESTS)
|
||||
ADD_SUBDIRECTORY(unittest)
|
||||
ENDIF()
|
||||
|
@ -32,7 +32,7 @@ Full Text Search interface
|
||||
#include "fts0priv.h"
|
||||
#include "fts0types.h"
|
||||
#include "fts0types.ic"
|
||||
#include "fts0vlc.ic"
|
||||
#include "fts0vlc.h"
|
||||
#include "fts0plugin.h"
|
||||
#include "dict0priv.h"
|
||||
#include "dict0stats.h"
|
||||
@ -1247,7 +1247,7 @@ fts_cache_node_add_positions(
|
||||
ulint enc_len;
|
||||
ulint last_pos;
|
||||
byte* ptr_start;
|
||||
ulint doc_id_delta;
|
||||
doc_id_t doc_id_delta;
|
||||
|
||||
#ifdef UNIV_DEBUG
|
||||
if (cache) {
|
||||
@ -1258,7 +1258,7 @@ fts_cache_node_add_positions(
|
||||
ut_ad(doc_id >= node->last_doc_id);
|
||||
|
||||
/* Calculate the space required to store the ilist. */
|
||||
doc_id_delta = (ulint)(doc_id - node->last_doc_id);
|
||||
doc_id_delta = doc_id - node->last_doc_id;
|
||||
enc_len = fts_get_encoded_len(doc_id_delta);
|
||||
|
||||
last_pos = 0;
|
||||
@ -1307,14 +1307,14 @@ fts_cache_node_add_positions(
|
||||
ptr_start = ptr;
|
||||
|
||||
/* Encode the new fragment. */
|
||||
ptr += fts_encode_int(doc_id_delta, ptr);
|
||||
ptr = fts_encode_int(doc_id_delta, ptr);
|
||||
|
||||
last_pos = 0;
|
||||
for (i = 0; i < ib_vector_size(positions); i++) {
|
||||
ulint pos = *(static_cast<ulint*>(
|
||||
ib_vector_get(positions, i)));
|
||||
|
||||
ptr += fts_encode_int(pos - last_pos, ptr);
|
||||
ptr = fts_encode_int(pos - last_pos, ptr);
|
||||
last_pos = pos;
|
||||
}
|
||||
|
||||
|
@ -36,6 +36,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
|
||||
#include "ut0list.h"
|
||||
#include "zlib.h"
|
||||
#include "fts0opt.h"
|
||||
#include "fts0vlc.h"
|
||||
|
||||
/** The FTS optimize thread's work queue. */
|
||||
ib_wqueue_t* fts_optimize_wq;
|
||||
@ -1116,7 +1117,7 @@ fts_optimize_encode_node(
|
||||
ulint pos_enc_len;
|
||||
doc_id_t doc_id_delta;
|
||||
dberr_t error = DB_SUCCESS;
|
||||
byte* src = enc->src_ilist_ptr;
|
||||
const byte* src = enc->src_ilist_ptr;
|
||||
|
||||
if (node->first_doc_id == 0) {
|
||||
ut_a(node->last_doc_id == 0);
|
||||
@ -1173,7 +1174,7 @@ fts_optimize_encode_node(
|
||||
|
||||
/* Encode the doc id. Cast to ulint, the delta should be small and
|
||||
therefore no loss of precision. */
|
||||
dst += fts_encode_int((ulint) doc_id_delta, dst);
|
||||
dst = fts_encode_int(doc_id_delta, dst);
|
||||
|
||||
/* Copy the encoded pos array. */
|
||||
memcpy(dst, src, pos_enc_len);
|
||||
@ -1220,7 +1221,8 @@ fts_optimize_node(
|
||||
doc_id_t delta;
|
||||
doc_id_t del_doc_id = FTS_NULL_DOC_ID;
|
||||
|
||||
delta = fts_decode_vlc(&enc->src_ilist_ptr);
|
||||
delta = fts_decode_vlc(
|
||||
(const byte**)&enc->src_ilist_ptr);
|
||||
|
||||
test_again:
|
||||
/* Check whether the doc id is in the delete list, if
|
||||
@ -1248,7 +1250,7 @@ test_again:
|
||||
|
||||
/* Skip the entries for this document. */
|
||||
while (*enc->src_ilist_ptr) {
|
||||
fts_decode_vlc(&enc->src_ilist_ptr);
|
||||
fts_decode_vlc((const byte**)&enc->src_ilist_ptr);
|
||||
}
|
||||
|
||||
/* Skip the end of word position marker. */
|
||||
|
@ -34,6 +34,7 @@ Completed 2011/7/10 Sunny and Jimmy Yang
|
||||
#include "fts0pars.h"
|
||||
#include "fts0types.h"
|
||||
#include "fts0plugin.h"
|
||||
#include "fts0vlc.h"
|
||||
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
@ -3224,7 +3225,7 @@ fts_query_filter_doc_ids(
|
||||
ulint len, /*!< in: doc id ilist size */
|
||||
ibool calc_doc_count) /*!< in: whether to remember doc count */
|
||||
{
|
||||
byte* ptr = static_cast<byte*>(data);
|
||||
const byte* ptr = static_cast<byte*>(data);
|
||||
doc_id_t doc_id = 0;
|
||||
ulint decoded = 0;
|
||||
ib_rbt_t* doc_freqs = word_freq->doc_freqs;
|
||||
@ -3234,8 +3235,8 @@ fts_query_filter_doc_ids(
|
||||
ulint freq = 0;
|
||||
fts_doc_freq_t* doc_freq;
|
||||
fts_match_t* match = NULL;
|
||||
ulint last_pos = 0;
|
||||
ulint pos = fts_decode_vlc(&ptr);
|
||||
doc_id_t last_pos = 0;
|
||||
doc_id_t pos = fts_decode_vlc(&ptr);
|
||||
|
||||
/* Some sanity checks. */
|
||||
if (doc_id == 0) {
|
||||
|
@ -8543,8 +8543,7 @@ calc_row_difference(
|
||||
&& prebuilt->table->fts
|
||||
&& innobase_strcasecmp(
|
||||
field->field_name, FTS_DOC_ID_COL_NAME) == 0) {
|
||||
doc_id = (doc_id_t) mach_read_from_n_little_endian(
|
||||
n_ptr, 8);
|
||||
doc_id = mach_read_uint64_little_endian(n_ptr);
|
||||
if (doc_id == 0) {
|
||||
return(DB_FTS_INVALID_DOCID);
|
||||
}
|
||||
@ -8787,16 +8786,6 @@ calc_row_difference(
|
||||
<< innodb_table->name;
|
||||
|
||||
return(DB_FTS_INVALID_DOCID);
|
||||
} else if ((doc_id
|
||||
- prebuilt->table->fts->cache->next_doc_id)
|
||||
>= FTS_DOC_ID_MAX_STEP) {
|
||||
|
||||
ib::warn() << "Doc ID " << doc_id << " is too"
|
||||
" big. Its difference with largest"
|
||||
" Doc ID used " << prebuilt->table->fts
|
||||
->cache->next_doc_id - 1
|
||||
<< " cannot exceed or equal to "
|
||||
<< FTS_DOC_ID_MAX_STEP;
|
||||
}
|
||||
|
||||
|
||||
|
@ -58,6 +58,7 @@ Modified Dec 29, 2014 Jan Lindström (Added sys_semaphore_waits)
|
||||
#include "fil0fil.h"
|
||||
#include "fil0crypt.h"
|
||||
#include "dict0crea.h"
|
||||
#include "fts0vlc.h"
|
||||
|
||||
/** The latest successfully looked up innodb_fts_aux_table */
|
||||
UNIV_INTERN table_id_t innodb_ft_aux_table_id;
|
||||
@ -2775,7 +2776,7 @@ i_s_fts_index_cache_fill_one_index(
|
||||
/* Decrypt the ilist, and display Dod ID and word position */
|
||||
for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
|
||||
fts_node_t* node;
|
||||
byte* ptr;
|
||||
const byte* ptr;
|
||||
ulint decoded = 0;
|
||||
doc_id_t doc_id = 0;
|
||||
|
||||
@ -2785,13 +2786,11 @@ i_s_fts_index_cache_fill_one_index(
|
||||
ptr = node->ilist;
|
||||
|
||||
while (decoded < node->ilist_size) {
|
||||
ulint pos = fts_decode_vlc(&ptr);
|
||||
|
||||
doc_id += pos;
|
||||
doc_id += fts_decode_vlc(&ptr);
|
||||
|
||||
/* Get position info */
|
||||
while (*ptr) {
|
||||
pos = fts_decode_vlc(&ptr);
|
||||
|
||||
OK(field_store_string(
|
||||
fields[I_S_FTS_WORD],
|
||||
@ -2812,7 +2811,7 @@ i_s_fts_index_cache_fill_one_index(
|
||||
doc_id, true));
|
||||
|
||||
OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
|
||||
pos, true));
|
||||
fts_decode_vlc(&ptr), true));
|
||||
|
||||
OK(schema_table_store_record(
|
||||
thd, table));
|
||||
@ -3146,7 +3145,7 @@ i_s_fts_index_table_fill_one_fetch(
|
||||
/* Decrypt the ilist, and display Dod ID and word position */
|
||||
for (ulint i = 0; i < ib_vector_size(word->nodes); i++) {
|
||||
fts_node_t* node;
|
||||
byte* ptr;
|
||||
const byte* ptr;
|
||||
ulint decoded = 0;
|
||||
doc_id_t doc_id = 0;
|
||||
|
||||
@ -3156,13 +3155,10 @@ i_s_fts_index_table_fill_one_fetch(
|
||||
ptr = node->ilist;
|
||||
|
||||
while (decoded < node->ilist_size) {
|
||||
ulint pos = fts_decode_vlc(&ptr);
|
||||
|
||||
doc_id += pos;
|
||||
doc_id += fts_decode_vlc(&ptr);
|
||||
|
||||
/* Get position info */
|
||||
while (*ptr) {
|
||||
pos = fts_decode_vlc(&ptr);
|
||||
|
||||
OK(field_store_string(
|
||||
fields[I_S_FTS_WORD],
|
||||
@ -3181,7 +3177,7 @@ i_s_fts_index_table_fill_one_fetch(
|
||||
longlong(doc_id), true));
|
||||
|
||||
OK(fields[I_S_FTS_ILIST_DOC_POS]->store(
|
||||
pos, true));
|
||||
fts_decode_vlc(&ptr), true));
|
||||
|
||||
OK(schema_table_store_record(
|
||||
thd, table));
|
||||
|
@ -96,10 +96,6 @@ those defined in mysql file ft_global.h */
|
||||
/** Threshold where our optimize thread automatically kicks in */
|
||||
#define FTS_OPTIMIZE_THRESHOLD 10000000
|
||||
|
||||
/** Threshold to avoid exhausting of doc ids. Consecutive doc id difference
|
||||
should not exceed FTS_DOC_ID_MAX_STEP */
|
||||
#define FTS_DOC_ID_MAX_STEP 65535
|
||||
|
||||
/** Maximum possible Fulltext word length in bytes (assuming mbmaxlen=4) */
|
||||
#define FTS_MAX_WORD_LEN (HA_FT_MAXCHARLEN * 4)
|
||||
|
||||
|
@ -314,16 +314,6 @@ int fts_doc_id_cmp(
|
||||
const void* p1, /*!< in: id1 */
|
||||
const void* p2); /*!< in: id2 */
|
||||
|
||||
/******************************************************************//**
|
||||
Decode and return the integer that was encoded using our VLC scheme.*/
|
||||
UNIV_INLINE
|
||||
ulint
|
||||
fts_decode_vlc(
|
||||
/*===========*/
|
||||
/*!< out: value decoded */
|
||||
byte** ptr); /*!< in: ptr to decode from, this ptr is
|
||||
incremented by the number of bytes decoded */
|
||||
|
||||
/******************************************************************//**
|
||||
Duplicate a string. */
|
||||
UNIV_INLINE
|
||||
@ -338,28 +328,6 @@ fts_string_dup(
|
||||
const fts_string_t* src, /*!< in: src string */
|
||||
mem_heap_t* heap); /*!< in: heap to use */
|
||||
|
||||
/******************************************************************//**
|
||||
Return length of val if it were encoded using our VLC scheme. */
|
||||
UNIV_INLINE
|
||||
ulint
|
||||
fts_get_encoded_len(
|
||||
/*================*/
|
||||
/*!< out: length of value
|
||||
encoded, in bytes */
|
||||
ulint val); /*!< in: value to encode */
|
||||
|
||||
/******************************************************************//**
|
||||
Encode an integer using our VLC scheme and return the length in bytes. */
|
||||
UNIV_INLINE
|
||||
ulint
|
||||
fts_encode_int(
|
||||
/*===========*/
|
||||
/*!< out: length of value
|
||||
encoded, in bytes */
|
||||
ulint val, /*!< in: value to encode */
|
||||
byte* buf); /*!< in: buffer, must have
|
||||
enough space */
|
||||
|
||||
/******************************************************************//**
|
||||
Get the selected FTS aux INDEX suffix. */
|
||||
UNIV_INLINE
|
||||
@ -381,6 +349,5 @@ fts_select_index(
|
||||
ulint len);
|
||||
|
||||
#include "fts0types.ic"
|
||||
#include "fts0vlc.ic"
|
||||
|
||||
#endif /* INNOBASE_FTS0TYPES_H */
|
||||
|
124
storage/innobase/include/fts0vlc.h
Normal file
124
storage/innobase/include/fts0vlc.h
Normal file
@ -0,0 +1,124 @@
|
||||
/**
|
||||
|
||||
Copyright (c) 2021, MariaDB Corporation.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
||||
**/
|
||||
/**
|
||||
@file include/fts0vlc.h
|
||||
Full text variable length integer encoding/decoding.
|
||||
|
||||
Created 2021-10-19 Thirunarayanan Balathandayuthapani
|
||||
**/
|
||||
|
||||
/** Return length of val if it were encoded using our VLC scheme.
|
||||
@param val value to encode
|
||||
@return length of value encoded, in bytes */
|
||||
inline size_t fts_get_encoded_len(doc_id_t val)
|
||||
{
|
||||
if (val < static_cast<doc_id_t>(1) << 7)
|
||||
return 1;
|
||||
if (val < static_cast<doc_id_t>(1) << 14)
|
||||
return 2;
|
||||
if (val < static_cast<doc_id_t>(1) << 21)
|
||||
return 3;
|
||||
if (val < static_cast<doc_id_t>(1) << 28)
|
||||
return 4;
|
||||
if (val < static_cast<doc_id_t>(1) << 35)
|
||||
return 5;
|
||||
if (val < static_cast<doc_id_t>(1) << 42)
|
||||
return 6;
|
||||
if (val < static_cast<doc_id_t>(1) << 49)
|
||||
return 7;
|
||||
if (val < static_cast<doc_id_t>(1) << 56)
|
||||
return 8;
|
||||
if (val < static_cast<doc_id_t>(1) << 63)
|
||||
return 9;
|
||||
return 10;
|
||||
}
|
||||
|
||||
/** Encode an integer using our VLC scheme and return the
|
||||
length in bytes.
|
||||
@param val value to encode
|
||||
@param buf buffer, must have enough space
|
||||
@return length of value encoded, in bytes */
|
||||
inline byte *fts_encode_int(doc_id_t val, byte *buf)
|
||||
{
|
||||
if (val < static_cast<doc_id_t>(1) << 7)
|
||||
goto add_1;
|
||||
if (val < static_cast<doc_id_t>(1) << 14)
|
||||
goto add_2;
|
||||
if (val < static_cast<doc_id_t>(1) << 21)
|
||||
goto add_3;
|
||||
if (val < static_cast<doc_id_t>(1) << 28)
|
||||
goto add_4;
|
||||
if (val < static_cast<doc_id_t>(1) << 35)
|
||||
goto add_5;
|
||||
if (val < static_cast<doc_id_t>(1) << 42)
|
||||
goto add_6;
|
||||
if (val < static_cast<doc_id_t>(1) << 49)
|
||||
goto add_7;
|
||||
if (val < static_cast<doc_id_t>(1) << 56)
|
||||
goto add_8;
|
||||
if (val < static_cast<doc_id_t>(1) << 63)
|
||||
goto add_9;
|
||||
|
||||
*buf++= static_cast<byte>(val >> 63);
|
||||
add_9:
|
||||
*buf++= static_cast<byte>(val >> 56) & 0x7F;
|
||||
add_8:
|
||||
*buf++= static_cast<byte>(val >> 49) & 0x7F;
|
||||
add_7:
|
||||
*buf++= static_cast<byte>(val >> 42) & 0x7F;
|
||||
add_6:
|
||||
*buf++= static_cast<byte>(val >> 35) & 0x7F;
|
||||
add_5:
|
||||
*buf++= static_cast<byte>(val >> 28) & 0x7F;
|
||||
add_4:
|
||||
*buf++= static_cast<byte>(val >> 21) & 0x7F;
|
||||
add_3:
|
||||
*buf++= static_cast<byte>(val >> 14) & 0x7F;
|
||||
add_2:
|
||||
*buf++= static_cast<byte>(val >> 7) & 0x7F;
|
||||
add_1:
|
||||
*buf++= static_cast<byte>(val) | 0x80;
|
||||
return buf;
|
||||
}
|
||||
|
||||
/** Decode and return the integer that was encoded using
|
||||
our VLC scheme.
|
||||
@param ptr pointer to decode from, this ptr is
|
||||
incremented by the number of bytes decoded
|
||||
@return value decoded */
|
||||
inline doc_id_t fts_decode_vlc(const byte **ptr)
|
||||
{
|
||||
ut_d(const byte *const start= *ptr);
|
||||
ut_ad(*start);
|
||||
|
||||
doc_id_t val= 0;
|
||||
for (;;)
|
||||
{
|
||||
byte b= *(*ptr)++;
|
||||
val|= (b & 0x7F);
|
||||
|
||||
/* High-bit on means "last byte in the encoded integer". */
|
||||
if (b & 0x80)
|
||||
break;
|
||||
ut_ad(val < static_cast<doc_id_t>(1) << (64 - 7));
|
||||
val <<= 7;
|
||||
}
|
||||
|
||||
ut_ad(*ptr - start <= 10);
|
||||
|
||||
return(val);
|
||||
}
|
@ -1,142 +0,0 @@
|
||||
/*****************************************************************************
|
||||
|
||||
Copyright (c) 2007, 2011, Oracle and/or its affiliates. All Rights Reserved.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free Software
|
||||
Foundation; version 2 of the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful, but WITHOUT
|
||||
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along with
|
||||
this program; if not, write to the Free Software Foundation, Inc.,
|
||||
51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
|
||||
|
||||
*****************************************************************************/
|
||||
|
||||
/******************************************************************//**
|
||||
@file include/fts0vlc.ic
|
||||
Full text variable length integer encoding/decoding.
|
||||
|
||||
Created 2007-03-27 Sunny Bains
|
||||
*******************************************************/
|
||||
|
||||
#ifndef INNOBASE_FTS0VLC_IC
|
||||
#define INNOBASE_FTS0VLC_IC
|
||||
|
||||
#include "fts0types.h"
|
||||
|
||||
/******************************************************************//**
|
||||
Return length of val if it were encoded using our VLC scheme.
|
||||
FIXME: We will need to be able encode 8 bytes value
|
||||
@return length of value encoded, in bytes */
|
||||
UNIV_INLINE
|
||||
ulint
|
||||
fts_get_encoded_len(
|
||||
/*================*/
|
||||
ulint val) /* in: value to encode */
|
||||
{
|
||||
if (val <= 127) {
|
||||
return(1);
|
||||
} else if (val <= 16383) {
|
||||
return(2);
|
||||
} else if (val <= 2097151) {
|
||||
return(3);
|
||||
} else if (val <= 268435455) {
|
||||
return(4);
|
||||
} else {
|
||||
/* Possibly we should care that on 64-bit machines ulint can
|
||||
contain values that we can't encode in 5 bytes, but
|
||||
fts_encode_int doesn't handle them either so it doesn't much
|
||||
matter. */
|
||||
|
||||
return(5);
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Encode an integer using our VLC scheme and return the length in bytes.
|
||||
@return length of value encoded, in bytes */
|
||||
UNIV_INLINE
|
||||
ulint
|
||||
fts_encode_int(
|
||||
/*===========*/
|
||||
ulint val, /* in: value to encode */
|
||||
byte* buf) /* in: buffer, must have enough space */
|
||||
{
|
||||
ulint len;
|
||||
|
||||
if (val <= 127) {
|
||||
*buf = (byte) val;
|
||||
|
||||
len = 1;
|
||||
} else if (val <= 16383) {
|
||||
*buf++ = (byte)(val >> 7);
|
||||
*buf = (byte)(val & 0x7F);
|
||||
|
||||
len = 2;
|
||||
} else if (val <= 2097151) {
|
||||
*buf++ = (byte)(val >> 14);
|
||||
*buf++ = (byte)((val >> 7) & 0x7F);
|
||||
*buf = (byte)(val & 0x7F);
|
||||
|
||||
len = 3;
|
||||
} else if (val <= 268435455) {
|
||||
*buf++ = (byte)(val >> 21);
|
||||
*buf++ = (byte)((val >> 14) & 0x7F);
|
||||
*buf++ = (byte)((val >> 7) & 0x7F);
|
||||
*buf = (byte)(val & 0x7F);
|
||||
|
||||
len = 4;
|
||||
} else {
|
||||
/* Best to keep the limitations of the 32/64 bit versions
|
||||
identical, at least for the time being. */
|
||||
ut_ad(val <= 4294967295u);
|
||||
|
||||
*buf++ = (byte)(val >> 28);
|
||||
*buf++ = (byte)((val >> 21) & 0x7F);
|
||||
*buf++ = (byte)((val >> 14) & 0x7F);
|
||||
*buf++ = (byte)((val >> 7) & 0x7F);
|
||||
*buf = (byte)(val & 0x7F);
|
||||
|
||||
len = 5;
|
||||
}
|
||||
|
||||
/* High-bit on means "last byte in the encoded integer". */
|
||||
*buf |= 0x80;
|
||||
|
||||
return(len);
|
||||
}
|
||||
|
||||
/******************************************************************//**
|
||||
Decode and return the integer that was encoded using our VLC scheme.
|
||||
@return value decoded */
|
||||
UNIV_INLINE
|
||||
ulint
|
||||
fts_decode_vlc(
|
||||
/*===========*/
|
||||
byte** ptr) /* in: ptr to decode from, this ptr is
|
||||
incremented by the number of bytes decoded */
|
||||
{
|
||||
ulint val = 0;
|
||||
|
||||
for (;;) {
|
||||
byte b = **ptr;
|
||||
|
||||
++*ptr;
|
||||
val |= (b & 0x7F);
|
||||
|
||||
/* High-bit on means "last byte in the encoded integer". */
|
||||
if (b & 0x80) {
|
||||
break;
|
||||
} else {
|
||||
val <<= 7;
|
||||
}
|
||||
}
|
||||
|
||||
return(val);
|
||||
}
|
||||
|
||||
#endif
|
@ -316,6 +316,28 @@ mach_read_from_n_little_endian(
|
||||
const byte* buf, /*!< in: from where to read */
|
||||
ulint buf_size) /*!< in: from how many bytes to read */
|
||||
MY_ATTRIBUTE((warn_unused_result));
|
||||
|
||||
|
||||
/** Reads a 64 bit stored in big endian format
|
||||
@param buf From where to read
|
||||
@return uint64_t */
|
||||
UNIV_INLINE
|
||||
uint64_t
|
||||
mach_read_uint64_little_endian(const byte* buf)
|
||||
{
|
||||
#ifdef WORDS_BIGENDIAN
|
||||
return
|
||||
uint64_t(buf[0]) | uint64_t(buf[1]) << 8 |
|
||||
uint64_t(buf[2]) << 16 | uint64_t(buf[3]) << 24 |
|
||||
uint64_t(buf[4]) << 32 | uint64_t(buf[5]) << 40 |
|
||||
uint64_t(buf[6]) << 48 | uint64_t(buf[7]) << 56;
|
||||
#else
|
||||
uint64_t n;
|
||||
memcpy(&n, buf, sizeof(uint64_t));
|
||||
return n;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*********************************************************//**
|
||||
Writes a ulint in the little-endian format. */
|
||||
UNIV_INLINE
|
||||
|
@ -1465,23 +1465,6 @@ error_exit:
|
||||
trx->error_state = DB_FTS_INVALID_DOCID;
|
||||
goto error_exit;
|
||||
}
|
||||
|
||||
/* Difference between Doc IDs are restricted within
|
||||
4 bytes integer. See fts_get_encoded_len(). Consecutive
|
||||
doc_ids difference should not exceed
|
||||
FTS_DOC_ID_MAX_STEP value. */
|
||||
|
||||
if (doc_id - next_doc_id >= FTS_DOC_ID_MAX_STEP) {
|
||||
ib::error() << "Doc ID " << doc_id
|
||||
<< " is too big. Its difference with"
|
||||
" largest used Doc ID "
|
||||
<< next_doc_id - 1 << " cannot"
|
||||
" exceed or equal to "
|
||||
<< FTS_DOC_ID_MAX_STEP;
|
||||
err = DB_FTS_INVALID_DOCID;
|
||||
trx->error_state = DB_FTS_INVALID_DOCID;
|
||||
goto error_exit;
|
||||
}
|
||||
}
|
||||
|
||||
if (table->skip_alter_undo) {
|
||||
|
22
storage/innobase/unittest/CMakeLists.txt
Normal file
22
storage/innobase/unittest/CMakeLists.txt
Normal file
@ -0,0 +1,22 @@
|
||||
# Copyright (c) 2021, MariaDB Corporation.
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; version 2 of the License.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1335 USA
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include
|
||||
${CMAKE_SOURCE_DIR}/unittest/mytap
|
||||
${CMAKE_SOURCE_DIR}/storage/innobase/include)
|
||||
ADD_EXECUTABLE(innodb_fts-t innodb_fts-t.cc)
|
||||
TARGET_LINK_LIBRARIES(innodb_fts-t mysys mytap)
|
||||
ADD_DEPENDENCIES(innodb_fts-t GenError)
|
||||
MY_ADD_TEST(innodb_fts)
|
52
storage/innobase/unittest/innodb_fts-t.cc
Normal file
52
storage/innobase/unittest/innodb_fts-t.cc
Normal file
@ -0,0 +1,52 @@
|
||||
#include "tap.h"
|
||||
#include "fts0fts.h"
|
||||
#include "fts0vlc.h"
|
||||
|
||||
struct fts_encode_info
|
||||
{
|
||||
const byte buf[10];
|
||||
int32_t len;
|
||||
doc_id_t val;
|
||||
};
|
||||
|
||||
/* Contains fts encoding min & max value for each length bytes */
|
||||
static const fts_encode_info fts_info[]=
|
||||
{
|
||||
{{0x80}, 1, 0},
|
||||
{{0xFF}, 1, (1 << 7) - 1},
|
||||
{{0x01, 0x80}, 2, 1 << 7},
|
||||
{{0x7F, 0XFF}, 2, (1 << 14) - 1},
|
||||
{{0x01, 0x00, 0x80}, 3, 1 << 14},
|
||||
{{0x7F, 0X7F, 0XFF}, 3, (1 << 21) - 1},
|
||||
{{0x01, 0x00, 0x00, 0x80}, 4, 1 << 21},
|
||||
{{0x7F, 0X7F, 0X7F, 0xFF}, 4, (1 << 28) - 1},
|
||||
{{0x01, 0x00, 0x00, 0x00, 0x80}, 5, 1 << 28},
|
||||
{{0x7F, 0X7F, 0X7F, 0x7F, 0xFF}, 5, (1ULL << 35) - 1},
|
||||
{{0x01, 0x00, 0x00, 0x00, 0x00, 0x80}, 6, 1ULL << 35},
|
||||
{{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0xFF}, 6, (1ULL << 42) - 1},
|
||||
{{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 7, 1ULL << 42},
|
||||
{{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0XFF}, 7, (1ULL << 49) - 1},
|
||||
{{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 8, 1ULL << 49},
|
||||
{{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0XFF}, 8, (1ULL << 56) -1},
|
||||
{{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 9, 1ULL << 56},
|
||||
{{0x7F, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0XFF}, 9, (1ULL << 63) -1},
|
||||
{{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80}, 10, 1ULL << 63},
|
||||
{{0x01, 0X7F, 0X7F, 0x7F, 0x7F, 0x7F, 0X7F, 0x7F, 0x7F, 0xFF}, 10, ~0ULL}
|
||||
};
|
||||
|
||||
int main(int, char**)
|
||||
{
|
||||
for (int i= array_elements(fts_info); i--;)
|
||||
{
|
||||
byte buf[10];
|
||||
const byte* fts_buf= buf;
|
||||
int32_t len= fts_encode_int(fts_info[i].val, buf) - &buf[0];
|
||||
if (fts_info[i].len == len &&
|
||||
!memcmp(&fts_info[i].buf, buf, len) &&
|
||||
fts_decode_vlc(&fts_buf) == fts_info[i].val &&
|
||||
fts_buf == &buf[len])
|
||||
ok(true, "FTS Encoded for %d bytes", fts_info[i].len);
|
||||
else
|
||||
ok(false, "FTS Encoded for %d bytes", fts_info[i].len);
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user