1
0
mirror of https://github.com/MariaDB/server.git synced 2025-08-08 11:22:35 +03:00

MDEV-34705: Binlog-in-engine: Use the whole page for binlog data

Signed-off-by: Kristian Nielsen <knielsen@knielsen-hq.org>
This commit is contained in:
Kristian Nielsen
2025-03-21 21:10:25 +01:00
parent 8b3b6770f4
commit e4935b716a
6 changed files with 69 additions and 39 deletions

View File

@@ -57,10 +57,10 @@ SET SESSION binlog_format= ROW;
*** Do 1500 transactions ...
SHOW BINARY LOGS;
Log_name File_size
binlog-000010.ibb 262144
binlog-000011.ibb 262144
binlog-000012.ibb 262144
binlog-000013.ibb 262144
binlog-000014.ibb 262144
*** Test purge by date.
SET GLOBAL max_binlog_total_size= 0;
SET @old_expire= @@GLOBAL.binlog_expire_logs_seconds;
@@ -70,6 +70,7 @@ SET GLOBAL binlog_expire_logs_seconds= 0;
*** Do 1000 transactions ...
SHOW BINARY LOGS;
Log_name File_size
binlog-000012.ibb 262144
binlog-000013.ibb 262144
binlog-000014.ibb 262144
binlog-000015.ibb 262144

View File

@@ -92,11 +92,11 @@ while ($i < $num_trx) {
#
# In this case, just check that the general structure of the present binlogs
# is similar, and then update the $binlog_name waited for and the .result file.
--let $binlog_name= binlog-000010.ibb
--let $binlog_name= binlog-000009.ibb
--let $binlog_size= 262144
--let $wait_notfound= 1
--source include/wait_for_engine_binlog.inc
--let $binlog_name= binlog-000014.ibb
--let $binlog_name= binlog-000013.ibb
--let $binlog_size= 262144
--source include/wait_for_engine_binlog.inc
SHOW BINARY LOGS;
@@ -117,7 +117,7 @@ while ($i < $num_insert) {
}
COMMIT;
--enable_query_log
--let $binlog_name= binlog-000012.ibb
--let $binlog_name= binlog-000011.ibb
--let $binlog_size= 262144
--let $wait_notfound= 1
--source include/wait_for_engine_binlog.inc

View File

@@ -4836,7 +4836,22 @@ show_engine_binlog_events(THD* thd, Protocol *protocol, LEX_MASTER_INFO *lex_mi)
return true;
}
if (reader->init_legacy_pos(lex_mi->log_file_name, lex_mi->pos))
ulonglong pos= lex_mi->pos;
/*
The positions "0" and "4" are unfortunately traditionally used
interchangeably to mean "the start of the binlog". Thus, we might here
easily see a starting position of "4", which is probably not valid in
the engine, but which really means "start of the file".
So here we have this ugly hack where "4" means the same as "0". Well,
use of offsets is discourated anyway in the new binlog (in favour of
GTID), and "4" is not going to be a valid position most likely, or if
it is, "0" will be equivalent (at least it is so for the InnoDB binlog
implementation.
*/
if (pos == 4)
pos= 0;
if (reader->init_legacy_pos(lex_mi->log_file_name, pos))
{
err= true;
goto end;

View File

@@ -642,7 +642,7 @@ fsp_log_binlog_write(mtr_t *mtr, fsp_binlog_page_entry *page,
{
uint64_t file_no= page->file_no;
uint32_t page_no= page->page_no;
if (page_offset + len >= srv_page_size - FIL_PAGE_DATA_END)
if (page_offset + len >= srv_page_size - BINLOG_PAGE_DATA_END)
page->complete= true;
if (page->flushed_clean)
{
@@ -814,7 +814,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
{
uint32_t page_size= (uint32_t)srv_page_size;
uint32_t page_size_shift= srv_page_size_shift;
const uint32_t page_end= page_size - FIL_PAGE_DATA_END;
const uint32_t page_end= page_size - BINLOG_PAGE_DATA_END;
uint32_t page_no= binlog_cur_page_no;
uint32_t page_offset= binlog_cur_page_offset;
/* ToDo: What is the lifetime of what's pointed to by binlog_cur_block, is there some locking needed around it or something? */
@@ -830,7 +830,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
*/
byte cont_flag= 0;
for (;;) {
if (page_offset == FIL_PAGE_DATA) {
if (page_offset == BINLOG_PAGE_DATA) {
uint32_t file_size_in_pages= binlog_page_fifo->size_in_pages(file_no);
if (UNIV_UNLIKELY(page_no >= file_size_in_pages)) {
/*
@@ -944,7 +944,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
binlog_page_fifo->release_page_mtr(block, mtr);
block= nullptr;
++page_no;
page_offset= FIL_PAGE_DATA;
page_offset= BINLOG_PAGE_DATA;
continue;
}
@@ -980,7 +980,7 @@ fsp_binlog_write_rec(chunk_data_base *chunk_data, mtr_t *mtr, byte chunk_type)
if (page_remain == 0) {
binlog_page_fifo->release_page_mtr(block, mtr);
block= nullptr;
page_offset= FIL_PAGE_DATA;
page_offset= BINLOG_PAGE_DATA;
++page_no;
} else {
page_offset+= size+3;
@@ -1054,8 +1054,8 @@ fsp_binlog_flush()
binlog_page_fifo->unlock();
uint32_t page_offset= binlog_cur_page_offset;
if (page_offset > FIL_PAGE_DATA ||
page_offset < srv_page_size - FIL_PAGE_DATA_END)
if (page_offset > BINLOG_PAGE_DATA ||
page_offset < srv_page_size - BINLOG_PAGE_DATA_END)
{
/*
If we are not precisely the end of a page, fill up that page with a dummy
@@ -1283,12 +1283,23 @@ read_more_data:
if (s.chunk_len == 0)
{
byte type;
if (s.in_page_offset < FIL_PAGE_DATA)
s.in_page_offset= FIL_PAGE_DATA;
else if (s.in_page_offset >= srv_page_size - (FIL_PAGE_DATA_END + 3) ||
/*
This code gives warning "comparison of unsigned expression in < 0 is
always false" when BINLOG_PAGE_DATA is 0.
So use a static assert for now; if it ever triggers, replace it with this
code:
if (s.in_page_offset < BINLOG_PAGE_DATA)
s.in_page_offset= BINLOG_PAGE_DATA;
*/
if (0)
static_assert(BINLOG_PAGE_DATA == 0,
"Replace static_assert with code from above comment");
else if (s.in_page_offset >= srv_page_size - (BINLOG_PAGE_DATA_END + 3) ||
page_ptr[s.in_page_offset] == FSP_BINLOG_TYPE_FILLER)
{
ut_ad(s.in_page_offset >= srv_page_size - FIL_PAGE_DATA_END ||
ut_ad(s.in_page_offset >= srv_page_size - BINLOG_PAGE_DATA_END ||
page_ptr[s.in_page_offset] == FSP_BINLOG_TYPE_FILLER);
goto go_next_page;
}
@@ -1402,7 +1413,7 @@ skip_chunk:
s.skip_current= false;
}
if (s.in_page_offset >= srv_page_size - (FIL_PAGE_DATA_END + 3))
if (s.in_page_offset >= srv_page_size - (BINLOG_PAGE_DATA_END + 3))
{
go_next_page:
/* End of page reached, move to the next page. */

View File

@@ -722,7 +722,7 @@ binlog_recovery::init_recovery_from(uint64_t file_no, lsn_t file_lsn,
else
{
skipping_early_lsn= false;
if (offset <= FIL_PAGE_DATA)
if (offset <= BINLOG_PAGE_DATA)
{
update_page_from_record(offset, buf, size);
skipping_partial_page= false;
@@ -948,7 +948,7 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
if (skipping_partial_page)
{
if (offset > FIL_PAGE_DATA)
if (offset > BINLOG_PAGE_DATA)
return false;
skipping_partial_page= false;
}
@@ -980,9 +980,9 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
if (space_id != (cur_file_no & 1))
{
/* Check that we recovered all of this file. */
if ( ( (cur_page_offset > FIL_PAGE_DATA &&
cur_page_offset < srv_page_size - FIL_PAGE_DATA_END) ||
cur_page_no + (cur_page_offset > FIL_PAGE_DATA) <
if ( ( (cur_page_offset > BINLOG_PAGE_DATA &&
cur_page_offset < srv_page_size - BINLOG_PAGE_DATA_END) ||
cur_page_no + (cur_page_offset > BINLOG_PAGE_DATA) <
cur_phys_size >> srv_page_size_shift) &&
!srv_force_recovery)
{
@@ -992,7 +992,7 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
}
/* Check that we recover from the start of the next file. */
if ((page_no > 0 || offset > FIL_PAGE_DATA) && !srv_force_recovery)
if ((page_no > 0 || offset > BINLOG_PAGE_DATA) && !srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record at start of file_no=%"
PRIu64 ", LSN " LSN_PF, cur_file_no+1, start_lsn);
@@ -1005,7 +1005,7 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
/* Test for moving to the next page. */
else if (page_no != cur_page_no)
{
if (cur_page_offset < srv_page_size - FIL_PAGE_DATA_END &&
if (cur_page_offset < srv_page_size - BINLOG_PAGE_DATA_END &&
!srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record in file_no=%"
@@ -1014,7 +1014,7 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
return true;
}
if ((page_no != cur_page_no + 1 || offset > FIL_PAGE_DATA) &&
if ((page_no != cur_page_no + 1 || offset > BINLOG_PAGE_DATA) &&
!srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record in file_no=%"
@@ -1028,7 +1028,7 @@ binlog_recovery::apply_redo(bool space_id, uint32_t page_no, uint16_t offset,
}
/* Test no gaps in offset. */
else if (offset != cur_page_offset &&
offset > FIL_PAGE_DATA &&
offset > BINLOG_PAGE_DATA &&
!srv_force_recovery)
{
sql_print_error("InnoDB: Missing recovery record in file_no=%"
@@ -1105,7 +1105,7 @@ innodb_binlog_init_state()
total_binlog_used_size= 0;
active_binlog_file_no.store(~(uint64_t)0, std::memory_order_release);
binlog_cur_page_no= 0;
binlog_cur_page_offset= FIL_PAGE_DATA;
binlog_cur_page_offset= BINLOG_PAGE_DATA;
current_binlog_state_interval=
(uint32_t)(innodb_binlog_state_interval >> srv_page_size_shift);
ut_a(innodb_binlog_state_interval ==
@@ -1283,7 +1283,7 @@ static bool
binlog_page_empty(const byte *page)
{
/* ToDo: Here we also need to see if there is a full state record at the start of the file. If not, we have to delete the file and ignore it, it is an incomplete file. Or can we rely on the innodb crash recovery to make file creation atomic and we will never see a partially pre-allocated file? Also if the gtid state is larger than mtr max size (if there is such max?), or if we crash in the middle of pre-allocation? */
return page[FIL_PAGE_DATA] == 0;
return page[BINLOG_PAGE_DATA] == 0;
}
@@ -1312,7 +1312,7 @@ find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf,
bool ret;
*out_page_no= 0;
*out_pos_in_page= FIL_PAGE_DATA;
*out_pos_in_page= BINLOG_PAGE_DATA;
binlog_name_make(file_name, file_no);
pfs_os_file_t fh= os_file_create(innodb_data_file_key, file_name,
@@ -1376,8 +1376,8 @@ find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf,
}
/* Now scan the last page to find the position in it to continue. */
p= &page_buf[FIL_PAGE_DATA];
page_end= &page_buf[page_size - FIL_PAGE_DATA_END];
p= &page_buf[BINLOG_PAGE_DATA];
page_end= &page_buf[page_size - BINLOG_PAGE_DATA_END];
while (*p && p < page_end) {
if (*p == FSP_BINLOG_TYPE_FILLER) {
p= page_end;
@@ -1391,7 +1391,7 @@ find_pos_in_binlog(uint64_t file_no, size_t file_size, byte *page_buf,
*out_page_no= p_0 - 1;
*out_pos_in_page= (uint32_t)(p - page_buf);
if (*out_pos_in_page >= page_size - FIL_PAGE_DATA_END)
if (*out_pos_in_page >= page_size - BINLOG_PAGE_DATA_END)
ret= fsp_binlog_open(file_name, fh, file_no, file_size, p_0, nullptr);
else
ret= fsp_binlog_open(file_name, fh, file_no, file_size, p_0 - 1, page_buf);
@@ -1493,7 +1493,7 @@ innodb_binlog_discover()
binlog_cur_page_no= page_no;
binlog_cur_page_offset= pos_in_page;
ib::info() << "Continuing binlog number " << file_no << " from position "
<< FIL_PAGE_DATA << ".";
<< BINLOG_PAGE_DATA << ".";
return binlog_files.found_binlogs;
}
@@ -1717,11 +1717,11 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
}
const uint32_t page_size= (uint32_t)srv_page_size;
const uint32_t page_room= page_size - (FIL_PAGE_DATA + FIL_PAGE_DATA_END);
const uint32_t page_room= page_size - (BINLOG_PAGE_DATA + BINLOG_PAGE_DATA_END);
uint32_t needed_pages= (uint32_t)((used_bytes + page_room - 1) / page_room);
/* For now, GTID state always at the start of a page. */
ut_ad(page_offset == FIL_PAGE_DATA);
ut_ad(page_offset == BINLOG_PAGE_DATA);
/*
Only write the GTID state record if there is room for actual event data
@@ -1739,7 +1739,7 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
block_page_no= page_no;
block= binlog_page_fifo->create_page(file_no, block_page_no);
ut_a(block /* ToDo: error handling? */);
page_offset= FIL_PAGE_DATA;
page_offset= BINLOG_PAGE_DATA;
byte *ptr= page_offset + &block->page_buf[0];
ssize_t chunk= used_bytes;
byte last_flag= FSP_BINLOG_FLAG_LAST;
@@ -1760,12 +1760,12 @@ binlog_gtid_state(rpl_binlog_state_base *state, mtr_t *mtr,
cont_flag= FSP_BINLOG_FLAG_CONT;
}
if (page_offset == page_size - FIL_PAGE_DATA_END) {
if (page_offset == page_size - BINLOG_PAGE_DATA_END) {
if (block)
binlog_page_fifo->release_page_mtr(block, mtr);
block= nullptr;
block_page_no= ~(uint32_t)0;
page_offset= FIL_PAGE_DATA;
page_offset= BINLOG_PAGE_DATA;
++page_no;
}
}
@@ -1794,7 +1794,7 @@ static int
read_gtid_state_from_page(rpl_binlog_state_base *state, const byte *page,
uint32_t page_no, binlog_header_data *out_header_data)
{
const byte *p= page + FIL_PAGE_DATA;
const byte *p= page + BINLOG_PAGE_DATA;
byte t= *p;
if (UNIV_UNLIKELY((t & FSP_BINLOG_TYPE_MASK) != FSP_BINLOG_TYPE_GTID_STATE))
{

View File

@@ -37,6 +37,9 @@ struct chunk_data_base;
/** Store crc32 checksum at the end of the page */
#define BINLOG_PAGE_CHECKSUM 4
#define BINLOG_PAGE_DATA 0
#define BINLOG_PAGE_DATA_END BINLOG_PAGE_CHECKSUM
enum fsp_binlog_chunk_types {
/* Zero means no data, effectively EOF. */