From 301a31fddfd5f08e3e38915a9c74517116028301 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 25 Sep 2007 15:52:01 +0200 Subject: [PATCH 01/12] WL#3072 Maria recovery Minor change: decouple the writing of a recovery trace from compilation with --with-debug; that writing happens if and only if EXTRA_DEBUG. So one can enable (by defining EXTRA_DEBUG) a recovery trace in a non-debug build. storage/maria/ma_recovery.c: Small optimization. Decouple recovery trace from --with-debug. --- storage/maria/ma_recovery.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index e740e334b5f..2f9390b0036 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -123,13 +123,16 @@ static my_bool redo_phase_message_printed; /** @brief Prints to a trace file if it is not NULL */ void tprint(FILE *trace_file, const char *format, ...) ATTRIBUTE_FORMAT(printf, 2, 3); -void tprint(FILE *trace_file, const char *format, ...) +void tprint(FILE *trace_file __attribute__ ((unused)), + const char *format __attribute__ ((unused)), ...) { +#ifdef EXTRA_DEBUG va_list args; va_start(args, format); if (trace_file != NULL) vfprintf(trace_file, format, args); va_end(args); +#endif } #define ALERT_USER() DBUG_ASSERT(0) @@ -157,7 +160,7 @@ int maria_recover(void) DBUG_ASSERT(!maria_in_recovery); maria_in_recovery= TRUE; -#if !defined(DBUG_OFF) && defined(EXTRA_DEBUG) +#ifdef EXTRA_DEBUG trace_file= fopen("maria_recovery.trace", "w"); #else trace_file= NULL; /* no trace file for being fast */ From c9c58163a936ccf2882aae1230018117744546b0 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 27 Sep 2007 14:18:28 +0300 Subject: [PATCH 02/12] Remove SAFE_MODE for opt_range as it disables UPDATE to use keys REDO optimization (Bascily avoid moving blocks from/to pagecache) More command line arguments to maria_read_log Fixed recovery bug when recreating table sql/opt_range.cc: Remove SAFE_MODE for opt_range as it disables UPDATE to use keys storage/maria/ma_blockrec.c: REDO optimization Use new interface for pagecache_reads to avoid copying page buffers storage/maria/ma_loghandler.c: Patch from Sanja: - Added new parameter to translog_get_page to use direct links to pagecache - Changed scanner to be able to use direct links This avoids a lot of calls to bmove512() in page cache. storage/maria/ma_loghandler.h: Added direct link to pagecache objects storage/maria/ma_open.c: Added const to parameter Added missing braces storage/maria/ma_pagecache.c: From Sanja: - Added direct links to pagecache (from pagecache_read()) Dirrect link means that on pagecache_read we get back a pointer to the pagecache buffer From Monty: - Fixed arguments to init_page_cache to handle big page caches - Fixed compiler warnings - Replaced PAGECACHE_PAGE_LINK with PAGECACHE_BLOCK_LINK * to catch errors storage/maria/ma_pagecache.h: Changed block numbers from int to long to be able to handle big page caches Changed some PAGECACHE_PAGE_LINK to PAGECACHE_BLOCK_LINK storage/maria/ma_recovery.c: Fixed recovery bug when recreating table (table was kept open) Moved some variables to function start (portability) Added space to some print messages storage/maria/maria_chk.c: key_buffer_size -> page_buffer_size storage/maria/maria_def.h: Changed default page_buffer_size to 10M storage/maria/maria_read_log.c: Added more startup options: --version --undo (apply undo) --page_cache_size (to run with big cache sizes) --silent (to not get any output from --apply) storage/maria/unittest/ma_control_file-t.c: Fixed compiler warning storage/maria/unittest/ma_test_loghandler-t.c: Added new argument to translog_init_scanner() storage/maria/unittest/ma_test_loghandler_multigroup-t.c: Added new argument to translog_init_scanner() storage/maria/unittest/ma_test_loghandler_multithread-t.c: Added new argument to translog_init_scanner() --- sql/opt_range.cc | 3 - storage/maria/ma_blockrec.c | 78 ++++--- storage/maria/ma_loghandler.c | 200 ++++++++++++++---- storage/maria/ma_loghandler.h | 8 +- storage/maria/ma_open.c | 4 +- storage/maria/ma_pagecache.c | 123 ++++++----- storage/maria/ma_pagecache.h | 42 ++-- storage/maria/ma_recovery.c | 77 +++++-- storage/maria/maria_chk.c | 4 +- storage/maria/maria_def.h | 4 +- storage/maria/maria_read_log.c | 47 ++-- storage/maria/unittest/ma_control_file-t.c | 2 +- storage/maria/unittest/ma_test_loghandler-t.c | 2 +- .../ma_test_loghandler_multigroup-t.c | 2 +- .../ma_test_loghandler_multithread-t.c | 2 +- 15 files changed, 415 insertions(+), 183 deletions(-) diff --git a/sql/opt_range.cc b/sql/opt_range.cc index 3ee7e8ee813..fbb1a2be487 100644 --- a/sql/opt_range.cc +++ b/sql/opt_range.cc @@ -2130,9 +2130,6 @@ int SQL_SELECT::test_quick_select(THD *thd, key_map keys_to_use, quick=0; needed_reg.clear_all(); quick_keys.clear_all(); - if ((specialflag & SPECIAL_SAFE_MODE) && ! force_quick_range || - !limit) - DBUG_RETURN(0); /* purecov: inspected */ if (keys_to_use.is_clear_all()) DBUG_RETURN(0); records= head->file->stats.records; diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index b12035c9cfa..3ec18d229c2 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -4609,6 +4609,9 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, uint block_size= share->block_size; uint rec_offset; uchar *buff= info->keyread_buff, *dir; + MARIA_PINNED_PAGE page_link; + enum pagecache_page_lock unlock_method; + enum pagecache_page_pin unpin_method; DBUG_ENTER("_ma_apply_redo_insert_row_head_or_tail"); info->keyread_buff_used= 1; @@ -4635,26 +4638,31 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, empty_space= (block_size - PAGE_OVERHEAD_SIZE); rec_offset= PAGE_HEADER_SIZE; dir= buff+ block_size - PAGE_SUFFIX_SIZE - DIR_ENTRY_SIZE; + unlock_method= PAGECACHE_LOCK_LEFT_UNLOCKED; + unpin_method= PAGECACHE_PIN_LEFT_UNPINNED; } else { uint max_entry; - if (!(buff= pagecache_read(share->pagecache, - &info->dfile, - page, 0, - buff, PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + if (!(buff= pagecache_read(share->pagecache, &info->dfile, + page, 0, 0, + PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE, + &page_link.link))) DBUG_RETURN(my_errno); - if (lsn_korr(buff) >= lsn) + if (lsn_korr(buff) >= lsn) /* Test if already applied */ { - /* Already applied */ - + pagecache_unlock_by_link(share->pagecache, page_link.link, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, LSN_IMPOSSIBLE, + LSN_IMPOSSIBLE); /* Fix bitmap, just in case */ empty_space= uint2korr(buff + EMPTY_SPACE_OFFSET); if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) DBUG_RETURN(my_errno); DBUG_RETURN(0); } + unlock_method= PAGECACHE_LOCK_WRITE_UNLOCK; + unpin_method= PAGECACHE_UNPIN; max_entry= (uint) ((uchar*) buff)[DIR_COUNT_OFFSET]; if (((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) != page_type)) @@ -4725,8 +4733,7 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, + unlock_method, unpin_method, PAGECACHE_WRITE_DELAY, 0)) DBUG_RETURN(my_errno); @@ -4747,6 +4754,11 @@ uint _ma_apply_redo_insert_row_head_or_tail(MARIA_HA *info, LSN lsn, DBUG_RETURN(0); err: + if (unlock_method == PAGECACHE_LOCK_WRITE_UNLOCK) + pagecache_unlock_by_link(share->pagecache, page_link.link, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, LSN_IMPOSSIBLE, + LSN_IMPOSSIBLE); DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); } @@ -4778,6 +4790,8 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, uint rownr, empty_space; uint block_size= share->block_size; uchar *buff= info->keyread_buff; + int result; + MARIA_PINNED_PAGE page_link; DBUG_ENTER("_ma_apply_redo_purge_row_head_or_tail"); page= page_korr(header); @@ -4788,11 +4802,10 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, info->keyread_buff_used= 1; - if (!(buff= pagecache_read(share->pagecache, - &info->dfile, - page, 0, - buff, PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + if (!(buff= pagecache_read(share->pagecache, &info->dfile, + page, 0, 0, + PAGECACHE_PLAIN_PAGE, PAGECACHE_LOCK_WRITE, + &page_link.link))) DBUG_RETURN(my_errno); if (lsn_korr(buff) >= lsn) @@ -4802,6 +4815,11 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, Note that in case the page is not anymore a head or tail page a future redo will fix the bitmap. */ + pagecache_unlock_by_link(share->pagecache, page_link.link, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, LSN_IMPOSSIBLE, + LSN_IMPOSSIBLE); + if ((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == page_type) { empty_space= uint2korr(buff+EMPTY_SPACE_OFFSET); @@ -4815,22 +4833,30 @@ uint _ma_apply_redo_purge_row_head_or_tail(MARIA_HA *info, LSN lsn, DBUG_ASSERT((buff[PAGE_TYPE_OFFSET] & PAGE_TYPE_MASK) == (uchar) page_type); if (delete_dir_entry(buff, block_size, rownr, &empty_space) < 0) - DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); + goto err; lsn_store(buff, lsn); + result= 0; if (pagecache_write(share->pagecache, &info->dfile, page, 0, buff, PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN, PAGECACHE_WRITE_DELAY, 0)) - DBUG_RETURN(my_errno); + result= my_errno; /* This will work even if the page was marked as UNALLOCATED_PAGE */ if (_ma_bitmap_set(info, page, page_type == HEAD_PAGE, empty_space)) - DBUG_RETURN(my_errno); + result= my_errno; + + DBUG_RETURN(result); + +err: + pagecache_unlock_by_link(share->pagecache, page_link.link, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, LSN_IMPOSSIBLE, + LSN_IMPOSSIBLE); + DBUG_RETURN(HA_ERR_WRONG_IN_RECORD); - DBUG_RETURN(0); } @@ -4872,16 +4898,21 @@ uint _ma_apply_redo_purge_blocks(MARIA_HA *info, for (i= 0; i < page_range ; i++) { + MARIA_PINNED_PAGE page_link; if (!(buff= pagecache_read(share->pagecache, &info->dfile, page+i, 0, buff, PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, 0))) + PAGECACHE_LOCK_WRITE, &page_link.link))) DBUG_RETURN(my_errno); if (lsn_korr(buff) >= lsn) { /* Already applied */ + pagecache_unlock_by_link(share->pagecache, page_link.link, + PAGECACHE_LOCK_WRITE_UNLOCK, + PAGECACHE_UNPIN, LSN_IMPOSSIBLE, + LSN_IMPOSSIBLE); continue; } buff[PAGE_TYPE_OFFSET]= UNALLOCATED_PAGE; @@ -4889,8 +4920,7 @@ uint _ma_apply_redo_purge_blocks(MARIA_HA *info, if (pagecache_write(share->pagecache, &info->dfile, page+i, 0, buff, PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, - PAGECACHE_PIN_LEFT_UNPINNED, + PAGECACHE_LOCK_WRITE_UNLOCK, PAGECACHE_UNPIN, PAGECACHE_WRITE_DELAY, 0)) DBUG_RETURN(my_errno); } diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index f3c90ceb1f5..e30c8c6627f 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -2296,21 +2296,21 @@ my_bool translog_unlock() } -/* - Get log page by file number and offset of the beginning of the page +/** + @brief Get log page by file number and offset of the beginning of the page - SYNOPSIS - translog_get_page() - data validator data, which contains the page address - buffer buffer for page placing + @param data validator data, which contains the page address + @param buffer buffer for page placing (might not be used in some cache implementations) + @param direct_link if it is not NULL then caller can accept direct + link to the page cache - RETURN - NULL - Error - # pointer to the page cache which should be used to read this page + @retval NULL Error + @retval # pointer to the page cache which should be used to read this page */ -static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) +static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer, + PAGECACHE_PAGE_LINK *direct_link) { TRANSLOG_ADDRESS addr= *(data->addr), in_buffers; uint cache_index; @@ -2324,6 +2324,9 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) /* it is really page address */ DBUG_ASSERT(LSN_OFFSET(addr) % TRANSLOG_PAGE_SIZE == 0); + if (direct_link) + *direct_link= NULL; + in_buffers= translog_only_in_buffers(); DBUG_PRINT("info", ("in_buffers: (%lu,0x%lx)", LSN_IN_PARTS(in_buffers))); @@ -2336,7 +2339,9 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) if (cmp_translog_addr(addr, in_buffers) >= 0) { uint16 buffer_no= log_descriptor.bc.buffer_no; +#ifndef DBUG_OFF uint16 buffer_start= buffer_no; +#endif struct st_translog_buffer *buffer_unlock= log_descriptor.bc.buffer; struct st_translog_buffer *curr_buffer= log_descriptor.bc.buffer; for (;;) @@ -2437,13 +2442,23 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) } file.file= log_descriptor.log_file_num[cache_index]; - buffer= (uchar*) - pagecache_valid_read(log_descriptor.pagecache, &file, - LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, - 3, (char*) buffer, - PAGECACHE_PLAIN_PAGE, - PAGECACHE_LOCK_LEFT_UNLOCKED, 0, - &translog_page_validator, (uchar*) data); + buffer= + (uchar*) (direct_link ? + pagecache_valid_read(log_descriptor.pagecache, &file, + LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, + 3, NULL, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_READ, direct_link, + &translog_page_validator, (uchar*) data) : + pagecache_valid_read(log_descriptor.pagecache, &file, + LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, + 3, (char*) buffer, + PAGECACHE_PLAIN_PAGE, + PAGECACHE_LOCK_LEFT_UNLOCKED, direct_link, + &translog_page_validator, (uchar*) data)); + DBUG_PRINT("info", ("Direct link is assigned to : 0x%lx * 0x%lx", + (ulong) direct_link, + (ulong)(direct_link ? *direct_link : NULL))); } else { @@ -2468,6 +2483,24 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer) DBUG_RETURN(buffer); } +/** + @brief free direct log page link + + @param direct_link the direct log page link to be freed + +*/ + +static void translog_free_link(PAGECACHE_PAGE_LINK *direct_link) +{ + DBUG_ENTER("translog_free_link"); + DBUG_PRINT("info", ("Direct link: 0x%lx", + (ulong) direct_link)); + if (direct_link) + pagecache_unlock_by_link(log_descriptor.pagecache, direct_link, + PAGECACHE_LOCK_READ_UNLOCK, PAGECACHE_UNPIN, + LSN_IMPOSSIBLE, LSN_IMPOSSIBLE); + DBUG_VOID_RETURN; +} /* Finds last page of the given log file @@ -2796,7 +2829,7 @@ my_bool translog_init(const char *directory, TRANSLOG_VALIDATOR_DATA data; uchar buffer[TRANSLOG_PAGE_SIZE], *page; data.addr= ¤t_page; - if ((page= translog_get_page(&data, buffer)) == NULL) + if ((page= translog_get_page(&data, buffer, NULL)) == NULL) DBUG_RETURN(1); if (data.was_recovered) { @@ -2848,7 +2881,7 @@ my_bool translog_init(const char *directory, /* continue old log */ DBUG_ASSERT(LSN_FILE_NO(last_valid_page)== LSN_FILE_NO(log_descriptor.horizon)); - if ((page= translog_get_page(&data, buffer)) == NULL || + if ((page= translog_get_page(&data, buffer, NULL)) == NULL || (chunk_offset= translog_get_first_chunk_offset(page)) == 0) DBUG_RETURN(1); @@ -5153,24 +5186,54 @@ static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA } -/* - Initialize reader scanner +/** + @brief Get page from page cache according to requested method - SYNOPSIS - translog_init_scanner() - lsn LSN with which it have to be inited - fixed_horizon true if it is OK do not read records which was written + @param scanner The scanner data + + @return operation status + @retval 0 OK + @retval 1 Error +*/ + +static my_bool +translog_scanner_get_page(TRANSLOG_SCANNER_DATA *scanner) +{ + TRANSLOG_VALIDATOR_DATA data; + DBUG_ENTER("translog_scanner_get_page"); + data.addr= &scanner->page_addr; + data.was_recovered= 0; + DBUG_RETURN((scanner->page= + translog_get_page(&data, scanner->buffer, + (scanner->use_direct_link ? + &scanner->direct_link : + NULL))) == + NULL); +} + + +/** + @brief Initialize reader scanner. + + @param lsn LSN with which it have to be inited + @param fixed_horizon true if it is OK do not read records which was written after scanning beginning - scanner scanner which have to be inited + @param scanner scanner which have to be inited + @param use_direct prefer using direct lings from page handler + where it is possible. - RETURN - 0 OK - 1 Error + @note If direct link was used translog_destroy_scanner should be + called after it using + + @return status of the operation + @retval 0 OK + @retval 1 Error */ my_bool translog_init_scanner(LSN lsn, my_bool fixed_horizon, - struct st_translog_scanner_data *scanner) + TRANSLOG_SCANNER_DATA *scanner, + my_bool use_direct) { TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_init_scanner"); @@ -5184,6 +5247,8 @@ my_bool translog_init_scanner(LSN lsn, scanner->page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; scanner->fixed_horizon= fixed_horizon; + scanner->use_direct_link= use_direct; + scanner->direct_link= NULL; scanner->horizon= translog_get_horizon(); DBUG_PRINT("info", ("horizon: (0x%lu,0x%lx)", @@ -5198,12 +5263,24 @@ my_bool translog_init_scanner(LSN lsn, if (translog_scanner_set_last_page(scanner)) DBUG_RETURN(1); - if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + if (translog_scanner_get_page(scanner)) DBUG_RETURN(1); DBUG_RETURN(0); } +/** + @brief Destroy scanner object; + + @param scanner The scanner object to destroy +*/ + +void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner) +{ + translog_free_link(scanner->direct_link); +} + + /* Checks End of the Log @@ -5298,7 +5375,6 @@ static my_bool translog_scanner_eof(TRANSLOG_SCANNER_DATA *scanner) scanner->last_file_page); } - /* Move scanner to the next chunk @@ -5315,7 +5391,6 @@ static my_bool translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner) { uint16 len; - TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_get_next_chunk"); if ((len= translog_get_total_chunk_length(scanner->page, @@ -5331,6 +5406,8 @@ translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner) } if (translog_scanner_eop(scanner)) { + /* before reading next page we should unpin current one if it was pinned */ + translog_free_link(scanner->direct_link); if (translog_scanner_eof(scanner)) { DBUG_PRINT("info", ("horizon: (%lu,0x%lx) pageaddr: (%lu,0x%lx)", @@ -5350,9 +5427,7 @@ translog_get_next_chunk(TRANSLOG_SCANNER_DATA *scanner) scanner->page_addr+= TRANSLOG_PAGE_SIZE; /* offset increased */ } - data.addr= &scanner->page_addr; - data.was_recovered= 0; - if ((scanner->page= translog_get_page(&data, scanner->buffer)) == NULL) + if (translog_scanner_get_page(scanner)) DBUG_RETURN(1); scanner->page_offset= translog_get_first_chunk_offset(scanner->page); @@ -5482,7 +5557,7 @@ translog_variable_length_header(uchar *page, translog_size_t page_offset, { DBUG_PRINT("info", ("use internal scanner for header reading")); scanner= &internal_scanner; - if (translog_init_scanner(buff->lsn, 1, scanner)) + if (translog_init_scanner(buff->lsn, 1, scanner, 0)) DBUG_RETURN(RECHEADER_READ_ERROR); } if (translog_get_next_chunk(scanner)) @@ -5502,13 +5577,15 @@ translog_variable_length_header(uchar *page, translog_size_t page_offset, } base_lsn= buff->groups[0].addr; - translog_init_scanner(base_lsn, 1, scanner); + translog_init_scanner(base_lsn, 1, scanner, scanner == &internal_scanner); /* first group chunk is always chunk type 2 */ page= scanner->page; page_offset= scanner->page_offset; src= page + page_offset + 1; page_rest= TRANSLOG_PAGE_SIZE - (src - page); body_len= page_rest; + if (scanner == &internal_scanner) + translog_destroy_scanner(scanner); } if (lsns) { @@ -5615,6 +5692,7 @@ int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff) { uchar buffer[TRANSLOG_PAGE_SIZE], *page; translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; + PAGECACHE_PAGE_LINK direct_link; TRANSLOG_ADDRESS addr; TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_read_record_header"); @@ -5628,8 +5706,10 @@ int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff) data.was_recovered= 0; addr= lsn; addr-= page_offset; /* offset decreasing */ - res= (!(page= translog_get_page(&data, buffer))) ? RECHEADER_READ_ERROR : + res= (!(page= translog_get_page(&data, buffer, &direct_link))) ? + RECHEADER_READ_ERROR : translog_read_record_header_from_buffer(page, page_offset, buff, 0); + translog_free_link(direct_link); DBUG_RETURN(res); } @@ -5774,8 +5854,9 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data data->current_group++; data->current_chunk= 0; DBUG_PRINT("info", ("skip to group: #%u", data->current_group)); + translog_destroy_scanner(&data->scanner); translog_init_scanner(data->header.groups[data->current_group].addr, - 1, &data->scanner); + 1, &data->scanner, 1); } else { @@ -5794,7 +5875,8 @@ static my_bool translog_record_read_next_chunk(struct st_translog_reader_data DBUG_ASSERT(data->header.groups_no - 1 == data->current_group); DBUG_ASSERT(data->header.lsn == data->scanner.page_addr + data->scanner.page_offset); - translog_init_scanner(data->header.chunk0_data_addr, 1, &data->scanner); + translog_destroy_scanner(&data->scanner); + translog_init_scanner(data->header.chunk0_data_addr, 1, &data->scanner, 1); data->chunk_size= data->header.chunk0_data_len; data->body_offset= data->scanner.page_offset; data->current_offset= new_current_offset; @@ -5844,7 +5926,7 @@ static my_bool translog_init_reader_data(LSN lsn, { int read_header; DBUG_ENTER("translog_init_reader_data"); - if (translog_init_scanner(lsn, 1, &data->scanner) || + if (translog_init_scanner(lsn, 1, &data->scanner, 1) || ((read_header= translog_read_record_header_scan(&data->scanner, &data->header, 1)) == RECHEADER_READ_ERROR)) @@ -5865,6 +5947,16 @@ static my_bool translog_init_reader_data(LSN lsn, } +/** + @brief Destroy reader data object +*/ + +static void translog_destroy_reader_data(struct st_translog_reader_data *data) +{ + translog_destroy_scanner(&data->scanner); +} + + /* Read a part of the record. @@ -5924,7 +6016,10 @@ translog_size_t translog_read_record(LSN lsn, memcpy(buffer, data->header.header + offset, len); length-= len; if (length == 0) + { + translog_destroy_reader_data(data); DBUG_RETURN(requested_length); + } offset+= len; buffer+= len; DBUG_PRINT("info", @@ -5952,7 +6047,10 @@ translog_size_t translog_read_record(LSN lsn, (offset - data->current_offset), len); length-= len; if (length == 0) + { + translog_destroy_reader_data(data); DBUG_RETURN(requested_length); + } offset+= len; buffer+= len; DBUG_PRINT("info", @@ -5961,7 +6059,10 @@ translog_size_t translog_read_record(LSN lsn, (ulong) length)); } if (translog_record_read_next_chunk(data)) + { + translog_destroy_reader_data(data); DBUG_RETURN(requested_length - length); + } } } @@ -6624,6 +6725,7 @@ LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon) { uint chunk_type; TRANSLOG_SCANNER_DATA scanner; + LSN result; DBUG_ENTER("translog_next_LSN"); if (horizon == LSN_IMPOSSIBLE) @@ -6632,7 +6734,7 @@ LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon) if (addr == horizon) DBUG_RETURN(LSN_IMPOSSIBLE); - translog_init_scanner(addr, 0, &scanner); + translog_init_scanner(addr, 0, &scanner, 1); chunk_type= scanner.page[scanner.page_offset] & TRANSLOG_CHUNK_TYPE; DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type, @@ -6647,9 +6749,13 @@ LSN translog_next_LSN(TRANSLOG_ADDRESS addr, TRANSLOG_ADDRESS horizon) DBUG_PRINT("info", ("type: %x byte: %x", (uint) chunk_type, (uint) scanner.page[scanner.page_offset])); } + if (scanner.page[scanner.page_offset] == 0) - DBUG_RETURN(LSN_IMPOSSIBLE); /* reached page filler */ - DBUG_RETURN(scanner.page_addr + scanner.page_offset); + result= LSN_IMPOSSIBLE; /* reached page filler */ + else + result= scanner.page_addr + scanner.page_offset; + translog_destroy_scanner(&scanner); + DBUG_RETURN(result); } /** @@ -6681,7 +6787,7 @@ LSN translog_first_lsn_in_log() data.addr= &addr; { uchar buffer[TRANSLOG_PAGE_SIZE]; - if ((page= translog_get_page(&data, buffer)) == NULL || + if ((page= translog_get_page(&data, buffer, NULL)) == NULL || (chunk_offset= translog_get_first_chunk_offset(page)) == 0) DBUG_RETURN(LSN_ERROR); } @@ -6719,7 +6825,7 @@ LSN translog_first_theoretical_lsn() addr= MAKE_LSN(1, TRANSLOG_PAGE_SIZE); /* the first page of the file */ data.addr= &addr; - if ((page= translog_get_page(&data, buffer)) == NULL) + if ((page= translog_get_page(&data, buffer, NULL)) == NULL) DBUG_RETURN(LSN_ERROR); DBUG_RETURN(MAKE_LSN(1, TRANSLOG_PAGE_SIZE + diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index 164ff013b10..c53132916ec 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -182,10 +182,14 @@ typedef struct st_translog_scanner_data TRANSLOG_ADDRESS horizon; TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ uchar *page; /* page content pointer */ + /* direct link on the current page or NULL if it is not supported/requested */ + PAGECACHE_PAGE_LINK direct_link; /* offset of the chunk in the page */ translog_size_t page_offset; /* set horizon only once at init */ my_bool fixed_horizon; + /* try to get direct link on the page if it is possible */ + my_bool use_direct_link; } TRANSLOG_SCANNER_DATA; @@ -245,7 +249,9 @@ extern my_bool translog_flush(LSN lsn); extern my_bool translog_init_scanner(LSN lsn, my_bool fixed_horizon, - struct st_translog_scanner_data *scanner); + struct st_translog_scanner_data *scanner, + my_bool use_direct_link); +extern void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner); extern int translog_read_next_record_header(TRANSLOG_SCANNER_DATA *scanner, TRANSLOG_HEADER_BUFFER *buff); diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 9b665cfb958..6852d5b4a6d 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -58,7 +58,7 @@ if (pos > end_pos) \ ** In MySQL the server will handle version issues. ******************************************************************************/ -MARIA_HA *_ma_test_if_reopen(char *filename) +MARIA_HA *_ma_test_if_reopen(const char *filename) { LIST *pos; @@ -1001,7 +1001,9 @@ uint _ma_state_info_write(MARIA_SHARE *share, uint pWrite) if (pWrite & 4) pthread_mutex_lock(&share->intern_lock); else if (maria_multi_threaded) + { safe_mutex_assert_owner(&share->intern_lock); + } if (share->base.born_transactional && translog_inited && !maria_in_recovery) { diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 9f450d25c50..27c9ceaa0f1 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -96,7 +96,7 @@ #define PCBLOCK_INFO(B) \ DBUG_PRINT("info", \ ("block: 0x%lx file: %lu page: %lu s: %0x hshL: 0x%lx req: %u/%u " \ - "wrlocks: %u", \ + "wrlocks: %u pins: %u", \ (ulong)(B), \ (ulong)((B)->hash_link ? \ (B)->hash_link->file.file : \ @@ -110,7 +110,8 @@ (uint)((B)->hash_link ? \ (B)->hash_link->requests : \ 0), \ - block->wlocks)) + block->wlocks, \ + (uint)(B)->pins)) /* TODO: put it to my_static.c */ my_bool my_disable_flush_pagecache_blocks= 0; @@ -457,8 +458,10 @@ error: #define FLUSH_CACHE 2000 /* sort this many blocks at once */ static void free_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block); +#ifndef DBUG_OFF static void test_key_cache(PAGECACHE *pagecache, const char *where, my_bool lock); +#endif #define PAGECACHE_HASH(p, f, pos) (((ulong) (pos) + \ (ulong) (f).file) & (p->hash_entries-1)) @@ -655,11 +658,11 @@ static inline uint next_power(uint value) */ -int init_pagecache(PAGECACHE *pagecache, size_t use_mem, - uint division_limit, uint age_threshold, - uint block_size) +ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size) { - uint blocks, hash_links, length; + ulong blocks, hash_links, length; int error; DBUG_ENTER("init_pagecache"); DBUG_ASSERT(block_size >= 512); @@ -689,10 +692,10 @@ int init_pagecache(PAGECACHE *pagecache, size_t use_mem, block_size)); DBUG_ASSERT(((uint)(1 << pagecache->shift)) == block_size); - blocks= (int) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + - 2 * sizeof(PAGECACHE_HASH_LINK) + - sizeof(PAGECACHE_HASH_LINK*) * - 5/4 + block_size)); + blocks= (ulong) (use_mem / (sizeof(PAGECACHE_BLOCK_LINK) + + 2 * sizeof(PAGECACHE_HASH_LINK) + + sizeof(PAGECACHE_HASH_LINK*) * + 5/4 + block_size)); /* We need to support page cache with just one block to be able to do scanning of rows-in-block files @@ -714,7 +717,7 @@ int init_pagecache(PAGECACHE *pagecache, size_t use_mem, ALIGN_SIZE(hash_links * sizeof(PAGECACHE_HASH_LINK)) + ALIGN_SIZE(sizeof(PAGECACHE_HASH_LINK*) * pagecache->hash_entries))) + - (((ulong) blocks) << pagecache->shift) > use_mem) + (blocks << pagecache->shift) > use_mem) blocks--; /* Allocate memory for cache page buffers */ if ((pagecache->block_mem= @@ -726,8 +729,7 @@ int init_pagecache(PAGECACHE *pagecache, size_t use_mem, For each block 2 hash links are allocated */ if ((pagecache->block_root= - (PAGECACHE_BLOCK_LINK*) my_malloc((uint) length, - MYF(0)))) + (PAGECACHE_BLOCK_LINK*) my_malloc((size_t) length, MYF(0)))) break; my_large_free(pagecache->block_mem, MYF(0)); pagecache->block_mem= 0; @@ -739,8 +741,8 @@ int init_pagecache(PAGECACHE *pagecache, size_t use_mem, } blocks= blocks / 4*3; } - pagecache->blocks_unused= (ulong) blocks; - pagecache->disk_blocks= (int) blocks; + pagecache->blocks_unused= blocks; + pagecache->disk_blocks= (long) blocks; pagecache->hash_links= hash_links; pagecache->hash_root= (PAGECACHE_HASH_LINK**) ((char*) pagecache->block_root + @@ -782,8 +784,8 @@ int init_pagecache(PAGECACHE *pagecache, size_t use_mem, pagecache->waiting_for_hash_link.last_thread= NULL; pagecache->waiting_for_block.last_thread= NULL; DBUG_PRINT("exit", - ("disk_blocks: %d block_root: 0x%lx hash_entries: %d\ - hash_root: 0x%lx hash_links: %d hash_link_root: 0x%lx", + ("disk_blocks: %ld block_root: 0x%lx hash_entries: %ld\ + hash_root: 0x%lx hash_links: %ld hash_link_root: 0x%lx", pagecache->disk_blocks, (long) pagecache->block_root, pagecache->hash_entries, (long) pagecache->hash_root, pagecache->hash_links, (long) pagecache->hash_link_root)); @@ -796,7 +798,7 @@ int init_pagecache(PAGECACHE *pagecache, size_t use_mem, } pagecache->blocks= pagecache->disk_blocks > 0 ? pagecache->disk_blocks : 0; - DBUG_RETURN((int) pagecache->disk_blocks); + DBUG_RETURN((ulong) pagecache->disk_blocks); err: error= my_errno; @@ -887,11 +889,11 @@ static int flush_all_key_blocks(PAGECACHE *pagecache) So we disable it for now. */ #if NOT_USED /* keep disabled until code is fixed see above !! */ -int resize_pagecache(PAGECACHE *pagecache, - size_t use_mem, uint division_limit, - uint age_threshold) +ulong resize_pagecache(PAGECACHE *pagecache, + size_t use_mem, uint division_limit, + uint age_threshold) { - int blocks; + ulong blocks; #ifdef THREAD struct st_my_thread_var *thread; WQUEUE *wqueue; @@ -1282,8 +1284,10 @@ static void unlink_block(PAGECACHE *pagecache, PAGECACHE_BLOCK_LINK *block) DBUG_ENTER("unlink_block"); DBUG_PRINT("unlink_block", ("unlink 0x%lx", (ulong)block)); if (block->next_used == block) + { /* The list contains only one member */ pagecache->used_last= pagecache->used_ins= NULL; + } else { block->next_used->prev_used= block->prev_used; @@ -2661,13 +2665,12 @@ void pagecache_unpin(PAGECACHE *pagecache, */ void pagecache_unlock_by_link(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK *block, enum pagecache_page_lock lock, enum pagecache_page_pin pin, LSN first_REDO_LSN_for_page, LSN lsn) { - PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; DBUG_ENTER("pagecache_unlock_by_link"); DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu %s %s", (ulong) block, @@ -2820,16 +2823,28 @@ void pagecache_unpin_by_link(PAGECACHE *pagecache, Pin will be chosen according to lock parameter (see lock_to_pin) */ -static enum pagecache_page_pin lock_to_pin[]= +static enum pagecache_page_pin lock_to_pin[2][8]= { - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, - PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/, - PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, - PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, - PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, - PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ + { + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ*/, + PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_TO_READ*/ + }, + { + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_LEFT_UNLOCKED*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_READLOCKED*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_LEFT_WRITELOCKED*/, + PAGECACHE_PIN /*PAGECACHE_LOCK_READ*/, + PAGECACHE_PIN /*PAGECACHE_LOCK_WRITE*/, + PAGECACHE_PIN_LEFT_UNPINNED /*PAGECACHE_LOCK_READ_UNLOCK*/, + PAGECACHE_UNPIN /*PAGECACHE_LOCK_WRITE_UNLOCK*/, + PAGECACHE_PIN_LEFT_PINNED /*PAGECACHE_LOCK_WRITE_TO_READ*/ + } }; uchar *pagecache_valid_read(PAGECACHE *pagecache, @@ -2839,24 +2854,27 @@ uchar *pagecache_valid_read(PAGECACHE *pagecache, uchar *buff, enum pagecache_page_type type, enum pagecache_page_lock lock, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK **link, pagecache_disk_read_validator validator, uchar* validator_data) { int error= 0; - enum pagecache_page_pin pin= lock_to_pin[lock]; - PAGECACHE_PAGE_LINK fake_link; + enum pagecache_page_pin pin= lock_to_pin[test(buff==0)][lock]; + PAGECACHE_BLOCK_LINK *fake_link; DBUG_ENTER("pagecache_valid_read"); - DBUG_PRINT("enter", ("fd: %u page: %lu level: %u t:%s %s %s", - (uint) file->file, (ulong) pageno, level, + DBUG_PRINT("enter", ("fd: %u page: %lu buffer: 0x%lx level: %u " + "t:%s %s %s", + (uint) file->file, (ulong) pageno, + (ulong) buff, level, page_cache_page_type_str[type], page_cache_page_lock_str[lock], page_cache_page_pin_str[pin])); + DBUG_ASSERT(buff != 0 || (buff == 0 && (pin == PAGECACHE_PIN || + pin == PAGECACHE_PIN_LEFT_PINNED))); if (!link) link= &fake_link; - else - *link= 0; + *link= 0; /* Catch errors */ restart: @@ -2910,19 +2928,25 @@ restart: goto restart; } - if (! ((status= block->status) & PCBLOCK_ERROR)) + status= block->status; + if (!buff) + buff= block->buffer; + else { + if (!(status & PCBLOCK_ERROR)) + { #if !defined(SERIALIZED_READ_FROM_CACHE) - pagecache_pthread_mutex_unlock(&pagecache->cache_lock); + pagecache_pthread_mutex_unlock(&pagecache->cache_lock); #endif - DBUG_ASSERT((pagecache->block_size & 511) == 0); - /* Copy data from the cache buffer */ - bmove512(buff, block->buffer, pagecache->block_size); + DBUG_ASSERT((pagecache->block_size & 511) == 0); + /* Copy data from the cache buffer */ + bmove512(buff, block->buffer, pagecache->block_size); #if !defined(SERIALIZED_READ_FROM_CACHE) - pagecache_pthread_mutex_lock(&pagecache->cache_lock); + pagecache_pthread_mutex_lock(&pagecache->cache_lock); #endif + } } remove_reader(block); @@ -2934,7 +2958,7 @@ restart: if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) unreg_request(pagecache, block, 1); else - *link= (PAGECACHE_PAGE_LINK)block; + *link= block; dec_counter_for_resize_op(pagecache); @@ -2983,7 +3007,7 @@ my_bool pagecache_delete(PAGECACHE *pagecache, my_bool flush) { int error= 0; - enum pagecache_page_pin pin= lock_to_pin[lock]; + enum pagecache_page_pin pin= lock_to_pin[0][lock]; DBUG_ENTER("pagecache_delete"); DBUG_PRINT("enter", ("fd: %u page: %lu %s %s", (uint) file->file, (ulong) pageno, @@ -3830,7 +3854,8 @@ int flush_pagecache_blocks(PAGECACHE *pagecache, 0 on success (always because it can't fail) */ -int reset_pagecache_counters(const char *name, PAGECACHE *pagecache) +int reset_pagecache_counters(const char *name __attribute__((unused)), + PAGECACHE *pagecache) { DBUG_ENTER("reset_pagecache_counters"); if (!pagecache->inited) diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h index 0e2aff3644d..7318cc08040 100644 --- a/storage/maria/ma_pagecache.h +++ b/storage/maria/ma_pagecache.h @@ -73,8 +73,6 @@ enum pagecache_write_mode PAGECACHE_WRITE_DONE }; -typedef void *PAGECACHE_PAGE_LINK; - /* file descriptor for Maria */ typedef struct st_pagecache_file { @@ -93,6 +91,8 @@ typedef struct st_pagecache_page PAGECACHE_PAGE; struct st_pagecache_hash_link; typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK; +typedef PAGECACHE_BLOCK_LINK * PAGECACHE_PAGE_LINK; /* To be removed */ + #include typedef my_bool (*pagecache_disk_read_validator)(uchar *page, uchar *data); @@ -106,25 +106,22 @@ typedef my_bool (*pagecache_disk_read_validator)(uchar *page, uchar *data); typedef struct st_pagecache { - my_bool inited; - my_bool resize_in_flush; /* true during flush of resize operation */ - my_bool can_be_used; /* usage of cache for read/write is allowed */ - uint shift; /* block size = 2 ^ shift */ - size_t mem_size; /* specified size of the cache memory */ - uint32 block_size; /* size of the page buffer of a cache block */ + size_t mem_size; /* specified size of the cache memory */ ulong min_warm_blocks; /* min number of warm blocks; */ ulong age_threshold; /* age threshold for hot blocks */ ulonglong time; /* total number of block link operations */ - uint hash_entries; /* max number of entries in the hash table */ - int hash_links; /* max number of hash links */ - int hash_links_used; /* number of hash links taken from free links pool */ - int disk_blocks; /* max number of blocks in the cache */ + ulong hash_entries; /* max number of entries in the hash table */ + long hash_links; /* max number of hash links */ + long hash_links_used; /* number of hash links taken from free links pool */ + long disk_blocks; /* max number of blocks in the cache */ ulong blocks_used; /* maximum number of concurrently used blocks */ ulong blocks_unused; /* number of currently unused blocks */ ulong blocks_changed; /* number of currently dirty blocks */ ulong warm_blocks; /* number of blocks in warm sub-chain */ ulong cnt_for_resize_op; /* counter to block resize operation */ ulong blocks_available; /* number of blocks available in the LRU chain */ + long blocks; /* max number of blocks in the cache */ + uint32 block_size; /* size of the page buffer of a cache block */ PAGECACHE_HASH_LINK **hash_root;/* arr. of entries into hash table buckets */ PAGECACHE_HASH_LINK *hash_link_root;/* memory for hash table links */ PAGECACHE_HASH_LINK *free_hash_list;/* list of free hash links */ @@ -159,19 +156,22 @@ typedef struct st_pagecache ulonglong global_cache_r_requests;/* number of read requests (read hits) */ ulonglong global_cache_read; /* number of reads from files to cache */ - int blocks; /* max number of blocks in the cache */ + uint shift; /* block size = 2 ^ shift */ + my_bool inited; + my_bool resize_in_flush; /* true during flush of resize operation */ + my_bool can_be_used; /* usage of cache for read/write is allowed */ my_bool in_init; /* Set to 1 in MySQL during init/resize */ } PAGECACHE; /* The default key cache */ extern PAGECACHE dflt_pagecache_var, *dflt_pagecache; -extern int init_pagecache(PAGECACHE *pagecache, size_t use_mem, - uint division_limit, uint age_threshold, - uint block_size); -extern int resize_pagecache(PAGECACHE *pagecache, - size_t use_mem, uint division_limit, - uint age_threshold); +extern ulong init_pagecache(PAGECACHE *pagecache, size_t use_mem, + uint division_limit, uint age_threshold, + uint block_size); +extern ulong resize_pagecache(PAGECACHE *pagecache, + size_t use_mem, uint division_limit, + uint age_threshold); extern void change_pagecache_param(PAGECACHE *pagecache, uint division_limit, uint age_threshold); @@ -185,7 +185,7 @@ extern uchar *pagecache_valid_read(PAGECACHE *pagecache, uchar *buff, enum pagecache_page_type type, enum pagecache_page_lock lock, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK **link, pagecache_disk_read_validator validator, uchar* validator_data); @@ -218,7 +218,7 @@ extern void pagecache_unlock(PAGECACHE *pagecache, LSN first_REDO_LSN_for_page, LSN lsn); extern void pagecache_unlock_by_link(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK *block, enum pagecache_page_lock lock, enum pagecache_page_pin pin, LSN first_REDO_LSN_for_page, diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index e740e334b5f..bb9852779c8 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -105,6 +105,7 @@ static int new_table(uint16 sid, const char *name, static int new_page(File fileid, pgcache_page_no_t pageid, LSN rec_lsn, struct st_dirty_page *dirty_page); static int close_all_tables(void); +static my_bool close_one_table(const char *name, LSN addr); static void print_redo_phase_progress(TRANSLOG_ADDRESS addr); /** @brief global [out] buffer for translog_read_record(); never shrinks */ @@ -415,6 +416,8 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) uint flags; int error= 1, create_mode= O_RDWR | O_TRUNC; MARIA_HA *info= NULL; + uint kfile_size_before_extension, keystart; + if (skip_DDLs) { tprint(tracef, "we skip DDLs\n"); @@ -431,6 +434,12 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) } name= log_record_buffer.str; tprint(tracef, "Table '%s'", name); + if (close_one_table(name, rec->lsn)) + { + tprint(tracef, " got error %d on close\n", my_errno); + ALERT_USER(); + goto end; + } /* we try hard to get create_rename_lsn, to avoid mistakes if possible */ info= maria_open(name, O_RDONLY, HA_OPEN_FOR_REPAIR); if (info) @@ -474,7 +483,7 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) info= NULL; } else /* one or two files absent, or header corrupted... */ - tprint(tracef, "can't be opened, probably does not exist"); + tprint(tracef, " can't be opened, probably does not exist"); /* if does not exist, or is older, overwrite it */ /** @todo symlinks */ ptr= name + strlen(name) + 1; @@ -490,13 +499,13 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) if ((kfile= my_create_with_symlink(linkname_ptr, filename, 0, create_mode, MYF(MY_WME|create_flag))) < 0) { - tprint(tracef, "Failed to create index file\n"); + tprint(tracef, " Failed to create index file\n"); goto end; } ptr++; - uint kfile_size_before_extension= uint2korr(ptr); + kfile_size_before_extension= uint2korr(ptr); ptr+= 2; - uint keystart= uint2korr(ptr); + keystart= uint2korr(ptr); ptr+= 2; /* set create_rename_lsn (for maria_read_log to be idempotent) */ lsn_store(ptr + sizeof(info->s->state.header) + 2, rec->lsn); @@ -507,7 +516,7 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) kfile_size_before_extension, 0, MYF(MY_NABP|MY_WME)) || my_chsize(kfile, keystart, 0, MYF(MY_WME))) { - tprint(tracef, "Failed to write to index file\n"); + tprint(tracef, " Failed to write to index file\n"); goto end; } if (!(flags & HA_DONT_TOUCH_DATA)) @@ -521,7 +530,7 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) MYF(MY_WME | create_flag))) < 0) || my_close(dfile, MYF(MY_WME))) { - tprint(tracef, "Failed to create data file\n"); + tprint(tracef, " Failed to create data file\n"); goto end; } /* @@ -533,7 +542,7 @@ prototype_redo_exec_hook(REDO_CREATE_TABLE) if (((info= maria_open(name, O_RDONLY, 0)) == NULL) || _ma_initialize_data_file(info->s, info->dfile.file)) { - tprint(tracef, "Failed to open new table or write to data file\n"); + tprint(tracef, " Failed to open new table or write to data file\n"); goto end; } } @@ -1436,6 +1445,11 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE) static int run_redo_phase(LSN lsn, my_bool apply) { + TRANSLOG_HEADER_BUFFER rec; + struct st_translog_scanner_data scanner; + int len; + uint i; + /* install hooks for execution */ #define install_redo_exec_hook(R) \ log_record_type_descriptor[LOGREC_ ## R].record_execute_in_redo_phase= \ @@ -1467,8 +1481,6 @@ static int run_redo_phase(LSN lsn, my_bool apply) current_group_end_lsn= LSN_IMPOSSIBLE; - TRANSLOG_HEADER_BUFFER rec; - if (unlikely(lsn == LSN_IMPOSSIBLE || lsn == translog_get_horizon())) { tprint(tracef, "checkpoint address refers to the log end log or " @@ -1476,7 +1488,7 @@ static int run_redo_phase(LSN lsn, my_bool apply) return 0; } - int len= translog_read_record_header(lsn, &rec); + len= translog_read_record_header(lsn, &rec); /** @todo EOF should be detected */ if (len == RECHEADER_READ_ERROR) @@ -1484,13 +1496,11 @@ static int run_redo_phase(LSN lsn, my_bool apply) tprint(tracef, "Failed to read header of the first record.\n"); return 1; } - struct st_translog_scanner_data scanner; - if (translog_init_scanner(lsn, 1, &scanner)) + if (translog_init_scanner(lsn, 1, &scanner, 0)) { tprint(tracef, "Scanner init failed\n"); return 1; } - uint i; for (i= 1;;i++) { uint16 sid= rec.short_trid; @@ -1533,7 +1543,7 @@ static int run_redo_phase(LSN lsn, my_bool apply) tprint(tracef, "Cannot find record where it should be\n"); return 1; } - if (translog_init_scanner(rec2.lsn, 1, &scanner2)) + if (translog_init_scanner(rec2.lsn, 1, &scanner2, 0)) { tprint(tracef, "Scanner2 init failed\n"); return 1; @@ -1607,6 +1617,7 @@ static uint end_of_redo_phase(my_bool prepare_for_undo_phase) { uint sid, unfinished= 0; char llbuf[22]; + LSN addr; hash_free(&all_dirty_pages); /* @@ -1667,7 +1678,7 @@ static uint end_of_redo_phase(my_bool prepare_for_undo_phase) The UNDO phase uses some normal run-time code of ROLLBACK: generates log records, etc; prepare tables for that */ - LSN addr= translog_get_horizon(); + addr= translog_get_horizon(); for (sid= 0; sid <= SHARE_ID_MAX; sid++) { MARIA_HA *info= all_tables[sid].info; @@ -2070,6 +2081,42 @@ end: return error; } + +/* Close one table during redo phase */ + +static my_bool close_one_table(const char *open_name, LSN addr) +{ + my_bool res= 0; + LIST *pos; + /* There are no other threads using the tables, so we don't need any locks */ + for (pos=maria_open_list ; pos ;) + { + MARIA_HA *info= (MARIA_HA*) pos->data; + MARIA_SHARE *share= info->s; + pos= pos->next; + if (!strcmp(share->open_file_name, open_name)) + { + struct st_table_for_recovery *internal_table, *end; + + for (internal_table= all_tables, end= internal_table + SHARE_ID_MAX + 1; + internal_table < end ; + internal_table++) + { + if (internal_table->info == info) + { + internal_table->info= 0; + break; + } + } + prepare_table_for_close(info, addr); + if (maria_close(info)) + res= 1; + } + } + return res; +} + + static void print_redo_phase_progress(TRANSLOG_ADDRESS addr) { static int end_logno= FILENO_IMPOSSIBLE, end_offset, percentage_printed= 0; diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index f9ed249817e..e6ac0dcfc50 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -154,7 +154,7 @@ int main(int argc, char **argv) enum options_mc { OPT_CHARSETS_DIR=256, OPT_SET_COLLATION,OPT_START_CHECK_POS, - OPT_CORRECT_CHECKSUM, OPT_KEY_BUFFER_SIZE, + OPT_CORRECT_CHECKSUM, OPT_PAGE_BUFFER_SIZE, OPT_KEY_CACHE_BLOCK_SIZE, OPT_MARIA_BLOCK_SIZE, OPT_READ_BUFFER_SIZE, OPT_WRITE_BUFFER_SIZE, OPT_SORT_BUFFER_SIZE, OPT_SORT_KEY_BLOCKS, OPT_DECODE_BITS, OPT_FT_MIN_WORD_LEN, @@ -296,7 +296,7 @@ static struct my_option my_long_options[] = {"wait", 'w', "Wait if table is locked.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, - { "key_buffer_size", OPT_KEY_BUFFER_SIZE, "", + { "page_buffer_size", OPT_PAGE_BUFFER_SIZE, "", (uchar**) &check_param.use_buffers, (uchar**) &check_param.use_buffers, 0, GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, (long) MALLOC_OVERHEAD, (long) ~0L, (long) MALLOC_OVERHEAD, (long) IO_SIZE, 0}, diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index 09852f4dc86..adcb35a5e0e 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -842,7 +842,7 @@ typedef struct st_maria_block_info #define UPDATE_AUTO_INC 8 #define UPDATE_OPEN_COUNT 16 -#define USE_BUFFER_INIT (((1024L*512L-MALLOC_OVERHEAD)/IO_SIZE)*IO_SIZE) +#define USE_BUFFER_INIT (((1024L*1024L*10-MALLOC_OVERHEAD)/8192)*8192) #define READ_BUFFER_INIT (1024L*256L-MALLOC_OVERHEAD) #define SORT_BUFFER_INIT (2048L*1024L-MALLOC_OVERHEAD) #define MIN_SORT_BUFFER (4096-MALLOC_OVERHEAD) @@ -906,7 +906,7 @@ my_bool _ma_check_status(void *param); void _ma_reset_status(MARIA_HA *maria); #include "ma_commit.h" -extern MARIA_HA *_ma_test_if_reopen(char *filename); +extern MARIA_HA *_ma_test_if_reopen(const char *filename); my_bool _ma_check_table_is_closed(const char *name, const char *where); int _ma_open_datafile(MARIA_HA *info, MARIA_SHARE *share, File file_to_dup); int _ma_open_keyfile(MARIA_SHARE *share); diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index a7a6370b1c4..ec1f7697a23 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -17,7 +17,6 @@ #include "ma_recovery.h" #include -#define PCACHE_SIZE (1024*1024*10) #define LOG_FLAGS 0 #define LOG_FILE_SIZE (1024L*1024L) @@ -30,7 +29,8 @@ const char *default_dbug_option= "d:t:i:O,\\maria_read_log.trace"; const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; #endif #endif /* DBUG_OFF */ -static my_bool opt_only_display, opt_display_and_apply; +static my_bool opt_only_display, opt_apply, opt_apply_undo, opt_silent; +static ulong opt_page_buffer_size; int main(int argc, char **argv) { @@ -63,7 +63,7 @@ int main(int argc, char **argv) } /* same page cache for log and data; assumes same page size... */ DBUG_ASSERT(maria_block_size == TRANSLOG_PAGE_SIZE); - if (init_pagecache(maria_pagecache, PCACHE_SIZE, 0, 0, + if (init_pagecache(maria_pagecache, opt_page_buffer_size, 0, 0, TRANSLOG_PAGE_SIZE) == 0) { fprintf(stderr, "Got error in init_pagecache() (errno: %d)\n", errno); @@ -100,8 +100,8 @@ int main(int argc, char **argv) LSN_IN_PARTS(lsn)); fprintf(stdout, "TRACE of the last maria_read_log\n"); - if (maria_apply_log(lsn, opt_display_and_apply, stdout, - opt_display_and_apply, FALSE)) + if (maria_apply_log(lsn, opt_apply, opt_silent ? NULL : stdout, + opt_apply_undo, FALSE)) goto err; fprintf(stdout, "%s: SUCCESS\n", my_progname); @@ -121,19 +121,32 @@ end: static struct my_option my_long_options[] = { + {"apply", 'a', + "Apply log to tables. Will display a lot of information if not run with --silent", + (uchar **) &opt_apply, (uchar **) &opt_apply, 0, + GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, +#ifndef DBUG_OFF + {"debug", '#', "Output debug log. Often the argument is 'd:t:o,filename'.", + 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, +#endif {"help", '?', "Display this help and exit.", 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, {"only-display", 'o', "display brief info about records's header", (uchar **) &opt_only_display, (uchar **) &opt_only_display, 0, GET_BOOL, NO_ARG,0, 0, 0, 0, 0, 0}, - {"display-and-apply", 'a', - "like --only-display but displays more info and modifies tables", - (uchar **) &opt_display_and_apply, (uchar **) &opt_display_and_apply, 0, + { "page_buffer_size", 'P', "", + (uchar**) &opt_page_buffer_size, (uchar**) &opt_page_buffer_size, 0, + GET_ULONG, REQUIRED_ARG, (long) USE_BUFFER_INIT, + (long) MALLOC_OVERHEAD, (long) ~(ulong) 0, (long) MALLOC_OVERHEAD, + (long) IO_SIZE, 0}, + {"silent", 's', "Print less information during apply/undo phase", + (uchar **) &opt_silent, (uchar **) &opt_silent, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, -#ifndef DBUG_OFF - {"debug", '#', "Output debug log. Often this is 'd:t:o,filename'.", - 0, 0, 0, GET_STR, OPT_ARG, 0, 0, 0, 0, 0, 0}, -#endif + {"undo", 'u', "Apply undos to tables. (disable with --disable-undo)", + (uchar **) &opt_apply_undo, (uchar **) &opt_apply_undo, 0, + GET_BOOL, NO_ARG, 1, 0, 0, 0, 0, 0}, + {"version", 'V', "Print version and exit.", + 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, { 0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0} }; @@ -141,7 +154,7 @@ static struct my_option my_long_options[] = static void print_version(void) { - VOID(printf("%s Ver 1.0 for %s on %s\n", + VOID(printf("%s Ver 1.1 for %s on %s\n", my_progname, SYSTEM_TYPE, MACHINE_TYPE)); NETWARE_SET_SCREEN_MODE(1); } @@ -174,6 +187,9 @@ get_one_option(int optid __attribute__((unused)), case '?': usage(); exit(0); + case 'V': + print_version(); + exit(0); #ifndef DBUG_OFF case '#': DBUG_SET_INITIAL(argument ? argument : default_dbug_option); @@ -192,7 +208,10 @@ static void get_options(int *argc,char ***argv) if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) exit(ho_error); - if ((opt_only_display + opt_display_and_apply) != 1) + if (opt_apply_undo) + opt_apply= 1; + + if ((opt_only_display + opt_apply) != 1) { usage(); exit(1); diff --git a/storage/maria/unittest/ma_control_file-t.c b/storage/maria/unittest/ma_control_file-t.c index a7472361dad..db0a1690ab8 100644 --- a/storage/maria/unittest/ma_control_file-t.c +++ b/storage/maria/unittest/ma_control_file-t.c @@ -407,7 +407,7 @@ static void version() static my_bool get_one_option(int optid, const struct my_option *opt __attribute__((unused)), - char *argument) + char *argument __attribute__((unused))) { switch(optid) { case 'V': diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index 6ea45f80433..fa7267e31d4 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -363,7 +363,7 @@ int main(int argc __attribute__((unused)), char *argv[]) read_ok(&rec); translog_free_record_header(&rec); lsn= first_lsn; - if (translog_init_scanner(first_lsn, 1, &scanner)) + if (translog_init_scanner(first_lsn, 1, &scanner, 0)) { fprintf(stderr, "scanner init failed\n"); goto err; diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index d5f00bdb6fd..559cd17638f 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -378,7 +378,7 @@ int main(int argc __attribute__((unused)), char *argv[]) ok(1, "read record"); translog_free_record_header(&rec); lsn= first_lsn; - if (translog_init_scanner(first_lsn, 1, &scanner)) + if (translog_init_scanner(first_lsn, 1, &scanner, 0)) { fprintf(stderr, "scanner init failed\n"); goto err; diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index 6255c11db89..a3af67c8ac2 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -373,7 +373,7 @@ int main(int argc __attribute__((unused)), bzero(indeces, sizeof(indeces)); - if (translog_init_scanner(first_lsn, 1, &scanner)) + if (translog_init_scanner(first_lsn, 1, &scanner, 0)) { fprintf(stderr, "scanner init failed\n"); goto err; From 7492e2ca8e4005495963f9b5e42b1c0038c86972 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 27 Sep 2007 17:36:03 +0300 Subject: [PATCH 03/12] Use direct memory access for the log scan BUILD/SETUP.sh: compile maria by default --- BUILD/SETUP.sh | 1 + storage/maria/ma_recovery.c | 32 +++++++++++++++++++++----------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/BUILD/SETUP.sh b/BUILD/SETUP.sh index 429c0cd85b4..a07f294be28 100755 --- a/BUILD/SETUP.sh +++ b/BUILD/SETUP.sh @@ -145,6 +145,7 @@ base_configs="--prefix=$prefix --enable-assembler " base_configs="$base_configs --with-extra-charsets=complex " base_configs="$base_configs --enable-thread-safe-client " base_configs="$base_configs --with-big-tables" +base_configs="$base_configs --with-plugin-maria" if test -d "$path/../cmd-line-utils/readline" then diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index bb9852779c8..d3aaa64bbda 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -1496,7 +1496,7 @@ static int run_redo_phase(LSN lsn, my_bool apply) tprint(tracef, "Failed to read header of the first record.\n"); return 1; } - if (translog_init_scanner(lsn, 1, &scanner, 0)) + if (translog_init_scanner(lsn, 1, &scanner, 1)) { tprint(tracef, "Scanner init failed\n"); return 1; @@ -1529,24 +1529,25 @@ static int run_redo_phase(LSN lsn, my_bool apply) } else { + struct st_translog_scanner_data scanner2; + TRANSLOG_HEADER_BUFFER rec2; /* There is a complete group for this transaction, containing more than this event. */ tprint(tracef, " ends a group:\n"); - struct st_translog_scanner_data scanner2; - TRANSLOG_HEADER_BUFFER rec2; len= - translog_read_record_header(all_active_trans[sid].group_start_lsn, &rec2); + translog_read_record_header(all_active_trans[sid].group_start_lsn, + &rec2); if (len < 0) /* EOF or error */ { tprint(tracef, "Cannot find record where it should be\n"); - return 1; + goto err; } - if (translog_init_scanner(rec2.lsn, 1, &scanner2, 0)) + if (translog_init_scanner(rec2.lsn, 1, &scanner2, 1)) { tprint(tracef, "Scanner2 init failed\n"); - return 1; + goto err; } current_group_end_lsn= rec.lsn; do @@ -1556,13 +1557,16 @@ static int run_redo_phase(LSN lsn, my_bool apply) const LOG_DESC *log_desc2= &log_record_type_descriptor[rec2.type]; display_record_position(log_desc2, &rec2, 0); if (apply && display_and_apply_record(log_desc2, &rec2)) - return 1; + { + translog_destroy(&scanner2); + goto err; + } } len= translog_read_next_record_header(&scanner2, &rec2); if (len < 0) /* EOF or error */ { tprint(tracef, "Cannot find record where it should be\n"); - return 1; + goto err; } } while (rec2.lsn < rec.lsn); @@ -1571,10 +1575,11 @@ static int run_redo_phase(LSN lsn, my_bool apply) all_active_trans[sid].group_start_lsn= LSN_IMPOSSIBLE; current_group_end_lsn= LSN_IMPOSSIBLE; /* for debugging */ display_record_position(log_desc, &rec, 0); + translog_destroy_scanner(&scanner2); } } if (apply && display_and_apply_record(log_desc, &rec)) - return 1; + goto err; } else /* record does not end group */ { @@ -1595,13 +1600,18 @@ static int run_redo_phase(LSN lsn, my_bool apply) break; case RECHEADER_READ_ERROR: tprint(tracef, "Error reading log\n"); - return 1; + goto err; } break; } } + translog_destroy_scanner(&scanner); translog_free_record_header(&rec); return 0; + +err: + translog_destroy_scanner(&scanner); + return 1; } From 0de4c5562339eb0af9303eb33526a6dde717f5a7 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 27 Sep 2007 17:41:21 +0300 Subject: [PATCH 04/12] Faster translog_filename_by_fileno Fixed compiler warning PAGECACHE_PAGE_LINK -> PAGECACHE_BLOCK_LINK* storage/maria/ma_loghandler.h: PAGECACHE_PAGE_LINK -> PAGECACHE_BLOCK_LINK* storage/maria/ma_pagecache.c: PAGECACHE_PAGE_LINK -> PAGECACHE_BLOCK_LINK* storage/maria/ma_pagecache.h: PAGECACHE_PAGE_LINK -> PAGECACHE_BLOCK_LINK* storage/maria/maria_def.h: PAGECACHE_PAGE_LINK -> PAGECACHE_BLOCK_LINK* --- storage/maria/ma_loghandler.c | 30 +++++++++++++++++------------- storage/maria/ma_loghandler.h | 4 ++-- storage/maria/ma_pagecache.c | 13 ++++++------- storage/maria/ma_pagecache.h | 6 ++---- storage/maria/maria_def.h | 2 +- 5 files changed, 28 insertions(+), 27 deletions(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index e30c8c6627f..e52be8be427 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -640,22 +640,25 @@ static void translog_check_cursor(struct st_buffer_cursor *cursor) translog_filename_by_fileno() file_no Number of the log we want to open path Pointer to buffer where file name will be - stored (must be FN_REFLEN bytes at least + stored (must be FN_REFLEN bytes at least) RETURN pointer to path */ static char *translog_filename_by_fileno(uint32 file_no, char *path) { - char file_name[10 + 8 + 1]; /* See fallowing my_sprintf() call */ - char *res; + char buff[11], *end; + uint length; DBUG_ENTER("translog_filename_by_fileno"); DBUG_ASSERT(file_no <= 0xfffffff); - my_sprintf(file_name, (file_name, "maria_log.%08u", file_no)); - res= fn_format(path, file_name, log_descriptor.directory, "", MYF(MY_WME)); - DBUG_PRINT("info", ("Path: '%s' path: 0x%lx res: 0x%lx", - res, (ulong) path, (ulong) res)); - DBUG_RETURN(res); + + /* log_descriptor.directory is already formated */ + end= strxmov(path, log_descriptor.directory, "maria_log.0000000", NullS); + length= (uint) (int10_to_str(file_no, buff, 10) - buff); + strmov(end-length+1, buff); + + DBUG_PRINT("info", ("Path: '%s' path: 0x%lx", path, (ulong) res)); + DBUG_RETURN(path); } @@ -986,10 +989,11 @@ static void translog_mark_file_finished(uint32 file) { int i; struct st_file_counter *fc_ptr; - DBUG_ENTER("translog_mark_file_finished"); DBUG_PRINT("enter", ("file: %lu", (ulong) file)); + LINT_INIT(fc_ptr); + pthread_mutex_lock(&log_descriptor.unfinished_files_lock); DBUG_ASSERT(log_descriptor.unfinished_files.elements > 0); @@ -2310,7 +2314,7 @@ my_bool translog_unlock() */ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer, - PAGECACHE_PAGE_LINK *direct_link) + PAGECACHE_BLOCK_LINK **direct_link) { TRANSLOG_ADDRESS addr= *(data->addr), in_buffers; uint cache_index; @@ -2490,7 +2494,7 @@ static uchar *translog_get_page(TRANSLOG_VALIDATOR_DATA *data, uchar *buffer, */ -static void translog_free_link(PAGECACHE_PAGE_LINK *direct_link) +static void translog_free_link(PAGECACHE_BLOCK_LINK *direct_link) { DBUG_ENTER("translog_free_link"); DBUG_PRINT("info", ("Direct link: 0x%lx", @@ -5692,7 +5696,7 @@ int translog_read_record_header(LSN lsn, TRANSLOG_HEADER_BUFFER *buff) { uchar buffer[TRANSLOG_PAGE_SIZE], *page; translog_size_t res, page_offset= LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE; - PAGECACHE_PAGE_LINK direct_link; + PAGECACHE_BLOCK_LINK *direct_link; TRANSLOG_ADDRESS addr; TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_read_record_header"); @@ -5963,7 +5967,7 @@ static void translog_destroy_reader_data(struct st_translog_reader_data *data) SYNOPSIS translog_read_record_header() lsn log record serial number (address of the record) - offset From the beginning of the record beginning (read§ + offset From the beginning of the record beginning (read by translog_read_record_header). length Length of record part which have to be read. buffer Buffer where to read the record part (have to be at diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index c53132916ec..b13984d88ef 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -182,8 +182,8 @@ typedef struct st_translog_scanner_data TRANSLOG_ADDRESS horizon; TRANSLOG_ADDRESS last_file_page; /* Last page on in this file */ uchar *page; /* page content pointer */ - /* direct link on the current page or NULL if it is not supported/requested */ - PAGECACHE_PAGE_LINK direct_link; + /* direct link on the current page or NULL if not supported/requested */ + PAGECACHE_BLOCK_LINK *direct_link; /* offset of the chunk in the page */ translog_size_t page_offset; /* set horizon only once at init */ diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 27c9ceaa0f1..4b0ccd2c9c8 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -2755,10 +2755,9 @@ void pagecache_unlock_by_link(PAGECACHE *pagecache, */ void pagecache_unpin_by_link(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK *block, LSN lsn) { - PAGECACHE_BLOCK_LINK *block= (PAGECACHE_BLOCK_LINK *)link; DBUG_ENTER("pagecache_unpin_by_link"); DBUG_PRINT("enter", ("block: 0x%lx fd: %u page: %lu", (ulong) block, @@ -3212,13 +3211,13 @@ my_bool pagecache_write_part(PAGECACHE *pagecache, enum pagecache_page_lock lock, enum pagecache_page_pin pin, enum pagecache_write_mode write_mode, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK **link, uint offset, uint size, pagecache_disk_read_validator validator, uchar* validator_data) { PAGECACHE_BLOCK_LINK *block= NULL; - PAGECACHE_PAGE_LINK fake_link; + PAGECACHE_BLOCK_LINK *fake_link; int error= 0; int need_lock_change= write_lock_change_table[lock].need_lock_change; DBUG_ENTER("pagecache_write_part"); @@ -3234,10 +3233,10 @@ my_bool pagecache_write_part(PAGECACHE *pagecache, DBUG_ASSERT(lock != PAGECACHE_LOCK_LEFT_READLOCKED); DBUG_ASSERT(lock != PAGECACHE_LOCK_READ_UNLOCK); DBUG_ASSERT(offset + size <= pagecache->block_size); + if (!link) link= &fake_link; - else - *link= 0; + *link= 0; restart: @@ -3362,7 +3361,7 @@ restart: if (pin == PAGECACHE_PIN_LEFT_UNPINNED || pin == PAGECACHE_UNPIN) unreg_request(pagecache, block, 1); else - *link= (PAGECACHE_PAGE_LINK)block; + *link= block; if (block->status & PCBLOCK_ERROR) error= 1; diff --git a/storage/maria/ma_pagecache.h b/storage/maria/ma_pagecache.h index 7318cc08040..23a443a5b97 100644 --- a/storage/maria/ma_pagecache.h +++ b/storage/maria/ma_pagecache.h @@ -91,8 +91,6 @@ typedef struct st_pagecache_page PAGECACHE_PAGE; struct st_pagecache_hash_link; typedef struct st_pagecache_hash_link PAGECACHE_HASH_LINK; -typedef PAGECACHE_BLOCK_LINK * PAGECACHE_PAGE_LINK; /* To be removed */ - #include typedef my_bool (*pagecache_disk_read_validator)(uchar *page, uchar *data); @@ -205,7 +203,7 @@ extern my_bool pagecache_write_part(PAGECACHE *pagecache, enum pagecache_page_lock lock, enum pagecache_page_pin pin, enum pagecache_write_mode write_mode, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK **link, uint offset, uint size, pagecache_disk_read_validator validator, @@ -228,7 +226,7 @@ extern void pagecache_unpin(PAGECACHE *pagecache, pgcache_page_no_t pageno, LSN lsn); extern void pagecache_unpin_by_link(PAGECACHE *pagecache, - PAGECACHE_PAGE_LINK *link, + PAGECACHE_BLOCK_LINK *link, LSN lsn); extern int flush_pagecache_blocks(PAGECACHE *keycache, PAGECACHE_FILE *file, diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index adcb35a5e0e..bac83db4f51 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -632,7 +632,7 @@ typedef struct st_maria_s_param /* Used to store reference to pinned page */ typedef struct st_pinned_page { - PAGECACHE_PAGE_LINK link; + PAGECACHE_BLOCK_LINK *link; enum pagecache_page_lock unlock; } MARIA_PINNED_PAGE; From d097806323618aef56bcfb1968b036be2aa8491a Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 27 Sep 2007 21:45:45 +0300 Subject: [PATCH 05/12] fix typo --- storage/maria/ma_loghandler.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index e52be8be427..3ac25879619 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -657,7 +657,7 @@ static char *translog_filename_by_fileno(uint32 file_no, char *path) length= (uint) (int10_to_str(file_no, buff, 10) - buff); strmov(end-length+1, buff); - DBUG_PRINT("info", ("Path: '%s' path: 0x%lx", path, (ulong) res)); + DBUG_PRINT("info", ("Path: '%s' path: 0x%lx", path, (ulong) path)); DBUG_RETURN(path); } From e71e5893ee1b8cf0328234667123d42727d9bf01 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 1 Oct 2007 09:59:05 +0300 Subject: [PATCH 06/12] Incorrect function call fixed. --- storage/maria/ma_recovery.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index d3aaa64bbda..72247728fab 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -1558,7 +1558,7 @@ static int run_redo_phase(LSN lsn, my_bool apply) display_record_position(log_desc2, &rec2, 0); if (apply && display_and_apply_record(log_desc2, &rec2)) { - translog_destroy(&scanner2); + translog_destroy_scanner(&scanner2); goto err; } } From 3c3e3648db433e11a783ea561792f523151d8b37 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 1 Oct 2007 18:48:20 +0300 Subject: [PATCH 07/12] fater way to detect last page address for the last log file. gprof build for amd64. storage/maria/ma_loghandler.c: fater way to detect last page address for the last log file. BUILD/compile-amd64-gprof-no-ndb: New BitKeeper file ``BUILD/compile-amd64-gprof-no-ndb'' --- BUILD/compile-amd64-gprof-no-ndb | 7 +++++++ storage/maria/ma_loghandler.c | 8 ++++++++ 2 files changed, 15 insertions(+) create mode 100755 BUILD/compile-amd64-gprof-no-ndb diff --git a/BUILD/compile-amd64-gprof-no-ndb b/BUILD/compile-amd64-gprof-no-ndb new file mode 100755 index 00000000000..9fd4c67155c --- /dev/null +++ b/BUILD/compile-amd64-gprof-no-ndb @@ -0,0 +1,7 @@ +#! /bin/sh +path=`dirname $0` +. "$path/SETUP.sh" +extra_flags="$amd64_cflags -pg -g" +extra_configs="$amd64_configs $max_no_ndb_configs --disable-shared $static_link" + +. "$path/FINISH.sh" diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 3ac25879619..0b064c5cfe2 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -5185,6 +5185,14 @@ static my_bool translog_scanner_set_last_page(TRANSLOG_SCANNER_DATA *scanner) { my_bool page_ok; + if (LSN_FILE_NO(scanner->page_addr) == LSN_FILE_NO(scanner->horizon)) + { + /* It is last file => we can easy find last page address by horizon */ + uint pagegrest= LSN_OFFSET(scanner->horizon) % TRANSLOG_PAGE_SIZE; + scanner->last_file_page= (scanner->horizon - + (pagegrest ? pagegrest : TRANSLOG_PAGE_SIZE)); + return (0); + } scanner->last_file_page= scanner->page_addr; return (translog_get_last_page_addr(&scanner->last_file_page, &page_ok)); } From ca12435fe11e558e548b5178ff69339336b7acc3 Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 1 Oct 2007 19:39:16 +0200 Subject: [PATCH 08/12] Fix for "innodb_mysql" and "events" failures: we can disable transactionality in CREATE SELECT only if the table is not temporary (because re-enabling causes a commit). In the future we should disable again for temporary tables; that will probably require changing ha_enable_transaction(). sql/sql_insert.cc: When we disable transactionality in CREATE SELECT, we re-enable it at the end and this causes a commit (inside ha_enable_transaction()); but this is undesired if the created table is temporary (we don't want CREATE TEMPORARY TABLE SELECT to commit all previous statements). So we disable logging only if the table is not temporary. Ideally in the future we would want to lift this restriction which sounds stupid, but for Maria it does not matter now (temporary tables are not transactional yet). --- sql/sql_insert.cc | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sql/sql_insert.cc b/sql/sql_insert.cc index d7267e6ecf6..ecaab5638cc 100644 --- a/sql/sql_insert.cc +++ b/sql/sql_insert.cc @@ -3460,10 +3460,12 @@ select_create::prepare(List &values, SELECT_LEX_UNIT *u) /* If error during the CREATE SELECT we drop the table, so no need for - engines to do logging of insertions (optimization). + engines to do logging of insertions (optimization). We don't do it for + temporary tables (yet) as re-enabling causes an undesirable commit. */ - if (ha_enable_transaction(thd, FALSE)) - DBUG_RETURN(-1); + if (((thd->lex->create_info.options & HA_LEX_CREATE_TMP_TABLE) == 0) && + ha_enable_transaction(thd, FALSE)) + DBUG_RETURN(-1); if (!(table= create_table_from_items(thd, create_info, create_table, alter_info, &values, @@ -3605,11 +3607,12 @@ bool select_create::send_eof() nevertheless. */ if (!table->s->tmp_table) + { + ha_enable_transaction(thd, TRUE); ha_commit(thd); // Can fail, but we proceed anyway - + } table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE); - ha_enable_transaction(thd, TRUE); if (thd->extra_lock) { mysql_unlock_tables(thd, thd->extra_lock); @@ -3632,6 +3635,9 @@ void select_create::abort() select_insert::abort(); reenable_binlog(thd); + if (table && !table->s->tmp_table) + ha_enable_transaction(thd, TRUE); + /* We roll back the statement, including truncating the transaction cache of the binary log, if the statement failed. @@ -3648,8 +3654,6 @@ void select_create::abort() if (thd->current_stmt_binlog_row_based) ha_rollback_stmt(thd); - ha_enable_transaction(thd, TRUE); - if (thd->extra_lock) { mysql_unlock_tables(thd, thd->extra_lock); From 48ed54c1403a1b1d5aacb68978aa59d60b4015ba Mon Sep 17 00:00:00 2001 From: unknown Date: Mon, 1 Oct 2007 23:51:57 +0300 Subject: [PATCH 09/12] Fixed problem with scanner destruction (unpinned page in the cache). storage/maria/ma_loghandler.c: Debug output adder to control scanner freeing. Fixed problem with scanner destruction (unpinned page in the cache). --- storage/maria/ma_loghandler.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 0b064c5cfe2..3a94be3a16a 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -5249,7 +5249,8 @@ my_bool translog_init_scanner(LSN lsn, { TRANSLOG_VALIDATOR_DATA data; DBUG_ENTER("translog_init_scanner"); - DBUG_PRINT("enter", ("LSN: (0x%lu,0x%lx)", LSN_IN_PARTS(lsn))); + DBUG_PRINT("enter", ("Scanner: 0x%lx LSN: (0x%lu,0x%lx)", + (ulong) scanner, LSN_IN_PARTS(lsn))); DBUG_ASSERT(LSN_OFFSET(lsn) % TRANSLOG_PAGE_SIZE != 0); DBUG_ASSERT(translog_inited == 1); @@ -5289,7 +5290,10 @@ my_bool translog_init_scanner(LSN lsn, void translog_destroy_scanner(TRANSLOG_SCANNER_DATA *scanner) { + DBUG_ENTER("translog_destroy_scanner"); + DBUG_PRINT("enter", ("Scanner: 0x%lx", (ulong)scanner)); translog_free_link(scanner->direct_link); + DBUG_VOID_RETURN; } @@ -5587,7 +5591,10 @@ translog_variable_length_header(uchar *page, translog_size_t page_offset, DBUG_PRINT("info", ("use internal scanner")); scanner= &internal_scanner; } - + else + { + translog_destroy_scanner(scanner); + } base_lsn= buff->groups[0].addr; translog_init_scanner(base_lsn, 1, scanner, scanner == &internal_scanner); /* first group chunk is always chunk type 2 */ From d0b9387b883dc30b6c89f4e769c74338e22cfc58 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 2 Oct 2007 18:02:09 +0200 Subject: [PATCH 10/12] WL#3072 - Maria recovery. * Recovery of the table's live checksum (CREATE TABLE ... CHECKSUM=1) is achieved in this patch. The table's live checksum (info->s->state.state.checksum) is updated in inwrite_rec_hook's under the log mutex when writing UNDO_ROW_INSERT|UPDATE|DELETE and REDO_DELETE_ALL. The checksum variation caused by the operation is stored in these UNDOs, so that the REDO phase, when it sees such UNDOs, can update the live checksum if it is older (state.is_of_lsn is lower) than the record. It is also used, as a nice add-on with no cost, to do less row checksum computation during the UNDO phase (as we have it in the record already). Doing this work, it became pressing to move in-write hooks (write_hook_for_redo() et al) to ma_blockrec.c. The 'parts' argument of inwrite_rec_hook is unpredictable (it comes mangled at this stage, for example by LSN compression) so it is replaced by a 'void* hook_arg', which is used to pass down information, currently only to write_hook_for_clr_end() (previous undo_lsn and type of undone record). * If from ha_maria, we print to stderr how many seconds (with one fractional digit) the REDO phase took, same for UNDO phase and for final table close. Just to give an indication for debugging and maybe also for Support. storage/maria/ha_maria.cc: question for Monty storage/maria/ma_blockrec.c: * log in-write hooks (write_hook_for_redo() etc) move from ma_loghandler.c to here; this is natural: the hooks are coupled to their callers (functions in ma_blockrec.c). * translog_write_record() now has a new argument "hook_arg"; using it to pass down to write_hook_for_clr_end() the transaction's previous_undo_lsn and the type of the being undone record, and also to pass down to all UNDOs the live checksum variation caused by the operation. * If table has live checksum, store in UNDO_ROW_INSERT|UPDATE|DELETE and in CLR_END the checksum variation ("delta") caused by the operation. For example if a DELETE caused the table's live checksum to change from 123 to 456, we store in the UNDO_ROW_DELETE, in 4 bytes, the value 333 (456-123). * Instead of hard-coded "1" as length of the place where we store the undone record's type in CLR_END, use a symbol CLR_TYPE_STORE_SIZE; use macros clr_type_store and clr_type_korr. * write_block_record() has a new parameter 'old_record_checksum' which is the pre-computed checksum of old_record; that value is used to update the table's live checksum when writing UNDO_ROW_UPDATE|CLR_END. * In allocate_write_block_record(), if we are executing UNDO_ROW_DELETE the row's checksum is already computed. * _ma_update_block_record2() now expect the new row's checksum into cur_row.checksum (was already true) and the old row's checksum into new_row.checksum (that's new). Its two callers, maria_update() and _ma_apply_undo_row_update(), honour this. * When executing an UNDO_ROW_INSERT|UPDATE|DELETE in UNDO phase, pick up the checksum delta from the log record. It is then used to update the table's live checksum when writing CLR_END, and saves us a computation of record. storage/maria/ma_blockrec.h: in-write hooks move from ma_loghandler.c storage/maria/ma_check.c: more straightforward size of buffer storage/maria/ma_checkpoint.c: <= is enough storage/maria/ma_commit.c: new prototype of translog_write_record() storage/maria/ma_create.c: new prototype of translog_write_record() storage/maria/ma_delete.c: The row's checksum must be computed before calling(*delete_record)(), not after, because it must be known inside _ma_delete_block_record() (to update the table's live checksum when writing UNDO_ROW_DELETE). If deleting from a transactional table, live checksum was already updated when writing UNDO_ROW_DELETE. storage/maria/ma_delete_all.c: @todo is now done (in ma_loghandler.c) storage/maria/ma_delete_table.c: new prototype of translog_write_record() storage/maria/ma_loghandler.c: * in-write hooks move to ma_blockrec.c. * translog_write_record() gets a new argument 'hook_arg' which is passed down to pre|inwrite_rec_hook. It is more useful that 'parts' for those hooks, because when those hooks are called, 'parts' has possibly been mangled (like with LSN compression) and is so unpredictable. * fix for compiler warning (unused buffer_start when compiling without debug support) * Because checksum delta is stored into UNDO_ROW_INSERT|UPDATE|DELETE and CLR_END, but only if the table has live checksum, these records are not PSEUDOFIXEDLENGTH anymore, they are now VARIABLE_LENGTH (their length is X if no live checksum and X+4 otherwise). * add an inwrite_rec_hook for UNDO_ROW_UPDATE, which updates the table's live checksum. Update it also in hooks of UNDO_ROW_INSERT| DELETE and REDO_DELETE_ALL and CLR_END. * Bugfix: when reading a record in translog_read_record(), it happened that "length" became negative, because the function assumed that the record extended beyond the page's end, whereas it may be shorter. storage/maria/ma_loghandler.h: * Instead of hard-coded "1" and "4", use symbols and macros to store/retrieve the type of record which the CLR_END corresponds to, and the checksum variation caused by the operation which logs the record * translog_write_record() gets a new argument 'hook_arg' which is passed down to pre|inwrite_rec_hook. It is more useful that 'parts' for those hooks, because when those hooks are called, 'parts' has possibly been mangled (like with LSN compression) and is so unpredictable. storage/maria/ma_open.c: fix for "empty body in if() statement" (when compiling without safemutex) storage/maria/ma_pagecache.c: <= is enough storage/maria/ma_recovery.c: * print the time that each recovery phase (REDO/UNDO/flush) took; this is enabled only when recovering from ha_maria. Is it printed n seconds with a fractional part of one digit (like 123.4 seconds). * In the REDO phase, update the table's live checksum by using the checksum delta stored in UNDO_ROW_INSERT|DELETE|UPDATE and CLR_END. Update it too when seeing REDO_DELETE_ALL. * In the UNDO phase, when executing UNDO_ROW_INSERT, if the table does not have live checksum then reading the record's header (as done by the master loop of run_undo_phase()) is enough; otherwise we do a translog_read_record() to have the checksum delta ready for _ma_apply_undo_row_insert(). * When at the end of the REDO phase we notice that there is an unfinished group of REDOs, don't assert in debug binaries, as I verified that it can happen in real life (with kill -9) * removing ' in #error as it confuses gcc3 storage/maria/ma_rename.c: new prototype of translog_write_record() storage/maria/ma_test_recovery.expected: Change in output of ma_test_recovery: now all live checksums of original tables equal those of tables recreated by the REDO phase and those of tables fixed by the UNDO phase. I.e. recovery of the live checksum looks like working (which was after all the only goal of this changeset). I checked by hand that it's not just all live checksums which are now 0 and that's why they match. They are the old values like 3757530372. maria.test has hard-coded checksum values in its result file so checks this too. storage/maria/ma_update.c: * It's useless to put up HA_STATE_CHANGED in 'key_changed', as we put up HA_STATE_CHANGED in info->update anyway. * We need to compute the old and new rows' checksum before calling (*update_record)(), as checksum delta must be known when logging UNDO_ROW_UPDATE which is done by _ma_update_block_record(). Note that some functions change the 'newrec' record (at least _ma_check_unique() does) so we cannot move the checksum computation too early in the function. storage/maria/ma_write.c: If inserting into a transactional table, live's checksum was already updated when writing UNDO_ROW_INSERT. The multiplication is a trick to save an if(). storage/maria/unittest/ma_test_loghandler-t.c: new prototype of translog_write_record() storage/maria/unittest/ma_test_loghandler_first_lsn-t.c: new prototype of translog_write_record() storage/maria/unittest/ma_test_loghandler_max_lsn-t.c: new prototype of translog_write_record() storage/maria/unittest/ma_test_loghandler_multigroup-t.c: new prototype of translog_write_record() storage/maria/unittest/ma_test_loghandler_multithread-t.c: new prototype of translog_write_record() storage/maria/unittest/ma_test_loghandler_noflush-t.c: new prototype of translog_write_record() storage/maria/unittest/ma_test_loghandler_pagecache-t.c: new prototype of translog_write_record() storage/maria/unittest/ma_test_loghandler_purge-t.c: new prototype of translog_write_record() storage/myisam/sort.c: fix for compiler warnings in pushbuild (write_merge_key* functions didn't have their declaration match MARIA_HA::write_key). --- storage/maria/ha_maria.cc | 3 + storage/maria/ma_blockrec.c | 463 +++++++++++++++--- storage/maria/ma_blockrec.h | 25 + storage/maria/ma_check.c | 10 +- storage/maria/ma_checkpoint.c | 6 +- storage/maria/ma_commit.c | 2 +- storage/maria/ma_create.c | 2 +- storage/maria/ma_delete.c | 41 +- storage/maria/ma_delete_all.c | 9 +- storage/maria/ma_delete_table.c | 2 +- storage/maria/ma_loghandler.c | 424 ++++------------ storage/maria/ma_loghandler.h | 16 +- storage/maria/ma_open.c | 2 +- storage/maria/ma_pagecache.c | 6 +- storage/maria/ma_recovery.c | 145 ++++-- storage/maria/ma_rename.c | 2 +- storage/maria/ma_test_recovery.expected | 176 ------- storage/maria/ma_update.c | 51 +- storage/maria/ma_write.c | 7 +- storage/maria/unittest/ma_test_loghandler-t.c | 21 +- .../unittest/ma_test_loghandler_first_lsn-t.c | 2 +- .../unittest/ma_test_loghandler_max_lsn-t.c | 2 +- .../ma_test_loghandler_multigroup-t.c | 24 +- .../ma_test_loghandler_multithread-t.c | 6 +- .../unittest/ma_test_loghandler_noflush-t.c | 2 +- .../unittest/ma_test_loghandler_pagecache-t.c | 2 +- .../unittest/ma_test_loghandler_purge-t.c | 8 +- storage/myisam/sort.c | 15 +- 28 files changed, 755 insertions(+), 719 deletions(-) diff --git a/storage/maria/ha_maria.cc b/storage/maria/ha_maria.cc index 678b88063db..2397eac068d 100644 --- a/storage/maria/ha_maria.cc +++ b/storage/maria/ha_maria.cc @@ -2185,6 +2185,9 @@ int ha_maria::create(const char *name, register TABLE *table_arg, error; ? Why fool the user? + Shouldn't this test be pushed down to maria_create()? Because currently, + ma_test1 -T crashes: it creates a table with DYNAMIC_RECORD but has + born_transactional==1, which confuses some recovery-related code. */ #endif create_info.transactional= (row_type == BLOCK_RECORD && diff --git a/storage/maria/ma_blockrec.c b/storage/maria/ma_blockrec.c index 3ec18d229c2..0d1a530dbfc 100644 --- a/storage/maria/ma_blockrec.c +++ b/storage/maria/ma_blockrec.c @@ -300,6 +300,32 @@ typedef struct st_maria_extent_cursor } MARIA_EXTENT_CURSOR; +/** + @brief Structure for passing down info to write_hook_for_clr_end(). + This hooks needs to know the variation of the live checksum caused by the + current operation to update state.checksum under log's mutex, + needs to know the transaction's previous undo_lsn to set + trn->undo_lsn under log mutex, and needs to know the type of UNDO being + undone now to modify state.records under log mutex. +*/ +struct st_msg_to_write_hook_for_clr_end +{ + LSN previous_undo_lsn; + enum translog_record_type undone_record_type; + ha_checksum checksum_delta; +}; +/** S:share,D:checksum_delta,E:expression,P:pointer_into_record,L:length */ +#define store_checksum_in_rec(S,D,E,P,L) do \ + { \ + D= 0; \ + if ((S)->calc_checksum != NULL) \ + { \ + D= (E); \ + ha_checksum_store(P, D); \ + L+= HA_CHECKSUM_STORE_SIZE; \ + } \ + } while (0) + static my_bool delete_tails(MARIA_HA *info, MARIA_RECORD_POS *tails); static my_bool delete_head_or_tail(MARIA_HA *info, ulonglong page, uint record_number, @@ -1387,7 +1413,7 @@ static my_bool write_tail(MARIA_HA *info, if (translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_TAIL, info->trn, info, sizeof(log_data) + length, TRANSLOG_INTERNAL_PARTS + 2, log_array, - log_data)) + log_data, NULL)) DBUG_RETURN(1); } @@ -1642,7 +1668,7 @@ static my_bool free_full_pages(MARIA_HA *info, MARIA_ROW *row) if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, info->trn, info, sizeof(log_data) + extents_length, TRANSLOG_INTERNAL_PARTS + 2, log_array, - log_data)) + log_data, NULL)) DBUG_RETURN(1); DBUG_RETURN(_ma_bitmap_free_full_pages(info, row->extents, @@ -1689,7 +1715,7 @@ static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count) if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, info->trn, info, sizeof(log_data), TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) + log_data, NULL)) res= 1; } @@ -1716,6 +1742,9 @@ static my_bool free_full_page_range(MARIA_HA *info, ulonglong page, uint count) @param row_pos Position on head page where to put head part of record @param undo_lsn <> LSN_ERROR if we are executing an UNDO + @param old_record_checksum Checksum of old_record: ignored if table does + not have live checksum; otherwise if + old_record==NULL it must be 0. @note On return all pinned pages are released. @@ -1731,7 +1760,8 @@ static my_bool write_block_record(MARIA_HA *info, MARIA_BITMAP_BLOCKS *bitmap_blocks, my_bool head_block_is_read, struct st_row_pos_info *row_pos, - LSN undo_lsn) + LSN undo_lsn, + ha_checksum old_record_checksum) { uchar *data, *end_of_data, *tmp_data_used, *tmp_data; uchar *row_extents_first_part, *row_extents_second_part; @@ -1785,7 +1815,10 @@ static my_bool write_block_record(MARIA_HA *info, if (share->base.pack_fields) store_key_length_inc(data, row->field_lengths_length); if (share->calc_checksum) + { *(data++)= (uchar) (row->checksum); /* store least significant byte */ + DBUG_ASSERT(!((old_record_checksum != 0) && (old_record == NULL))); + } memcpy(data, record, share->base.null_bytes); data+= share->base.null_bytes; memcpy(data, row->empty_bits, share->base.pack_bytes); @@ -2211,7 +2244,7 @@ static my_bool write_block_record(MARIA_HA *info, if (translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_HEAD, info->trn, info, sizeof(log_data) + data_length, TRANSLOG_INTERNAL_PARTS + 2, log_array, - log_data)) + log_data, NULL)) goto disk_err; } @@ -2328,7 +2361,7 @@ static my_bool write_block_record(MARIA_HA *info, error= translog_write_record(&lsn, LOGREC_REDO_INSERT_ROW_BLOBS, info->trn, info, log_entry_length, (uint) (log_array_pos - log_array), - log_array, log_data); + log_array, log_data, NULL); if (log_array != tmp_log_array) my_free((uchar*) log_array, MYF(0)); if (error) @@ -2343,31 +2376,44 @@ static my_bool write_block_record(MARIA_HA *info, if (undo_lsn != LSN_ERROR) { - uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + 1]; + uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE]; + struct st_msg_to_write_hook_for_clr_end msg; /* undo_lsn must be first for compression to work */ lsn_store(log_data, undo_lsn); /* - Store if this CLR is about an UNDO_INSERT, UNDO_DELETE or UNDO_UPDATE; - in the first/second case, Recovery, when it sees the CLR_END in the - REDO phase, may decrement/increment the records' count. + Store if this CLR is about UNDO_DELETE or UNDO_UPDATE; + in the first case, Recovery, when it sees the CLR_END in the + REDO phase, may decrement the records' count. */ - log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE]= old_record ? - LOGREC_UNDO_ROW_UPDATE : LOGREC_UNDO_ROW_DELETE; log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); - + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= + sizeof(log_data) - HA_CHECKSUM_STORE_SIZE; + msg.undone_record_type= + old_record ? LOGREC_UNDO_ROW_UPDATE : LOGREC_UNDO_ROW_DELETE; + clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, + msg.undone_record_type); + msg.previous_undo_lsn= undo_lsn; + store_checksum_in_rec(share, msg.checksum_delta, + row->checksum - old_record_checksum, + log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE, + log_array[TRANSLOG_INTERNAL_PARTS + 0].length); if (translog_write_record(&lsn, LOGREC_CLR_END, - info->trn, info, sizeof(log_data), + info->trn, info, + log_array[TRANSLOG_INTERNAL_PARTS + 0].length, TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data + LSN_STORE_SIZE)) + log_data + LSN_STORE_SIZE, &msg)) goto disk_err; } else { uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + - PAGE_STORE_SIZE + DIRPOS_STORE_SIZE]; + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE + + HA_CHECKSUM_STORE_SIZE]; + ha_checksum checksum_delta; - /* LOGREC_UNDO_ROW_INSERT & LOGREC_UNDO_ROW_INSERT share same header */ + /* LOGREC_UNDO_ROW_INSERT & LOGREC_UNDO_ROW_UPDATE share same header */ lsn_store(log_data, info->trn->undo_lsn); page_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, head_block->page); @@ -2376,15 +2422,24 @@ static my_bool write_block_record(MARIA_HA *info, row_pos->rownr); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= + sizeof(log_data) - HA_CHECKSUM_STORE_SIZE; + store_checksum_in_rec(share, checksum_delta, + row->checksum - old_record_checksum, + log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + log_array[TRANSLOG_INTERNAL_PARTS + 0].length); + compile_time_assert(sizeof(ha_checksum) == HA_CHECKSUM_STORE_SIZE); if (!old_record) { /* Write UNDO log record for the INSERT */ if (translog_write_record(&lsn, LOGREC_UNDO_ROW_INSERT, - info->trn, info, sizeof(log_data), + info->trn, info, + log_array[TRANSLOG_INTERNAL_PARTS + + 0].length, TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data + LSN_STORE_SIZE)) + log_data + LSN_STORE_SIZE, &checksum_delta)) goto disk_err; } else @@ -2397,10 +2452,11 @@ static my_bool write_block_record(MARIA_HA *info, TRANSLOG_INTERNAL_PARTS + 1, &row_parts_count); if (translog_write_record(&lsn, LOGREC_UNDO_ROW_UPDATE, info->trn, - info, sizeof(log_data) + row_length, + info, log_array[TRANSLOG_INTERNAL_PARTS + + 0].length + row_length, TRANSLOG_INTERNAL_PARTS + 1 + - row_parts_count, - log_array, log_data + LSN_STORE_SIZE)) + row_parts_count, log_array, + log_data + LSN_STORE_SIZE, &checksum_delta)) goto disk_err; } } @@ -2517,10 +2573,18 @@ static my_bool allocate_and_write_block_record(MARIA_HA *info, DBUG_RETURN(1); row->lastpos= ma_recordpos(blocks->block->page, row_pos.rownr); if (info->s->calc_checksum) - row->checksum= (info->s->calc_checksum)(info, record); + { + if (undo_lsn == LSN_ERROR) + row->checksum= (info->s->calc_checksum)(info, record); + else + { + /* _ma_apply_undo_row_delete() already set row's checksum. Verify it. */ + DBUG_ASSERT(row->checksum == (info->s->calc_checksum)(info, record)); + } + } if (write_block_record(info, (uchar*) 0, record, row, blocks, blocks->block->org_bitmap_value != 0, - &row_pos, undo_lsn)) + &row_pos, undo_lsn, 0)) DBUG_RETURN(1); /* Error reading bitmap */ DBUG_PRINT("exit", ("Rowid: %lu (%lu:%u)", (ulong) row->lastpos, (ulong) ma_recordpos_to_page(row->lastpos), @@ -2592,6 +2656,7 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) MARIA_BITMAP_BLOCKS *blocks= &info->cur_row.insert_blocks; MARIA_BITMAP_BLOCK *block, *end; LSN lsn= LSN_IMPOSSIBLE; + MARIA_SHARE *share= info->s; DBUG_ENTER("_ma_write_abort_block_record"); if (delete_head_or_tail(info, @@ -2619,13 +2684,14 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) } } - if (info->s->now_transactional) + if (share->now_transactional) { - LSN previous_undo_lsn; TRANSLOG_HEADER_BUFFER rec; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + 1]; + uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE]; int len; + struct st_msg_to_write_hook_for_clr_end msg; /* We do need the code above (delete_head_or_tail() etc) for non-transactional tables. @@ -2644,15 +2710,24 @@ my_bool _ma_write_abort_block_record(MARIA_HA *info) goto end; } DBUG_ASSERT(rec.type == LOGREC_UNDO_ROW_INSERT); - previous_undo_lsn= lsn_korr(rec.header); - lsn_store(log_data, previous_undo_lsn); - log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE]= LOGREC_UNDO_ROW_INSERT; + memcpy(log_data, rec.header, LSN_STORE_SIZE); /* previous UNDO LSN */ + msg.previous_undo_lsn= lsn_korr(rec.header); + msg.undone_record_type= LOGREC_UNDO_ROW_INSERT; + clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, + LOGREC_UNDO_ROW_INSERT); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= + sizeof(log_data) - HA_CHECKSUM_STORE_SIZE; + store_checksum_in_rec(share, msg.checksum_delta, + - info->cur_row.checksum, + log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE, + log_array[TRANSLOG_INTERNAL_PARTS + 0].length); if (translog_write_record(&lsn, LOGREC_CLR_END, - info->trn, info, sizeof(log_data), + info->trn, info, + log_array[TRANSLOG_INTERNAL_PARTS + 0].length, TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data + LSN_STORE_SIZE)) + log_data + LSN_STORE_SIZE, &msg)) res= 1; } end: @@ -2685,6 +2760,8 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, uchar *dir; ulonglong page; struct st_row_pos_info row_pos; + my_bool res; + ha_checksum old_checksum; MARIA_SHARE *share= info->s; DBUG_ENTER("_ma_update_block_record2"); DBUG_PRINT("enter", ("rowid: %lu", (long) record_pos)); @@ -2694,7 +2771,11 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, DBUG_DUMP("newrec", record, share->base.reclength); #endif - /* checksum was computed by maria_update() already and put into cur_row */ + /* + Checksums of new and old rows were computed by callers already; new + row's was put into cur_row, old row's was put into new_row. + */ + old_checksum= new_row->checksum; new_row->checksum= cur_row->checksum; calc_record_size(info, record, new_row); page= ma_recordpos_to_page(record_pos); @@ -2747,8 +2828,9 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, if (cur_row->extents_count && free_full_pages(info, cur_row)) goto err; - DBUG_RETURN(write_block_record(info, oldrec, record, new_row, blocks, - 1, &row_pos, undo_lsn)); + res= write_block_record(info, oldrec, record, new_row, blocks, + 1, &row_pos, undo_lsn, old_checksum); + DBUG_RETURN(res); } /* Allocate all size in block for record @@ -2781,8 +2863,9 @@ static my_bool _ma_update_block_record2(MARIA_HA *info, row_pos.dir= dir; row_pos.data= buff + uint2korr(dir); row_pos.length= head_length; - DBUG_RETURN(write_block_record(info, oldrec, record, new_row, blocks, 1, - &row_pos, undo_lsn)); + res= write_block_record(info, oldrec, record, new_row, blocks, 1, + &row_pos, undo_lsn, old_checksum); + DBUG_RETURN(res); err: _ma_unpin_all_pages_and_finalize_row(info, 0); @@ -2949,7 +3032,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info, LOGREC_REDO_PURGE_ROW_TAIL), info->trn, info, sizeof(log_data), TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) + log_data, NULL)) DBUG_RETURN(1); } if (pagecache_write(share->pagecache, @@ -2976,7 +3059,7 @@ static my_bool delete_head_or_tail(MARIA_HA *info, if (translog_write_record(&lsn, LOGREC_REDO_PURGE_BLOCKS, info->trn, info, sizeof(log_data), TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data)) + log_data, NULL)) DBUG_RETURN(1); } /* Write the empty page (needed only for REPAIR to work) */ @@ -3044,6 +3127,7 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) { ulonglong page; uint record_number; + MARIA_SHARE *share= info->s; DBUG_ENTER("_ma_delete_block_record"); page= ma_recordpos_to_page(info->cur_row.lastpos); @@ -3058,13 +3142,14 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) if (info->cur_row.extents && free_full_pages(info, &info->cur_row)) goto err; - if (info->s->now_transactional) + if (share->now_transactional) { LSN lsn; uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + - DIR_COUNT_SIZE]; + DIRPOS_STORE_SIZE + HA_CHECKSUM_STORE_SIZE]; size_t row_length; uint row_parts_count; + ha_checksum checksum_delta; /* Write UNDO record */ lsn_store(log_data, info->trn->undo_lsn); @@ -3073,15 +3158,26 @@ my_bool _ma_delete_block_record(MARIA_HA *info, const uchar *record) PAGE_STORE_SIZE, record_number); info->log_row_parts[TRANSLOG_INTERNAL_PARTS].str= (char*) log_data; - info->log_row_parts[TRANSLOG_INTERNAL_PARTS].length= sizeof(log_data); + info->log_row_parts[TRANSLOG_INTERNAL_PARTS].length= + sizeof(log_data) - HA_CHECKSUM_STORE_SIZE; + store_checksum_in_rec(share, checksum_delta, + - info->cur_row.checksum, + log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + info->log_row_parts[TRANSLOG_INTERNAL_PARTS + + 0].length); + row_length= fill_insert_undo_parts(info, record, info->log_row_parts + TRANSLOG_INTERNAL_PARTS + 1, &row_parts_count); if (translog_write_record(&lsn, LOGREC_UNDO_ROW_DELETE, info->trn, - info, sizeof(log_data) + row_length, + info, + info->log_row_parts[TRANSLOG_INTERNAL_PARTS + + 0].length + row_length, TRANSLOG_INTERNAL_PARTS + 1 + row_parts_count, - info->log_row_parts, log_data + LSN_STORE_SIZE)) + info->log_row_parts, log_data + LSN_STORE_SIZE, + &checksum_delta)) goto err; } @@ -3377,7 +3473,8 @@ static my_bool read_long_data(MARIA_HA *info, uchar *to, ulong length, cur_row.extents_counts contains number of extents cur_row.empty_bits is set to empty bits cur_row.field_lengths contains packed length of all fields - cur_row.blob_length contains total length of all blobs. + cur_row.blob_length contains total length of all blobs + cur_row.checksum contains checksum of read record. RETURN 0 ok @@ -4576,6 +4673,211 @@ static size_t fill_update_undo_parts(MARIA_HA *info, const uchar *oldrec, DBUG_RETURN(row_length); } +/*************************************************************************** + In-write hooks called under log's lock when log record is written +***************************************************************************/ + +/** + @brief Sets transaction's rec_lsn if needed + + A transaction sometimes writes a REDO even before the page is in the + pagecache (example: brand new head or tail pages; full pages). So, if + Checkpoint happens just after the REDO write, it needs to know that the + REDO phase must start before this REDO. Scanning the pagecache cannot + tell that as the page is not in the cache. So, transaction sets its rec_lsn + to the REDO's LSN or somewhere before, and Checkpoint reads the + transaction's rec_lsn. + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_redo(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, MARIA_HA *tbl_info + __attribute__ ((unused)), + LSN *lsn, void *hook_arg + __attribute__ ((unused))) +{ + /* + Users of dummy_transaction_object must keep this TRN clean as it + is used by many threads (like those manipulating non-transactional + tables). It might be dangerous if one user sets rec_lsn or some other + member and it is picked up by another user (like putting this rec_lsn into + a page of a non-transactional table); it's safer if all members stay 0. So + non-transactional log records (REPAIR, CREATE, RENAME, DROP) should not + call this hook; we trust them but verify ;) + */ + DBUG_ASSERT(trn->trid != 0); + /* + If the hook stays so simple, it would be faster to pass + !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn + to translog_write_record(), like Monty did in his original code, and not + have a hook. For now we keep it like this. + */ + if (trn->rec_lsn == 0) + trn->rec_lsn= *lsn; + return 0; +} + + +/** + @brief Sets transaction's undo_lsn, first_undo_lsn if needed + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_undo(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, MARIA_HA *tbl_info + __attribute__ ((unused)), + LSN *lsn, void *hook_arg + __attribute__ ((unused))) +{ + DBUG_ASSERT(trn->trid != 0); + trn->undo_lsn= *lsn; + if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0)) + trn->first_undo_lsn= + trn->undo_lsn | LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn); + DBUG_ASSERT(tbl_info->state == &tbl_info->s->state.state); + return 0; + /* + when we implement purging, we will specialize this hook: UNDO_PURGE + records will additionally set trn->undo_purge_lsn + */ +} + + +/** + @brief Sets the table's records count and checksum to 0, then calls the + generic REDO hook. + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_redo_delete_all(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, MARIA_HA *tbl_info + __attribute__ ((unused)), + LSN *lsn, void *hook_arg) +{ + MARIA_SHARE *share= tbl_info->s; + DBUG_ASSERT(tbl_info->state == &tbl_info->s->state.state); + share->state.state.records= share->state.state.checksum= 0; + return write_hook_for_redo(type, trn, tbl_info, lsn, hook_arg); +} + + +/** + @brief Upates "records" and "checksum" and calls the generic UNDO hook + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_undo_row_insert(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, MARIA_HA *tbl_info, + LSN *lsn, void *hook_arg) +{ + MARIA_SHARE *share= tbl_info->s; + share->state.state.records++; + share->state.state.checksum+= *(ha_checksum *)hook_arg; + return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg); +} + + +/** + @brief Upates "records" and calls the generic UNDO hook + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_undo_row_delete(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, MARIA_HA *tbl_info, + LSN *lsn, void *hook_arg) +{ + MARIA_SHARE *share= tbl_info->s; + share->state.state.records--; + share->state.state.checksum+= *(ha_checksum *)hook_arg; + return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg); +} + + +/** + @brief Upates "records" and "checksum" and calls the generic UNDO hook + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_undo_row_update(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, MARIA_HA *tbl_info, + LSN *lsn, void *hook_arg) +{ + MARIA_SHARE *share= tbl_info->s; + share->state.state.checksum+= *(ha_checksum *)hook_arg; + return write_hook_for_undo(type, trn, tbl_info, lsn, hook_arg); +} + + +/** + @brief Sets transaction's undo_lsn, first_undo_lsn if needed + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_clr_end(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn, MARIA_HA *tbl_info + __attribute__ ((unused)), + LSN *lsn __attribute__ ((unused)), + void *hook_arg) +{ + MARIA_SHARE *share= tbl_info->s; + struct st_msg_to_write_hook_for_clr_end *msg= + (struct st_msg_to_write_hook_for_clr_end *)hook_arg; + DBUG_ASSERT(trn->trid != 0); + trn->undo_lsn= msg->previous_undo_lsn; + share->state.state.checksum+= msg->checksum_delta; + + switch (msg->undone_record_type) { + case LOGREC_UNDO_ROW_DELETE: + share->state.state.records++; + break; + case LOGREC_UNDO_ROW_INSERT: + share->state.state.records--; + break; + case LOGREC_UNDO_ROW_UPDATE: + break; + default: + DBUG_ASSERT(0); + } + if (trn->undo_lsn == LSN_IMPOSSIBLE) /* has fully rolled back */ + trn->first_undo_lsn= LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn); + return 0; +} + + +/** + @brief Updates table's lsn_of_file_id. + + @return Operation status, always 0 (success) +*/ + +my_bool write_hook_for_file_id(enum translog_record_type type + __attribute__ ((unused)), + TRN *trn + __attribute__ ((unused)), + MARIA_HA *tbl_info, + LSN *lsn __attribute__ ((unused)), + void *hook_arg + __attribute__ ((unused))) +{ + DBUG_ASSERT(cmp_translog_addr(tbl_info->s->lsn_of_file_id, *lsn) < 0); + tbl_info->s->lsn_of_file_id= *lsn; + return 0; +} + /*************************************************************************** Applying of REDO log records ***************************************************************************/ @@ -4944,19 +5246,25 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, ulonglong page; uint rownr; LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + 1], *buff; + uchar log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE + HA_CHECKSUM_STORE_SIZE], + *buff; my_bool res= 1; MARIA_PINNED_PAGE page_link; LSN lsn; + MARIA_SHARE *share= info->s; + struct st_msg_to_write_hook_for_clr_end msg; DBUG_ENTER("_ma_apply_undo_row_insert"); page= page_korr(header); - rownr= dirpos_korr(header + PAGE_STORE_SIZE); + header+= PAGE_STORE_SIZE; + rownr= dirpos_korr(header); + header+= DIRPOS_STORE_SIZE; DBUG_PRINT("enter", ("Page: %lu rownr: %u", (ulong) page, rownr)); - if (!(buff= pagecache_read(info->s->pagecache, + if (!(buff= pagecache_read(share->pagecache, &info->dfile, page, 0, - info->buff, info->s->page_type, + info->buff, share->page_type, PAGECACHE_LOCK_WRITE, &page_link.link))) DBUG_RETURN(1); @@ -4977,14 +5285,24 @@ my_bool _ma_apply_undo_row_insert(MARIA_HA *info, LSN undo_lsn, /* undo_lsn must be first for compression to work */ lsn_store(log_data, undo_lsn); - log_data[LSN_STORE_SIZE + FILEID_STORE_SIZE]= LOGREC_UNDO_ROW_INSERT; + clr_type_store(log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE, + LOGREC_UNDO_ROW_INSERT); + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= + sizeof(log_data) - HA_CHECKSUM_STORE_SIZE; + msg.undone_record_type= LOGREC_UNDO_ROW_INSERT; + msg.previous_undo_lsn= undo_lsn; + store_checksum_in_rec(share, msg.checksum_delta, + - ha_checksum_korr(header), + log_data + LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE, + log_array[TRANSLOG_INTERNAL_PARTS + 0].length); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); if (translog_write_record(&lsn, LOGREC_CLR_END, - info->trn, info, sizeof(log_data), + info->trn, info, log_array[TRANSLOG_INTERNAL_PARTS + + 0].length, TRANSLOG_INTERNAL_PARTS + 1, log_array, - log_data + LSN_STORE_SIZE)) + log_data + LSN_STORE_SIZE, &msg)) goto err; res= 0; @@ -5014,6 +5332,16 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn, some buffers to point directly to 'header' */ memcpy(&row, &info->cur_row, sizeof(row)); + if (share->calc_checksum) + { + /* + We extract the checksum delta here, saving a recomputation in + allocate_and_write_block_record(). It's only an optimization. + */ + row.checksum= - ha_checksum_korr(header); + header+= HA_CHECKSUM_STORE_SIZE; + } + null_field_lengths= row.null_field_lengths; blob_lengths= row.blob_lengths; @@ -5184,18 +5512,25 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, uchar *current_record, *orig_record; int error= 1; MARIA_RECORD_POS record_pos; + ha_checksum checksum_delta; DBUG_ENTER("_ma_apply_undo_row_update"); page= page_korr(header); - rownr= dirpos_korr(header + PAGE_STORE_SIZE); + header+= PAGE_STORE_SIZE; + rownr= dirpos_korr(header); + header+= DIRPOS_STORE_SIZE; record_pos= ma_recordpos(page, rownr); DBUG_PRINT("enter", ("Page: %lu rownr: %u", (ulong) page, rownr)); + if (share->calc_checksum) + { + checksum_delta= ha_checksum_korr(header); + header+= HA_CHECKSUM_STORE_SIZE; + } /* Set header to point to old field values, generated by fill_update_undo_parts() */ - header+= PAGE_STORE_SIZE + DIRPOS_STORE_SIZE; field_length_header= ma_get_length((uchar**) &header); field_length_data= header; header+= field_length_header; @@ -5290,14 +5625,14 @@ my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, if (share->calc_checksum) { - info->cur_row.checksum= (*share->calc_checksum)(info, orig_record); - info->state->checksum+= (info->cur_row.checksum - - (*share->calc_checksum)(info, current_record)); + info->new_row.checksum= checksum_delta + + (info->cur_row.checksum= (*share->calc_checksum)(info, orig_record)); + /* verify that record's content is sane */ + DBUG_ASSERT(info->new_row.checksum == + (*share->calc_checksum)(info, current_record)); } - /* - Now records are up to date, execute the update to original values - */ + /* Now records are up to date, execute the update to original values */ if (_ma_update_block_record2(info, record_pos, current_record, orig_record, undo_lsn)) goto err; diff --git a/storage/maria/ma_blockrec.h b/storage/maria/ma_blockrec.h index 30dffe1c0c0..0bce4015daf 100644 --- a/storage/maria/ma_blockrec.h +++ b/storage/maria/ma_blockrec.h @@ -193,3 +193,28 @@ my_bool _ma_apply_undo_row_delete(MARIA_HA *info, LSN undo_lsn, const uchar *header, size_t length); my_bool _ma_apply_undo_row_update(MARIA_HA *info, LSN undo_lsn, const uchar *header, size_t length); + +my_bool write_hook_for_redo(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, LSN *lsn, + void *hook_arg); +my_bool write_hook_for_undo(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, LSN *lsn, + void *hook_arg); +my_bool write_hook_for_redo_delete_all(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, + LSN *lsn, void *hook_arg); +my_bool write_hook_for_undo_row_insert(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, + LSN *lsn, void *hook_arg); +my_bool write_hook_for_undo_row_delete(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, + LSN *lsn, void *hook_arg); +my_bool write_hook_for_undo_row_update(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, + LSN *lsn, void *hook_arg); +my_bool write_hook_for_clr_end(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, LSN *lsn, + void *hook_arg); +my_bool write_hook_for_file_id(enum translog_record_type type, + TRN *trn, MARIA_HA *tbl_info, LSN *lsn, + void *hook_arg); diff --git a/storage/maria/ma_check.c b/storage/maria/ma_check.c index fa1c812daf7..8ef8d6a7e76 100644 --- a/storage/maria/ma_check.c +++ b/storage/maria/ma_check.c @@ -5602,11 +5602,10 @@ static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info) record). */ LEX_STRING log_array[TRANSLOG_INTERNAL_PARTS + 1]; - uchar log_data[LSN_STORE_SIZE]; + uchar log_data[FILEID_STORE_SIZE + 4]; LSN lsn; - compile_time_assert(LSN_STORE_SIZE >= (FILEID_STORE_SIZE + 4)); log_array[TRANSLOG_INTERNAL_PARTS + 0].str= (char*) log_data; - log_array[TRANSLOG_INTERNAL_PARTS + 0].length= FILEID_STORE_SIZE + 4; + log_array[TRANSLOG_INTERNAL_PARTS + 0].length= sizeof(log_data); /* testflag gives an idea of what REPAIR did (in particular T_QUICK or not: did it touch the data file or not?). @@ -5614,10 +5613,9 @@ static int write_log_record_for_repair(const HA_CHECK *param, MARIA_HA *info) int4store(log_data + FILEID_STORE_SIZE, param->testflag); if (unlikely(translog_write_record(&lsn, LOGREC_REDO_REPAIR_TABLE, &dummy_transaction_object, info, - log_array[TRANSLOG_INTERNAL_PARTS + - 0].length, + sizeof(log_data), sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data) || + log_array, log_data, NULL) || translog_flush(lsn))) return 1; /* diff --git a/storage/maria/ma_checkpoint.c b/storage/maria/ma_checkpoint.c index 4446285fce9..76eacaede0d 100644 --- a/storage/maria/ma_checkpoint.c +++ b/storage/maria/ma_checkpoint.c @@ -266,7 +266,7 @@ static int really_execute_checkpoint(void) &dummy_transaction_object, NULL, total_rec_length, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL) || + log_array, NULL, NULL) || translog_flush(lsn))) goto err; @@ -652,7 +652,7 @@ pthread_handler_t ma_checkpoint_background(void *arg __attribute__((unused))) break; #if 0 /* good for testing, to do a lot of checkpoints, finds a lot of bugs */ pthread_mutex_unlock(&LOCK_checkpoint); - my_sleep(100000); // a tenth of a second + my_sleep(100000); /* a tenth of a second */ pthread_mutex_lock(&LOCK_checkpoint); #else /* To have a killable sleep, we use timedwait like our SQL GET_LOCK() */ @@ -893,7 +893,7 @@ static int collect_tables(LEX_STRING *str, LSN checkpoint_start_log_horizon) filter_param.pages_covered_by_bitmap= share->bitmap.pages_covered; /* OS file descriptors are ints which we stored in 4 bytes */ - compile_time_assert(sizeof(int) == 4); + compile_time_assert(sizeof(int) <= 4); pthread_mutex_lock(&share->intern_lock); /* Tables in a normal state have their two file descriptors open. diff --git a/storage/maria/ma_commit.c b/storage/maria/ma_commit.c index 36ea2f6e6e4..8a0a4c136bb 100644 --- a/storage/maria/ma_commit.c +++ b/storage/maria/ma_commit.c @@ -64,7 +64,7 @@ int ma_commit(TRN *trn) res= (translog_write_record(&commit_lsn, LOGREC_COMMIT, trn, NULL, 0, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL) || + log_array, NULL, NULL) || translog_flush(commit_lsn) || trnman_commit_trn(trn)); /* diff --git a/storage/maria/ma_create.c b/storage/maria/ma_create.c index ba1d9a13b42..f3cee8b26e2 100644 --- a/storage/maria/ma_create.c +++ b/storage/maria/ma_create.c @@ -997,7 +997,7 @@ int maria_create(const char *name, enum data_file_type datafile_type, &dummy_transaction_object, NULL, total_rec_length, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL) || + log_array, NULL, NULL) || translog_flush(lsn))) goto err; /* diff --git a/storage/maria/ma_delete.c b/storage/maria/ma_delete.c index 56da6fd3ed3..c229e6c55d0 100644 --- a/storage/maria/ma_delete.c +++ b/storage/maria/ma_delete.c @@ -44,10 +44,10 @@ int maria_delete(MARIA_HA *info,const uchar *record) /* Test if record is in datafile */ DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", - maria_print_error(info->s, HA_ERR_CRASHED); + maria_print_error(share, HA_ERR_CRASHED); DBUG_RETURN(my_errno= HA_ERR_CRASHED);); DBUG_EXECUTE_IF("my_error_test_undefined_error", - maria_print_error(info->s, INT_MAX); + maria_print_error(share, INT_MAX); DBUG_RETURN(my_errno= INT_MAX);); if (!(info->update & HA_STATE_AKTIV)) { @@ -70,17 +70,17 @@ int maria_delete(MARIA_HA *info,const uchar *record) old_key= info->lastkey2; for (i=0 ; i < share->base.keys ; i++ ) { - if (maria_is_key_active(info->s->state.key_map, i)) + if (maria_is_key_active(share->state.key_map, i)) { - info->s->keyinfo[i].version++; - if (info->s->keyinfo[i].flag & HA_FULLTEXT ) + share->keyinfo[i].version++; + if (share->keyinfo[i].flag & HA_FULLTEXT ) { if (_ma_ft_del(info,i,(char*) old_key,record,info->cur_row.lastpos)) goto err; } else { - if (info->s->keyinfo[i].ck_delete(info,i,old_key, + if (share->keyinfo[i].ck_delete(info,i,old_key, _ma_make_key(info,i,old_key,record,info->cur_row.lastpos))) goto err; } @@ -89,19 +89,20 @@ int maria_delete(MARIA_HA *info,const uchar *record) } } + if (share->calc_checksum) + { + /* + We can't use the row based checksum as this doesn't have enough + precision. + */ + info->cur_row.checksum= (*share->calc_checksum)(info, record); + } + if ((*share->delete_record)(info, record)) goto err; /* Remove record from database */ - /* - We can't use the row based checksum as this doesn't have enough - precision. - */ - if (info->s->calc_checksum) - { - info->cur_row.checksum= (*info->s->calc_checksum)(info,record); - info->state->checksum-= info->cur_row.checksum; - } - + info->state->checksum+= - !share->now_transactional * + info->cur_row.checksum; info->update= HA_STATE_CHANGED+HA_STATE_DELETED+HA_STATE_ROW_CHANGED; info->state->records-= !share->now_transactional; share->state.changed|= STATE_NOT_OPTIMIZED_ROWS; @@ -111,8 +112,8 @@ int maria_delete(MARIA_HA *info,const uchar *record) allow_break(); /* Allow SIGHUP & SIGINT */ if (info->invalidator != 0) { - DBUG_PRINT("info", ("invalidator... '%s' (delete)", info->s->open_file_name)); - (*info->invalidator)(info->s->open_file_name); + DBUG_PRINT("info", ("invalidator... '%s' (delete)", share->open_file_name)); + (*info->invalidator)(share->open_file_name); info->invalidator=0; } DBUG_RETURN(0); @@ -122,7 +123,7 @@ err: mi_sizestore(lastpos, info->cur_row.lastpos); if (save_errno != HA_ERR_RECORD_CHANGED) { - maria_print_error(info->s, HA_ERR_CRASHED); + maria_print_error(share, HA_ERR_CRASHED); maria_mark_crashed(info); /* mark table crashed */ } VOID(_ma_writeinfo(info,WRITEINFO_UPDATE_KEYFILE)); @@ -131,7 +132,7 @@ err: my_errno=save_errno; if (save_errno == HA_ERR_KEY_NOT_FOUND) { - maria_print_error(info->s, HA_ERR_CRASHED); + maria_print_error(share, HA_ERR_CRASHED); my_errno=HA_ERR_CRASHED; } diff --git a/storage/maria/ma_delete_all.c b/storage/maria/ma_delete_all.c index 8cb4fdb8a3e..408ca31b3c6 100644 --- a/storage/maria/ma_delete_all.c +++ b/storage/maria/ma_delete_all.c @@ -64,14 +64,15 @@ int maria_delete_all_rows(MARIA_HA *info) if (unlikely(translog_write_record(&lsn, LOGREC_REDO_DELETE_ALL, info->trn, info, 0, sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data) || + log_array, log_data, NULL) || translog_flush(lsn))) goto err; } /* For recovery it matters that this is called after writing the log record, - so that resetting state.records actually happens under log's mutex. + so that resetting state.records and state.checksum actually happens under + log's mutex. */ _ma_reset_status(info); @@ -147,10 +148,6 @@ void _ma_reset_status(MARIA_HA *info) info->state->key_file_length= share->base.keystart; info->state->data_file_length= 0; info->state->empty= info->state->key_empty= 0; - /** - @todo RECOVERY BUG - the line below must happen under log's mutex when writing the REDO - */ info->state->checksum= 0; /* Drop the delete key chain. */ diff --git a/storage/maria/ma_delete_table.c b/storage/maria/ma_delete_table.c index 693c68c7e5f..e6cbd961b7a 100644 --- a/storage/maria/ma_delete_table.c +++ b/storage/maria/ma_delete_table.c @@ -92,7 +92,7 @@ int maria_delete_table(const char *name) log_array[TRANSLOG_INTERNAL_PARTS + 0].length, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL) || + log_array, NULL, NULL) || translog_flush(lsn))) DBUG_RETURN(1); } diff --git a/storage/maria/ma_loghandler.c b/storage/maria/ma_loghandler.c index 3a94be3a16a..a4e5404815d 100644 --- a/storage/maria/ma_loghandler.c +++ b/storage/maria/ma_loghandler.c @@ -14,8 +14,8 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #include "maria_def.h" -#include "ma_blockrec.h" -#include "trnman.h" +#include "ma_blockrec.h" /* for some constants and in-write hooks */ +#include "trnman.h" /* for access to members of TRN */ /** @file @@ -208,31 +208,6 @@ static MARIA_SHARE **id_to_share= NULL; /* lock for id_to_share */ static my_atomic_rwlock_t LOCK_id_to_share; -static my_bool write_hook_for_redo(enum translog_record_type type, - TRN *trn, MARIA_HA *tbl_info, LSN *lsn, - struct st_translog_parts *parts); -static my_bool write_hook_for_undo(enum translog_record_type type, - TRN *trn, MARIA_HA *tbl_info, LSN *lsn, - struct st_translog_parts *parts); -static my_bool write_hook_for_redo_delete_all(enum translog_record_type type, - TRN *trn, MARIA_HA *tbl_info, - LSN *lsn, - struct st_translog_parts *parts); -static my_bool write_hook_for_undo_row_insert(enum translog_record_type type, - TRN *trn, MARIA_HA *tbl_info, - LSN *lsn, - struct st_translog_parts *parts); -static my_bool write_hook_for_undo_row_delete(enum translog_record_type type, - TRN *trn, MARIA_HA *tbl_info, - LSN *lsn, - struct st_translog_parts *parts); -static my_bool write_hook_for_clr_end(enum translog_record_type type, - TRN *trn, MARIA_HA *tbl_info, LSN *lsn, - struct st_translog_parts *parts); -static my_bool write_hook_for_file_id(enum translog_record_type type, - TRN *trn, MARIA_HA *tbl_info, LSN *lsn, - struct st_translog_parts *parts); - static my_bool translog_page_validator(uchar *page_addr, uchar* data_ptr); /* @@ -437,8 +412,8 @@ static LOG_DESC INIT_LOGREC_REDO_UNDELETE_ROW= "redo_undelete_row", LOGREC_NOT_LAST_IN_GROUP, NULL, NULL}; static LOG_DESC INIT_LOGREC_CLR_END= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, LSN_STORE_SIZE + FILEID_STORE_SIZE + 1, - LSN_STORE_SIZE + FILEID_STORE_SIZE + 1, NULL, write_hook_for_clr_end, NULL, 1, +{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE, NULL, write_hook_for_clr_end, NULL, 1, "clr_end", LOGREC_LAST_IN_GROUP, NULL, NULL}; static LOG_DESC INIT_LOGREC_PURGE_END= @@ -446,8 +421,7 @@ static LOG_DESC INIT_LOGREC_PURGE_END= "purge_end", LOGREC_LAST_IN_GROUP, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_ROW_INSERT= -{LOGRECTYPE_PSEUDOFIXEDLENGTH, - LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, +{LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, NULL, write_hook_for_undo_row_insert, NULL, 1, "undo_row_insert", LOGREC_LAST_IN_GROUP, NULL, NULL}; @@ -461,7 +435,7 @@ static LOG_DESC INIT_LOGREC_UNDO_ROW_DELETE= static LOG_DESC INIT_LOGREC_UNDO_ROW_UPDATE= {LOGRECTYPE_VARIABLE_LENGTH, 0, LSN_STORE_SIZE + FILEID_STORE_SIZE + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, - NULL, write_hook_for_undo, NULL, 1, + NULL, write_hook_for_undo_row_update, NULL, 1, "undo_row_update", LOGREC_LAST_IN_GROUP, NULL, NULL}; static LOG_DESC INIT_LOGREC_UNDO_KEY_INSERT= @@ -3678,23 +3652,23 @@ static translog_size_t translog_get_current_group_size() } -/* - Write variable record in 1 group +/** + @brief Write variable record in 1 group. - SYNOPSIS - translog_write_variable_record_1group() - lsn LSN of the record will be written here - type the log record type - short_trid Short transaction ID or 0 if it has no sense - parts Descriptor of record source parts - buffer_to_flush Buffer which have to be flushed if it is not 0 - header_length Calculated header length of chunk type 0 - trn Transaction structure pointer for hooks by - record log type, for short_id + @param lsn LSN of the record will be written here + @param type the log record type + @param short_trid Short transaction ID or 0 if it has no sense + @param parts Descriptor of record source parts + @param buffer_to_flush Buffer which have to be flushed if it is not 0 + @param header_length Calculated header length of chunk type 0 + @param trn Transaction structure pointer for hooks by + record log type, for short_id + @param hook_arg Argument which will be passed to pre-write and + in-write hooks of this record. - RETURN - 0 OK - 1 Error + @return Operation status + @retval 0 OK + @retval 1 Error */ static my_bool @@ -3705,7 +3679,7 @@ translog_write_variable_record_1group(LSN *lsn, struct st_translog_parts *parts, struct st_translog_buffer *buffer_to_flush, uint16 header_length, - TRN *trn) + TRN *trn, void *hook_arg) { TRANSLOG_ADDRESS horizon; struct st_buffer_cursor cursor; @@ -3721,7 +3695,7 @@ translog_write_variable_record_1group(LSN *lsn, *lsn, TRUE) || (log_record_type_descriptor[type].inwrite_hook && (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info, - lsn, parts))) + lsn, hook_arg))) { translog_unlock(); DBUG_RETURN(1); @@ -3830,23 +3804,23 @@ translog_write_variable_record_1group(LSN *lsn, } -/* - Write variable record in 1 chunk +/** + @brief Write variable record in 1 chunk. - SYNOPSIS - translog_write_variable_record_1chunk() - lsn LSN of the record will be written here - type the log record type - short_trid Short transaction ID or 0 if it has no sense - parts Descriptor of record source parts - buffer_to_flush Buffer which have to be flushed if it is not 0 - header_length Calculated header length of chunk type 0 - trn Transaction structure pointer for hooks by - record log type, for short_id + @param lsn LSN of the record will be written here + @param type the log record type + @param short_trid Short transaction ID or 0 if it has no sense + @param parts Descriptor of record source parts + @param buffer_to_flush Buffer which have to be flushed if it is not 0 + @param header_length Calculated header length of chunk type 0 + @param trn Transaction structure pointer for hooks by + record log type, for short_id + @param hook_arg Argument which will be passed to pre-write and + in-write hooks of this record. - RETURN - 0 OK - 1 Error + @return Operation status + @retval 0 OK + @retval 1 Error */ static my_bool @@ -3857,7 +3831,7 @@ translog_write_variable_record_1chunk(LSN *lsn, struct st_translog_parts *parts, struct st_translog_buffer *buffer_to_flush, uint16 header_length, - TRN *trn) + TRN *trn, void *hook_arg) { int rc; uchar chunk0_header[1 + 2 + 5 + 2]; @@ -3871,7 +3845,7 @@ translog_write_variable_record_1chunk(LSN *lsn, *lsn, TRUE) || (log_record_type_descriptor[type].inwrite_hook && (*log_record_type_descriptor[type].inwrite_hook)(type, trn, tbl_info, - lsn, parts))) + lsn, hook_arg))) { translog_unlock(); DBUG_RETURN(1); @@ -4196,24 +4170,24 @@ static my_bool translog_relative_LSN_encode(struct st_translog_parts *parts, } -/* - Write multi-group variable-size record +/** + @brief Write multi-group variable-size record. - SYNOPSIS - translog_write_variable_record_mgroup() - lsn LSN of the record will be written here - type the log record type - short_trid Short transaction ID or 0 if it has no sense - parts Descriptor of record source parts - buffer_to_flush Buffer which have to be flushed if it is not 0 - header_length Header length calculated for 1 group - buffer_rest Beginning from which we plan to write in full pages - trn Transaction structure pointer for hooks by - record log type, for short_id + @param lsn LSN of the record will be written here + @param type the log record type + @param short_trid Short transaction ID or 0 if it has no sense + @param parts Descriptor of record source parts + @param buffer_to_flush Buffer which have to be flushed if it is not 0 + @param header_length Header length calculated for 1 group + @param buffer_rest Beginning from which we plan to write in full pages + @param trn Transaction structure pointer for hooks by + record log type, for short_id + @param hook_arg Argument which will be passed to pre-write and + in-write hooks of this record. - RETURN - 0 OK - 1 Error + @return Operation status + @retval 0 OK + @retval 1 Error */ static my_bool @@ -4226,7 +4200,7 @@ translog_write_variable_record_mgroup(LSN *lsn, *buffer_to_flush, uint16 header_length, translog_size_t buffer_rest, - TRN *trn) + TRN *trn, void *hook_arg) { TRANSLOG_ADDRESS horizon; struct st_buffer_cursor cursor; @@ -4554,7 +4528,7 @@ translog_write_variable_record_mgroup(LSN *lsn, if (log_record_type_descriptor[type].inwrite_hook && (*log_record_type_descriptor[type].inwrite_hook) (type, trn, tbl_info, - lsn, parts)) + lsn, hook_arg)) goto err; } @@ -4626,21 +4600,21 @@ err: } -/* - Write the variable length log record +/** + @brief Write the variable length log record. - SYNOPSIS - translog_write_variable_record() - lsn LSN of the record will be written here - type the log record type - short_trid Short transaction ID or 0 if it has no sense - parts Descriptor of record source parts - trn Transaction structure pointer for hooks by - record log type, for short_id + @param lsn LSN of the record will be written here + @param type the log record type + @param short_trid Short transaction ID or 0 if it has no sense + @param parts Descriptor of record source parts + @param trn Transaction structure pointer for hooks by + record log type, for short_id + @param hook_arg Argument which will be passed to pre-write and + in-write hooks of this record. - RETURN - 0 OK - 1 Error + @return Operation status + @retval 0 OK + @retval 1 Error */ static my_bool translog_write_variable_record(LSN *lsn, @@ -4648,7 +4622,7 @@ static my_bool translog_write_variable_record(LSN *lsn, MARIA_HA *tbl_info, SHORT_TRANSACTION_ID short_trid, struct st_translog_parts *parts, - TRN *trn) + TRN *trn, void *hook_arg) { struct st_translog_buffer *buffer_to_flush= NULL; uint header_length1= 1 + 2 + 2 + @@ -4725,7 +4699,7 @@ static my_bool translog_write_variable_record(LSN *lsn, res= translog_write_variable_record_1chunk(lsn, type, tbl_info, short_trid, parts, buffer_to_flush, - header_length1, trn); + header_length1, trn, hook_arg); DBUG_RETURN(res); } @@ -4737,7 +4711,7 @@ static my_bool translog_write_variable_record(LSN *lsn, res= translog_write_variable_record_1group(lsn, type, tbl_info, short_trid, parts, buffer_to_flush, - header_length1, trn); + header_length1, trn, hook_arg); DBUG_RETURN(res); } /* following function makes translog_unlock(); */ @@ -4745,26 +4719,26 @@ static my_bool translog_write_variable_record(LSN *lsn, short_trid, parts, buffer_to_flush, header_length1, - buffer_rest, trn); + buffer_rest, trn, hook_arg); DBUG_RETURN(res); } -/* - Write the fixed and pseudo-fixed log record +/** + @brief Write the fixed and pseudo-fixed log record. - SYNOPSIS - translog_write_fixed_record() - lsn LSN of the record will be written here - type the log record type - short_trid Short transaction ID or 0 if it has no sense - parts Descriptor of record source parts - trn Transaction structure pointer for hooks by - record log type, for short_id + @param lsn LSN of the record will be written here + @param type the log record type + @param short_trid Short transaction ID or 0 if it has no sense + @param parts Descriptor of record source parts + @param trn Transaction structure pointer for hooks by + record log type, for short_id + @param hook_arg Argument which will be passed to pre-write and + in-write hooks of this record. - RETURN - 0 OK - 1 Error + @return Operation status + @retval 0 OK + @retval 1 Error */ static my_bool translog_write_fixed_record(LSN *lsn, @@ -4772,7 +4746,7 @@ static my_bool translog_write_fixed_record(LSN *lsn, MARIA_HA *tbl_info, SHORT_TRANSACTION_ID short_trid, struct st_translog_parts *parts, - TRN *trn) + TRN *trn, void *hook_arg) { struct st_translog_buffer *buffer_to_flush= NULL; uchar chunk1_header[1 + 2]; @@ -4824,7 +4798,7 @@ static my_bool translog_write_fixed_record(LSN *lsn, *lsn, TRUE) || (log_record_type_descriptor[type].inwrite_hook && (*log_record_type_descriptor[type].inwrite_hook) (type, trn, tbl_info, - lsn, parts))) + lsn, hook_arg))) { rc= 1; goto err; @@ -4899,6 +4873,9 @@ err: @param store_share_id if tbl_info!=NULL then share's id will automatically be stored in the two first bytes pointed (so pointer is assumed to be !=NULL) + @param hook_arg argument which will be passed to pre-write and + in-write hooks of this record. + @return Operation status @retval 0 OK @retval 1 Error @@ -4910,7 +4887,8 @@ my_bool translog_write_record(LSN *lsn, translog_size_t rec_len, uint part_no, LEX_STRING *parts_data, - uchar *store_share_id) + uchar *store_share_id, + void *hook_arg) { struct st_translog_parts parts; LEX_STRING *part; @@ -4955,7 +4933,7 @@ my_bool translog_write_record(LSN *lsn, if (unlikely(translog_write_record(&dummy_lsn, LOGREC_LONG_TRANSACTION_ID, trn, NULL, sizeof(log_data), sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL))) + log_array, NULL, NULL))) DBUG_RETURN(1); } @@ -5018,17 +4996,17 @@ my_bool translog_write_record(LSN *lsn, if (!(rc= (log_record_type_descriptor[type].prewrite_hook && (*log_record_type_descriptor[type].prewrite_hook) (type, trn, tbl_info, - &parts)))) + hook_arg)))) { switch (log_record_type_descriptor[type].class) { case LOGRECTYPE_VARIABLE_LENGTH: rc= translog_write_variable_record(lsn, type, tbl_info, - short_trid, &parts, trn); + short_trid, &parts, trn, hook_arg); break; case LOGRECTYPE_PSEUDOFIXEDLENGTH: case LOGRECTYPE_FIXEDLENGTH: rc= translog_write_fixed_record(lsn, type, tbl_info, - short_trid, &parts, trn); + short_trid, &parts, trn, hook_arg); break; case LOGRECTYPE_NOT_ALLOWED: default: @@ -6060,6 +6038,7 @@ translog_size_t translog_read_record(LSN lsn, if (offset < page_end) { uint len= page_end - offset; + set_if_smaller(len, length); /* in case we read beyond record's end */ DBUG_ASSERT(offset >= data->current_offset); memcpy(buffer, data->scanner.page + data->body_offset + @@ -6339,207 +6318,6 @@ out: } -/** - @brief Sets transaction's rec_lsn if needed - - A transaction sometimes writes a REDO even before the page is in the - pagecache (example: brand new head or tail pages; full pages). So, if - Checkpoint happens just after the REDO write, it needs to know that the - REDO phase must start before this REDO. Scanning the pagecache cannot - tell that as the page is not in the cache. So, transaction sets its rec_lsn - to the REDO's LSN or somewhere before, and Checkpoint reads the - transaction's rec_lsn. - - @todo move it to a separate file - - @return Operation status, always 0 (success) -*/ - -static my_bool write_hook_for_redo(enum translog_record_type type - __attribute__ ((unused)), - TRN *trn, MARIA_HA *tbl_info - __attribute__ ((unused)), - LSN *lsn, - struct st_translog_parts *parts - __attribute__ ((unused))) -{ - /* - Users of dummy_transaction_object must keep this TRN clean as it - is used by many threads (like those manipulating non-transactional - tables). It might be dangerous if one user sets rec_lsn or some other - member and it is picked up by another user (like putting this rec_lsn into - a page of a non-transactional table); it's safer if all members stay 0. So - non-transactional log records (REPAIR, CREATE, RENAME, DROP) should not - call this hook; we trust them but verify ;) - */ - DBUG_ASSERT(trn->trid != 0); - /* - If the hook stays so simple, it would be faster to pass - !trn->rec_lsn ? trn->rec_lsn : some_dummy_lsn - to translog_write_record(), like Monty did in his original code, and not - have a hook. For now we keep it like this. - */ - if (trn->rec_lsn == 0) - trn->rec_lsn= *lsn; - return 0; -} - - -/** - @brief Sets transaction's undo_lsn, first_undo_lsn if needed - - @todo move it to a separate file - - @return Operation status, always 0 (success) -*/ - -static my_bool write_hook_for_undo(enum translog_record_type type - __attribute__ ((unused)), - TRN *trn, MARIA_HA *tbl_info - __attribute__ ((unused)), - LSN *lsn, - struct st_translog_parts *parts - __attribute__ ((unused))) -{ - DBUG_ASSERT(trn->trid != 0); - trn->undo_lsn= *lsn; - if (unlikely(LSN_WITH_FLAGS_TO_LSN(trn->first_undo_lsn) == 0)) - trn->first_undo_lsn= - trn->undo_lsn | LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn); - return 0; - /* - when we implement purging, we will specialize this hook: UNDO_PURGE - records will additionally set trn->undo_purge_lsn - */ -} - - -/** - @brief Sets the table's records count to 0, then calls the generic REDO - hook. - - @todo move it to a separate file - - @return Operation status, always 0 (success) -*/ - -static my_bool write_hook_for_redo_delete_all(enum translog_record_type type - __attribute__ ((unused)), - TRN *trn, MARIA_HA *tbl_info - __attribute__ ((unused)), - LSN *lsn, - struct st_translog_parts *parts - __attribute__ ((unused))) -{ - tbl_info->s->state.state.records= 0; - return write_hook_for_redo(type, trn, tbl_info, lsn, parts); -} - - -/** - @brief Upates "records" and calls the generic UNDO hook - - @todo move it to a separate file - - @return Operation status, always 0 (success) -*/ - -static my_bool write_hook_for_undo_row_insert(enum translog_record_type type - __attribute__ ((unused)), - TRN *trn, MARIA_HA *tbl_info, - LSN *lsn, - struct st_translog_parts *parts - __attribute__ ((unused))) -{ - tbl_info->s->state.state.records++; - return write_hook_for_undo(type, trn, tbl_info, lsn, parts); -} - - -/** - @brief Upates "records" and calls the generic UNDO hook - - @todo move it to a separate file - - @return Operation status, always 0 (success) -*/ - -static my_bool write_hook_for_undo_row_delete(enum translog_record_type type - __attribute__ ((unused)), - TRN *trn, MARIA_HA *tbl_info, - LSN *lsn, - struct st_translog_parts *parts - __attribute__ ((unused))) -{ - tbl_info->s->state.state.records--; - return write_hook_for_undo(type, trn, tbl_info, lsn, parts); -} - - -/** - @brief Sets transaction's undo_lsn, first_undo_lsn if needed - - @todo move it to a separate file - - @return Operation status, always 0 (success) -*/ - -static my_bool write_hook_for_clr_end(enum translog_record_type type - __attribute__ ((unused)), - TRN *trn, MARIA_HA *tbl_info - __attribute__ ((unused)), - LSN *lsn - __attribute__ ((unused)), - struct st_translog_parts *parts) -{ - char *ptr= parts->parts[TRANSLOG_INTERNAL_PARTS + 0].str; - enum translog_record_type undone_record_type= - ptr[LSN_STORE_SIZE + FILEID_STORE_SIZE]; - - DBUG_ASSERT(trn->trid != 0); - trn->undo_lsn= lsn_korr(ptr); - switch (undone_record_type) { - case LOGREC_UNDO_ROW_DELETE: - tbl_info->s->state.state.records++; - break; - case LOGREC_UNDO_ROW_INSERT: - tbl_info->s->state.state.records--; - break; - case LOGREC_UNDO_ROW_UPDATE: - break; - default: - DBUG_ASSERT(0); - } - if (trn->undo_lsn == LSN_IMPOSSIBLE) /* has fully rolled back */ - trn->first_undo_lsn= LSN_WITH_FLAGS_TO_FLAGS(trn->first_undo_lsn); - return 0; -} - - -/** - @brief Updates table's lsn_of_file_id. - - @todo move it to a separate file - - @return Operation status, always 0 (success) -*/ - -static my_bool write_hook_for_file_id(enum translog_record_type type - __attribute__ ((unused)), - TRN *trn - __attribute__ ((unused)), - MARIA_HA *tbl_info, - LSN *lsn - __attribute__ ((unused)), - struct st_translog_parts *parts - __attribute__ ((unused))) -{ - DBUG_ASSERT(cmp_translog_addr(tbl_info->s->lsn_of_file_id, *lsn) < 0); - tbl_info->s->lsn_of_file_id= *lsn; - return 0; -} - - /** @brief Gives a 2-byte-id to MARIA_SHARE and logs this fact @@ -6610,7 +6388,7 @@ int translog_assign_id_to_share(MARIA_HA *tbl_info, TRN *trn) log_array[TRANSLOG_INTERNAL_PARTS + 1].length, sizeof(log_array)/sizeof(log_array[0]), - log_array, log_data))) + log_array, log_data, NULL))) return 1; } pthread_mutex_unlock(&share->intern_lock); diff --git a/storage/maria/ma_loghandler.h b/storage/maria/ma_loghandler.h index b13984d88ef..9ea3bfca263 100644 --- a/storage/maria/ma_loghandler.h +++ b/storage/maria/ma_loghandler.h @@ -48,6 +48,7 @@ typedef uint16 SHORT_TRANSACTION_ID; struct st_maria_info; +/* Changing one of the "SIZE" below will break backward-compatibility! */ /* Length of CRC at end of pages */ #define CRC_LENGTH 4 /* Size of file id in logs */ @@ -57,16 +58,23 @@ struct st_maria_info; /* Size of page ranges in log */ #define PAGERANGE_STORE_SIZE ROW_EXTENT_COUNT_SIZE #define DIRPOS_STORE_SIZE 1 +#define CLR_TYPE_STORE_SIZE 1 +/* If table has live checksum we store its changes in UNDOs */ +#define HA_CHECKSUM_STORE_SIZE 4 /* Store methods to match the above sizes */ #define fileid_store(T,A) int2store(T,A) #define page_store(T,A) int5store(T,A) #define dirpos_store(T,A) ((*(uchar*) (T)) = A) #define pagerange_store(T,A) int2store(T,A) +#define clr_type_store(T,A) ((*(uchar*) (T)) = A) +#define ha_checksum_store(T,A) int4store(T,A) #define fileid_korr(P) uint2korr(P) #define page_korr(P) uint5korr(P) #define dirpos_korr(P) ((P)[0]) #define pagerange_korr(P) uint2korr(P) +#define clr_type_korr(P) ((P)[0]) +#define ha_checksum_korr(P) uint4korr(P) /* Length of disk drive sector size (we assume that writing it @@ -230,7 +238,8 @@ translog_write_record(LSN *lsn, enum translog_record_type type, struct st_transaction *trn, struct st_maria_info *tbl_info, translog_size_t rec_len, uint part_no, - LEX_STRING *parts_data, uchar *store_share_id); + LEX_STRING *parts_data, uchar *store_share_id, + void *hook_arg); extern void translog_destroy(); @@ -299,12 +308,11 @@ struct st_translog_parts typedef my_bool(*prewrite_rec_hook) (enum translog_record_type type, TRN *trn, struct st_maria_info *tbl_info, - struct st_translog_parts *parts); + void *hook_arg); typedef my_bool(*inwrite_rec_hook) (enum translog_record_type type, TRN *trn, struct st_maria_info *tbl_info, - LSN *lsn, - struct st_translog_parts *parts); + LSN *lsn, void *hook_arg); typedef uint16(*read_rec_hook) (enum translog_record_type type, uint16 read_length, uchar *read_buff, diff --git a/storage/maria/ma_open.c b/storage/maria/ma_open.c index 6852d5b4a6d..b736fc6e9dc 100644 --- a/storage/maria/ma_open.c +++ b/storage/maria/ma_open.c @@ -177,7 +177,7 @@ static MARIA_HA *maria_clone_internal(MARIA_SHARE *share, int mode, share->delay_key_write=1; info.state= &share->state.state; /* Change global values by default */ - if (!share->base.born_transactional) /* but for transactional ones ... */ + if (!share->base.born_transactional) /* For transactional ones ... */ info.trn= &dummy_transaction_object; /* ... force crash if no trn given */ pthread_mutex_unlock(&share->intern_lock); diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 4b0ccd2c9c8..4a3fc555a8a 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -3941,7 +3941,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, } } - compile_time_assert(sizeof(pagecache->blocks == 4)); + compile_time_assert(sizeof(pagecache->blocks) <= 4); str->length= 4 + /* number of dirty pages */ (4 + /* file */ 4 + /* pageno */ @@ -3963,8 +3963,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, { if (block->type != PAGECACHE_LSN_PAGE) continue; /* no need to store it in the checkpoint record */ - compile_time_assert((4 == sizeof(block->hash_link->file.file))); - compile_time_assert((4 == sizeof(block->hash_link->pageno))); + compile_time_assert(sizeof(block->hash_link->file.file) <= 4); + compile_time_assert(sizeof(block->hash_link->pageno) <= 4); int4store(ptr, block->hash_link->file.file); ptr+= 4; int4store(ptr, block->hash_link->pageno); diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 6967571655e..1ae2244159c 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -50,6 +50,7 @@ static LSN current_group_end_lsn, static TrID max_long_trid= 0; /**< max long trid seen by REDO phase */ static FILE *tracef; /**< trace file for debugging */ static my_bool skip_DDLs; /**< if REDO phase should skip DDL records */ +static ulonglong now; /**< for tracking execution time of phases */ #define prototype_redo_exec_hook(R) \ static int exec_REDO_LOGREC_ ## R(const TRANSLOG_HEADER_BUFFER *rec) @@ -312,8 +313,12 @@ end: log_record_buffer.length= 0; if (tracef != stdout && redo_phase_message_printed) { + ulonglong old_now= now; + now= my_getsystime(); + float previous_phase_took= (now - old_now)/10000000.0; /** @todo RECOVERY BUG all prints to stderr should go to error log */ - fprintf(stderr, "\n"); + /** @todo RECOVERY BUG all prints to stderr should go to error log */ + fprintf(stderr, " (%.1f seconds)\n", previous_phase_took); } /* we don't cleanly close tables if we hit some error (may corrupt them) */ DBUG_RETURN(error); @@ -1211,12 +1216,25 @@ prototype_redo_exec_hook(UNDO_ROW_INSERT) MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); if (info == NULL) return 0; + MARIA_SHARE *share= info->s; set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); - if (cmp_translog_addr(rec->lsn, info->s->state.is_of_horizon) >= 0) + if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { tprint(tracef, " state older than record, updating rows' count\n"); - info->s->state.state.records++; - /** @todo RECOVERY BUG Also update the table's checksum */ + share->state.state.records++; + if (share->calc_checksum) + { + uchar buff[HA_CHECKSUM_STORE_SIZE]; + if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + HA_CHECKSUM_STORE_SIZE, buff, NULL) != + HA_CHECKSUM_STORE_SIZE) + { + tprint(tracef, "Failed to read record\n"); + return 1; + } + share->state.state.checksum+= ha_checksum_korr(buff); + } /** @todo some bits below will rather be set when executing UNDOs related to keys @@ -1234,15 +1252,29 @@ prototype_redo_exec_hook(UNDO_ROW_DELETE) MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); if (info == NULL) return 0; + MARIA_SHARE *share= info->s; set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); - if (cmp_translog_addr(rec->lsn, info->s->state.is_of_horizon) >= 0) + if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { tprint(tracef, " state older than record, updating rows' count\n"); - info->s->state.state.records--; - info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | + share->state.state.records--; + if (share->calc_checksum) + { + uchar buff[HA_CHECKSUM_STORE_SIZE]; + if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + HA_CHECKSUM_STORE_SIZE, buff, NULL) != + HA_CHECKSUM_STORE_SIZE) + { + tprint(tracef, "Failed to read record\n"); + return 1; + } + share->state.state.checksum+= ha_checksum_korr(buff); + } + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES; } - tprint(tracef, " rows' count %lu\n", (ulong)info->s->state.state.records); + tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records); return 0; } @@ -1252,10 +1284,24 @@ prototype_redo_exec_hook(UNDO_ROW_UPDATE) MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); if (info == NULL) return 0; + MARIA_SHARE *share= info->s; set_undo_lsn_for_active_trans(rec->short_trid, rec->lsn); - if (cmp_translog_addr(rec->lsn, info->s->state.is_of_horizon) >= 0) + if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { - info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | + if (share->calc_checksum) + { + uchar buff[HA_CHECKSUM_STORE_SIZE]; + if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE + + PAGE_STORE_SIZE + DIRPOS_STORE_SIZE, + HA_CHECKSUM_STORE_SIZE, buff, NULL) != + HA_CHECKSUM_STORE_SIZE) + { + tprint(tracef, "Failed to read record\n"); + return 1; + } + share->state.state.checksum+= ha_checksum_korr(buff); + } + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES; } return 0; @@ -1306,33 +1352,46 @@ prototype_redo_exec_hook(CLR_END) MARIA_HA *info= get_MARIA_HA_from_UNDO_record(rec); if (info == NULL) return 0; + MARIA_SHARE *share= info->s; LSN previous_undo_lsn= lsn_korr(rec->header); enum translog_record_type undone_record_type= - (rec->header)[LSN_STORE_SIZE + FILEID_STORE_SIZE]; + clr_type_korr(rec->header + LSN_STORE_SIZE + FILEID_STORE_SIZE); const LOG_DESC *log_desc= &log_record_type_descriptor[undone_record_type]; set_undo_lsn_for_active_trans(rec->short_trid, previous_undo_lsn); tprint(tracef, " CLR_END was about %s, undo_lsn now LSN (%lu,0x%lx)\n", log_desc->name, LSN_IN_PARTS(previous_undo_lsn)); - if (cmp_translog_addr(rec->lsn, info->s->state.is_of_horizon) >= 0) + if (cmp_translog_addr(rec->lsn, share->state.is_of_horizon) >= 0) { tprint(tracef, " state older than record, updating rows' count\n"); + if (share->calc_checksum) + { + uchar buff[HA_CHECKSUM_STORE_SIZE]; + if (translog_read_record(rec->lsn, LSN_STORE_SIZE + FILEID_STORE_SIZE + + CLR_TYPE_STORE_SIZE, HA_CHECKSUM_STORE_SIZE, + buff, NULL) != HA_CHECKSUM_STORE_SIZE) + { + tprint(tracef, "Failed to read record\n"); + return 1; + } + share->state.state.checksum+= ha_checksum_korr(buff); + } switch (undone_record_type) { case LOGREC_UNDO_ROW_DELETE: - info->s->state.state.records++; + share->state.state.records++; break; case LOGREC_UNDO_ROW_INSERT: - info->s->state.state.records--; + share->state.state.records--; break; case LOGREC_UNDO_ROW_UPDATE: break; default: DBUG_ASSERT(0); } - info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES; } - tprint(tracef, " rows' count %lu\n", (ulong)info->s->state.state.records); + tprint(tracef, " rows' count %lu\n", (ulong)share->state.state.records); return 0; } @@ -1353,12 +1412,33 @@ prototype_undo_exec_hook(UNDO_ROW_INSERT) */ return 1; } - info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | + MARIA_SHARE *share= info->s; + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES; + const uchar *record_ptr= rec->header; + if (share->calc_checksum) + { + /* + We need to read more of the record to put the checksum into the record + buffer used by _ma_apply_undo_row_insert(). + If the table has no live checksum, rec->header will be enough. + */ + enlarge_buffer(rec); + if (log_record_buffer.str == NULL || + translog_read_record(rec->lsn, 0, rec->record_length, + log_record_buffer.str, NULL) != + rec->record_length) + { + tprint(tracef, "Failed to read record\n"); + return 1; + } + record_ptr= log_record_buffer.str; + } + info->trn= trn; error= _ma_apply_undo_row_insert(info, previous_undo_lsn, - rec->header + LSN_STORE_SIZE + + record_ptr + LSN_STORE_SIZE + FILEID_STORE_SIZE); info->trn= 0; /* trn->undo_lsn is updated in an inwrite_hook when writing the CLR_END */ @@ -1378,7 +1458,8 @@ prototype_undo_exec_hook(UNDO_ROW_DELETE) if (info == NULL) return 1; - info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | + MARIA_SHARE *share= info->s; + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES; enlarge_buffer(rec); @@ -1405,8 +1486,7 @@ prototype_undo_exec_hook(UNDO_ROW_DELETE) PAGE_STORE_SIZE + DIRPOS_STORE_SIZE)); info->trn= 0; tprint(tracef, " rows' count %lu\n undo_lsn now LSN (%lu,0x%lx)\n", - (ulong)info->s->state.state.records, - LSN_IN_PARTS(previous_undo_lsn)); + (ulong)share->state.state.records, LSN_IN_PARTS(previous_undo_lsn)); return error; } @@ -1419,8 +1499,8 @@ prototype_undo_exec_hook(UNDO_ROW_UPDATE) if (info == NULL) return 1; - - info->s->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | + MARIA_SHARE *share= info->s; + share->state.changed|= STATE_CHANGED | STATE_NOT_ANALYZED | STATE_NOT_OPTIMIZED_KEYS | STATE_NOT_SORTED_PAGES; enlarge_buffer(rec); @@ -1634,7 +1714,7 @@ static uint end_of_redo_phase(my_bool prepare_for_undo_phase) hash_free(&all_dirty_pages); /* - hash_free() can be called multiple times probably, but be safe it that + hash_free() can be called multiple times probably, but be safe if that changes */ bzero(&all_dirty_pages, sizeof(all_dirty_pages)); @@ -1652,11 +1732,8 @@ static uint end_of_redo_phase(my_bool prepare_for_undo_phase) LSN gslsn= all_active_trans[sid].group_start_lsn; TRN *trn; if (gslsn != LSN_IMPOSSIBLE) - { tprint(tracef, "Group at LSN (%lu,0x%lx) short_trid %u aborted\n", LSN_IN_PARTS(gslsn), sid); - ALERT_USER(); - } if (all_active_trans[sid].undo_lsn != LSN_IMPOSSIBLE) { char llbuf[22]; @@ -1734,8 +1811,12 @@ static int run_undo_phase(uint unfinished) { if (tracef != stdout) { + ulonglong old_now= now; + now= my_getsystime(); + float previous_phase_took= (now - old_now)/10000000.0; /** @todo RECOVERY BUG all prints to stderr should go to error log */ - fprintf(stderr, " 100%%; transactions to roll back:"); + fprintf(stderr, " 100%% (%.1f seconds); transactions to roll back:", + previous_phase_took); } tprint(tracef, "%u transactions will be rolled back\n", unfinished); for( ; ; ) @@ -2070,8 +2151,11 @@ static int close_all_tables(void) tprint(tracef, "Closing all tables\n"); if (tracef != stdout && redo_phase_message_printed) { + ulonglong old_now= now; + now= my_getsystime(); + float previous_phase_took= (now - old_now)/10000000.0; /** @todo RECOVERY BUG all prints to stderr should go to error log */ - fprintf(stderr, "; flushing tables"); + fprintf(stderr, " (%.1f seconds); flushing tables", previous_phase_took); } /* @@ -2141,6 +2225,7 @@ static void print_redo_phase_progress(TRANSLOG_ADDRESS addr) /** @todo RECOVERY BUG all prints to stderr should go to error log */ fprintf(stderr, "Maria engine: starting recovery; recovered pages: 0%%"); redo_phase_message_printed= TRUE; + now= my_getsystime(); } if (end_logno == FILENO_IMPOSSIBLE) { @@ -2166,7 +2251,7 @@ static void print_redo_phase_progress(TRANSLOG_ADDRESS addr) } #ifdef MARIA_EXTERNAL_LOCKING -#error Maria's Checkpoint and Recovery are really not ready for it +#error Marias Checkpoint and Recovery are really not ready for it #endif /* diff --git a/storage/maria/ma_rename.c b/storage/maria/ma_rename.c index 44cd60711da..57d35f6c066 100644 --- a/storage/maria/ma_rename.c +++ b/storage/maria/ma_rename.c @@ -85,7 +85,7 @@ int maria_rename(const char *old_name, const char *new_name) &dummy_transaction_object, NULL, old_name_len + new_name_len, sizeof(log_array)/sizeof(log_array[0]), - log_array, NULL) || + log_array, NULL, NULL) || translog_flush(lsn))) { maria_close(info); diff --git a/storage/maria/ma_test_recovery.expected b/storage/maria/ma_test_recovery.expected index 926943b11b3..70339fc69b6 100644 --- a/storage/maria/ma_test_recovery.expected +++ b/storage/maria/ma_test_recovery.expected @@ -4,10 +4,6 @@ TEST WITH ma_test1 -s -M -T -c applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 3757530372 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -21,10 +17,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 3757530372 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -78,10 +70,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -95,10 +83,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -112,10 +96,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -137,10 +117,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 3697324514 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -158,10 +134,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 3026590807 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -175,10 +147,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 3026590807 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -192,10 +160,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -253,10 +217,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -270,10 +230,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -287,10 +243,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -312,10 +264,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 3697324514 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -333,10 +281,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 3026590807 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -350,10 +294,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 3026590807 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -367,10 +307,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -428,10 +364,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -445,10 +377,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -462,10 +390,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 221293111 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -487,10 +411,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 3697324514 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -508,10 +428,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -525,10 +441,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -542,10 +454,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 2428948025 ---- -> Checksum: 0 11c11 < Datafile length: 16384 Keyfile length: 16384 --- @@ -603,10 +511,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -620,10 +524,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -637,10 +537,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -662,10 +558,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 4024695312 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -683,10 +575,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 800025671 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -700,10 +588,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 800025671 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -717,10 +601,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -778,10 +658,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -795,10 +671,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -812,10 +684,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -837,10 +705,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 4024695312 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -858,10 +722,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 800025671 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -875,10 +735,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 800025671 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -892,10 +748,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -953,10 +805,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -970,10 +818,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -987,10 +831,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 411409161 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -1012,10 +852,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 4024695312 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -1033,10 +869,6 @@ Dying on request without maria_commit()/maria_close() applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -1050,10 +882,6 @@ testing idempotency applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- @@ -1067,10 +895,6 @@ testing applying of CLRs to recreate table applying log Differences in maria_chk -dvv, recovery not yet perfect ! ========DIFF START======= -7c7 -< Checksum: 529753687 ---- -> Checksum: 0 11c11 < Datafile length: 49152 Keyfile length: 16384 --- diff --git a/storage/maria/ma_update.c b/storage/maria/ma_update.c index 0cb2e2b648b..1d138c13273 100644 --- a/storage/maria/ma_update.c +++ b/storage/maria/ma_update.c @@ -27,11 +27,9 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) bool auto_key_changed=0; ulonglong changed; MARIA_SHARE *share=info->s; - ha_checksum old_checksum; DBUG_ENTER("maria_update"); LINT_INIT(new_key); LINT_INIT(changed); - LINT_INIT(old_checksum); DBUG_EXECUTE_IF("maria_pretend_crashed_table_on_usage", maria_print_error(info->s, HA_ERR_CRASHED); @@ -59,16 +57,6 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) goto err_end; /* Record has changed */ } - if (share->calc_checksum) - { - /* - We can't use the row based checksum as this doesn't have enough - precision. - */ - if (info->s->calc_checksum) - old_checksum= (*info->s->calc_checksum)(info, oldrec); - } - /* Calculate and check all unique constraints */ key_changed=0; for (i=0 ; i < share->state.header.uniques ; i++) @@ -137,19 +125,23 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) } } } - /* - If we are running with external locking, we must update the index file - that something has changed. - */ - if (changed || !my_disable_locking) - key_changed|= HA_STATE_CHANGED; if (share->calc_checksum) { - info->cur_row.checksum= (*share->calc_checksum)(info,newrec); - info->state->checksum+= (info->cur_row.checksum - old_checksum); - /* Store new checksum in index file header */ - key_changed|= HA_STATE_CHANGED; + /* + We can't use the row based checksum as this doesn't have enough + precision (one byte, while the table's is more bytes). + At least _ma_check_unique() modifies the 'newrec' record, so checksum + has to be computed _after_ it. Nobody apparently modifies 'oldrec'. + We need to pass the old row's checksum down to (*update_record)(), we do + this via info->new_row.checksum (not intuitive but existing code + mandated that cur_row is the new row). + If (*update_record)() fails, table will be marked corrupted so no need + to revert the live checksum change. + */ + info->state->checksum+= !share->now_transactional * + ((info->cur_row.checksum= (*share->calc_checksum)(info, newrec)) - + (info->new_row.checksum= (*share->calc_checksum)(info, oldrec))); } { /* @@ -165,14 +157,9 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) org_delete_link= share->state.dellink; if ((*share->update_record)(info, pos, oldrec, newrec)) goto err; - if (!key_changed && - (memcmp((char*) &state, (char*) info->state, sizeof(state)) || - org_split != share->state.split || - org_delete_link != share->state.dellink)) - key_changed|= HA_STATE_CHANGED; /* Must update index file */ } if (auto_key_changed) - set_if_bigger(info->s->state.auto_increment, + set_if_bigger(share->state.auto_increment, ma_retrieve_auto_increment(info, newrec)); /* @@ -195,8 +182,8 @@ int maria_update(register MARIA_HA *info, const uchar *oldrec, uchar *newrec) allow_break(); /* Allow SIGHUP & SIGINT */ if (info->invalidator != 0) { - DBUG_PRINT("info", ("invalidator... '%s' (update)", info->s->open_file_name)); - (*info->invalidator)(info->s->open_file_name); + DBUG_PRINT("info", ("invalidator... '%s' (update)", share->open_file_name)); + (*info->invalidator)(share->open_file_name); info->invalidator=0; } DBUG_RETURN(0); @@ -232,7 +219,7 @@ err: } else { - maria_print_error(info->s, HA_ERR_CRASHED); + maria_print_error(share, HA_ERR_CRASHED); maria_mark_crashed(info); } info->update= (HA_STATE_CHANGED | HA_STATE_AKTIV | HA_STATE_ROW_CHANGED | @@ -243,7 +230,7 @@ err: allow_break(); /* Allow SIGHUP & SIGINT */ if (save_errno == HA_ERR_KEY_NOT_FOUND) { - maria_print_error(info->s, HA_ERR_CRASHED); + maria_print_error(share, HA_ERR_CRASHED); save_errno=HA_ERR_CRASHED; } DBUG_RETURN(my_errno=save_errno); diff --git a/storage/maria/ma_write.c b/storage/maria/ma_write.c index b034d71ef9d..18892475229 100644 --- a/storage/maria/ma_write.c +++ b/storage/maria/ma_write.c @@ -162,10 +162,6 @@ int maria_write(MARIA_HA *info, uchar *record) rw_unlock(&share->key_root_lock[i]); } } - /** - @todo RECOVERY BUG - this += must happen under log's mutex when writing the UNDO - */ if (share->calc_write_checksum) info->cur_row.checksum= (*share->calc_write_checksum)(info,record); if (filepos != HA_OFFSET_ERROR) @@ -176,7 +172,8 @@ int maria_write(MARIA_HA *info, uchar *record) @todo when we enable multiple writers, we will have to protect 'records' and 'checksum' somehow. */ - info->state->checksum+= info->cur_row.checksum; + info->state->checksum+= !share->now_transactional * + info->cur_row.checksum; } if (share->base.auto_key) set_if_bigger(info->s->state.auto_increment, diff --git a/storage/maria/unittest/ma_test_loghandler-t.c b/storage/maria/unittest/ma_test_loghandler-t.c index fa7267e31d4..d9627409c80 100644 --- a/storage/maria/unittest/ma_test_loghandler-t.c +++ b/storage/maria/unittest/ma_test_loghandler-t.c @@ -198,8 +198,8 @@ int main(int argc __attribute__((unused)), char *argv[]) trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID; if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_0LSN_EXAMPLE, - trn, NULL, - 6, TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); @@ -220,8 +220,8 @@ int main(int argc __attribute__((unused)), char *argv[]) /* check auto-count feature */ parts[TRANSLOG_INTERNAL_PARTS + 1].str= NULL; parts[TRANSLOG_INTERNAL_PARTS + 1].length= 0; - if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_1LSN_EXAMPLE, - trn, NULL, LSN_STORE_SIZE, 0, parts, NULL)) + if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_1LSN_EXAMPLE, trn, + NULL, LSN_STORE_SIZE, 0, parts, NULL, NULL)) { fprintf(stderr, "1 Can't write reference defore record #%lu\n", (ulong) i); @@ -241,7 +241,7 @@ int main(int argc __attribute__((unused)), char *argv[]) if (translog_write_record(&lsn, LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE, trn, NULL, 0, TRANSLOG_INTERNAL_PARTS + 2, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "1 Can't write var reference defore record #%lu\n", (ulong) i); @@ -259,8 +259,8 @@ int main(int argc __attribute__((unused)), char *argv[]) parts[TRANSLOG_INTERNAL_PARTS + 0].length= 23; if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_2LSN_EXAMPLE, - trn, NULL, - 23, TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + trn, NULL, 23, TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL, NULL)) { fprintf(stderr, "0 Can't write reference defore record #%lu\n", (ulong) i); @@ -280,7 +280,8 @@ int main(int argc __attribute__((unused)), char *argv[]) if (translog_write_record(&lsn, LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE, trn, NULL, 14 + rec_len, - TRANSLOG_INTERNAL_PARTS + 2, parts, NULL)) + TRANSLOG_INTERNAL_PARTS + 2, parts, NULL, + NULL)) { fprintf(stderr, "0 Can't write var reference defore record #%lu\n", (ulong) i); @@ -297,7 +298,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) i); translog_destroy(); @@ -316,7 +317,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE, trn, NULL, rec_len, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); translog_destroy(); diff --git a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c index 28233ae04cb..845e33b63a5 100644 --- a/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c +++ b/storage/maria/unittest/ma_test_loghandler_first_lsn-t.c @@ -106,7 +106,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); diff --git a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c index d6f0bde7a8e..68561443a02 100644 --- a/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c +++ b/storage/maria/unittest/ma_test_loghandler_max_lsn-t.c @@ -96,7 +96,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); diff --git a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c index 559cd17638f..e5afa0b40db 100644 --- a/storage/maria/unittest/ma_test_loghandler_multigroup-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multigroup-t.c @@ -194,8 +194,8 @@ int main(int argc __attribute__((unused)), char *argv[]) trn->short_id= 0; trn->first_undo_lsn= TRANSACTION_LOGGED_LONG_ID; if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_0LSN_EXAMPLE, - trn, NULL, - 6, TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, parts, + NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); @@ -214,10 +214,9 @@ int main(int argc __attribute__((unused)), char *argv[]) parts[TRANSLOG_INTERNAL_PARTS + 0].length= LSN_STORE_SIZE; trn->short_id= i % 0xFFFF; if (translog_write_record(&lsn, - LOGREC_FIXED_RECORD_1LSN_EXAMPLE, - trn, NULL, - LSN_STORE_SIZE, - TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + LOGREC_FIXED_RECORD_1LSN_EXAMPLE, trn, NULL, + LSN_STORE_SIZE, TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL, NULL)) { fprintf(stderr, "1 Can't write reference before record #%lu\n", (ulong) i); @@ -237,7 +236,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_VARIABLE_RECORD_1LSN_EXAMPLE, trn, NULL, LSN_STORE_SIZE + rec_len, TRANSLOG_INTERNAL_PARTS + 2, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "1 Can't write var reference before record #%lu\n", (ulong) i); @@ -256,9 +255,8 @@ int main(int argc __attribute__((unused)), char *argv[]) trn->short_id= i % 0xFFFF; if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_2LSN_EXAMPLE, - trn, NULL, 23, - TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + trn, NULL, 23, TRANSLOG_INTERNAL_PARTS + 1, + parts, NULL, NULL)) { fprintf(stderr, "0 Can't write reference before record #%lu\n", (ulong) i); @@ -279,7 +277,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_VARIABLE_RECORD_2LSN_EXAMPLE, trn, NULL, LSN_STORE_SIZE * 2 + rec_len, TRANSLOG_INTERNAL_PARTS + 2, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "0 Can't write var reference before record #%lu\n", (ulong) i); @@ -296,7 +294,7 @@ int main(int argc __attribute__((unused)), char *argv[]) if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_0LSN_EXAMPLE, trn, NULL, 6, - TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) i); translog_destroy(); @@ -314,7 +312,7 @@ int main(int argc __attribute__((unused)), char *argv[]) if (translog_write_record(&lsn, LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE, trn, NULL, rec_len, - TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL)) { fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); translog_destroy(); diff --git a/storage/maria/unittest/ma_test_loghandler_multithread-t.c b/storage/maria/unittest/ma_test_loghandler_multithread-t.c index a3af67c8ac2..0a81aeac55f 100644 --- a/storage/maria/unittest/ma_test_loghandler_multithread-t.c +++ b/storage/maria/unittest/ma_test_loghandler_multithread-t.c @@ -138,7 +138,7 @@ void writer(int num) if (translog_write_record(&lsn, LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &trn, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write LOGREC_FIXED_RECORD_0LSN_EXAMPLE record #%lu " "thread %i\n", (ulong) i, num); @@ -155,7 +155,7 @@ void writer(int num) LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE, &trn, NULL, len, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write variable record #%lu\n", (ulong) i); translog_destroy(); @@ -307,7 +307,7 @@ int main(int argc __attribute__((unused)), LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write the first record\n"); translog_destroy(); diff --git a/storage/maria/unittest/ma_test_loghandler_noflush-t.c b/storage/maria/unittest/ma_test_loghandler_noflush-t.c index 2c3afb9a76b..a53e6257314 100644 --- a/storage/maria/unittest/ma_test_loghandler_noflush-t.c +++ b/storage/maria/unittest/ma_test_loghandler_noflush-t.c @@ -85,7 +85,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); diff --git a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c index 276640dfd17..fa6fcd544a3 100644 --- a/storage/maria/unittest/ma_test_loghandler_pagecache-t.c +++ b/storage/maria/unittest/ma_test_loghandler_pagecache-t.c @@ -97,7 +97,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); diff --git a/storage/maria/unittest/ma_test_loghandler_purge-t.c b/storage/maria/unittest/ma_test_loghandler_purge-t.c index c638aa85ac6..d9001b9d3ad 100644 --- a/storage/maria/unittest/ma_test_loghandler_purge-t.c +++ b/storage/maria/unittest/ma_test_loghandler_purge-t.c @@ -80,7 +80,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); @@ -102,7 +102,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write record #%lu\n", (ulong) 0); translog_destroy(); @@ -125,7 +125,7 @@ int main(int argc __attribute__((unused)), char *argv[]) if (translog_write_record(&lsn, LOGREC_VARIABLE_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, LONG_BUFFER_SIZE, - TRANSLOG_INTERNAL_PARTS + 1, parts, NULL)) + TRANSLOG_INTERNAL_PARTS + 1, parts, NULL, NULL)) { fprintf(stderr, "Can't write variable record\n"); translog_destroy(); @@ -150,7 +150,7 @@ int main(int argc __attribute__((unused)), char *argv[]) LOGREC_FIXED_RECORD_0LSN_EXAMPLE, &dummy_transaction_object, NULL, 6, TRANSLOG_INTERNAL_PARTS + 1, - parts, NULL)) + parts, NULL, NULL)) { fprintf(stderr, "Can't write last record\n"); translog_destroy(); diff --git a/storage/myisam/sort.c b/storage/myisam/sort.c index 3ab478682c6..3d7e460eea1 100644 --- a/storage/myisam/sort.c +++ b/storage/myisam/sort.c @@ -78,10 +78,10 @@ static int NEAR_F write_keys_varlen(MI_SORT_PARAM *info,uchar **sort_keys, static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile,BUFFPEK *buffpek, uint sort_length); static int NEAR_F write_merge_key(MI_SORT_PARAM *info, IO_CACHE *to_file, - char *key, uint sort_length, uint count); + uchar *key, uint sort_length, uint count); static int NEAR_F write_merge_key_varlen(MI_SORT_PARAM *info, IO_CACHE *to_file, - char* key, uint sort_length, + uchar* key, uint sort_length, uint count); static inline int my_var_write(MI_SORT_PARAM *info, IO_CACHE *to_file, uchar *bufs); @@ -858,16 +858,15 @@ static uint NEAR_F read_to_buffer_varlen(IO_CACHE *fromfile, BUFFPEK *buffpek, static int NEAR_F write_merge_key_varlen(MI_SORT_PARAM *info, - IO_CACHE *to_file,char* key, + IO_CACHE *to_file, uchar* key, uint sort_length, uint count) { uint idx; - - char *bufs = key; + uchar *bufs = key; for (idx=1;idx<=count;idx++) { int err; - if ((err= my_var_write(info,to_file, (uchar*) bufs))) + if ((err= my_var_write(info, to_file, bufs))) return (err); bufs=bufs+sort_length; } @@ -876,10 +875,10 @@ static int NEAR_F write_merge_key_varlen(MI_SORT_PARAM *info, static int NEAR_F write_merge_key(MI_SORT_PARAM *info __attribute__((unused)), - IO_CACHE *to_file, char* key, + IO_CACHE *to_file, uchar* key, uint sort_length, uint count) { - return my_b_write(to_file,(uchar*) key,(uint) sort_length*count); + return my_b_write(to_file, key, (uint) sort_length * count); } /* From e81426170095017c9fe9727ca6274ef2e3e4819d Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 2 Oct 2007 19:16:20 +0200 Subject: [PATCH 11/12] Fix for pushbuild failure (noticable only on 64-bit) storage/maria/ma_pagecache.c: pagecache->blocks is now long, takes 8 bytes on some platforms. The cast to ulonglong in int8store is for those platforms where ulong is 32-bit and int8store uses some << shifts, if there are (x<<40 is undefined if x is 32-bit). storage/maria/ma_recovery.c: this change corresponds to the one done in ma_pagecache.c: number of dirty pages is stored in 8 bytes. --- storage/maria/ma_pagecache.c | 10 +++++----- storage/maria/ma_recovery.c | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/storage/maria/ma_pagecache.c b/storage/maria/ma_pagecache.c index 4a3fc555a8a..aa21a8bfacb 100755 --- a/storage/maria/ma_pagecache.c +++ b/storage/maria/ma_pagecache.c @@ -3898,7 +3898,7 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, LSN *min_rec_lsn) { my_bool error= 0; - uint stored_list_size= 0; + ulong stored_list_size= 0; uint file_hash; char *ptr; LSN minimum_rec_lsn= LSN_MAX; @@ -3941,8 +3941,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, } } - compile_time_assert(sizeof(pagecache->blocks) <= 4); - str->length= 4 + /* number of dirty pages */ + compile_time_assert(sizeof(pagecache->blocks) <= 8); + str->length= 8 + /* number of dirty pages */ (4 + /* file */ 4 + /* pageno */ LSN_STORE_SIZE /* rec_lsn */ @@ -3950,8 +3950,8 @@ my_bool pagecache_collect_changed_blocks_with_lsn(PAGECACHE *pagecache, if (NULL == (str->str= my_malloc(str->length, MYF(MY_WME)))) goto err; ptr= str->str; - int4store(ptr, stored_list_size); - ptr+= 4; + int8store(ptr, (ulonglong)stored_list_size); + ptr+= 8; if (!stored_list_size) goto end; for (file_hash= 0; file_hash < PAGECACHE_CHANGED_BLOCKS_HASH; file_hash++) diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index 1ae2244159c..c994860ddfd 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -2003,7 +2003,7 @@ static MARIA_HA *get_MARIA_HA_from_UNDO_record(const static LSN parse_checkpoint_record(LSN lsn) { - uint i; + ulong i; TRANSLOG_HEADER_BUFFER rec; tprint(tracef, "Loading data from checkpoint record at LSN (%lu,0x%lx)\n", @@ -2087,9 +2087,9 @@ static LSN parse_checkpoint_record(LSN lsn) } /* dirty pages */ - uint nb_dirty_pages= uint4korr(ptr); - ptr+= 4; - tprint(tracef, "%u dirty pages\n", nb_dirty_pages); + ulong nb_dirty_pages= uint8korr(ptr); + ptr+= 8; + tprint(tracef, "%lu dirty pages\n", nb_dirty_pages); if (hash_init(&all_dirty_pages, &my_charset_bin, nb_dirty_pages, offsetof(struct st_dirty_page, file_and_page_id), sizeof(((struct st_dirty_page *)NULL)->file_and_page_id), From 24db7ed7e13c603488c0a5e05ad9c87754b2ffaf Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 3 Oct 2007 18:10:32 +0200 Subject: [PATCH 12/12] WL#3072 Maria recovery. * Thanks to Serg's tip, we fix here the compilation issue of REDO_REPAIR_TABLE's execution, by defining versions of _ma_killed_ptr() and _ma_check_print_info|warning|error() in maria_read_log.c (we move those of maria_chk.c into an include file and include it in maria_chk.c and maria_read_log.c). Execution of such record looks like working from my tests (it only happens in maria_read_log; recovery-from-mysqld skips DDLs and REPAIR is considered DDL here as it bypasses logging): tested ALTER TABLE ENABLE KEYS and then remove table, apply log: that did a repair. * Recent changes broke maria_read_log a bit: -a became default and -o caused error; fixing this. storage/maria/Makefile.am: addind new file storage/maria/ma_recovery.c: enable execution of REDO_REPAIR_TABLE by maria_read_log now that it compiles. Now reason to keep only T_QUICK from testflag. storage/maria/maria_chk.c: moving these functions to ma_check_standalone.h for reusability storage/maria/maria_def.h: comment storage/maria/maria_read_log.c: ma_check_standalone.h needs my_progname_short. Fixing bug where "maria_read_log" would default to -a and "maria_read_log -o" would throw an error. Implemented behaviour is: - no options: usage() - -a : applys, applys UNDOs by default unless --disable-undo - -o : only prints storage/maria/ma_check_standalone.h: All standalone programs which need to use functions from ma_check.c (like maria_repair()) must define their version of _ma_killed_ptr() and _ma_check_print_info|warning|error(). Indeed, linking with ma_check.o brings in the dependencies of ma_check.o which are definitions of the above functions; if the program does not define them then the ones of ha_maria.o are used i.e. ha_maria.o is linked into the program, and this brings dependencies of ha_maria.o on mysqld.o into the program's linking which thus fails, as the program is not linked with mysqld.o. We put in this file the functions which maria_chk.c uses, so that they can be reused by maria_read_log (when it replays REDO_REPAIR_TABLE) as they are good enough (they just print to stdout/stderr like maria_read_log already does). --- storage/maria/Makefile.am | 2 +- storage/maria/ma_check_standalone.h | 106 ++++++++++++++++++++++++++++ storage/maria/ma_recovery.c | 12 +--- storage/maria/maria_chk.c | 82 +-------------------- storage/maria/maria_def.h | 5 +- storage/maria/maria_read_log.c | 19 ++--- 6 files changed, 124 insertions(+), 102 deletions(-) create mode 100644 storage/maria/ma_check_standalone.h diff --git a/storage/maria/Makefile.am b/storage/maria/Makefile.am index 2bd9b7db922..795784f31aa 100644 --- a/storage/maria/Makefile.am +++ b/storage/maria/Makefile.am @@ -62,7 +62,7 @@ noinst_HEADERS = maria_def.h ma_rt_index.h ma_rt_key.h ma_rt_mbr.h \ ma_control_file.h ha_maria.h ma_blockrec.h \ ma_loghandler.h ma_loghandler_lsn.h ma_pagecache.h \ ma_checkpoint.h ma_recovery.h ma_commit.h \ - trnman_public.h + trnman_public.h ma_check_standalone.h ma_test1_DEPENDENCIES= $(LIBRARIES) ma_test1_LDADD= @CLIENT_EXTRA_LDFLAGS@ libmaria.a \ $(top_builddir)/storage/myisam/libmyisam.a \ diff --git a/storage/maria/ma_check_standalone.h b/storage/maria/ma_check_standalone.h new file mode 100644 index 00000000000..3874d722d6c --- /dev/null +++ b/storage/maria/ma_check_standalone.h @@ -0,0 +1,106 @@ +/* Copyright (C) 2007 MySQL AB + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; version 2 of the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ + +/* + All standalone programs which need to use functions from ma_check.c + (like maria_repair()) must define their version of _ma_killed_ptr() + and _ma_check_print_info|warning|error(). Indeed, linking with ma_check.o + brings in the dependencies of ma_check.o which are definitions of the above + functions; if the program does not define them then the ones of + ha_maria.o are used i.e. ha_maria.o is linked into the program, and this + brings dependencies of ha_maria.o on mysqld.o into the program's linking + which thus fails, as the program is not linked with mysqld.o. + This file contains the versions of these functions used by maria_chk and + maria_read_log. +*/ + +/* + Check if check/repair operation was killed by a signal +*/ + +static int not_killed= 0; + +volatile int *_ma_killed_ptr(HA_CHECK *param __attribute__((unused))) +{ + return ¬_killed; /* always NULL */ +} + + /* print warnings and errors */ + /* VARARGS */ + +void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), + const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_info"); + DBUG_PRINT("enter", ("format: %s", fmt)); + + va_start(args,fmt); + VOID(vfprintf(stdout, fmt, args)); + VOID(fputc('\n',stdout)); + va_end(args); + DBUG_VOID_RETURN; +} + +/* VARARGS */ + +void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_warning"); + DBUG_PRINT("enter", ("format: %s", fmt)); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short, + param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->warning_printed=1; + va_start(args,fmt); + fprintf(stderr,"%s: warning: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} + +/* VARARGS */ + +void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) +{ + va_list args; + DBUG_ENTER("_ma_check_print_error"); + DBUG_PRINT("enter", ("format: %s", fmt)); + + fflush(stdout); + if (!param->warning_printed && !param->error_printed) + { + if (param->testflag & T_SILENT) + fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name); + param->out_flag|= O_DATA_LOST; + } + param->error_printed|=1; + va_start(args,fmt); + fprintf(stderr,"%s: error: ",my_progname_short); + VOID(vfprintf(stderr, fmt, args)); + VOID(fputc('\n',stderr)); + fflush(stderr); + va_end(args); + DBUG_VOID_RETURN; +} diff --git a/storage/maria/ma_recovery.c b/storage/maria/ma_recovery.c index c994860ddfd..631a10b8025 100644 --- a/storage/maria/ma_recovery.c +++ b/storage/maria/ma_recovery.c @@ -783,27 +783,17 @@ prototype_redo_exec_hook(REDO_REPAIR_TABLE) than the mapping, so we can repair. */ tprint(tracef, " repairing...\n"); - /** - @todo RECOVERY BUG fix this: - the maria_chk_init() call causes a heap of linker errors in ha_maria.cc! - */ -#if 0 HA_CHECK param; maria_chk_init(¶m); param.isam_file_name= info->s->open_file_name; param.testflag= uint4korr(rec->header); - if (maria_repair(¶m, info, info->s->open_file_name, - param.testflag & T_QUICK)) + if (maria_repair(¶m, info, info->s->open_file_name, param.testflag)) goto end; if (_ma_update_create_rename_lsn(info->s, rec->lsn, TRUE)) goto end; error= 0; end: return error; -#else - DBUG_ASSERT("fix this table repairing" == NULL); - return error; -#endif } diff --git a/storage/maria/maria_chk.c b/storage/maria/maria_chk.c index e6ac0dcfc50..639334c3033 100644 --- a/storage/maria/maria_chk.c +++ b/storage/maria/maria_chk.c @@ -1758,84 +1758,4 @@ err: } /* sort_record_index */ - -/* - Check if maria_chk was killed by a signal - This is overloaded by other programs that want to be able to abort - sorting -*/ - -static int not_killed= 0; - -volatile int *_ma_killed_ptr(HA_CHECK *param __attribute__((unused))) -{ - return ¬_killed; /* always NULL */ -} - - /* print warnings and errors */ - /* VARARGS */ - -void _ma_check_print_info(HA_CHECK *param __attribute__((unused)), - const char *fmt,...) -{ - va_list args; - DBUG_ENTER("_ma_check_print_info"); - DBUG_PRINT("enter", ("format: %s", fmt)); - - va_start(args,fmt); - VOID(vfprintf(stdout, fmt, args)); - VOID(fputc('\n',stdout)); - va_end(args); - DBUG_VOID_RETURN; -} - -/* VARARGS */ - -void _ma_check_print_warning(HA_CHECK *param, const char *fmt,...) -{ - va_list args; - DBUG_ENTER("_ma_check_print_warning"); - DBUG_PRINT("enter", ("format: %s", fmt)); - - fflush(stdout); - if (!param->warning_printed && !param->error_printed) - { - if (param->testflag & T_SILENT) - fprintf(stderr,"%s: MARIA file %s\n",my_progname_short, - param->isam_file_name); - param->out_flag|= O_DATA_LOST; - } - param->warning_printed=1; - va_start(args,fmt); - fprintf(stderr,"%s: warning: ",my_progname_short); - VOID(vfprintf(stderr, fmt, args)); - VOID(fputc('\n',stderr)); - fflush(stderr); - va_end(args); - DBUG_VOID_RETURN; -} - -/* VARARGS */ - -void _ma_check_print_error(HA_CHECK *param, const char *fmt,...) -{ - va_list args; - DBUG_ENTER("_ma_check_print_error"); - DBUG_PRINT("enter", ("format: %s", fmt)); - - fflush(stdout); - if (!param->warning_printed && !param->error_printed) - { - if (param->testflag & T_SILENT) - fprintf(stderr,"%s: MARIA file %s\n",my_progname_short,param->isam_file_name); - param->out_flag|= O_DATA_LOST; - } - param->error_printed|=1; - va_start(args,fmt); - fprintf(stderr,"%s: error: ",my_progname_short); - VOID(vfprintf(stderr, fmt, args)); - VOID(fputc('\n',stderr)); - fflush(stderr); - va_end(args); - DBUG_VOID_RETURN; -} +#include "ma_check_standalone.h" diff --git a/storage/maria/maria_def.h b/storage/maria/maria_def.h index bac83db4f51..c1f21a4ef67 100644 --- a/storage/maria/maria_def.h +++ b/storage/maria/maria_def.h @@ -923,7 +923,10 @@ C_MODE_START int _ma_flush_table_files(MARIA_HA *info, uint flush_data_or_index, enum flush_type flush_type_for_data, enum flush_type flush_type_for_index); -/* Functions needed by _ma_check (are overrided in MySQL) */ +/* + Functions needed by _ma_check (are overridden in MySQL/ha_maria.cc). + See ma_check_standalone.h . +*/ volatile int *_ma_killed_ptr(HA_CHECK *param); void _ma_check_print_error _VARARGS((HA_CHECK *param, const char *fmt, ...)); void _ma_check_print_warning _VARARGS((HA_CHECK *param, const char *fmt, ...)); diff --git a/storage/maria/maria_read_log.c b/storage/maria/maria_read_log.c index ec1f7697a23..59882f81d9a 100644 --- a/storage/maria/maria_read_log.c +++ b/storage/maria/maria_read_log.c @@ -31,12 +31,14 @@ const char *default_dbug_option= "d:t:i:o,/tmp/maria_read_log.trace"; #endif /* DBUG_OFF */ static my_bool opt_only_display, opt_apply, opt_apply_undo, opt_silent; static ulong opt_page_buffer_size; +static const char *my_progname_short; int main(int argc, char **argv) { LSN lsn; char **default_argv; MY_INIT(argv[0]); + my_progname_short= my_progname+dirname_length(my_progname); load_defaults("my", load_default_groups, &argc, &argv); default_argv= argv; @@ -103,12 +105,12 @@ int main(int argc, char **argv) if (maria_apply_log(lsn, opt_apply, opt_silent ? NULL : stdout, opt_apply_undo, FALSE)) goto err; - fprintf(stdout, "%s: SUCCESS\n", my_progname); + fprintf(stdout, "%s: SUCCESS\n", my_progname_short); goto end; err: /* don't touch anything more, in case we hit a bug */ - fprintf(stderr, "%s: FAILED\n", my_progname); + fprintf(stderr, "%s: FAILED\n", my_progname_short); exit(1); end: maria_end(); @@ -119,6 +121,9 @@ end: } +#include "ma_check_standalone.h" + + static struct my_option my_long_options[] = { {"apply", 'a', @@ -155,7 +160,7 @@ static struct my_option my_long_options[] = static void print_version(void) { VOID(printf("%s Ver 1.1 for %s on %s\n", - my_progname, SYSTEM_TYPE, MACHINE_TYPE)); + my_progname_short, SYSTEM_TYPE, MACHINE_TYPE)); NETWARE_SET_SCREEN_MODE(1); } @@ -169,7 +174,7 @@ static void usage(void) puts("Display and apply log records from a MARIA transaction log"); puts("found in the current directory (for now)"); - VOID(printf("\nUsage: %s OPTIONS\n", my_progname)); + VOID(printf("\nUsage: %s OPTIONS\n", my_progname_short)); puts("You need to use one of -o or -a"); my_print_help(my_long_options); print_defaults("my", load_default_groups); @@ -203,13 +208,11 @@ static void get_options(int *argc,char ***argv) { int ho_error; - my_progname= argv[0][0]; - if ((ho_error=handle_options(argc, argv, my_long_options, get_one_option))) exit(ho_error); - if (opt_apply_undo) - opt_apply= 1; + if (!opt_apply) + opt_apply_undo= FALSE; if ((opt_only_display + opt_apply) != 1) {