diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index 4617e9d697b..092b6f3f2a5 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -24,6 +24,7 @@ heikki@donna.mysql.fi heikki@hundin.mysql.fi heikki@rescue. heikki@work.mysql.com +hf@deer.(none) hf@deer.mysql.r18.ru hf@genie.(none) igor@hundin.mysql.fi diff --git a/Docs/Makefile.am b/Docs/Makefile.am index 00eb936c408..f3df055a7dd 100644 --- a/Docs/Makefile.am +++ b/Docs/Makefile.am @@ -157,53 +157,6 @@ manual_letter.de.ps: manual.de.texi include.texi touch $@ -# -# Internals Manual -# - -# GNU Info -internals.info: internals.texi include.texi - cd $(srcdir) && $(MAKEINFO) --no-split -I $(srcdir) $< - -# Plain Text -internals.txt: internals.texi include.texi - cd $(srcdir) && \ - $(MAKEINFO) -I $(srcdir) --no-headers --no-split --output $@ $< - -# HTML, all in one file -internals.html: internals.texi include.texi $(srcdir)/Support/texi2html - cd $(srcdir) && @PERL@ $(srcdir)/Support/texi2html $(TEXI2HTML_FLAGS) $< -internals_toc.html: internals.html - -# PDF, Portable Document Format -internals.pdf: internals.texi - sed -e 's|@image{[^}]*} *||g' <$< >internals-tmp.texi - pdftex --interaction=nonstopmode internals-tmp.texi - texindex internals-tmp.?? - pdftex --interaction=nonstopmode internals-tmp.texi - texindex internals-tmp.?? - pdftex --interaction=nonstopmode internals-tmp.texi - mv internals-tmp.pdf $@ - rm -f internals-tmp.* - touch $@ - -# Postscript, A4 Paper -internals_a4.ps: internals.texi include.texi - TEXINPUTS=$(srcdir):$$TEXINPUTS \ - MAKEINFO='$(MAKEINFO) -I $(srcdir)' \ - $(TEXI2DVI) --batch --texinfo --quiet '@afourpaper' $< - $(DVIPS) -t a4 internals.dvi -o $@ - touch $@ - -# Postscript, US Letter Paper -internals_letter.ps: internals.texi include.texi - TEXINPUTS=$(srcdir):$$TEXINPUTS \ - MAKEINFO='$(MAKEINFO) -I $(srcdir)' \ - $(TEXI2DVI) --batch $< - $(DVIPS) -t letter internals.dvi -o $@ - touch $@ - - # # Miscellaneous # diff --git a/client/mysqltest.c b/client/mysqltest.c index f6c999b18e4..f5afa0fa0df 100644 --- a/client/mysqltest.c +++ b/client/mysqltest.c @@ -996,7 +996,8 @@ int do_sync_with_master2(const char* p) if (!(row = mysql_fetch_row(res))) die("line %u: empty result in %s", start_lineno, query_buf); if (!row[0]) - die("Error on slave while syncing with master"); + die("line %u: could not sync with master ('%s' returned NULL)", + start_lineno, query_buf); mysql_free_result(res); last_result=0; if (rpl_parse) diff --git a/include/config-win.h b/include/config-win.h index 9931d2c4b95..096c00e4574 100644 --- a/include/config-win.h +++ b/include/config-win.h @@ -130,6 +130,11 @@ typedef uint rf_SetTimer; #define SIZEOF_LONG 4 #define SIZEOF_LONG_LONG 8 #define SIZEOF_OFF_T 8 +#ifdef _WIN64 +#define SIZEOF_CHARP 8 +#else +#define SIZEOF_CHARP 4 +#endif #define HAVE_BROKEN_NETINET_INCLUDES #ifdef __NT__ #define HAVE_NAMED_PIPE /* We can only create pipes on NT */ @@ -196,6 +201,7 @@ inline double ulonglong2double(ulonglong value) /* Optimized store functions for Intel x86 */ +#ifndef _WIN64 #define sint2korr(A) (*((int16 *) (A))) #define sint3korr(A) ((int32) ((((uchar) (A)[2]) & 128) ? \ (((uint32) 255L << 24) | \ @@ -236,7 +242,7 @@ inline double ulonglong2double(ulonglong value) #define float8get(V,M) doubleget((V),(M)) #define float4store(V,M) memcpy((byte*) V,(byte*) (&M),sizeof(float)) #define float8store(V,M) doublestore((V),(M)) - +#endif /* _WIN64 */ #define HAVE_PERROR #define HAVE_VFPRINT diff --git a/include/my_base.h b/include/my_base.h index cd04ab971db..91a248cd401 100644 --- a/include/my_base.h +++ b/include/my_base.h @@ -255,6 +255,7 @@ enum ha_base_keytype { #define HA_ERR_CANNOT_ADD_FOREIGN 150 /* Cannot add a foreign key constr. */ #define HA_ERR_NO_REFERENCED_ROW 151 /* Cannot add a child row */ #define HA_ERR_ROW_IS_REFERENCED 152 /* Cannot delete a parent row */ +#define HA_ERR_NO_SAVEPOINT 153 /* No savepoint with that name */ /* Other constants */ diff --git a/include/my_global.h b/include/my_global.h index 51e267fc977..7a8465a0484 100644 --- a/include/my_global.h +++ b/include/my_global.h @@ -642,9 +642,6 @@ typedef long my_ptrdiff_t; typedef long long my_ptrdiff_t; #endif -/* typedef used for length of string; Should be unsigned! */ -typedef ulong size_str; - #define MY_ALIGN(A,L) (((A) + (L) - 1) & ~((L) - 1)) #define ALIGN_SIZE(A) MY_ALIGN((A),sizeof(double)) /* Size to make adressable obj. */ @@ -713,6 +710,9 @@ typedef long longlong; #endif #endif +/* typedef used for length of string; Should be unsigned! */ +typedef ulong size_str; + #ifdef USE_RAID /* The following is done with a if to not get problems with pre-processors @@ -850,7 +850,7 @@ typedef char bool; /* Ordinary boolean values 0 1 */ */ /* Optimized store functions for Intel x86 */ -#ifdef __i386__ +#if defined(__i386__) && !defined(_WIN64) #define sint2korr(A) (*((int16 *) (A))) #define sint3korr(A) ((int32) ((((uchar) (A)[2]) & 128) ? \ (((uint32) 255L << 24) | \ diff --git a/include/my_sys.h b/include/my_sys.h index 603b3bad6bd..7f8b8a80a1c 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -138,7 +138,7 @@ extern int NEAR my_errno; /* Last error in mysys */ #define QUICK_SAFEMALLOC sf_malloc_quick=1 #define NORMAL_SAFEMALLOC sf_malloc_quick=0 extern uint sf_malloc_prehunc,sf_malloc_endhunc,sf_malloc_quick; -extern ulonglong safemalloc_mem_limit; +extern ulonglong sf_malloc_mem_limit; #define CALLER_INFO_PROTO , const char *sFile, uint uLine #define CALLER_INFO , __FILE__, __LINE__ @@ -239,7 +239,7 @@ extern int NEAR my_umask, /* Default creation mask */ NEAR my_safe_to_handle_signal, /* Set when allowed to SIGTSTP */ NEAR my_dont_interrupt; /* call remember_intr when set */ extern my_bool NEAR mysys_uses_curses, my_use_symdir; -extern long lCurMemory,lMaxMemory; /* from safemalloc */ +extern ulong sf_malloc_cur_memory, sf_malloc_max_memory; extern ulong my_default_record_cache_size; extern my_bool NEAR my_disable_locking,NEAR my_disable_async_io, diff --git a/include/mysql_version.h.in b/include/mysql_version.h.in index 793bf36e9fe..da184665f6e 100644 --- a/include/mysql_version.h.in +++ b/include/mysql_version.h.in @@ -10,6 +10,7 @@ #else #define PROTOCOL_VERSION @PROTOCOL_VERSION@ #define MYSQL_SERVER_VERSION "@VERSION@" +#define MYSQL_BASE_VERSION "mysqld-@MYSQL_BASE_VERSION@" #ifndef MYSQL_SERVER_SUFFIX #define MYSQL_SERVER_SUFFIX "@MYSQL_SERVER_SUFFIX@" #endif diff --git a/include/thr_alarm.h b/include/thr_alarm.h index 439f046252f..8ff4472f700 100644 --- a/include/thr_alarm.h +++ b/include/thr_alarm.h @@ -100,7 +100,7 @@ typedef struct st_alarm { #define thr_alarm_init(A) (*(A))=0 #define thr_alarm_in_use(A) (*(A)!= 0) void init_thr_alarm(uint max_alarm); -bool thr_alarm(thr_alarm_t *alarmed, uint sec, ALARM *buff); +my_bool thr_alarm(thr_alarm_t *alarmed, uint sec, ALARM *buff); void thr_alarm_kill(pthread_t thread_id); void thr_end_alarm(thr_alarm_t *alarmed); void end_thr_alarm(my_bool free_structures); diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index e61dcf4ecee..8402993e971 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -1364,7 +1364,8 @@ btr_cur_update_sec_rec_in_place( } /***************************************************************** -Updates a record when the update causes no size changes in its fields. */ +Updates a record when the update causes no size changes in its fields. +We assume here that the ordering fields of the record do not change. */ ulint btr_cur_update_in_place( @@ -1455,7 +1456,8 @@ btr_cur_update_in_place( Tries to update a record on a page in an index tree. It is assumed that mtr holds an x-latch on the page. The operation does not succeed if there is too little space on the page or if the update would result in too empty a page, -so that tree compression is recommended. */ +so that tree compression is recommended. We assume here that the ordering +fields of the record do not change. */ ulint btr_cur_optimistic_update( @@ -1507,10 +1509,11 @@ btr_cur_optimistic_update( ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - if (!row_upd_changes_field_size(rec, index, update)) { + if (!row_upd_changes_field_size_or_external(rec, index, update)) { - /* The simplest and most common case: the update does not - change the size of any field */ + /* The simplest and the most common case: the update does not + change the size of any field and none of the updated fields is + externally stored in rec or update */ return(btr_cur_update_in_place(flags, cursor, update, cmpl_info, thr, mtr)); @@ -1539,7 +1542,7 @@ btr_cur_optimistic_update( new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - row_upd_clust_index_replace_new_col_vals(new_entry, update); + row_upd_index_replace_new_col_vals(new_entry, index, update, NULL); old_rec_size = rec_get_size(rec); new_rec_size = rec_get_converted_size(new_entry); @@ -1669,54 +1672,13 @@ btr_cur_pess_upd_restore_supremum( lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page), rec); } - -/*************************************************************** -Replaces and copies the data in the new column values stored in the -update vector to the clustered index entry given. */ -static -void -btr_cur_copy_new_col_vals( -/*======================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - upd_t* update, /* in: update vector */ - mem_heap_t* heap) /* in: heap where data is copied */ -{ - upd_field_t* upd_field; - dfield_t* dfield; - dfield_t* new_val; - ulint field_no; - byte* data; - ulint i; - - dtuple_set_info_bits(entry, update->info_bits); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - upd_field = upd_get_nth_field(update, i); - - field_no = upd_field->field_no; - - dfield = dtuple_get_nth_field(entry, field_no); - - new_val = &(upd_field->new_val); - - if (new_val->len == UNIV_SQL_NULL) { - data = NULL; - } else { - data = mem_heap_alloc(heap, new_val->len); - - ut_memcpy(data, new_val->data, new_val->len); - } - - dfield_set_data(dfield, data, new_val->len); - } -} /***************************************************************** Performs an update of a record on a page of a tree. It is assumed that mtr holds an x-latch on the tree and on the cursor page. If the update is made on the leaf level, to avoid deadlocks, mtr must also -own x-latches to brothers of page, if those brothers exist. */ +own x-latches to brothers of page, if those brothers exist. We assume +here that the ordering fields of the record do not change. */ ulint btr_cur_pessimistic_update( @@ -1813,7 +1775,7 @@ btr_cur_pessimistic_update( new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - btr_cur_copy_new_col_vals(new_entry, update, heap); + row_upd_index_replace_new_col_vals(new_entry, index, update, heap); if (!(flags & BTR_KEEP_SYS_FLAG)) { row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, @@ -3369,8 +3331,8 @@ btr_free_externally_stored_field( page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO); - offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET); - + offset = mach_read_from_4(data + local_len + + BTR_EXTERN_OFFSET); extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4); diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index 7b817d8263d..63e7763ef87 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -364,6 +364,8 @@ btr_pcur_move_to_next_page( btr_leaf_page_release(page, cursor->latch_mode, mtr); page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor)); + + page_check_dir(next_page); } /************************************************************* diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index e000d862403..246a60a61cd 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -209,12 +209,12 @@ ibool buf_debug_prints = FALSE; /* If this is set TRUE, /************************************************************************ Calculates a page checksum which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value -on 32-bit and 64-bit architectures. */ +to a file. Note that we must be careful to calculate the same value on +32-bit and 64-bit architectures. */ ulint -buf_calc_page_checksum( -/*===================*/ +buf_calc_page_new_checksum( +/*=======================*/ /* out: checksum */ byte* page) /* in: buffer page */ { @@ -222,12 +222,39 @@ buf_calc_page_checksum( /* Since the fields FIL_PAGE_FILE_FLUSH_LSN and ..._ARCH_LOG_NO are written outside the buffer pool to the first pages of data - files, we have to skip them in page checksum calculation */ + files, we have to skip them in the page checksum calculation. + We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the + checksum is stored, and also the last 8 bytes of page because + there we store the old formula checksum. */ + + checksum = ut_fold_binary(page + FIL_PAGE_OFFSET, + FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) + + ut_fold_binary(page + FIL_PAGE_DATA, + UNIV_PAGE_SIZE - FIL_PAGE_DATA + - FIL_PAGE_END_LSN_OLD_CHKSUM); + checksum = checksum & 0xFFFFFFFF; + + return(checksum); +} + +/************************************************************************ +In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only +looked at the first few bytes of the page. This calculates that old +checksum. +NOTE: we must first store the new formula checksum to +FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum +because this takes that field as an input! */ + +ulint +buf_calc_page_old_checksum( +/*=======================*/ + /* out: checksum */ + byte* page) /* in: buffer page */ +{ + ulint checksum; checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN); - + ut_fold_binary(page + FIL_PAGE_DATA, - UNIV_PAGE_SIZE - FIL_PAGE_DATA - - FIL_PAGE_END_LSN); + checksum = checksum & 0xFFFFFFFF; return(checksum); @@ -243,27 +270,47 @@ buf_page_is_corrupted( byte* read_buf) /* in: a database page */ { ulint checksum; + ulint old_checksum; + ulint checksum_field; + ulint old_checksum_field; - checksum = buf_calc_page_checksum(read_buf); + if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4) + != mach_read_from_4(read_buf + UNIV_PAGE_SIZE + - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { + + /* Stored log sequence numbers at the start and the end + of page do not match */ - /* Note that InnoDB initializes empty pages to zero, and - early versions of InnoDB did not store page checksum to - the 4 most significant bytes of the page lsn field at the - end of a page: */ - - if ((mach_read_from_4(read_buf + FIL_PAGE_LSN + 4) - != mach_read_from_4(read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN + 4)) - || (checksum != mach_read_from_4(read_buf - + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN) - && mach_read_from_4(read_buf + FIL_PAGE_LSN) - != mach_read_from_4(read_buf - + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN))) { return(TRUE); } + old_checksum = buf_calc_page_old_checksum(read_buf); + + old_checksum_field = mach_read_from_4(read_buf + UNIV_PAGE_SIZE + - FIL_PAGE_END_LSN_OLD_CHKSUM); + + /* There are 2 valid formulas for old_checksum_field: + 1. Very old versions of InnoDB only stored 8 byte lsn to the start + and the end of the page. + 2. Newer InnoDB versions store the old formula checksum there. */ + + if (old_checksum_field != mach_read_from_4(read_buf + FIL_PAGE_LSN) + && old_checksum_field != old_checksum) { + + return(TRUE); + } + + checksum = buf_calc_page_new_checksum(read_buf); + checksum_field = mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM); + + /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id + (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */ + + if (checksum_field != 0 && checksum_field != checksum) { + + return(TRUE); + } + return(FALSE); } @@ -277,6 +324,7 @@ buf_page_print( { dict_index_t* index; ulint checksum; + ulint old_checksum; char* buf; buf = mem_alloc(4 * UNIV_PAGE_SIZE); @@ -291,19 +339,23 @@ buf_page_print( mem_free(buf); - checksum = buf_calc_page_checksum(read_buf); + checksum = buf_calc_page_new_checksum(read_buf); + old_checksum = buf_calc_page_old_checksum(read_buf); ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Page checksum %lu stored checksum %lu\n", - checksum, mach_read_from_4(read_buf - + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN)); + fprintf(stderr, +" InnoDB: Page checksum %lu, prior-to-4.0.14-form checksum %lu\n" +"InnoDB: stored checksum %lu, prior-to-4.0.14-form stored checksum %lu\n", + checksum, old_checksum, + mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM), + mach_read_from_4(read_buf + UNIV_PAGE_SIZE + - FIL_PAGE_END_LSN_OLD_CHKSUM)); fprintf(stderr, "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn at page end %lu\n", mach_read_from_4(read_buf + FIL_PAGE_LSN), mach_read_from_4(read_buf + FIL_PAGE_LSN + 4), mach_read_from_4(read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN + 4)); + - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)); if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT) { fprintf(stderr, diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c index 735966c28c5..c0999ee4841 100644 --- a/innobase/buf/buf0flu.c +++ b/innobase/buf/buf0flu.c @@ -361,21 +361,29 @@ buf_flush_init_for_writing( ulint space, /* in: space id */ ulint page_no) /* in: page number */ { - /* Write the newest modification lsn to the page */ + UT_NOT_USED(space); + + /* Write the newest modification lsn to the page header and trailer */ mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn); - mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN, newest_lsn); + mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + newest_lsn); + /* Write the page number */ - /* Write to the page the space id and page number */ - - mach_write_to_4(page + FIL_PAGE_SPACE, space); mach_write_to_4(page + FIL_PAGE_OFFSET, page_no); - /* We overwrite the first 4 bytes of the end lsn field to store - a page checksum */ + /* Store the new formula checksum */ - mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN, - buf_calc_page_checksum(page)); + mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, + buf_calc_page_new_checksum(page)); + + /* We overwrite the first 4 bytes of the end lsn field to store + the old formula checksum. Since it depends also on the field + FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the + new formula checksum. */ + + mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, + buf_calc_page_old_checksum(page)); } /************************************************************************ diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c index 8ab5acb4da7..f2f94cc47ce 100644 --- a/innobase/data/data0data.c +++ b/innobase/data/data0data.c @@ -584,8 +584,7 @@ dtuple_convert_big_rec( * sizeof(big_rec_field_t)); /* Decide which fields to shorten: the algorithm is to look for - the longest field which does not occur in the ordering part - of any index on the table */ + the longest field whose type is DATA_BLOB */ n_fields = 0; @@ -610,12 +609,9 @@ dtuple_convert_big_rec( } } - /* Skip over fields which are ordering in some index */ - - if (!is_externally_stored && - dict_field_get_col( - dict_index_get_nth_field(index, i)) - ->ord_part == 0) { + if (!is_externally_stored + && dict_index_get_nth_type(index, i)->mtype + == DATA_BLOB) { dfield = dtuple_get_nth_field(entry, i); @@ -629,9 +625,13 @@ dtuple_convert_big_rec( } } - if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10 - + REC_1BYTE_OFFS_LIMIT) { + /* We do not store externally fields which are smaller than + DICT_MAX_COL_PREFIX_LEN */ + ut_a(DICT_MAX_COL_PREFIX_LEN > REC_1BYTE_OFFS_LIMIT); + + if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10 + + DICT_MAX_COL_PREFIX_LEN) { /* Cannot shorten more */ mem_heap_free(heap); @@ -644,13 +644,19 @@ dtuple_convert_big_rec( drop below 128 which is the limit for the 2-byte offset storage format in a physical record. This we accomplish by storing 128 bytes of data in entry - itself, and only the remaining part to big rec vec. */ + itself, and only the remaining part to big rec vec. + + We store the first bytes locally to the record. Then + we can calculate all ordering fields in all indexes + from locally stored data. */ dfield = dtuple_get_nth_field(entry, longest_i); vector->fields[n_fields].field_no = longest_i; + ut_a(dfield->len > DICT_MAX_COL_PREFIX_LEN); + vector->fields[n_fields].len = dfield->len - - REC_1BYTE_OFFS_LIMIT; + - DICT_MAX_COL_PREFIX_LEN; vector->fields[n_fields].data = mem_heap_alloc(heap, vector->fields[n_fields].len); diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c index 5d0ddf3e887..df430f06bcb 100644 --- a/innobase/data/data0type.c +++ b/innobase/data/data0type.c @@ -85,8 +85,6 @@ dtype_print( printf("DATA_MIX_ID"); } else if (prtype == DATA_ENGLISH) { printf("DATA_ENGLISH"); - } else if (prtype == DATA_FINNISH) { - printf("DATA_FINNISH"); } else { printf("prtype %lu", mtype); } diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c index 374c567c3ca..0bf2ace3324 100644 --- a/innobase/dict/dict0boot.c +++ b/innobase/dict/dict0boot.c @@ -276,7 +276,7 @@ dict_boot(void) DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 1); - dict_mem_index_add_field(index, (char *) "NAME", 0); + dict_mem_index_add_field(index, (char *) "NAME", 0, 0); index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_TABLES, MLOG_4BYTES, &mtr); @@ -287,7 +287,7 @@ dict_boot(void) index = dict_mem_index_create((char *) "SYS_TABLES", (char *) "ID_IND", DICT_HDR_SPACE, DICT_UNIQUE, 1); - dict_mem_index_add_field(index, (char *) "ID", 0); + dict_mem_index_add_field(index, (char *) "ID", 0, 0); index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_TABLE_IDS, MLOG_4BYTES, &mtr); @@ -313,8 +313,8 @@ dict_boot(void) (char *) "CLUST_IND", DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2); - dict_mem_index_add_field(index, (char *) "TABLE_ID", 0); - dict_mem_index_add_field(index, (char *) "POS", 0); + dict_mem_index_add_field(index, (char *) "TABLE_ID", 0, 0); + dict_mem_index_add_field(index, (char *) "POS", 0, 0); index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_COLUMNS, MLOG_4BYTES, &mtr); @@ -343,8 +343,8 @@ dict_boot(void) (char *) "CLUST_IND", DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2); - dict_mem_index_add_field(index, (char *) "TABLE_ID", 0); - dict_mem_index_add_field(index, (char *) "ID", 0); + dict_mem_index_add_field(index, (char *) "TABLE_ID", 0, 0); + dict_mem_index_add_field(index, (char *) "ID", 0, 0); index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_INDEXES, MLOG_4BYTES, &mtr); @@ -365,8 +365,8 @@ dict_boot(void) (char *) "CLUST_IND", DICT_HDR_SPACE, DICT_UNIQUE | DICT_CLUSTERED, 2); - dict_mem_index_add_field(index, (char *) "INDEX_ID", 0); - dict_mem_index_add_field(index, (char *) "POS", 0); + dict_mem_index_add_field(index, (char *) "INDEX_ID", 0, 0); + dict_mem_index_add_field(index, (char *) "POS", 0, 0); index->page_no = mtr_read_ulint(dict_hdr + DICT_HDR_FIELDS, MLOG_4BYTES, &mtr); diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c index 3619ac02f4d..9139e589a0a 100644 --- a/innobase/dict/dict0crea.c +++ b/innobase/dict/dict0crea.c @@ -337,7 +337,7 @@ dict_create_index_for_cluster_step( for (i = 0; i < table->n_cols; i++) { col = dict_table_get_nth_col(table, i); - dict_mem_index_add_field(index, col->name, 0); + dict_mem_index_add_field(index, col->name, 0, 0); } (node->cluster)->index = index; @@ -450,9 +450,17 @@ dict_create_sys_fields_tuple( dict_field_t* field; dfield_t* dfield; byte* ptr; + ibool index_contains_column_prefix_field = FALSE; + ulint j; ut_ad(index && heap); + for (j = 0; j < index->n_fields; j++) { + if (dict_index_get_nth_field(index, j)->prefix_len > 0) { + index_contains_column_prefix_field = TRUE; + } + } + field = dict_index_get_nth_field(index, i); sys_fields = dict_sys->sys_fields; @@ -466,11 +474,25 @@ dict_create_sys_fields_tuple( mach_write_to_8(ptr, index->id); dfield_set_data(dfield, ptr, 8); - /* 1: POS ----------------------------*/ + /* 1: POS + PREFIX LENGTH ----------------------------*/ + dfield = dtuple_get_nth_field(entry, 1); ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, i); + + if (index_contains_column_prefix_field) { + /* If there are column prefix fields in the index, then + we store the number of the field to the 2 HIGH bytes + and the prefix length to the 2 low bytes, */ + + mach_write_to_4(ptr, (i << 16) + field->prefix_len); + } else { + /* Else we store the number of the field to the 2 LOW bytes. + This is to keep the storage format compatible with + InnoDB versions < 4.0.14. */ + + mach_write_to_4(ptr, i); + } dfield_set_data(dfield, ptr, 4); /* 4: COL_NAME -------------------------*/ diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c index c11a5f76d94..2fc05b1923f 100644 --- a/innobase/dict/dict0dict.c +++ b/innobase/dict/dict0dict.c @@ -88,15 +88,6 @@ dict_index_remove_from_cache( dict_table_t* table, /* in: table */ dict_index_t* index); /* in, own: index */ /*********************************************************************** -Adds a column to index. */ -UNIV_INLINE -void -dict_index_add_col( -/*===============*/ - dict_index_t* index, /* in: index */ - dict_col_t* col, /* in: column */ - ulint order); /* in: order criterion */ -/*********************************************************************** Copies fields contained in index2 to index1. */ static void @@ -482,8 +473,9 @@ dict_index_get_nth_col_pos( ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + col = dict_table_get_nth_col(index->table, n); + if (index->type & DICT_CLUSTERED) { - col = dict_table_get_nth_col(index->table, n); return(col->clust_pos); } @@ -492,9 +484,8 @@ dict_index_get_nth_col_pos( for (pos = 0; pos < n_fields; pos++) { field = dict_index_get_nth_field(index, pos); - col = field->col; - if (dict_col_get_no(col) == n) { + if (col == field->col && field->prefix_len == 0) { return(pos); } @@ -502,7 +493,46 @@ dict_index_get_nth_col_pos( return(ULINT_UNDEFINED); } + +/************************************************************************ +Looks for a matching field in an index. The column and the prefix len have +to be the same. */ + +ulint +dict_index_get_nth_field_pos( +/*=========================*/ + /* out: position in internal representation + of the index; if not contained, returns + ULINT_UNDEFINED */ + dict_index_t* index, /* in: index from which to search */ + dict_index_t* index2, /* in: index */ + ulint n) /* in: field number in index2 */ +{ + dict_field_t* field; + dict_field_t* field2; + ulint n_fields; + ulint pos; + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + field2 = dict_index_get_nth_field(index2, n); + + n_fields = dict_index_get_n_fields(index); + + for (pos = 0; pos < n_fields; pos++) { + field = dict_index_get_nth_field(index, pos); + + if (field->col == field2->col + && field->prefix_len == field2->prefix_len) { + + return(pos); + } + } + + return(ULINT_UNDEFINED); +} + /************************************************************************** Returns a table object, based on table id, and memoryfixes it. */ @@ -622,8 +652,7 @@ dict_table_get( } /************************************************************************** -Returns a table object and increments MySQL open handle count on the table. -*/ +Returns a table object and increments MySQL open handle count on the table. */ dict_table_t* dict_table_get_and_increment_handle_count( @@ -732,11 +761,12 @@ dict_table_add_to_cache( } /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); + HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, + table); /* Add table to hash table of tables based on table id */ HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold, - table); + table); /* Add table to LRU list of tables */ UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); @@ -828,7 +858,7 @@ dict_table_rename_in_cache( /* Remove table from the hash tables of tables */ HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, - ut_fold_string(table->name), table); + ut_fold_string(table->name), table); name_buf = mem_heap_alloc(table->heap, ut_strlen(new_name) + 1); @@ -837,7 +867,8 @@ dict_table_rename_in_cache( table->name = name_buf; /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, table); + HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, + table); dict_sys->size += (mem_heap_get_size(table->heap) - old_size); @@ -1128,7 +1159,6 @@ dict_index_add_to_cache( ulint n_ord; ibool success; ulint i; - ulint j; ut_ad(index); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -1158,28 +1188,6 @@ dict_index_add_to_cache( return(FALSE); } - - /* Check that the same column does not appear twice in the index. - InnoDB assumes this in its algorithms, e.g., update of an index - entry */ - - for (i = 0; i < dict_index_get_n_fields(index); i++) { - - for (j = 0; j < i; j++) { - if (dict_index_get_nth_field(index, j)->col - == dict_index_get_nth_field(index, i)->col) { - - ut_print_timestamp(stderr); - - fprintf(stderr, -" InnoDB: Error: column %s appears twice in index %s of table %s\n" -"InnoDB: This is not allowed in InnoDB.\n" -"InnoDB: UPDATE can cause such an index to become corrupt in InnoDB.\n", - dict_index_get_nth_field(index, i)->col->name, - index->name, table->name); - } - } - } /* Build the cache internal representation of the index, containing also the added system fields */ @@ -1223,8 +1231,8 @@ dict_index_add_to_cache( cluster = dict_table_get_low(table->cluster_name); - tree = dict_index_get_tree(UT_LIST_GET_FIRST(cluster->indexes)); - + tree = dict_index_get_tree( + UT_LIST_GET_FIRST(cluster->indexes)); new_index->tree = tree; new_index->page_no = tree->page; } else { @@ -1352,13 +1360,14 @@ UNIV_INLINE void dict_index_add_col( /*===============*/ - dict_index_t* index, /* in: index */ - dict_col_t* col, /* in: column */ - ulint order) /* in: order criterion */ + dict_index_t* index, /* in: index */ + dict_col_t* col, /* in: column */ + ulint order, /* in: order criterion */ + ulint prefix_len) /* in: column prefix length */ { dict_field_t* field; - dict_mem_index_add_field(index, col->name, order); + dict_mem_index_add_field(index, col->name, order, prefix_len); field = dict_index_get_nth_field(index, index->n_def - 1); @@ -1384,7 +1393,8 @@ dict_index_copy( for (i = start; i < end; i++) { field = dict_index_get_nth_field(index2, i); - dict_index_add_col(index1, field->col, field->order); + dict_index_add_col(index1, field->col, field->order, + field->prefix_len); } } @@ -1487,7 +1497,7 @@ dict_index_build_internal_clust( /* Add the mix id column */ dict_index_add_col(new_index, - dict_table_get_sys_col(table, DATA_MIX_ID), 0); + dict_table_get_sys_col(table, DATA_MIX_ID), 0, 0); /* Copy the rest of fields */ dict_index_copy(new_index, index, table->mix_len, @@ -1525,14 +1535,15 @@ dict_index_build_internal_clust( if (!(index->type & DICT_UNIQUE)) { dict_index_add_col(new_index, - dict_table_get_sys_col(table, DATA_ROW_ID), 0); + dict_table_get_sys_col(table, DATA_ROW_ID), 0, 0); trx_id_pos++; } dict_index_add_col(new_index, - dict_table_get_sys_col(table, DATA_TRX_ID), 0); + dict_table_get_sys_col(table, DATA_TRX_ID), 0, 0); + dict_index_add_col(new_index, - dict_table_get_sys_col(table, DATA_ROLL_PTR), 0); + dict_table_get_sys_col(table, DATA_ROLL_PTR), 0, 0); for (i = 0; i < trx_id_pos; i++) { @@ -1561,7 +1572,14 @@ dict_index_build_internal_clust( for (i = 0; i < new_index->n_def; i++) { field = dict_index_get_nth_field(new_index, i); - (field->col)->aux = 0; + + /* If there is only a prefix of the column in the index + field, do not mark the column as contained in the index */ + + if (field->prefix_len == 0) { + + field->col->aux = 0; + } } /* Add to new_index non-system columns of table not yet included @@ -1572,7 +1590,7 @@ dict_index_build_internal_clust( ut_ad(col->type.mtype != DATA_SYS); if (col->aux == ULINT_UNDEFINED) { - dict_index_add_col(new_index, col, 0); + dict_index_add_col(new_index, col, 0, 0); } } @@ -1584,7 +1602,11 @@ dict_index_build_internal_clust( for (i = 0; i < new_index->n_def; i++) { field = dict_index_get_nth_field(new_index, i); - (field->col)->clust_pos = i; + + if (field->prefix_len == 0) { + + field->col->clust_pos = i; + } } new_index->cached = TRUE; @@ -1646,25 +1668,33 @@ dict_index_build_internal_non_clust( for (i = 0; i < clust_index->n_uniq; i++) { field = dict_index_get_nth_field(clust_index, i); - (field->col)->aux = ULINT_UNDEFINED; + field->col->aux = ULINT_UNDEFINED; } /* Mark with 0 table columns already contained in new_index */ for (i = 0; i < new_index->n_def; i++) { field = dict_index_get_nth_field(new_index, i); - (field->col)->aux = 0; + + /* If there is only a prefix of the column in the index + field, do not mark the column as contained in the index */ + + if (field->prefix_len == 0) { + + field->col->aux = 0; + } } - /* Add to new_index columns necessary to determine the clustered + /* Add to new_index the columns necessary to determine the clustered index entry uniquely */ for (i = 0; i < clust_index->n_uniq; i++) { field = dict_index_get_nth_field(clust_index, i); - if ((field->col)->aux == ULINT_UNDEFINED) { - dict_index_add_col(new_index, field->col, 0); + if (field->col->aux == ULINT_UNDEFINED) { + dict_index_add_col(new_index, field->col, 0, + field->prefix_len); } } @@ -1787,6 +1817,14 @@ dict_foreign_find_index( for (i = 0; i < n_cols; i++) { col_name = dict_index_get_nth_field(index, i) ->col->name; + if (dict_index_get_nth_field(index, i) + ->prefix_len != 0) { + /* We do not accept column prefix + indexes here */ + + break; + } + if (ut_strlen(columns[i]) != ut_strlen(col_name) || 0 != ut_cmp_in_lower_case(columns[i], @@ -3776,6 +3814,10 @@ dict_field_print_low( ut_ad(mutex_own(&(dict_sys->mutex))); printf(" %s", field->name); + + if (field->prefix_len != 0) { + printf("(%lu)", field->prefix_len); + } } /************************************************************************** diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c index 8f39605e493..48c445fa0c9 100644 --- a/innobase/dict/dict0load.c +++ b/innobase/dict/dict0load.c @@ -301,6 +301,8 @@ dict_load_fields( dtuple_t* tuple; dfield_t* dfield; char* col_name; + ulint pos_and_prefix_len; + ulint prefix_len; rec_t* rec; byte* field; ulint len; @@ -345,8 +347,28 @@ dict_load_fields( ut_a(ut_memcmp(buf, field, len) == 0); field = rec_get_nth_field(rec, 1, &len); - ut_ad(len == 4); - ut_a(i == mach_read_from_4(field)); + ut_a(len == 4); + + /* The next field stores the field position in the index + and a possible column prefix length if the index field + does not contain the whole column. The storage format is + like this: if there is at least one prefix field in the index, + then the HIGH 2 bytes contain the field number (== i) and the + low 2 bytes the prefix length for the field. Otherwise the + field number (== i) is contained in the 2 LOW bytes. */ + + pos_and_prefix_len = mach_read_from_4(field); + + ut_a((pos_and_prefix_len & 0xFFFF) == i + || (pos_and_prefix_len & 0xFFFF0000) == (i << 16)); + + if ((i == 0 && pos_and_prefix_len > 0) + || (pos_and_prefix_len & 0xFFFF0000) > 0) { + + prefix_len = pos_and_prefix_len & 0xFFFF; + } else { + prefix_len = 0; + } ut_a(0 == ut_strcmp((char*) "COL_NAME", dict_field_get_col( @@ -359,7 +381,7 @@ dict_load_fields( ut_memcpy(col_name, field, len); col_name[len] = '\0'; - dict_mem_index_add_field(index, col_name, 0); + dict_mem_index_add_field(index, col_name, 0, prefix_len); btr_pcur_move_to_next_user_rec(&pcur, &mtr); } diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c index e5918c6aeb6..56efc0a0117 100644 --- a/innobase/dict/dict0mem.c +++ b/innobase/dict/dict0mem.c @@ -266,10 +266,13 @@ by the column name may be released only after publishing the index. */ void dict_mem_index_add_field( /*=====================*/ - dict_index_t* index, /* in: index */ - char* name, /* in: column name */ - ulint order) /* in: order criterion; 0 means an ascending - order */ + dict_index_t* index, /* in: index */ + char* name, /* in: column name */ + ulint order, /* in: order criterion; 0 means an + ascending order */ + ulint prefix_len) /* in: 0 or the column prefix length + in a MySQL index like + INDEX (textcol(25)) */ { dict_field_t* field; @@ -282,6 +285,8 @@ dict_mem_index_add_field( field->name = name; field->order = order; + + field->prefix_len = prefix_len; } /************************************************************************** diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index 98980f6c337..a8dc357749c 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -632,7 +632,7 @@ fil_space_create( /* Spaces with an odd id number are reserved to replicate spaces used in log debugging */ - ut_a((purpose == FIL_LOG) || (id % 2 == 0)); + ut_anp((purpose == FIL_LOG) || (id % 2 == 0)); #endif mutex_enter(&(system->mutex)); @@ -1202,8 +1202,8 @@ loop: /* Do aio */ - ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0); + ut_anp(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_anp((len % OS_FILE_LOG_BLOCK_SIZE) == 0); /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index ee48288b875..b6941d80e90 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -778,7 +778,7 @@ fsp_init_file_page_low( page[i] = 0xFF; } #endif - mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN, + mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, ut_dulint_zero); mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero); } @@ -2875,7 +2875,7 @@ fseg_free_step( freed yet */ ut_a(descr); - ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header) + ut_anp(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header) % FSP_EXTENT_SIZE, mtr) == FALSE); inode = fseg_inode_get(header, mtr); diff --git a/innobase/ha/ha0ha.c b/innobase/ha/ha0ha.c index 4489b25ec2b..eb28e15215d 100644 --- a/innobase/ha/ha0ha.c +++ b/innobase/ha/ha0ha.c @@ -293,11 +293,13 @@ ha_print_info( hash_table_t* table) /* in: hash table */ { hash_cell_t* cell; -/* ha_node_t* node; */ - ulint nodes = 0; - ulint cells = 0; +/* + ha_node_t* node; ulint len = 0; ulint max_len = 0; + ulint nodes = 0; +*/ + ulint cells = 0; ulint n_bufs; ulint i; diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index 187afa17047..c07756ab308 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -170,7 +170,7 @@ dropped! So, there seems to be no problem. */ /********************************************************************** Validates the ibuf data structures when the caller owns ibuf_mutex. */ -static + ibool ibuf_validate_low(void); /*===================*/ @@ -484,8 +484,8 @@ ibuf_data_init_for_space( index = dict_mem_index_create(buf, (char *) "CLUST_IND", space, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF,2); - dict_mem_index_add_field(index, (char *) "PAGE_NO", 0); - dict_mem_index_add_field(index, (char *) "TYPES", 0); + dict_mem_index_add_field(index, (char *) "PAGE_NO", 0, 0); + dict_mem_index_add_field(index, (char *) "TYPES", 0, 0); index->page_no = FSP_IBUF_TREE_ROOT_PAGE_NO; @@ -2727,7 +2727,7 @@ reset_bit: /********************************************************************** Validates the ibuf data structures when the caller owns ibuf_mutex. */ -static + ibool ibuf_validate_low(void) /*===================*/ diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index 1d17c0e952d..506877333c3 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -690,7 +690,13 @@ and sleep this many microseconds in between */ #define BTR_CUR_RETRY_DELETE_N_TIMES 100 #define BTR_CUR_RETRY_SLEEP_TIME 50000 -/* The reference in a field of which data is stored on a different page */ +/* The reference in a field for which data is stored on a different page. +The reference is at the end of the 'locally' stored part of the field. +'Locally' means storage in the index record. +We store locally a long enough prefix of each column so that we can determine +the ordering parts of each index record without looking into the externally +stored part. */ + /*--------------------------------------*/ #define BTR_EXTERN_SPACE_ID 0 /* space id where stored */ #define BTR_EXTERN_PAGE_NO 4 /* page no where stored */ diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index e4d3671586d..2963efd6396 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -364,11 +364,24 @@ to a file. Note that we must be careful to calculate the same value on 32-bit and 64-bit architectures. */ ulint -buf_calc_page_checksum( -/*===================*/ +buf_calc_page_new_checksum( +/*=======================*/ /* out: checksum */ byte* page); /* in: buffer page */ /************************************************************************ +In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only +looked at the first few bytes of the page. This calculates that old +checksum. +NOTE: we must first store the new formula checksum to +FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum +because this takes that field as an input! */ + +ulint +buf_calc_page_old_checksum( +/*=======================*/ + /* out: checksum */ + byte* page); /* in: buffer page */ +/************************************************************************ Checks if a page is corrupt. */ ibool diff --git a/innobase/include/data0data.h b/innobase/include/data0data.h index e0fb06e5018..889d148d3fe 100644 --- a/innobase/include/data0data.h +++ b/innobase/include/data0data.h @@ -453,8 +453,6 @@ struct dfield_struct{ void* data; /* pointer to data */ ulint len; /* data length; UNIV_SQL_NULL if SQL null; */ dtype_t type; /* type of data */ - ulint col_no; /* when building index entries, the column - number can be stored here */ }; struct dtuple_struct { diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h index b53a70a8909..4da686bf2e1 100644 --- a/innobase/include/data0type.h +++ b/innobase/include/data0type.h @@ -18,14 +18,16 @@ typedef struct dtype_struct dtype_t; data type */ extern dtype_t* dtype_binary; -/* Data main types of SQL data; NOTE! character data types requiring -collation transformation must have the smallest codes! All codes must be -less than 256! */ +/* Data main types of SQL data */ #define DATA_VARCHAR 1 /* character varying */ #define DATA_CHAR 2 /* fixed length character */ #define DATA_FIXBINARY 3 /* binary string of fixed length */ #define DATA_BINARY 4 /* binary string */ -#define DATA_BLOB 5 /* binary large object */ +#define DATA_BLOB 5 /* binary large object, or a TEXT type; if + prtype & DATA_NONLATIN1 != 0 the data must + be compared by MySQL as a whole field; if + prtype & DATA_BINARY_TYPE == 0, then this is + actually a TEXT column */ #define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */ #define DATA_SYS_CHILD 7 /* address of the child page in node pointer */ #define DATA_SYS 8 /* system column */ @@ -34,35 +36,55 @@ binary strings */ #define DATA_FLOAT 9 #define DATA_DOUBLE 10 #define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */ -#define DATA_VARMYSQL 12 /* data types for which comparisons must be */ -#define DATA_MYSQL 13 /* made by MySQL */ -#define DATA_ERROR 111 /* error value */ -#define DATA_MTYPE_MAX 255 +#define DATA_VARMYSQL 12 /* non-latin1 varying length char */ +#define DATA_MYSQL 13 /* non-latin1 fixed length char */ +#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size() + requires the values are <= 63 */ /*-------------------------------------------*/ -/* Precise data types for system columns; NOTE: the values must run -from 0 up in the order given! All codes must be less than 256! */ +/* In the lowest byte in the precise type we store the MySQL type code +(not applicable for system columns). */ + +#define DATA_ENGLISH 4 /* English language character string: this + is a relic from pre-MySQL time and only used + for InnoDB's own system tables */ +#define DATA_ERROR 111 /* another relic from pre-MySQL time */ + +#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL + type from the precise type */ + +/* Precise data types for system columns and the length of those columns; +NOTE: the values must run from 0 up in the order given! All codes must +be less than 256 */ #define DATA_ROW_ID 0 /* row id: a dulint */ #define DATA_ROW_ID_LEN 6 /* stored length for row id */ + #define DATA_TRX_ID 1 /* transaction id: 6 bytes */ #define DATA_TRX_ID_LEN 6 + #define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */ #define DATA_ROLL_PTR_LEN 7 + #define DATA_MIX_ID 3 /* mixed index label: a dulint, stored in a row in a compressed form */ #define DATA_MIX_ID_LEN 9 /* maximum stored length for mix id (in a compressed dulint form) */ #define DATA_N_SYS_COLS 4 /* number of system columns defined above */ +/*-------------------------------------------*/ +/* Flags ORed to the precise data type */ #define DATA_NOT_NULL 256 /* this is ORed to the precise type when the column is declared as NOT NULL */ #define DATA_UNSIGNED 512 /* this id ORed to the precise type when we have an unsigned integer type */ +#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character + string, this is ORed to the precise type: + this only holds for tables created with + >= MySQL-4.0.14 */ +#define DATA_NONLATIN1 2048 /* if the data type is a DATA_BLOB (actually + TEXT) of a non-latin1 type, this is ORed to + the precise type: this only holds for tables + created with >= MySQL-4.0.14 */ /*-------------------------------------------*/ -/* Precise types of a char or varchar data. All codes must be less than 256! */ -#define DATA_ENGLISH 4 /* English language character string */ -#define DATA_FINNISH 5 /* Finnish */ -#define DATA_PRTYPE_MAX 255 - /* This many bytes we need to store the type information affecting the alphabetical order for a single field and decide the storage size of an SQL null*/ @@ -123,7 +145,7 @@ dtype_get_pad_char( /*===============*/ /* out: padding character code, or ULINT_UNDEFINED if no padding specified */ - dtype_t* type); /* in: typeumn */ + dtype_t* type); /* in: type */ /*************************************************************************** Returns the size of a fixed size data type, 0 if not a fixed size type. */ UNIV_INLINE @@ -150,24 +172,24 @@ dtype_is_fixed_size( /* out: TRUE if fixed size */ dtype_t* type); /* in: type */ /************************************************************************** -Stores to a type the information which determines its alphabetical -ordering. */ +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_store_for_order_and_null_size( /*================================*/ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE - bytes */ + bytes where we store the info */ dtype_t* type); /* in: type struct */ /************************************************************************** -Reads of a type the stored information which determines its alphabetical -ordering. */ +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_read_for_order_and_null_size( /*===============================*/ dtype_t* type, /* in: type struct */ - byte* buf); /* in: buffer for type order info */ + byte* buf); /* in: buffer for the stored order info */ /************************************************************************* Validates a data type structure. */ diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index d82d976d076..ddd0b0ae8cc 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -110,7 +110,9 @@ dtype_get_pad_char( if (type->mtype == DATA_CHAR || type->mtype == DATA_VARCHAR || type->mtype == DATA_BINARY - || type->mtype == DATA_FIXBINARY) { + || type->mtype == DATA_FIXBINARY + || type->mtype == DATA_MYSQL + || type->mtype == DATA_VARMYSQL) { /* Space is the padding character for all char and binary strings */ @@ -124,39 +126,56 @@ dtype_get_pad_char( } /************************************************************************** -Stores to a type the information which determines its alphabetical -ordering. */ +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_store_for_order_and_null_size( /*================================*/ byte* buf, /* in: buffer for DATA_ORDER_NULL_TYPE_BUF_SIZE - bytes */ + bytes where we store the info */ dtype_t* type) /* in: type struct */ { ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE); buf[0] = (byte)(type->mtype & 0xFF); + + if (type->prtype & DATA_BINARY_TYPE) { + buf[0] = buf[0] | 128; + } + + if (type->prtype & DATA_NONLATIN1) { + buf[0] = buf[0] | 64; + } + buf[1] = (byte)(type->prtype & 0xFF); mach_write_to_2(buf + 2, type->len & 0xFFFF); } /************************************************************************** -Reads of a type the stored information which determines its alphabetical -ordering. */ +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. */ UNIV_INLINE void dtype_read_for_order_and_null_size( /*===============================*/ dtype_t* type, /* in: type struct */ - byte* buf) /* in: buffer for type order info */ + byte* buf) /* in: buffer for stored type order info */ { ut_ad(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE); - type->mtype = buf[0]; + type->mtype = buf[0] & 63; type->prtype = buf[1]; + if (buf[0] & 128) { + type->prtype = type->prtype | DATA_BINARY_TYPE; + } + + if (buf[0] & 64) { + type->prtype = type->prtype | DATA_NONLATIN1; + } + type->len = mach_read_from_2(buf + 2); } diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h index ab7d0caa35c..854b9794c00 100644 --- a/innobase/include/db0err.h +++ b/innobase/include/db0err.h @@ -44,8 +44,10 @@ Created 5/24/1996 Heikki Tuuri #define DB_CORRUPTION 39 /* data structure corruption noticed */ #define DB_COL_APPEARS_TWICE_IN_INDEX 40 /* InnoDB cannot handle an index where same column appears twice */ -#define DB_CANNOT_DROP_CONSTRAINT 40 /* dropping a foreign key constraint +#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint from a table failed */ +#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given + name */ /* The following are partial failure codes */ #define DB_FAIL 1000 diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index 97486a7c2f6..e88c6a52bcb 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -569,6 +569,19 @@ dict_index_get_nth_col_pos( dict_index_t* index, /* in: index */ ulint n); /* in: column number */ /************************************************************************ +Looks for a matching field in an index. The column and the prefix len has +to be the same. */ + +ulint +dict_index_get_nth_field_pos( +/*=========================*/ + /* out: position in internal representation + of the index; if not contained, returns + ULINT_UNDEFINED */ + dict_index_t* index, /* in: index from which to search */ + dict_index_t* index2, /* in: index */ + ulint n); /* in: field number in index2 */ +/************************************************************************ Looks for column n position in the clustered index. */ ulint diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 71ea67117a7..c5982c162a7 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -203,7 +203,6 @@ dict_index_get_n_fields( { ut_ad(index); ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); return(index->n_fields); } diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 0798541cfe0..03dc913a7c9 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -111,10 +111,13 @@ by the column name may be released only after publishing the index. */ void dict_mem_index_add_field( /*=====================*/ - dict_index_t* index, /* in: index */ - char* name, /* in: column name */ - ulint order); /* in: order criterion; 0 means an ascending - order */ + dict_index_t* index, /* in: index */ + char* name, /* in: column name */ + ulint order, /* in: order criterion; 0 means an + ascending order */ + ulint prefix_len); /* in: 0 or the column prefix length + in a MySQL index like + INDEX (textcol(25)) */ /************************************************************************** Frees an index memory object. */ @@ -158,12 +161,18 @@ struct dict_col_struct{ in some of the functions below */ }; +#define DICT_MAX_COL_PREFIX_LEN 512 + /* Data structure for a field in an index */ struct dict_field_struct{ - dict_col_t* col; /* pointer to the table column */ - char* name; /* name of the column */ - ulint order; /* flags for ordering this field: - DICT_DESCEND, ... */ + dict_col_t* col; /* pointer to the table column */ + char* name; /* name of the column */ + ulint order; /* flags for ordering this field: + DICT_DESCEND, ... */ + ulint prefix_len; /* 0 or the length of the column + prefix in a MySQL index of type, e.g., + INDEX (textcol(25)); must be smaller + than DICT_MAX_COL_PREFIX_LEN */ }; /* Data structure for an index tree */ diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index 23ef0304b2d..4f78fdb2fd7 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -43,7 +43,10 @@ struct fil_addr_struct{ extern fil_addr_t fil_addr_null; /* The byte offsets on a file page for various variables */ -#define FIL_PAGE_SPACE 0 /* space id the page belongs to */ +#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the + page belongs to (== 0) but in later + versions the 'new' checksum of the + page */ #define FIL_PAGE_OFFSET 4 /* page offset inside space */ #define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor of the page, its offset */ @@ -64,7 +67,7 @@ extern fil_addr_t fil_addr_null; #define FIL_PAGE_DATA 38 /* start of the data on the page */ /* File page trailer */ -#define FIL_PAGE_END_LSN 8 /* the low 4 bytes of this are used +#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used to store the page checksum, the last 4 bytes should be identical to the last 4 bytes of FIL_PAGE_LSN */ diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index d3b3d55d015..5608ba020b7 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -450,6 +450,18 @@ lock_rec_get_mutex_for_addr( ulint space, /* in: space id */ ulint page_no);/* in: page number */ /************************************************************************* +Checks that a transaction id is sensible, i.e., not in the future. */ + +ibool +lock_check_trx_id_sanity( +/*=====================*/ + /* out: TRUE if ok */ + dulint trx_id, /* in: trx id */ + rec_t* rec, /* in: user record */ + dict_index_t* index, /* in: clustered index */ + ibool has_kernel_mutex);/* in: TRUE if the caller owns the + kernel mutex */ +/************************************************************************* Validates the lock queue on a single record. */ ibool diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h index 86f27a2d3eb..1ec8b71d069 100644 --- a/innobase/include/os0file.h +++ b/innobase/include/os0file.h @@ -146,6 +146,21 @@ os_file_create_simple( ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ ibool* success);/* out: TRUE if succeed, FALSE if error */ /******************************************************************** +A simple function to open or create a file. */ + +os_file_t +os_file_create_simple_no_error_handling( +/*====================================*/ + /* out, own: handle to the file, not defined if error, + error number can be retrieved with os_get_last_error */ + char* name, /* in: name of the file or path as a null-terminated + string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened + (if does not exist, error), or OS_FILE_CREATE if a new + file is created (if exists, error) */ + ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ + ibool* success);/* out: TRUE if succeed, FALSE if error */ +/******************************************************************** Opens an existing file or creates a new. */ os_file_t @@ -173,6 +188,14 @@ os_file_close( /* out: TRUE if success */ os_file_t file); /* in, own: handle to a file */ /*************************************************************************** +Closes a file handle. */ + +ibool +os_file_close_no_error_handling( +/*============================*/ + /* out: TRUE if success */ + os_file_t file); /* in, own: handle to a file */ +/*************************************************************************** Gets a file size. */ ibool diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h index b5e33af5bc0..04f771c3abd 100644 --- a/innobase/include/page0page.h +++ b/innobase/include/page0page.h @@ -666,6 +666,15 @@ page_rec_validate( /* out: TRUE if ok */ rec_t* rec); /* in: record on the page */ /******************************************************************* +Checks that the first directory slot points to the infimum record and +the last to the supremum. This function is intended to track if the +bug fixed in 4.0.14 has caused corruption to users' databases. */ + +void +page_check_dir( +/*===========*/ + page_t* page); /* in: index page */ +/******************************************************************* This function checks the consistency of an index page when we do not know the index. This is also resilient so that this should never crash even if the page is total garbage. */ diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 6f2a99fc8c2..712e263350e 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -42,6 +42,22 @@ cmp_data_data( buffer) */ ulint len2); /* in: data field length or UNIV_SQL_NULL */ /***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ + +int +cmp_data_data_slow( +/*===============*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + dtype_t* cur_type,/* in: data type of the fields */ + byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** This function is used to compare two dfields where at least the first has its data type field set. */ UNIV_INLINE diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic index e9d493da8b5..4ecd66e06ec 100644 --- a/innobase/include/row0mysql.ic +++ b/innobase/include/row0mysql.ic @@ -58,7 +58,8 @@ row_mysql_store_col_in_innobase_format( /*===================================*/ dfield_t* dfield, /* in/out: dfield */ byte* buf, /* in/out: buffer for the converted - value */ + value; this must be at least col_len + long! */ byte* mysql_data, /* in: MySQL column value, not SQL NULL; NOTE that dfield may also get a pointer to mysql_data, @@ -96,7 +97,6 @@ row_mysql_store_col_in_innobase_format( while (col_len > 0 && ptr[col_len - 1] == ' ') { col_len--; } - } else if (type == DATA_BLOB) { ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); } diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h index 09a79e19fd7..d1befbbbad3 100644 --- a/innobase/include/row0row.h +++ b/innobase/include/row0row.h @@ -86,9 +86,10 @@ dtuple_t* row_build( /*======*/ /* out, own: row built; see the NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap as the latter only places pointers to + ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or + ROW_COPY_ALSO_EXTERNALS, + the two last copy also the data fields to + heap as the first only places pointers to data fields on the index page, and thus is more efficient */ dict_index_t* index, /* in: clustered index */ diff --git a/innobase/include/row0sel.h b/innobase/include/row0sel.h index aa2da6fe5f6..5ef7ff9399a 100644 --- a/innobase/include/row0sel.h +++ b/innobase/include/row0sel.h @@ -87,9 +87,11 @@ row_printf_step( /* out: query thread to run next or NULL */ que_thr_t* thr); /* in: query thread */ /******************************************************************** -Converts a key value stored in MySQL format to an Innobase dtuple. -The last field of the key value may be just a prefix of a fixed length -field: hence the parameter key_len. */ +Converts a key value stored in MySQL format to an Innobase dtuple. The last +field of the key value may be just a prefix of a fixed length field: hence +the parameter key_len. But currently we do not allow search keys where the +last field is only a prefix of the full key field len and print a warning if +such appears. */ void row_sel_convert_mysql_key_to_innobase( @@ -100,6 +102,7 @@ row_sel_convert_mysql_key_to_innobase( to index! */ byte* buf, /* in: buffer to use in field conversions */ + ulint buf_len, /* in: buffer length */ dict_index_t* index, /* in: index of the key value */ byte* key_ptr, /* in: MySQL key value */ ulint key_len); /* in: MySQL key value length */ diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 273ec6074eb..473c55c7ef9 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -114,13 +114,15 @@ row_upd_index_write_log( closed within this function */ mtr_t* mtr); /* in: mtr into whose log to write */ /*************************************************************** -Returns TRUE if row update changes size of some field in index. */ +Returns TRUE if row update changes size of some field in index or if some +field to be updated is stored externally in rec or update. */ ibool -row_upd_changes_field_size( -/*=======================*/ +row_upd_changes_field_size_or_external( +/*===================================*/ /* out: TRUE if the update changes the size of - some field in index */ + some field in index or the field is external + in rec or update */ rec_t* rec, /* in: record in clustered index */ dict_index_t* index, /* in: clustered index */ upd_t* update);/* in: update vector */ @@ -175,16 +177,10 @@ row_upd_index_replace_new_col_vals( dtuple_t* entry, /* in/out: index entry where replaced */ dict_index_t* index, /* in: index; NOTE that may also be a non-clustered index */ - upd_t* update); /* in: update vector */ -/*************************************************************** -Replaces the new column values stored in the update vector to the -clustered index entry given. */ - -void -row_upd_clust_index_replace_new_col_vals( -/*=====================================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - upd_t* update); /* in: update vector */ + upd_t* update, /* in: update vector */ + mem_heap_t* heap); /* in: memory heap to which we allocate and + copy the new values, set this as NULL if you + do not want allocation */ /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering @@ -358,9 +354,9 @@ struct upd_node_struct{ externally in the clustered index record of row */ ulint n_ext_vec;/* number of fields in ext_vec */ - mem_heap_t* heap; /* memory heap used as auxiliary storage for - row; this must be emptied after a successful - update if node->row != NULL */ + mem_heap_t* heap; /* memory heap used as auxiliary storage; + this must be emptied after a successful + update */ /*----------------------*/ sym_node_t* table_sym;/* table node in symbol table */ que_node_t* col_assign_list; diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index 24e692dedab..1e54d7bfc35 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -153,6 +153,7 @@ extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, /* Array of English strings describing the current state of an i/o handler thread */ extern char* srv_io_thread_op_info[]; +extern char* srv_io_thread_function[]; typedef struct srv_sys_struct srv_sys_t; diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h index 820af4cd014..0d7126c9c57 100644 --- a/innobase/include/trx0roll.h +++ b/innobase/include/trx0roll.h @@ -177,6 +177,55 @@ trx_general_rollback_for_mysql( ibool partial,/* in: TRUE if partial rollback requested */ trx_savept_t* savept);/* in: pointer to savepoint undo number, if partial rollback requested */ +/*********************************************************************** +Rolls back a transaction back to a named savepoint. Modifications after the +savepoint are undone but InnoDB does NOT release the corresponding locks +which are stored in memory. If a lock is 'implicit', that is, a new inserted +row holds a lock where the lock information is carried by the trx id stored in +the row, these locks are naturally released in the rollback. Savepoints which +were set after this savepoint are deleted. */ + +ulint +trx_rollback_to_savepoint_for_mysql( +/*================================*/ + /* out: if no savepoint + of the name found then + DB_NO_SAVEPOINT, + otherwise DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache + position corresponding to this + savepoint; MySQL needs this + information to remove the + binlog entries of the queries + executed after the savepoint */ +/*********************************************************************** +Creates a named savepoint. If the transaction is not yet started, starts it. +If there is already a savepoint of the same name, this call erases that old +savepoint and replaces it with a new. Savepoints are deleted in a transaction +commit or rollback. */ + +ulint +trx_savepoint_for_mysql( +/*====================*/ + /* out: always DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong binlog_cache_pos); /* in: MySQL binlog cache + position corresponding to this + connection at the time of the + savepoint */ +/*********************************************************************** +Frees savepoint structs. */ + +void +trx_roll_savepoints_free( +/*=====================*/ + trx_t* trx, /* in: transaction handle */ + trx_named_savept_t* savep); /* in: free all savepoints > this one; + if this is NULL, free all savepoints + of trx */ extern sess_t* trx_dummy_sess; @@ -207,6 +256,21 @@ struct roll_node_struct{ case of a partial rollback */ }; +/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */ +struct trx_named_savept_struct{ + char* name; /* savepoint name */ + trx_savept_t savept; /* the undo number corresponding to + the savepoint */ + ib_longlong mysql_binlog_cache_pos; + /* the MySQL binlog cache position + corresponding to this savepoint, not + defined if the MySQL binlogging is not + enabled */ + UT_LIST_NODE_T(trx_named_savept_t) + trx_savepoints; /* the list of savepoints of a + transaction */ +}; + /* Rollback node states */ #define ROLL_NODE_SEND 1 #define ROLL_NODE_WAIT 2 diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic index ada2d8cb19c..343e6d7c2fa 100644 --- a/innobase/include/trx0sys.ic +++ b/innobase/include/trx0sys.ic @@ -296,6 +296,16 @@ trx_is_active( return(FALSE); } + if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) { + + /* There must be corruption: we return TRUE because this + function is only called by lock_clust_rec_some_has_impl() + and row_vers_impl_x_locked_off_kernel() and they have + diagnostic prints in this case */ + + return(TRUE); + } + trx = trx_get_on_id(trx_id); if (trx && (trx->conc_state == TRX_ACTIVE)) { diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index 39229923375..6b08b674db8 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -381,7 +381,8 @@ struct trx_struct{ replication slave, we have here the master binlog name up to which replication has processed; otherwise - this is a pointer to a null character */ + this is a pointer to a null + character */ ib_longlong mysql_master_log_pos; /* if the database server is a MySQL replication slave, this is the @@ -501,6 +502,10 @@ struct trx_struct{ mem_heap_t* read_view_heap; /* memory heap for the read view */ read_view_t* read_view; /* consistent read view or NULL */ /*------------------------------*/ + UT_LIST_BASE_NODE_T(trx_named_savept_t) + trx_savepoints; /* savepoints set with SAVEPOINT ..., + oldest first */ + /*------------------------------*/ mutex_t undo_mutex; /* mutex protecting the fields in this section (down to undo_no_arr), EXCEPT last_sql_stat_start, which can be diff --git a/innobase/include/trx0types.h b/innobase/include/trx0types.h index b8befe7172f..2965eb4451f 100644 --- a/innobase/include/trx0types.h +++ b/innobase/include/trx0types.h @@ -24,6 +24,7 @@ typedef struct trx_undo_inf_struct trx_undo_inf_t; typedef struct trx_purge_struct trx_purge_t; typedef struct roll_node_struct roll_node_t; typedef struct commit_node_struct commit_node_t; +typedef struct trx_named_savept_struct trx_named_savept_t; /* Transaction savepoint */ typedef struct trx_savept_struct trx_savept_t; diff --git a/innobase/include/univ.i b/innobase/include/univ.i index e29f3ec92e1..4854e5a7b78 100644 --- a/innobase/include/univ.i +++ b/innobase/include/univ.i @@ -187,7 +187,11 @@ management to ensure correct alignment for doubles etc. */ /* Another basic type we use is unsigned long integer which is intended to be equal to the word size of the machine. */ +#ifdef _WIN64 +typedef unsigned __int64 ulint; +#else typedef unsigned long int ulint; +#endif typedef long int lint; diff --git a/innobase/include/ut0dbg.h b/innobase/include/ut0dbg.h index e99dc8c09d6..802557099fc 100644 --- a/innobase/include/ut0dbg.h +++ b/innobase/include/ut0dbg.h @@ -50,6 +50,37 @@ extern ulint* ut_dbg_null_ptr; }\ } +/* This can be used if there are % characters in the assertion formula: +if we try to printf the formula gcc would complain of illegal print +format characters */ +#define ut_anp(EXPR)\ +{\ + ulint dbg_i;\ +\ + if (!((ulint)(EXPR) + ut_dbg_zero)) {\ + ut_print_timestamp(stderr);\ + fprintf(stderr,\ + " InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\ + os_thread_pf(os_thread_get_curr_id()), IB__FILE__,\ + (ulint)__LINE__);\ + fprintf(stderr,\ + "\nInnoDB: We intentionally generate a memory trap.\n");\ + fprintf(stderr,\ + "InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\ + ut_dbg_stop_threads = TRUE;\ + dbg_i = *(ut_dbg_null_ptr);\ + if (dbg_i) {\ + ut_dbg_null_ptr = NULL;\ + }\ + }\ + if (ut_dbg_stop_threads) {\ + fprintf(stderr,\ + "InnoDB: Thread %lu stopped in file %s line %lu\n",\ + os_thread_pf(os_thread_get_curr_id()), IB__FILE__, (ulint)__LINE__);\ + os_thread_sleep(1000000000);\ + }\ +} + #define ut_error {\ ulint dbg_i;\ ut_print_timestamp(stderr);\ diff --git a/innobase/include/ut0mem.h b/innobase/include/ut0mem.h index 09e0d800685..4e8566eba1b 100644 --- a/innobase/include/ut0mem.h +++ b/innobase/include/ut0mem.h @@ -57,7 +57,7 @@ ut_free( /*====*/ void* ptr); /* in, own: memory block */ /************************************************************************** -Frees all allocated memory not freed yet. */ +Frees in shutdown all allocated memory not freed yet. */ void ut_free_all_mem(void); diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c index 4bb1d243ed4..fecb1f95c68 100644 --- a/innobase/lock/lock0lock.c +++ b/innobase/lock/lock0lock.c @@ -356,7 +356,7 @@ lock_mutex_enter_kernel(void) } /************************************************************************* -Releses the kernel mutex. This function is used in this module to allow +Releases the kernel mutex. This function is used in this module to allow monitoring the contention degree on the kernel mutex caused by the lock operations. */ UNIV_INLINE @@ -514,6 +514,53 @@ lock_rec_mutex_own_all(void) #endif +/************************************************************************* +Checks that a transaction id is sensible, i.e., not in the future. */ + +ibool +lock_check_trx_id_sanity( +/*=====================*/ + /* out: TRUE if ok */ + dulint trx_id, /* in: trx id */ + rec_t* rec, /* in: user record */ + dict_index_t* index, /* in: clustered index */ + ibool has_kernel_mutex)/* in: TRUE if the caller owns the + kernel mutex */ +{ + char err_buf[500]; + ibool is_ok = TRUE; + + if (!has_kernel_mutex) { + mutex_enter(&kernel_mutex); + } + + /* A sanity check: the trx_id in rec must be smaller than the global + trx id counter */ + + if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) { + rec_sprintf(err_buf, 400, rec); + ut_print_timestamp(stderr); + fprintf(stderr, +"InnoDB: Error: transaction id associated with record\n%s\n" +"InnoDB: in table %s index %s\n" +"InnoDB: is %lu %lu which is higher than the global trx id counter %lu %lu!\n" +"InnoDB: The table is corrupt. You have to do dump + drop + reimport.\n", + err_buf, index->table_name, index->name, + ut_dulint_get_high(trx_id), + ut_dulint_get_low(trx_id), + ut_dulint_get_high(trx_sys->max_trx_id), + ut_dulint_get_low(trx_sys->max_trx_id)); + + is_ok = FALSE; + } + + if (!has_kernel_mutex) { + mutex_exit(&kernel_mutex); + } + + return(is_ok); +} + /************************************************************************* Checks that a record is seen in a consistent read. */ @@ -539,6 +586,15 @@ lock_clust_rec_cons_read_sees( return(TRUE); } + if (!lock_check_trx_id_sanity(trx_id, rec, index, FALSE)) { + /* Trying to get the 'history' of a corrupt record is bound + to fail: let us try to use the record itself in the query */ + fprintf(stderr, +"InnoDB: We try to access the corrupt record in the query anyway.\n"); + + return(TRUE); + } + return(FALSE); } @@ -562,7 +618,9 @@ lock_sec_rec_cons_read_sees( read_view_t* view) /* in: consistent read view */ { dulint max_trx_id; - + + UT_NOT_USED(index); + ut_ad(!(index->type & DICT_CLUSTERED)); ut_ad(page_rec_is_user_rec(rec)); @@ -575,6 +633,16 @@ lock_sec_rec_cons_read_sees( if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) { + if (!lock_check_trx_id_sanity(max_trx_id, rec, index, FALSE)) { + /* Trying to get the 'history' of a corrupt record is + bound to fail: let us try to use the record itself in + the query */ + fprintf(stderr, +"InnoDB: We try to access the corrupt record in the query anyway.\n"); + + return(TRUE); + } + return(FALSE); } @@ -1569,6 +1637,15 @@ lock_sec_rec_some_has_impl_off_kernel( /* Ok, in this case it is possible that some transaction has an implicit x-lock. We have to look in the clustered index. */ + if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), rec, index, + TRUE)) { + buf_page_print(page); + + /* The page is corrupt: try to avoid a crash by returning + NULL */ + return(NULL); + } + return(row_vers_impl_x_locked_off_kernel(rec, index)); } @@ -2565,7 +2642,7 @@ lock_move_rec_list_start( ulint heap_no; ulint type_mode; - ut_ad(new_page); + ut_a(new_page); lock_mutex_enter_kernel(); @@ -3028,7 +3105,7 @@ lock_deadlock_recursive( we return LOCK_VICTIM_IS_START */ { lock_t* lock; - ulint bit_no; + ulint bit_no = ULINT_UNDEFINED; trx_t* lock_trx; char* err_buf; ulint ret; @@ -3067,6 +3144,7 @@ lock_deadlock_recursive( lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); } else { ut_ad(lock_get_type(lock) == LOCK_REC); + ut_a(bit_no != ULINT_UNDEFINED); lock = lock_rec_get_prev(lock, bit_no); } diff --git a/innobase/log/log0log.c b/innobase/log/log0log.c index e15812e03af..b5ce1a3d97b 100644 --- a/innobase/log/log0log.c +++ b/innobase/log/log0log.c @@ -375,7 +375,7 @@ log_pad_current_log_block(void) log_close(); log_release(); - ut_ad((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE) + ut_anp((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE) == LOG_BLOCK_HDR_SIZE); } @@ -1070,8 +1070,8 @@ log_group_write_buf( ulint i; ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_anp(len % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_anp(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); if (new_data_offset == 0) { write_header = TRUE; @@ -2123,11 +2123,11 @@ log_group_archive( start_lsn = log_sys->archived_lsn; - ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_anp(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); end_lsn = log_sys->next_archived_lsn; - ut_ad(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_anp(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); buf = log_sys->archive_buf; @@ -2234,7 +2234,7 @@ loop: group->next_archived_file_no = group->archived_file_no + n_files; group->next_archived_offset = next_offset % group->file_size; - ut_ad(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0); + ut_anp(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0); } /********************************************************* @@ -2429,8 +2429,8 @@ loop: start_lsn = log_sys->archived_lsn; if (calc_new_limit) { - ut_ad(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0); - + ut_anp(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE + == 0); limit_lsn = ut_dulint_add(start_lsn, log_sys->archive_buf_size); @@ -2916,6 +2916,7 @@ loop: mutex_enter(&kernel_mutex); + /* Check that there are no longer transactions */ if (trx_n_mysql_transactions > 0 || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { @@ -2924,6 +2925,8 @@ loop: goto loop; } + /* Check that the master thread is suspended */ + if (srv_n_threads_active[SRV_MASTER] != 0) { mutex_exit(&kernel_mutex); @@ -2952,7 +2955,6 @@ loop: } log_archive_all(); - log_make_checkpoint_at(ut_dulint_max, TRUE); mutex_enter(&(log_sys->mutex)); @@ -2961,8 +2963,9 @@ loop: if (ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0 || (srv_log_archive_on - && ut_dulint_cmp(lsn, - ut_dulint_add(log_sys->archived_lsn, LOG_BLOCK_HDR_SIZE)) != 0)) { + && ut_dulint_cmp(lsn, + ut_dulint_add(log_sys->archived_lsn, LOG_BLOCK_HDR_SIZE)) + != 0)) { mutex_exit(&(log_sys->mutex)); @@ -2981,10 +2984,22 @@ loop: mutex_exit(&(log_sys->mutex)); + mutex_enter(&kernel_mutex); + /* Check that the master thread has stayed suspended */ + if (srv_n_threads_active[SRV_MASTER] != 0) { + fprintf(stderr, +"InnoDB: Warning: the master thread woke up during shutdown\n"); + + mutex_exit(&kernel_mutex); + + goto loop; + } + mutex_exit(&kernel_mutex); + fil_flush_file_spaces(FIL_TABLESPACE); fil_flush_file_spaces(FIL_LOG); - /* The following fil_write_... will pass the buffer pool: therefore + /* The next fil_write_... will pass the buffer pool: therefore it is essential that the buffer pool has been completely flushed to disk! */ @@ -2993,12 +3008,14 @@ loop: goto loop; } + /* The lock timeout thread should now have exited */ + if (srv_lock_timeout_and_monitor_active) { goto loop; } - /* We now suspend also the InnoDB error monitor thread */ + /* We now let also the InnoDB error monitor thread to exit */ srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; @@ -3008,6 +3025,7 @@ loop: } /* Make some checks that the server really is quiet */ + ut_a(srv_n_threads_active[SRV_MASTER] == 0); ut_a(buf_all_freed()); ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); @@ -3016,6 +3034,7 @@ loop: fil_flush_file_spaces(FIL_TABLESPACE); /* Make some checks that the server really is quiet */ + ut_a(srv_n_threads_active[SRV_MASTER] == 0); ut_a(buf_all_freed()); ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); } diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index 47833214d15..8e5fe819afb 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -973,7 +973,7 @@ recv_recover_page( ulint space, /* in: space id */ ulint page_no) /* in: page number */ { - buf_block_t* block; + buf_block_t* block = NULL; recv_addr_t* recv_addr; recv_t* recv; byte* buf; @@ -1085,7 +1085,7 @@ recv_recover_page( page_lsn = page_newest_lsn; mach_write_to_8(page + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN, ut_dulint_zero); + - FIL_PAGE_END_LSN_OLD_CHKSUM, ut_dulint_zero); mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero); } @@ -1107,7 +1107,7 @@ recv_recover_page( recv_parse_or_apply_log_rec_body(recv->type, buf, buf + recv->len, page, &mtr); mach_write_to_8(page + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN, + - FIL_PAGE_END_LSN_OLD_CHKSUM, ut_dulint_add(recv->start_lsn, recv->len)); mach_write_to_8(page + FIL_PAGE_LSN, @@ -1132,6 +1132,8 @@ recv_recover_page( mutex_exit(&(recv_sys->mutex)); if (!recover_backup && modification_to_page) { + ut_a(block); + buf_flush_recv_note_modification(block, start_lsn, end_lsn); } @@ -1339,6 +1341,7 @@ loop: mutex_exit(&(recv_sys->mutex)); } +#ifdef UNIV_HOTBACKUP /*********************************************************************** Applies log records in the hash table to a backup. */ @@ -1520,8 +1523,8 @@ recv_check_identical( for (i = 0; i < len; i++) { if (str1[i] != str2[i]) { - fprintf(stderr, "Strings do not match at offset %lu\n", i); - + fprintf(stderr, + "Strings do not match at offset %lu\n", i); ut_print_buf(str1 + i, 16); fprintf(stderr, "\n"); ut_print_buf(str2 + i, 16); @@ -1654,6 +1657,7 @@ recv_compare_spaces_low( recv_compare_spaces(space1, space2, n_pages); } +#endif /*********************************************************************** Tries to parse a single log record and returns its length. */ diff --git a/innobase/mem/mem0pool.c b/innobase/mem/mem0pool.c index 382e505b63f..b004a8c4df7 100644 --- a/innobase/mem/mem0pool.c +++ b/innobase/mem/mem0pool.c @@ -99,6 +99,12 @@ mem_pool_t* mem_comm_pool = NULL; ulint mem_out_of_mem_err_msg_count = 0; +/* We use this counter to check that the mem pool mutex does not leak; +this is to track a strange assertion failure reported at +mysql@lists.mysql.com */ + +ulint mem_n_threads_inside = 0; + /************************************************************************ Reserves the mem pool mutex. */ @@ -328,6 +334,9 @@ mem_area_alloc( n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE)); mutex_enter(&(pool->mutex)); + mem_n_threads_inside++; + + ut_a(mem_n_threads_inside == 1); area = UT_LIST_GET_FIRST(pool->free_list[n]); @@ -338,6 +347,7 @@ mem_area_alloc( /* Out of memory in memory pool: we try to allocate from the operating system with the regular malloc: */ + mem_n_threads_inside--; mutex_exit(&(pool->mutex)); return(ut_malloc(size)); @@ -353,6 +363,16 @@ mem_area_alloc( n); mem_analyze_corruption((byte*)area); + + /* Try to analyze a strange assertion failure reported at + mysql@lists.mysql.com where the free bit IS 1 in the + hex dump above */ + + if (mem_area_get_free(area)) { + fprintf(stderr, +"InnoDB: Probably a race condition because now the area is marked free!\n"); + } + ut_a(0); } @@ -374,6 +394,7 @@ mem_area_alloc( pool->reserved += mem_area_get_size(area); + mem_n_threads_inside--; mutex_exit(&(pool->mutex)); ut_ad(mem_pool_validate(pool)); @@ -495,6 +516,9 @@ mem_area_free( n = ut_2_log(size); mutex_enter(&(pool->mutex)); + mem_n_threads_inside++; + + ut_a(mem_n_threads_inside == 1); if (buddy && mem_area_get_free(buddy) && (size == mem_area_get_size(buddy))) { @@ -518,6 +542,7 @@ mem_area_free( pool->reserved += ut_2_exp(n); + mem_n_threads_inside--; mutex_exit(&(pool->mutex)); mem_area_free(new_ptr, pool); @@ -533,6 +558,7 @@ mem_area_free( pool->reserved -= size; } + mem_n_threads_inside--; mutex_exit(&(pool->mutex)); ut_ad(mem_pool_validate(pool)); @@ -577,7 +603,7 @@ mem_pool_validate( } } - ut_a(free + pool->reserved == pool->size + ut_anp(free + pool->reserved == pool->size - (pool->size % MEM_AREA_MIN_SIZE)); mutex_exit(&(pool->mutex)); diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index 2f32b9347dc..612bd534fd1 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -60,6 +60,7 @@ struct os_aio_slot_struct{ ulint pos; /* index of the slot in the aio array */ ibool reserved; /* TRUE if this slot is reserved */ + time_t reservation_time;/* time when reserved */ ulint len; /* length of the block to read or write */ byte* buf; /* buffer used in i/o */ @@ -147,6 +148,12 @@ time_t os_last_printout; ibool os_has_said_disk_full = FALSE; +/* The mutex protecting the following counts of pending pread and pwrite +operations */ +os_mutex_t os_file_count_mutex; +ulint os_file_n_pending_preads = 0; +ulint os_file_n_pending_pwrites = 0; + /*************************************************************************** Gets the operating system version. Currently works only on Windows. */ @@ -364,6 +371,8 @@ os_io_init_simple(void) { ulint i; + os_file_count_mutex = os_mutex_create(NULL); + for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { os_file_seek_mutexes[i] = os_mutex_create(NULL); } @@ -415,9 +424,8 @@ try_again: file = CreateFile(name, access, - FILE_SHARE_READ | FILE_SHARE_WRITE, - /* file can be read and written - also by other processes */ + FILE_SHARE_READ,/* file can be read also by other + processes */ NULL, /* default security attributes */ create_flag, attributes, @@ -481,6 +489,101 @@ try_again: return(file); #endif } + +/******************************************************************** +A simple function to open or create a file. */ + +os_file_t +os_file_create_simple_no_error_handling( +/*====================================*/ + /* out, own: handle to the file, not defined if error, + error number can be retrieved with os_get_last_error */ + char* name, /* in: name of the file or path as a null-terminated + string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file is opened + (if does not exist, error), or OS_FILE_CREATE if a new + file is created (if exists, error) */ + ulint access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */ + ibool* success)/* out: TRUE if succeed, FALSE if error */ +{ +#ifdef __WIN__ + os_file_t file; + DWORD create_flag; + DWORD access; + DWORD attributes = 0; + + ut_a(name); + + if (create_mode == OS_FILE_OPEN) { + create_flag = OPEN_EXISTING; + } else if (create_mode == OS_FILE_CREATE) { + create_flag = CREATE_NEW; + } else { + create_flag = 0; + ut_error; + } + + if (access_type == OS_FILE_READ_ONLY) { + access = GENERIC_READ; + } else if (access_type == OS_FILE_READ_WRITE) { + access = GENERIC_READ | GENERIC_WRITE; + } else { + access = 0; + ut_error; + } + + file = CreateFile(name, + access, + FILE_SHARE_READ,/* file can be read also by other + processes */ + NULL, /* default security attributes */ + create_flag, + attributes, + NULL); /* no template file */ + + if (file == INVALID_HANDLE_VALUE) { + *success = FALSE; + } else { + *success = TRUE; + } + + return(file); +#else + os_file_t file; + int create_flag; + + ut_a(name); + + if (create_mode == OS_FILE_OPEN) { + if (access_type == OS_FILE_READ_ONLY) { + create_flag = O_RDONLY; + } else { + create_flag = O_RDWR; + } + } else if (create_mode == OS_FILE_CREATE) { + create_flag = O_RDWR | O_CREAT | O_EXCL; + } else { + create_flag = 0; + ut_error; + } + + if (create_mode == OS_FILE_CREATE) { + file = open(name, create_flag, S_IRUSR | S_IWUSR + | S_IRGRP | S_IWGRP); + } else { + file = open(name, create_flag); + } + + if (file == -1) { + *success = FALSE; + } else { + *success = TRUE; + } + + return(file); +#endif +} + /******************************************************************** Opens an existing file or creates a new. */ @@ -566,9 +669,14 @@ try_again: file = CreateFile(name, GENERIC_READ | GENERIC_WRITE, /* read and write access */ - FILE_SHARE_READ | FILE_SHARE_WRITE, - /* file can be read and written - also by other processes */ + FILE_SHARE_READ,/* File can be read also by other + processes; we must give the read + permission because of ibbackup. We do + not give the write permission to + others because if one would succeed to + start 2 instances of mysqld on the + SAME files, that could cause severe + database corruption! */ NULL, /* default security attributes */ create_flag, attributes, @@ -676,6 +784,41 @@ os_file_close( #endif } +/*************************************************************************** +Closes a file handle. */ + +ibool +os_file_close_no_error_handling( +/*============================*/ + /* out: TRUE if success */ + os_file_t file) /* in, own: handle to a file */ +{ +#ifdef __WIN__ + BOOL ret; + + ut_a(file); + + ret = CloseHandle(file); + + if (ret) { + return(TRUE); + } + + return(FALSE); +#else + int ret; + + ret = close(file); + + if (ret == -1) { + + return(FALSE); + } + + return(TRUE); +#endif +} + /*************************************************************************** Gets a file size. */ @@ -896,6 +1039,7 @@ os_file_pread( offset */ { off_t offs; + ssize_t n_bytes; ut_a((offset & 0xFFFFFFFF) == offset); @@ -917,7 +1061,17 @@ os_file_pread( os_n_file_reads++; #ifdef HAVE_PREAD - return(pread(file, buf, n, offs)); + os_mutex_enter(os_file_count_mutex); + os_file_n_pending_preads++; + os_mutex_exit(os_file_count_mutex); + + n_bytes = pread(file, buf, n, offs); + + os_mutex_enter(os_file_count_mutex); + os_file_n_pending_preads--; + os_mutex_exit(os_file_count_mutex); + + return(n_bytes); #else { ssize_t ret; @@ -982,8 +1136,16 @@ os_file_pwrite( os_n_file_writes++; #ifdef HAVE_PWRITE + os_mutex_enter(os_file_count_mutex); + os_file_n_pending_pwrites++; + os_mutex_exit(os_file_count_mutex); + ret = pwrite(file, buf, n, offs); + os_mutex_enter(os_file_count_mutex); + os_file_n_pending_pwrites--; + os_mutex_exit(os_file_count_mutex); + if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC && srv_unix_file_flush_method != SRV_UNIX_NOSYNC && !os_do_not_call_flush_at_each_write) { @@ -1372,20 +1534,36 @@ os_aio_init( os_io_init_simple(); + for (i = 0; i < n_segments; i++) { + srv_io_thread_op_info[i] = (char*)"not started yet"; + } + n_per_seg = n / n_segments; n_write_segs = (n_segments - 2) / 2; n_read_segs = n_segments - 2 - n_write_segs; /* printf("Array n per seg %lu\n", n_per_seg); */ - os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg, - n_read_segs); - os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg, - n_write_segs); os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1); + srv_io_thread_function[0] = (char*)"insert buffer thread"; + os_aio_log_array = os_aio_array_create(n_per_seg, 1); + srv_io_thread_function[1] = (char*)"log thread"; + + os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg, + n_read_segs); + for (i = 2; i < 2 + n_read_segs; i++) { + srv_io_thread_function[i] = (char*)"read thread"; + } + + os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg, + n_write_segs); + for (i = 2 + n_read_segs; i < n_segments; i++) { + srv_io_thread_function[i] = (char*)"write thread"; + } + os_aio_sync_array = os_aio_array_create(n_slots_sync, 1); os_aio_n_segments = n_segments; @@ -1677,6 +1855,7 @@ loop: } slot->reserved = TRUE; + slot->reservation_time = time(NULL); slot->message1 = message1; slot->message2 = message2; slot->file = file; @@ -2249,6 +2428,8 @@ os_aio_simulated_handle( ulint total_len; ulint offs; ulint lowest_offset; + ulint biggest_age; + ulint age; byte* combined_buf; byte* combined_buf2= 0; /* Remove warning */ ibool ret; @@ -2301,22 +2482,55 @@ restart: n_consecutive = 0; - /* Look for an i/o request at the lowest offset in the array - (we ignore the high 32 bits of the offset in these heuristics) */ + /* If there are at least 2 seconds old requests, then pick the oldest + one to prevent starvation. If several requests have the same age, + then pick the one at the lowest offset. */ + biggest_age = 0; lowest_offset = ULINT_MAX; - + for (i = 0; i < n; i++) { slot = os_aio_array_get_nth_slot(array, i + segment * n); - if (slot->reserved && slot->offset < lowest_offset) { + if (slot->reserved) { + age = (ulint)difftime(time(NULL), + slot->reservation_time); - /* Found an i/o request */ - consecutive_ios[0] = slot; + if ((age >= 2 && age > biggest_age) + || (age >= 2 && age == biggest_age + && slot->offset < lowest_offset)) { - n_consecutive = 1; + /* Found an i/o request */ + consecutive_ios[0] = slot; - lowest_offset = slot->offset; + n_consecutive = 1; + + biggest_age = age; + lowest_offset = slot->offset; + } + } + } + + if (n_consecutive == 0) { + /* There were no old requests. Look for an i/o request at the + lowest offset in the array (we ignore the high 32 bits of the + offset in these heuristics) */ + + lowest_offset = ULINT_MAX; + + for (i = 0; i < n; i++) { + slot = os_aio_array_get_nth_slot(array, + i + segment * n); + + if (slot->reserved && slot->offset < lowest_offset) { + + /* Found an i/o request */ + consecutive_ios[0] = slot; + + n_consecutive = 1; + + lowest_offset = slot->offset; + } } } @@ -2422,7 +2636,7 @@ consecutive_loop: + FIL_PAGE_LSN + 4) != mach_read_from_4(combined_buf + len2 + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN + 4)) { + - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: ERROR: The page to be written seems corrupt!\n"); @@ -2583,14 +2797,15 @@ os_aio_print( double avg_bytes_read; ulint i; - if (buf_end - buf < 1000) { + if (buf_end - buf < 1200) { return; } for (i = 0; i < srv_n_file_io_threads; i++) { - buf += sprintf(buf, "I/O thread %lu state: %s\n", i, - srv_io_thread_op_info[i]); + buf += sprintf(buf, "I/O thread %lu state: %s (%s)\n", i, + srv_io_thread_op_info[i], + srv_io_thread_function[i]); } buf += sprintf(buf, "Pending normal aio reads:"); @@ -2665,6 +2880,12 @@ loop: "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n", os_n_file_reads, os_n_file_writes, os_n_fsyncs); + if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) { + buf += sprintf(buf, + "%lu pending preads, %lu pending pwrites\n", + os_file_n_pending_preads, os_file_n_pending_pwrites); + } + if (os_n_file_reads == os_n_file_reads_old) { avg_bytes_read = 0.0; } else { diff --git a/innobase/os/os0thread.c b/innobase/os/os0thread.c index 9af98760ad1..1252cc5e4b7 100644 --- a/innobase/os/os0thread.c +++ b/innobase/os/os0thread.c @@ -187,8 +187,8 @@ os_thread_exit( is cast as a DWORD */ { #ifdef UNIV_DEBUG_THREAD_CREATION - printf("A thread exits.\n"); - printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); + printf("Thread exits, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); #endif os_mutex_enter(os_sync_mutex); os_thread_count--; diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c index d3a40668c4b..7e2fc19c00f 100644 --- a/innobase/page/page0cur.c +++ b/innobase/page/page0cur.c @@ -14,6 +14,7 @@ Created 10/4/1994 Heikki Tuuri #include "rem0cmp.h" #include "mtr0log.h" #include "log0recv.h" +#include "rem0cmp.h" ulint page_cur_short_succ = 0; @@ -218,6 +219,8 @@ page_cur_search_with_match( || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE) || (mode == PAGE_CUR_LE_OR_EXTENDS) || (mode == PAGE_CUR_DBG)); + page_check_dir(page); + #ifdef PAGE_CUR_ADAPT if ((page_header_get_field(page, PAGE_LEVEL) == 0) && (mode == PAGE_CUR_LE) @@ -595,6 +598,7 @@ page_cur_parse_insert_rec( rec_t* cursor_rec; byte buf1[1024]; byte* buf; + byte* ptr2 = ptr; ulint info_bits = 0; /* remove warning */ page_cur_t cursor; @@ -697,7 +701,20 @@ page_cur_parse_insert_rec( /* Build the inserted record to buf */ - ut_a(mismatch_index < UNIV_PAGE_SIZE); + if (mismatch_index >= UNIV_PAGE_SIZE) { + printf("Is short %lu, info_bits %lu, offset %lu, o_offset %lu\n" + "mismatch index %lu, end_seg_len %lu\n" + "parsed len %lu\n", + is_short, info_bits, offset, origin_offset, + mismatch_index, end_seg_len, (ulint)(ptr - ptr2)); + + printf("Dump of 300 bytes of log:\n"); + ut_print_buf(ptr2, 300); + + buf_page_print(page); + + ut_a(0); + } ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index); ut_memcpy(buf + mismatch_index, ptr, end_seg_len); diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c index 7d240bdd5b0..ef5dad60c08 100644 --- a/innobase/page/page0page.c +++ b/innobase/page/page0page.c @@ -353,7 +353,7 @@ page_create( infimum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); - ut_ad(infimum_rec == page + PAGE_INFIMUM); + ut_a(infimum_rec == page + PAGE_INFIMUM); rec_set_n_owned(infimum_rec, 1); rec_set_heap_no(infimum_rec, 0); @@ -370,7 +370,7 @@ page_create( supremum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); - ut_ad(supremum_rec == page + PAGE_SUPREMUM); + ut_a(supremum_rec == page + PAGE_SUPREMUM); rec_set_n_owned(supremum_rec, 1); rec_set_heap_no(supremum_rec, 1); @@ -389,6 +389,8 @@ page_create( page_header_set_ptr(page, PAGE_FREE, NULL); page_header_set_field(page, PAGE_GARBAGE, 0); page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); + page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); + page_header_set_field(page, PAGE_N_DIRECTION, 0); page_header_set_field(page, PAGE_N_RECS, 0); page_set_max_trx_id(page, ut_dulint_zero); @@ -402,17 +404,22 @@ page_create( slot = page_dir_get_nth_slot(page, 1); page_dir_slot_set_rec(slot, supremum_rec); - /* Set next pointers in infimum and supremum */ + /* Set the next pointers in infimum and supremum */ rec_set_next_offs(infimum_rec, (ulint)(supremum_rec - page)); rec_set_next_offs(supremum_rec, 0); +#ifdef notdefined + /* Disable the use of page_template: there is a race condition here: + while one thread is creating page_template, another one can start + using it before the memcpy completes! */ + if (page_template == NULL) { page_template = mem_alloc(UNIV_PAGE_SIZE); ut_memcpy(page_template, page, UNIV_PAGE_SIZE); } - +#endif return(page); } @@ -439,6 +446,9 @@ page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } + /* Track a memory corruption bug in Windows */ + ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == PAGE_INFIMUM); + page_cur_set_before_first(new_page, &cur2); /* Copy records from the original page to the new page */ @@ -449,6 +459,8 @@ page_copy_rec_list_end_no_locks( ut_a( page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr)); + ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) + == PAGE_INFIMUM); page_cur_move_to_next(&cur1); page_cur_move_to_next(&cur2); } @@ -1315,6 +1327,37 @@ page_rec_validate( return(TRUE); } + +/******************************************************************* +Checks that the first directory slot points to the infimum record and +the last to the supremum. This function is intended to track if the +bug fixed in 4.0.14 has caused corruption to users' databases. */ + +void +page_check_dir( +/*===========*/ + page_t* page) /* in: index page */ +{ + ulint n_slots; + + n_slots = page_dir_get_n_slots(page); + + if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0)) + != page_get_infimum_rec(page)) { + + fprintf(stderr, +"InnoDB: Page directory corruption: supremum not pointed to\n"); + buf_page_print(page); + } + + if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1)) + != page_get_supremum_rec(page)) { + + fprintf(stderr, +"InnoDB: Page directory corruption: supremum not pointed to\n"); + buf_page_print(page); + } +} /******************************************************************* This function checks the consistency of an index page when we do not @@ -1598,7 +1641,8 @@ page_validate( "InnoDB: previous record %s\n", err_buf); rec_sprintf(err_buf, 900, rec); - fprintf(stderr, "InnoDB: record %s\n", err_buf); + fprintf(stderr, + "InnoDB: record %s\n", err_buf); goto func_exit; } diff --git a/innobase/pars/pars0opt.c b/innobase/pars/pars0opt.c index 91083e6fa16..4faf83b47a3 100644 --- a/innobase/pars/pars0opt.c +++ b/innobase/pars/pars0opt.c @@ -1058,7 +1058,6 @@ opt_clust_access( dfield_t* dfield; mem_heap_t* heap; ulint n_fields; - ulint col_no; ulint pos; ulint i; @@ -1093,8 +1092,7 @@ opt_clust_access( plan->clust_map = mem_heap_alloc(heap, n_fields * sizeof(ulint)); for (i = 0; i < n_fields; i++) { - col_no = dict_index_get_nth_col_no(clust_index, i); - pos = dict_index_get_nth_col_pos(index, col_no); + pos = dict_index_get_nth_field_pos(index, clust_index, i); *(plan->clust_map + i) = pos; @@ -1109,7 +1107,8 @@ opt_clust_access( dfield = dtuple_get_nth_field(plan->clust_ref, table->mix_len); - dfield_set_data(dfield, mem_heap_alloc(heap, table->mix_id_len), + dfield_set_data(dfield, mem_heap_alloc(heap, + table->mix_id_len), table->mix_id_len); ut_memcpy(dfield_get_data(dfield), table->mix_id_buf, table->mix_id_len); diff --git a/innobase/pars/pars0pars.c b/innobase/pars/pars0pars.c index 664f498ef3e..3e43b6ae262 100644 --- a/innobase/pars/pars0pars.c +++ b/innobase/pars/pars0pars.c @@ -244,13 +244,11 @@ pars_resolve_func_data_type( /* Inherit the data type from the first argument (which must not be the SQL null literal whose type is DATA_ERROR) */ - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) - != DATA_ERROR); dtype_copy(que_node_get_data_type(node), que_node_get_data_type(arg)); - ut_a(dtype_get_mtype(que_node_get_data_type(node)) == DATA_INT); - + ut_a(dtype_get_mtype(que_node_get_data_type(node)) + == DATA_INT); } else if (func == PARS_COUNT_TOKEN) { ut_a(arg); dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4, 0); @@ -1596,7 +1594,7 @@ pars_create_index( column = column_list; while (column) { - dict_mem_index_add_field(index, column->name, 0); + dict_mem_index_add_field(index, column->name, 0, 0); column->resolved = TRUE; column->token_type = SYM_COLUMN; diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c index e9740d7ea78..2e18e68ec43 100644 --- a/innobase/rem/rem0cmp.c +++ b/innobase/rem/rem0cmp.c @@ -38,7 +38,7 @@ Used in debug checking of cmp_dtuple_... . This function is used to compare a data tuple to a physical record. If dtuple has n fields then rec must have either m >= n fields, or it must differ from dtuple in some of the m fields rec has. */ -static + int cmp_debug_dtuple_rec_with_match( /*============================*/ @@ -50,9 +50,10 @@ cmp_debug_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ - ulint* matched_fields);/* in/out: number of already completely - matched fields; when function returns, - contains the value for current comparison */ + ulint* matched_fields);/* in/out: number of already + completely matched fields; when function + returns, contains the value for current + comparison */ /***************************************************************** This function is used to compare two data fields for which the data type is such that we must use MySQL code to compare them. The prototype here @@ -79,17 +80,12 @@ UNIV_INLINE ulint cmp_collate( /*========*/ - /* out: collation order position */ - dtype_t* type __attribute__((unused)) , /* in: type */ - ulint code) /* in: code of a character stored in database - record */ -{ - ut_ad((type->mtype == DATA_CHAR) || (type->mtype == DATA_VARCHAR)); - + /* out: collation order position */ + ulint code) /* in: code of a character stored in database record */ +{ return((ulint) srv_latin1_ordering[code]); } - /***************************************************************** Returns TRUE if two types are equal for comparison purposes. */ @@ -118,7 +114,8 @@ cmp_types_are_equal( if (type1->mtype == DATA_INT && (type1->prtype & DATA_UNSIGNED) - != (type2->prtype & DATA_UNSIGNED)) { + != (type2->prtype & DATA_UNSIGNED)) { + /* The storage format of an unsigned integer is different from a signed integer: in a signed integer we OR 0x8000... to the value of positive integers. */ @@ -131,12 +128,17 @@ cmp_types_are_equal( return(FALSE); } + if (type1->mtype == DATA_BLOB && (type1->prtype & DATA_BINARY_TYPE) + != (type2->prtype & DATA_BINARY_TYPE)) { + return(FALSE); + } + return(TRUE); } /***************************************************************** -Innobase uses this function is to compare two data fields for which the -data type is such that we must compare whole fields. */ +Innobase uses this function to compare two data fields for which the data type +is such that we must compare whole fields or call MySQL to do the comparison */ static int cmp_whole_field( @@ -239,8 +241,34 @@ cmp_whole_field( return(0); case DATA_VARMYSQL: case DATA_MYSQL: + case DATA_BLOB: + if (data_type == DATA_BLOB + && 0 != (type->prtype & DATA_BINARY_TYPE)) { + + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: Error: comparing a binary BLOB with a character set sensitive\n" +"InnoDB: comparison!\n"); + } + + /* MySQL does not pad the ends of strings with spaces in a + comparison. That would cause a foreign key check to fail for + non-latin1 character sets if we have different length columns. + To prevent that we remove trailing spaces here before doing + the comparison. NOTE that if we in the future map more MySQL + types to DATA_MYSQL or DATA_VARMYSQL, we have to change this + code. */ + + while (a_length > 0 && a[a_length - 1] == ' ') { + a_length--; + } + + while (b_length > 0 && b[b_length - 1] == ' ') { + b_length--; + } + return(innobase_mysql_cmp( - (int)(type->prtype & ~DATA_NOT_NULL), + (int)(type->prtype & DATA_MYSQL_TYPE_MASK), a, a_length, b, b_length)); default: fprintf(stderr, @@ -291,7 +319,10 @@ cmp_data_data_slow( return(1); } - if (cur_type->mtype >= DATA_FLOAT) { + if (cur_type->mtype >= DATA_FLOAT + || (cur_type->mtype == DATA_BLOB + && (cur_type->prtype & DATA_NONLATIN1))) { + return(cmp_whole_field(cur_type, data1, len1, data2, len2)); } @@ -334,9 +365,12 @@ cmp_data_data_slow( goto next_byte; } - if (cur_type->mtype <= DATA_CHAR) { - data1_byte = cmp_collate(cur_type, data1_byte); - data2_byte = cmp_collate(cur_type, data2_byte); + if (cur_type->mtype <= DATA_CHAR + || (cur_type->mtype == DATA_BLOB + && 0 == (cur_type->prtype & DATA_BINARY_TYPE))) { + + data1_byte = cmp_collate(data1_byte); + data2_byte = cmp_collate(data2_byte); } if (data1_byte > data2_byte) { @@ -487,7 +521,9 @@ cmp_dtuple_rec_with_match( } } - if (cur_type->mtype >= DATA_FLOAT) { + if (cur_type->mtype >= DATA_FLOAT + || (cur_type->mtype == DATA_BLOB + && (cur_type->prtype & DATA_NONLATIN1))) { ret = cmp_whole_field(cur_type, dfield_get_data(dtuple_field), dtuple_f_len, @@ -547,10 +583,13 @@ cmp_dtuple_rec_with_match( goto next_byte; } - if (cur_type->mtype <= DATA_CHAR) { - rec_byte = cmp_collate(cur_type, rec_byte); - dtuple_byte = cmp_collate(cur_type, - dtuple_byte); + if (cur_type->mtype <= DATA_CHAR + || (cur_type->mtype == DATA_BLOB + && 0 == + (cur_type->prtype & DATA_BINARY_TYPE))) { + + rec_byte = cmp_collate(rec_byte); + dtuple_byte = cmp_collate(dtuple_byte); } if (dtuple_byte > rec_byte) { @@ -583,8 +622,8 @@ order_resolved: matched_fields)); ut_ad(*matched_fields == cur_field); /* In the debug version, the above cmp_debug_... sets - *matched_fields to a value */ - *matched_fields = cur_field; + *matched_fields to a value */ + *matched_fields = cur_field; *matched_bytes = cur_bytes; return(ret); @@ -804,7 +843,10 @@ cmp_rec_rec_with_match( } } - if (cur_type->mtype >= DATA_FLOAT) { + if (cur_type->mtype >= DATA_FLOAT + || (cur_type->mtype == DATA_BLOB + && (cur_type->prtype & DATA_NONLATIN1))) { + ret = cmp_whole_field(cur_type, rec1_b_ptr, rec1_f_len, rec2_b_ptr, rec2_f_len); @@ -861,9 +903,13 @@ cmp_rec_rec_with_match( goto next_byte; } - if (cur_type->mtype <= DATA_CHAR) { - rec1_byte = cmp_collate(cur_type, rec1_byte); - rec2_byte = cmp_collate(cur_type, rec2_byte); + if (cur_type->mtype <= DATA_CHAR + || (cur_type->mtype == DATA_BLOB + && 0 == + (cur_type->prtype & DATA_BINARY_TYPE))) { + + rec1_byte = cmp_collate(rec1_byte); + rec2_byte = cmp_collate(rec2_byte); } if (rec1_byte < rec2_byte) { @@ -906,7 +952,7 @@ This function is used to compare a data tuple to a physical record. If dtuple has n fields then rec must have either m >= n fields, or it must differ from dtuple in some of the m fields rec has. If encounters an externally stored field, returns 0. */ -static + int cmp_debug_dtuple_rec_with_match( /*============================*/ @@ -918,9 +964,10 @@ cmp_debug_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ - ulint* matched_fields) /* in/out: number of already completely - matched fields; when function returns, - contains the value for current comparison */ + ulint* matched_fields) /* in/out: number of already + completely matched fields; when function + returns, contains the value for current + comparison */ { dtype_t* cur_type; /* pointer to type of the current field in dtuple */ diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index e96c08a715b..23da0b9b93c 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -217,8 +217,8 @@ ins_node_set_new_row( } /*********************************************************************** -Does an insert operation by updating a delete marked existing record -in the index. This situation can occur if the delete marked record is +Does an insert operation by updating a delete-marked existing record +in the index. This situation can occur if the delete-marked record is kept in the index for consistent reads. */ static ulint @@ -240,9 +240,9 @@ row_ins_sec_index_entry_by_modify( ut_ad((cursor->index->type & DICT_CLUSTERED) == 0); ut_ad(rec_get_deleted_flag(rec)); - /* We know that in the ordering entry and rec are identified. - But in their binary form there may be differences if there - are char fields in them. Therefore we have to calculate the + /* We know that in the alphabetical ordering, entry and rec are + identical. But in their binary form there may be differences if + there are char fields in them. Therefore we have to calculate the difference and do an update-in-place if necessary. */ heap = mem_heap_create(1024); @@ -305,8 +305,8 @@ row_ins_clust_index_entry_by_modify( /* Try optimistic updating of the record, keeping changes within the page */ - err = btr_cur_optimistic_update(0, cursor, update, 0, thr, mtr); - + err = btr_cur_optimistic_update(0, cursor, update, 0, thr, + mtr); if (err == DB_OVERFLOW || err == DB_UNDERFLOW) { err = DB_FAIL; } @@ -364,11 +364,17 @@ row_ins_cascade_calc_update_vec( /* out: number of fields in the calculated update vector; the value can also be 0 if no foreign key - fields changed */ + fields changed; the returned value + is ULINT_UNDEFINED if the column + type in the child table is too short + to fit the new value in the parent + table: that means the update fails */ upd_node_t* node, /* in: update node of the parent table */ - dict_foreign_t* foreign) /* in: foreign key constraint whose + dict_foreign_t* foreign, /* in: foreign key constraint whose type is != 0 */ + mem_heap_t* heap) /* in: memory heap to use as + temporary storage */ { upd_node_t* cascade = node->cascade_node; dict_table_t* table = foreign->foreign_table; @@ -381,14 +387,16 @@ row_ins_cascade_calc_update_vec( upd_field_t* parent_ufield; ulint n_fields_updated; ulint parent_field_no; + dtype_t* type; ulint i; ulint j; ut_a(node && foreign && cascade && table && index); /* Calculate the appropriate update vector which will set the fields - in the child index record to the same value as the referenced index - record will get in the update. */ + in the child index record to the same value (possibly padded with + spaces if the column is a fixed length CHAR or FIXBINARY column) as + the referenced index record will get in the update. */ parent_table = node->table; ut_a(parent_table == foreign->referenced_table); @@ -424,7 +432,56 @@ row_ins_cascade_calc_update_vec( dict_table_get_nth_col_pos(table, dict_index_get_nth_col_no(index, i)); ufield->exp = NULL; + ufield->new_val = parent_ufield->new_val; + + type = dict_index_get_nth_type(index, i); + + /* Do not allow a NOT NULL column to be + updated as NULL */ + + if (ufield->new_val.len == UNIV_SQL_NULL + && (type->prtype & DATA_NOT_NULL)) { + + return(ULINT_UNDEFINED); + } + + /* If the new value would not fit in the + column, do not allow the update */ + + if (ufield->new_val.len != UNIV_SQL_NULL + && ufield->new_val.len + > dtype_get_len(type)) { + + return(ULINT_UNDEFINED); + } + + /* If the parent column type has a different + length than the child column type, we may + need to pad with spaces the new value of the + child column */ + + if (dtype_is_fixed_size(type) + && ufield->new_val.len != UNIV_SQL_NULL + && ufield->new_val.len + < dtype_get_fixed_size(type)) { + + ufield->new_val.data = + mem_heap_alloc(heap, + dtype_get_fixed_size(type)); + ufield->new_val.len = + dtype_get_fixed_size(type); + ut_a(dtype_get_pad_char(type) + != ULINT_UNDEFINED); + + memset(ufield->new_val.data, + (byte)dtype_get_pad_char(type), + dtype_get_fixed_size(type)); + ut_memcpy(ufield->new_val.data, + parent_ufield->new_val.data, + parent_ufield->new_val.len); + } + ufield->extern_storage = FALSE; n_fields_updated++; @@ -570,9 +627,11 @@ row_ins_foreign_check_on_constraint( dict_index_t* clust_index; dtuple_t* ref; mem_heap_t* tmp_heap; + mem_heap_t* upd_vec_heap = NULL; rec_t* rec; rec_t* clust_rec; upd_t* update; + ulint n_to_update; ulint err; ulint i; char* ptr; @@ -597,8 +656,10 @@ row_ins_foreign_check_on_constraint( *ptr = '\0'; /* We call a function in ha_innodb.cc */ +#ifndef UNIV_HOTBACKUP innobase_invalidate_query_cache(thr_get_trx(thr), table_name_buf, ut_strlen(table->name) + 1); +#endif node = thr->run_node; if (node->is_delete && 0 == (foreign->type & @@ -828,7 +889,21 @@ row_ins_foreign_check_on_constraint( /* Build the appropriate update vector which sets changing foreign->n_fields first fields in rec to new values */ - row_ins_cascade_calc_update_vec(node, foreign); + upd_vec_heap = mem_heap_create(256); + + n_to_update = row_ins_cascade_calc_update_vec(node, foreign, + upd_vec_heap); + if (n_to_update == ULINT_UNDEFINED) { + err = DB_ROW_IS_REFERENCED; + + row_ins_foreign_report_err( +(char*)"Trying a cascaded update where the updated value in the child\n" +"table would not fit in the length of the column, or the value would\n" +"be NULL and the column is declared as not NULL in the child table,", + thr, foreign, btr_pcur_get_rec(pcur), entry); + + goto nonstandard_exit_func; + } if (cascade->update->n_fields == 0) { @@ -867,10 +942,18 @@ row_ins_foreign_check_on_constraint( btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); + if (upd_vec_heap) { + mem_heap_free(upd_vec_heap); + } + return(err); nonstandard_exit_func: + if (upd_vec_heap) { + mem_heap_free(upd_vec_heap); + } + btr_pcur_store_position(pcur, mtr); mtr_commit(mtr); @@ -1275,6 +1358,11 @@ row_ins_unique_report_err( dtuple_t* entry, /* in: index entry to insert in the index */ dict_index_t* index) /* in: index */ { + UT_NOT_USED(thr); + UT_NOT_USED(rec); + UT_NOT_USED(entry); + UT_NOT_USED(index); + #ifdef notdefined /* Disable reporting to test if the slowdown of REPLACE in 4.0.13 was caused by this! */ @@ -1816,7 +1904,7 @@ row_ins_index_entry( /* Try first optimistic descent to the B-tree */ err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, - ext_vec, n_ext_vec, thr); + ext_vec, n_ext_vec, thr); if (err != DB_FAIL) { return(err); @@ -1832,13 +1920,15 @@ row_ins_index_entry( /*************************************************************** Sets the values of the dtuple fields in entry from the values of appropriate columns in row. */ -UNIV_INLINE +static void row_ins_index_entry_set_vals( /*=========================*/ + dict_index_t* index, /* in: index */ dtuple_t* entry, /* in: index entry to make */ dtuple_t* row) /* in: row */ { + dict_field_t* ind_field; dfield_t* field; dfield_t* row_field; ulint n_fields; @@ -1850,11 +1940,21 @@ row_ins_index_entry_set_vals( for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(entry, i); + ind_field = dict_index_get_nth_field(index, i); - row_field = dtuple_get_nth_field(row, field->col_no); + row_field = dtuple_get_nth_field(row, ind_field->col->ind); + + /* Check column prefix indexes */ + if (ind_field->prefix_len > 0 + && dfield_get_len(row_field) != UNIV_SQL_NULL + && dfield_get_len(row_field) > ind_field->prefix_len) { + + field->len = ind_field->prefix_len; + } else { + field->len = row_field->len; + } field->data = row_field->data; - field->len = row_field->len; } } @@ -1873,7 +1973,7 @@ row_ins_index_entry_step( ut_ad(dtuple_check_typed(node->row)); - row_ins_index_entry_set_vals(node->entry, node->row); + row_ins_index_entry_set_vals(node->index, node->entry, node->row); ut_ad(dtuple_check_typed(node->entry)); diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index 428e4d568f3..dc2a50c4f0c 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -76,9 +76,6 @@ row_mysql_store_blob_ref( also to set the NULL bit in the MySQL record header! */ { - ulint sum = 0; - ulint i; - /* MySQL might assume the field is set to zero except the length and the pointer fields */ @@ -93,22 +90,6 @@ row_mysql_store_blob_ref( ut_a(col_len - 8 > 2 || len < 256 * 256); ut_a(col_len - 8 > 3 || len < 256 * 256 * 256); - /* We try to track an elusive bug which probably was fixed - May 9, 2002, but better be sure: we probe the data buffer - to make sure it is in valid allocated memory */ - - for (i = 0; i < len; i++) { - - sum += (ulint)(data + i); - } - - /* The variable below is identically false, we just fool the - compiler to not optimize away our loop */ - if (row_mysql_identically_false) { - - printf("Sum %lu\n", sum); - } - mach_write_to_n_little_endian(dest, col_len - 8, len); ut_memcpy(dest + col_len - 8, (byte*)&data, sizeof(byte*)); @@ -526,6 +507,7 @@ row_get_prebuilt_insert_row( ins_node_t* node; dtuple_t* row; dict_table_t* table = prebuilt->table; + ulint i; ut_ad(prebuilt && table && prebuilt->trx); @@ -549,6 +531,14 @@ row_get_prebuilt_insert_row( dict_table_copy_types(row, table); + /* We init the value of every field to the SQL NULL to avoid + a debug assertion from failing */ + + for (i = 0; i < dtuple_get_n_fields(row); i++) { + + dtuple_get_nth_field(row, i)->len = UNIV_SQL_NULL; + } + ins_node_set_new_row(node, row); prebuilt->ins_graph = @@ -952,7 +942,8 @@ row_update_for_mysql( if (prebuilt->pcur->btr_cur.index == clust_index) { btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur); } else { - btr_pcur_copy_stored_position(node->pcur, prebuilt->clust_pcur); + btr_pcur_copy_stored_position(node->pcur, + prebuilt->clust_pcur); } ut_a(node->pcur->rel_pos == BTR_PCUR_ON); @@ -1477,8 +1468,7 @@ row_create_index_for_mysql( ulint namelen; ulint keywordlen; ulint err; - ulint i; - ulint j; + ulint i, j; ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); ut_ad(mutex_own(&(dict_sys->mutex))); @@ -1486,23 +1476,9 @@ row_create_index_for_mysql( trx->op_info = (char *) "creating index"; - trx_start_if_not_started(trx); - - namelen = ut_strlen(index->table_name); - - keywordlen = ut_strlen("_recover_innodb_tmp_table"); - - if (namelen >= keywordlen - && 0 == ut_memcmp( - index->table_name + namelen - keywordlen, - (char*)"_recover_innodb_tmp_table", keywordlen)) { - - return(DB_SUCCESS); - } - /* Check that the same column does not appear twice in the index. - InnoDB assumes this in its algorithms, e.g., update of an index - entry */ + Starting from 4.0.14 InnoDB should be able to cope with that, but + safer not to allow them. */ for (i = 0; i < dict_index_get_n_fields(index); i++) { for (j = 0; j < i; j++) { @@ -1525,6 +1501,20 @@ row_create_index_for_mysql( } } + trx_start_if_not_started(trx); + + namelen = ut_strlen(index->table_name); + + keywordlen = ut_strlen("_recover_innodb_tmp_table"); + + if (namelen >= keywordlen + && 0 == ut_memcmp( + index->table_name + namelen - keywordlen, + (char*)"_recover_innodb_tmp_table", keywordlen)) { + + return(DB_SUCCESS); + } + heap = mem_heap_create(512); trx->dict_operation = TRUE; @@ -1542,6 +1532,7 @@ row_create_index_for_mysql( que_graph_free((que_t*) que_node_get_parent(thr)); error_handling: + if (err != DB_SUCCESS) { /* We have special error handling here */ @@ -2541,7 +2532,7 @@ loop: prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT); + ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT); goto loop; } diff --git a/innobase/row/row0row.c b/innobase/row/row0row.c index 40a775143f4..6c0c6c04cd5 100644 --- a/innobase/row/row0row.c +++ b/innobase/row/row0row.c @@ -136,7 +136,14 @@ row_build_index_entry( dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col)); dfield_copy(dfield, dfield2); - dfield->col_no = dict_col_get_no(col); + + /* If a column prefix index, take only the prefix */ + if (ind_field->prefix_len > 0 + && dfield_get_len(dfield2) != UNIV_SQL_NULL + && dfield_get_len(dfield2) > ind_field->prefix_len) { + + dfield_set_len(dfield, ind_field->prefix_len); + } } ut_ad(dtuple_check_typed(entry)); @@ -146,8 +153,7 @@ row_build_index_entry( /*********************************************************************** An inverse function to dict_row_build_index_entry. Builds a row from a -record in a clustered index. NOTE that externally stored (often big) -fields are always copied to heap. */ +record in a clustered index. */ dtuple_t* row_build( @@ -172,6 +178,7 @@ row_build( { dtuple_t* row; dict_table_t* table; + dict_field_t* ind_field; dict_col_t* col; dfield_t* dfield; ulint n_fields; @@ -204,19 +211,24 @@ row_build( dict_table_copy_types(row, table); for (i = 0; i < n_fields; i++) { + ind_field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(dict_index_get_nth_field(index, i)); - dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - field = rec_get_nth_field(rec, i, &len); + if (ind_field->prefix_len == 0) { - if (type == ROW_COPY_ALSO_EXTERNALS - && rec_get_nth_field_extern_bit(rec, i)) { + col = dict_field_get_col(ind_field); + dfield = dtuple_get_nth_field(row, + dict_col_get_no(col)); + field = rec_get_nth_field(rec, i, &len); - field = btr_rec_copy_externally_stored_field(rec, - i, &len, heap); + if (type == ROW_COPY_ALSO_EXTERNALS + && rec_get_nth_field_extern_bit(rec, i)) { + + field = btr_rec_copy_externally_stored_field( + rec, i, &len, heap); + } + + dfield_set_data(dfield, field, len); } - - dfield_set_data(dfield, field, len); } ut_ad(dtuple_check_typed(row)); @@ -371,7 +383,6 @@ row_build_row_ref( dict_table_t* table; dict_index_t* clust_index; dfield_t* dfield; - dict_col_t* col; dtuple_t* ref; byte* field; ulint len; @@ -403,24 +414,13 @@ row_build_row_ref( for (i = 0; i < ref_len; i++) { dfield = dtuple_get_nth_field(ref, i); - col = dict_field_get_col( - dict_index_get_nth_field(clust_index, i)); - pos = dict_index_get_nth_col_pos(index, dict_col_get_no(col)); + pos = dict_index_get_nth_field_pos(index, clust_index, i); - if (pos != ULINT_UNDEFINED) { - field = rec_get_nth_field(rec, pos, &len); + ut_a(pos != ULINT_UNDEFINED); + + field = rec_get_nth_field(rec, pos, &len); - dfield_set_data(dfield, field, len); - } else { - ut_ad(table->type == DICT_TABLE_CLUSTER_MEMBER); - ut_ad(i == table->mix_len); - - dfield_set_data(dfield, - mem_heap_alloc(heap, table->mix_id_len), - table->mix_id_len); - ut_memcpy(dfield_get_data(dfield), table->mix_id_buf, - table->mix_id_len); - } + dfield_set_data(dfield, field, len); } ut_ad(dtuple_check_typed(ref)); @@ -448,7 +448,6 @@ row_build_row_ref_in_tuple( dict_table_t* table; dict_index_t* clust_index; dfield_t* dfield; - dict_col_t* col; byte* field; ulint len; ulint ref_len; @@ -483,19 +482,13 @@ row_build_row_ref_in_tuple( for (i = 0; i < ref_len; i++) { dfield = dtuple_get_nth_field(ref, i); - col = dict_field_get_col( - dict_index_get_nth_field(clust_index, i)); - pos = dict_index_get_nth_col_pos(index, dict_col_get_no(col)); + pos = dict_index_get_nth_field_pos(index, clust_index, i); - if (pos != ULINT_UNDEFINED) { - field = rec_get_nth_field(rec, pos, &len); + ut_a(pos != ULINT_UNDEFINED); + + field = rec_get_nth_field(rec, pos, &len); - dfield_set_data(dfield, field, len); - } else { - ut_ad(table->type == DICT_TABLE_CLUSTER_MEMBER); - ut_ad(i == table->mix_len); - ut_a(0); - } + dfield_set_data(dfield, field, len); } ut_ad(dtuple_check_typed(ref)); @@ -517,6 +510,7 @@ row_build_row_ref_from_row( directly into data of this row */ { dict_index_t* clust_index; + dict_field_t* field; dfield_t* dfield; dfield_t* dfield2; dict_col_t* col; @@ -533,13 +527,21 @@ row_build_row_ref_from_row( for (i = 0; i < ref_len; i++) { dfield = dtuple_get_nth_field(ref, i); - - col = dict_field_get_col( - dict_index_get_nth_field(clust_index, i)); - + + field = dict_index_get_nth_field(clust_index, i); + + col = dict_field_get_col(field); + dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col)); dfield_copy(dfield, dfield2); + + if (field->prefix_len > 0 + && dfield->len != UNIV_SQL_NULL + && dfield->len > field->prefix_len) { + + dfield->len = field->prefix_len; + } } ut_ad(dtuple_check_typed(ref)); diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index 4732472d805..114ebf870b0 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -65,41 +65,50 @@ row_sel_sec_rec_is_for_clust_rec( rec_t* sec_rec, /* in: secondary index record */ dict_index_t* sec_index, /* in: secondary index */ rec_t* clust_rec, /* in: clustered index record */ - dict_index_t* clust_index __attribute__((unused))) - /* in: clustered index */ + dict_index_t* clust_index) /* in: clustered index */ { - dict_col_t* col; - byte* sec_field; - ulint sec_len; - byte* clust_field; - ulint clust_len; - ulint n; - ulint i; + dict_field_t* ifield; + dict_col_t* col; + byte* sec_field; + ulint sec_len; + byte* clust_field; + ulint clust_len; + ulint n; + ulint i; - n = dict_index_get_n_ordering_defined_by_user(sec_index); + UT_NOT_USED(clust_index); - for (i = 0; i < n; i++) { - col = dict_field_get_col( - dict_index_get_nth_field(sec_index, i)); + n = dict_index_get_n_ordering_defined_by_user(sec_index); - clust_field = rec_get_nth_field(clust_rec, - dict_col_get_clust_pos(col), - &clust_len); - sec_field = rec_get_nth_field(sec_rec, i, &sec_len); + for (i = 0; i < n; i++) { + ifield = dict_index_get_nth_field(sec_index, i); + col = dict_field_get_col(ifield); + + clust_field = rec_get_nth_field(clust_rec, + dict_col_get_clust_pos(col), + &clust_len); + sec_field = rec_get_nth_field(sec_rec, i, &sec_len); - if (sec_len != clust_len) { + if (ifield->prefix_len > 0 + && clust_len != UNIV_SQL_NULL + && clust_len > ifield->prefix_len) { - return(FALSE); + clust_len = ifield->prefix_len; } - if (0 != cmp_data_data(dict_col_get_type(col), - clust_field, clust_len, - sec_field, sec_len)) { - return(FALSE); - } - } + if (sec_len != clust_len) { - return(TRUE); + return(FALSE); + } + + if (0 != cmp_data_data(dict_col_get_type(col), + clust_field, clust_len, + sec_field, sec_len)) { + return(FALSE); + } + } + + return(TRUE); } /************************************************************************* @@ -606,7 +615,7 @@ row_sel_get_clust_rec( /* Try to place a lock on the index record */ err = lock_clust_rec_read_check_and_lock(0, clust_rec, index, - node->row_lock_mode, LOCK_ORDINARY, thr); + node->row_lock_mode, LOCK_ORDINARY, thr); if (err != DB_SUCCESS) { return(err); @@ -656,7 +665,7 @@ row_sel_get_clust_rec( *out_rec = clust_rec; return(DB_SUCCESS); - } + } } /* Fetch the columns needed in test conditions */ @@ -1850,9 +1859,11 @@ row_printf_step( } /******************************************************************** -Converts a key value stored in MySQL format to an Innobase dtuple. -The last field of the key value may be just a prefix of a fixed length -field: hence the parameter key_len. */ +Converts a key value stored in MySQL format to an Innobase dtuple. The last +field of the key value may be just a prefix of a fixed length field: hence +the parameter key_len. But currently we do not allow search keys where the +last field is only a prefix of the full key field len and print a warning if +such appears. */ void row_sel_convert_mysql_key_to_innobase( @@ -1863,17 +1874,24 @@ row_sel_convert_mysql_key_to_innobase( to index! */ byte* buf, /* in: buffer to use in field conversions */ + ulint buf_len, /* in: buffer length */ dict_index_t* index, /* in: index of the key value */ byte* key_ptr, /* in: MySQL key value */ ulint key_len) /* in: MySQL key value length */ { + byte* original_buf = buf; + dict_field_t* field; dfield_t* dfield; - ulint offset; - ulint len; + ulint data_offset; + ulint data_len; + ulint data_field_len; + ibool is_null; byte* key_end; ulint n_fields = 0; + ulint type; - UT_NOT_USED(index); + /* For documentation of the key value storage format in MySQL, see + ha_innobase::store_key_val_for_row() in ha_innodb.cc. */ key_end = key_ptr + key_len; @@ -1882,11 +1900,14 @@ row_sel_convert_mysql_key_to_innobase( dtuple_set_n_fields(tuple, ULINT_MAX); dfield = dtuple_get_nth_field(tuple, 0); + field = dict_index_get_nth_field(index, 0); if (dfield_get_type(dfield)->mtype == DATA_SYS) { - /* A special case: we are looking for a position in a - generated clustered index: the first and the only - ordering column is ROW_ID */ + /* A special case: we are looking for a position in the + generated clustered index which InnoDB automatically added + to a table with no primary key: the first and the only + ordering column is ROW_ID which InnoDB stored to the key_ptr + buffer. */ ut_a(key_len == DATA_ROW_ID_LEN); @@ -1897,70 +1918,114 @@ row_sel_convert_mysql_key_to_innobase( return; } - while (key_ptr < key_end) { - offset = 0; - len = dfield_get_type(dfield)->len; + while (key_ptr < key_end) { - n_fields++; + ut_a(dict_col_get_type(field->col)->mtype + == dfield_get_type(dfield)->mtype); + + data_offset = 0; + is_null = FALSE; if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) { /* The first byte in the field tells if this is an SQL NULL value */ - offset = 1; + data_offset = 1; - if (*key_ptr != 0) { + if (*key_ptr != 0) { dfield_set_data(dfield, NULL, UNIV_SQL_NULL); - goto next_part; + is_null = TRUE; } } - row_mysql_store_col_in_innobase_format( - dfield, buf, key_ptr + offset, len, - dfield_get_type(dfield)->mtype, + type = dfield_get_type(dfield)->mtype; + + /* Calculate data length and data field total length */ + + if (type == DATA_BLOB) { + /* The key field is a column prefix of a BLOB or + TEXT type column */ + + ut_a(field->prefix_len > 0); + + /* MySQL stores the actual data length to the first 2 + bytes after the optional SQL NULL marker byte. The + storage format is little-endian. */ + + /* There are no key fields > 255 bytes currently in + MySQL */ + if (key_ptr[data_offset + 1] != 0) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: Error: BLOB or TEXT prefix > 255 bytes in query to table %s\n", + index->table_name); + } + + data_len = key_ptr[data_offset]; + data_field_len = data_offset + 2 + field->prefix_len; + data_offset += 2; + + type = DATA_CHAR; /* now that we know the length, we + store the column value like it would + be a fixed char field */ + } else if (field->prefix_len > 0) { + data_len = field->prefix_len; + data_field_len = data_offset + data_len; + } else { + data_len = dfield_get_type(dfield)->len; + data_field_len = data_offset + data_len; + } + + /* Storing may use at most data_len bytes of buf */ + + if (!is_null) { + row_mysql_store_col_in_innobase_format( + dfield, buf, key_ptr + data_offset, + data_len, type, dfield_get_type(dfield)->prtype & DATA_UNSIGNED); - next_part: - key_ptr += (offset + len); + buf += data_len; + } + + key_ptr += data_field_len; if (key_ptr > key_end) { - /* The last field in key was not a complete - field but a prefix of it. + /* The last field in key was not a complete key field + but a prefix of it. - Print a warning about this! HA_READ_PREFIX_LAST - does not currently work in InnoDB with partial-field - key value prefixes. Since MySQL currently uses a - padding trick to calculate LIKE 'abc%' type queries - there should never be partial-field prefixes - in searches. */ + Print a warning about this! HA_READ_PREFIX_LAST does + not currently work in InnoDB with partial-field key + value prefixes. Since MySQL currently uses a padding + trick to calculate LIKE 'abc%' type queries there + should never be partial-field prefixes in searches. */ ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: using a partial-field key prefix in search\n"); - ut_ad(dfield_get_len(dfield) != UNIV_SQL_NULL); - - dfield_set_data(dfield, buf, - len - (ulint)(key_ptr - key_end)); + if (!is_null) { + dfield->len -= (ulint)(key_ptr - key_end); + } } - buf += len; - + n_fields++; + field++; dfield++; } - /* We set the length of tuple to n_fields: we assume that - the memory area allocated for it is big enough (usually - bigger than n_fields). */ + ut_a(buf <= original_buf + buf_len); + + /* We set the length of tuple to n_fields: we assume that the memory + area allocated for it is big enough (usually bigger than n_fields). */ dtuple_set_n_fields(tuple, n_fields); } /****************************************************************** Stores the row id to the prebuilt struct. */ -UNIV_INLINE +static void row_sel_store_row_id_to_prebuilt( /*=============================*/ @@ -1970,11 +2035,22 @@ row_sel_store_row_id_to_prebuilt( { byte* data; ulint len; + char err_buf[1000]; data = rec_get_nth_field(index_rec, dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len); - ut_a(len == DATA_ROW_ID_LEN); + if (len != DATA_ROW_ID_LEN) { + rec_sprintf(err_buf, 900, index_rec); + + fprintf(stderr, +"InnoDB: Error: Row id field is wrong length %lu in table %s index %s\n" +"InnoDB: Field number %lu, record:\n%s\n", + len, index->table_name, index->name, + dict_index_get_sys_col_pos(index, DATA_ROW_ID), + err_buf); + ut_a(0); + } ut_memcpy(prebuilt->row_id, data, len); } @@ -3021,7 +3097,7 @@ rec_loop: if (prebuilt->select_lock_type != LOCK_NONE && set_also_gap_locks) { - /* Try to place a lock on the index record */ + /* Try to place a lock on the index record */ err = sel_set_rec_lock(rec, index, prebuilt->select_lock_type, diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index b84e55ca643..b22e494f891 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -428,7 +428,8 @@ row_undo_mod_del_unmark_sec( found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); if (!found) { - fprintf(stderr, "InnoDB: error in sec index entry del undo in\n" + fprintf(stderr, + "InnoDB: error in sec index entry del undo in\n" "InnoDB: index %s table %s\n", index->name, index->table->name); dtuple_sprintf(err_buf, 900, entry); @@ -570,7 +571,7 @@ row_undo_mod_upd_exist_sec( the row */ row_upd_index_replace_new_col_vals(entry, index, - node->update); + node->update, NULL); row_undo_mod_del_unmark_sec(node, thr, index, entry); } diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 5fce1c1861b..db68479509d 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -72,8 +72,9 @@ searched delete is obviously to keep the x-latch for several steps of query graph execution. */ /*************************************************************** -Checks if an update vector changes some of the first fields of an index -record. */ +Checks if an update vector changes some of the first ordering fields of an +index record. This is only used in foreign key checks and we can assume +that index does not contain column prefixes. */ static ibool row_upd_changes_first_fields( @@ -234,7 +235,8 @@ row_upd_check_references_constraints( if (err != DB_SUCCESS) { if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); + row_mysql_unfreeze_data_dictionary( + trx); } mem_heap_free(heap); @@ -350,14 +352,15 @@ row_upd_index_entry_sys_field( } /*************************************************************** -Returns TRUE if row update changes size of some field in index -or if some field to be updated is stored externally in rec or update. */ +Returns TRUE if row update changes size of some field in index or if some +field to be updated is stored externally in rec or update. */ ibool -row_upd_changes_field_size( -/*=======================*/ +row_upd_changes_field_size_or_external( +/*===================================*/ /* out: TRUE if the update changes the size of - some field in index */ + some field in index or the field is external + in rec or update */ rec_t* rec, /* in: record in clustered index */ dict_index_t* index, /* in: clustered index */ upd_t* update) /* in: update vector */ @@ -820,72 +823,58 @@ void row_upd_index_replace_new_col_vals( /*===============================*/ dtuple_t* entry, /* in/out: index entry where replaced */ - dict_index_t* index, /* in: index; NOTE that may also be a + dict_index_t* index, /* in: index; NOTE that this may also be a non-clustered index */ - upd_t* update) /* in: update vector */ + upd_t* update, /* in: update vector */ + mem_heap_t* heap) /* in: memory heap to which we allocate and + copy the new values, set this as NULL if you + do not want allocation */ { + dict_field_t* field; upd_field_t* upd_field; dfield_t* dfield; dfield_t* new_val; - ulint field_no; - dict_index_t* clust_index; + ulint j; ulint i; ut_ad(index); - clust_index = dict_table_get_first_index(index->table); - dtuple_set_info_bits(entry, update->info_bits); - for (i = 0; i < upd_get_n_fields(update); i++) { + for (j = 0; j < dict_index_get_n_fields(index); j++) { - upd_field = upd_get_nth_field(update, i); + field = dict_index_get_nth_field(index, j); - field_no = dict_index_get_nth_col_pos(index, - dict_index_get_nth_col_no(clust_index, - upd_field->field_no)); - if (field_no != ULINT_UNDEFINED) { - dfield = dtuple_get_nth_field(entry, field_no); + for (i = 0; i < upd_get_n_fields(update); i++) { - new_val = &(upd_field->new_val); + upd_field = upd_get_nth_field(update, i); - dfield_set_data(dfield, new_val->data, new_val->len); + if (upd_field->field_no == field->col->clust_pos) { + + dfield = dtuple_get_nth_field(entry, j); + + new_val = &(upd_field->new_val); + + dfield_set_data(dfield, new_val->data, + new_val->len); + if (heap && new_val->len != UNIV_SQL_NULL) { + dfield->data = mem_heap_alloc(heap, + new_val->len); + ut_memcpy(dfield->data, new_val->data, + new_val->len); + } + + if (field->prefix_len > 0 + && new_val->len != UNIV_SQL_NULL + && new_val->len > field->prefix_len) { + + dfield->len = field->prefix_len; + } + } } } } -/*************************************************************** -Replaces the new column values stored in the update vector to the -clustered index entry given. */ - -void -row_upd_clust_index_replace_new_col_vals( -/*=====================================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - upd_t* update) /* in: update vector */ -{ - upd_field_t* upd_field; - dfield_t* dfield; - dfield_t* new_val; - ulint field_no; - ulint i; - - dtuple_set_info_bits(entry, update->info_bits); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - upd_field = upd_get_nth_field(update, i); - - field_no = upd_field->field_no; - - dfield = dtuple_get_nth_field(entry, field_no); - - new_val = &(upd_field->new_val); - - dfield_set_data(dfield, new_val->data, new_val->len); - } -} - /*************************************************************** Checks if an update vector changes an ordering field of an index record. This function is fast if the update vector is short or the number of ordering @@ -931,9 +920,15 @@ row_upd_changes_ord_field_binary( upd_field = upd_get_nth_field(update, j); + /* Note that if the index field is a column prefix + then it may be that row does not contain an externally + stored part of the column value, and we cannot compare + the datas */ + if (col_pos == upd_field->field_no - && (row == NULL - || !dfield_datas_are_binary_equal( + && (row == NULL + || ind_field->prefix_len > 0 + || !dfield_datas_are_binary_equal( dtuple_get_nth_field(row, col_no), &(upd_field->new_val)))) { return(TRUE); @@ -978,8 +973,9 @@ row_upd_changes_some_index_ord_field_binary( } /*************************************************************** -Checks if an update vector changes some of the first fields of an index -record. */ +Checks if an update vector changes some of the first ordering fields of an +index record. This is only used in foreign key checks and we can assume +that index does not contain column prefixes. */ static ibool row_upd_changes_first_fields( @@ -1013,9 +1009,10 @@ row_upd_changes_first_fields( upd_field = upd_get_nth_field(update, j); if (col_pos == upd_field->field_no - && cmp_dfield_dfield( + && (ind_field->prefix_len > 0 + || 0 != cmp_dfield_dfield( dtuple_get_nth_field(entry, i), - &(upd_field->new_val))) { + &(upd_field->new_val)))) { return(TRUE); } } @@ -1204,7 +1201,7 @@ close_cur: } /* Build a new index entry */ - row_upd_index_replace_new_col_vals(entry, index, node->update); + row_upd_index_replace_new_col_vals(entry, index, node->update, NULL); /* Insert new index entry */ err = row_ins_index_entry(index, entry, NULL, 0, thr); @@ -1317,12 +1314,12 @@ row_upd_clust_rec_by_insert( entry = row_build_index_entry(node->row, index, heap); - row_upd_clust_index_replace_new_col_vals(entry, node->update); + row_upd_index_replace_new_col_vals(entry, index, node->update, NULL); row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); /* If we return from a lock wait, for example, we may have - extern fields marked as not-owned in entry (marked if the + extern fields marked as not-owned in entry (marked in the if-branch above). We must unmark them. */ btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec, @@ -1702,9 +1699,9 @@ function_exit: /* Do some cleanup */ if (node->row != NULL) { - mem_heap_empty(node->heap); node->row = NULL; node->n_ext_vec = 0; + mem_heap_empty(node->heap); } node->state = UPD_NODE_UPDATE_CLUSTERED; diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c index cd8b18e5e12..91aaba40812 100644 --- a/innobase/row/row0vers.c +++ b/innobase/row/row0vers.c @@ -27,6 +27,7 @@ Created 2/6/1997 Heikki Tuuri #include "row0upd.h" #include "rem0cmp.h" #include "read0read.h" +#include "lock0lock.h" /********************************************************************* Finds out if an active transaction has inserted or modified a secondary @@ -111,6 +112,14 @@ row_vers_impl_x_locked_off_kernel( return(NULL); } + if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, TRUE)) { + /* Corruption noticed: try to avoid a crash by returning */ + + mtr_commit(&mtr); + + return(NULL); + } + /* We look up if some earlier version of the clustered index record would require rec to be in a different state (delete marked or unmarked, or not existing). If there is such a version, then rec was @@ -177,7 +186,8 @@ row_vers_impl_x_locked_off_kernel( /* If we get here, we know that the trx_id transaction is still active and it has modified prev_version. Let us check - if prev_version would require rec to be in a different state. */ + if prev_version would require rec to be in a different + state. */ vers_del = rec_get_deleted_flag(prev_version); diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index 2a93ca966eb..d13d499dd17 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -286,6 +286,7 @@ ulint srv_test_n_mutexes = ULINT_MAX; i/o handler thread */ char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; +char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; time_t srv_last_monitor_time; @@ -2399,8 +2400,9 @@ srv_sprintf_innodb_monitor( srv_conc_n_threads, srv_conc_n_waiting_threads); #ifdef UNIV_LINUX buf += sprintf(buf, - "Main thread process no %lu, state: %s\n", + "Main thread process no. %lu, id %lu, state: %s\n", srv_main_thread_process_no, + srv_main_thread_id, srv_main_thread_op_info); #else buf += sprintf(buf, @@ -2464,8 +2466,8 @@ srv_lock_timeout_and_monitor_thread( ulint i; #ifdef UNIV_DEBUG_THREAD_CREATION - printf("Lock timeout thread starts\n"); - printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); + printf("Lock timeout thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); #endif UT_NOT_USED(arg); srv_last_monitor_time = time(NULL); @@ -2637,8 +2639,8 @@ srv_error_monitor_thread( UT_NOT_USED(arg); #ifdef UNIV_DEBUG_THREAD_CREATION - printf("Error monitor thread starts\n"); - printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); + printf("Error monitor thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); #endif loop: srv_error_monitor_active = TRUE; @@ -2760,8 +2762,8 @@ srv_master_thread( UT_NOT_USED(arg); #ifdef UNIV_DEBUG_THREAD_CREATION - printf("Master thread starts\n"); - printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); + printf("Master thread starts, id %lu\n", + os_thread_pf(os_thread_get_curr_id())); #endif srv_main_thread_process_no = os_proc_get_number(); srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index 1f278d82bc0..964e728b23c 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -415,8 +415,8 @@ io_handler_thread( segment = *((ulint*)arg); #ifdef UNIV_DEBUG_THREAD_CREATION - printf("Io handler thread %lu starts\n", segment); - printf("Thread id %lu\n", os_thread_pf(os_thread_get_curr_id())); + printf("Io handler thread %lu starts, id %lu\n", segment, + os_thread_pf(os_thread_get_curr_id())); #endif for (i = 0;; i++) { fil_aio_wait(segment); @@ -1492,7 +1492,9 @@ innobase_shutdown_for_mysql(void) } /* 1. Flush buffer pool to disk, write the current lsn to - the tablespace header(s), and copy all log data to archive */ + the tablespace header(s), and copy all log data to archive. + The step 1 is the real InnoDB shutdown. The remaining steps + just free data structures after the shutdown. */ logs_empty_and_mark_files_at_shutdown(); diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c index 05e179e06a5..9453189d598 100644 --- a/innobase/trx/trx0rec.c +++ b/innobase/trx/trx0rec.c @@ -272,8 +272,8 @@ trx_undo_page_report_insert( mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, ptr - undo_page); - /* Write the log entry to the REDO log of this change in the UNDO log */ - + /* Write the log entry to the REDO log of this change in the UNDO + log */ trx_undof_page_add_undo_rec_log(undo_page, first_free, ptr - undo_page, mtr); return(first_free); @@ -492,7 +492,8 @@ trx_undo_page_report_modify( /* Reserve 2 bytes for the pointer to the next undo log record */ ptr += 2; - /* Store first some general parameters to the undo log */ + /* Store first some general parameters to the undo log */ + if (update) { if (rec_get_deleted_flag(rec)) { type_cmpl = TRX_UNDO_UPD_DEL_REC; @@ -526,8 +527,7 @@ trx_undo_page_report_modify( /* Store the values of the system columns */ trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec); - roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec); - + roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec); len = mach_dulint_write_compressed(ptr, trx_id); ptr += len; @@ -632,7 +632,11 @@ trx_undo_page_report_modify( columns which occur as ordering fields in any index. This info is used in the purge of old versions where we use it to build and search the delete marked index records, to look if we can remove them from the - index tree. */ + index tree. Note that starting from 4.0.14 also externally stored + fields can be ordering in some index. But we always store at least + 384 first bytes locally to the clustered index record, which means + we can construct the column prefix fields in the index from the + stored data. */ if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { @@ -1408,11 +1412,11 @@ trx_undo_prev_version_build( return(DB_ERROR); } - if (row_upd_changes_field_size(rec, index, update)) { + if (row_upd_changes_field_size_or_external(rec, index, update)) { - entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - - row_upd_clust_index_replace_new_col_vals(entry, update); + entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, + heap); + row_upd_index_replace_new_col_vals(entry, index, update, heap); buf = mem_heap_alloc(heap, rec_get_converted_size(entry)); diff --git a/innobase/trx/trx0roll.c b/innobase/trx/trx0roll.c index a9f8c5ad22c..7d1b341221c 100644 --- a/innobase/trx/trx0roll.c +++ b/innobase/trx/trx0roll.c @@ -52,6 +52,11 @@ trx_general_rollback_for_mysql( que_thr_t* thr; roll_node_t* roll_node; + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + trx_start_if_not_started(trx); heap = mem_heap_create(512); @@ -89,6 +94,11 @@ trx_general_rollback_for_mysql( ut_a(trx->error_state == DB_SUCCESS); + /* Tell Innobase server that there might be work for + utility threads: */ + + srv_active_wake_master_thread(); + return((int) trx->error_state); } @@ -110,20 +120,8 @@ trx_rollback_for_mysql( trx->op_info = (char *) "rollback"; - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - err = trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx_mark_sql_stat_end(trx); - - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - trx->op_info = (char *) ""; return(err); @@ -147,25 +145,191 @@ trx_rollback_last_sql_stat_for_mysql( trx->op_info = (char *) "rollback of SQL statement"; - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - err = trx_general_rollback_for_mysql(trx, TRUE, &(trx->last_sql_stat_start)); + /* The following call should not be needed, but we play safe: */ trx_mark_sql_stat_end(trx); - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - trx->op_info = (char *) ""; return(err); } +/*********************************************************************** +Frees savepoint structs. */ + +void +trx_roll_savepoints_free( +/*=====================*/ + trx_t* trx, /* in: transaction handle */ + trx_named_savept_t* savep) /* in: free all savepoints > this one; + if this is NULL, free all savepoints + of trx */ +{ + trx_named_savept_t* next_savep; + + if (savep == NULL) { + savep = UT_LIST_GET_FIRST(trx->trx_savepoints); + } else { + savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + } + + while (savep != NULL) { + next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + + UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); + mem_free(savep->name); + mem_free(savep); + + savep = next_savep; + } +} + +/*********************************************************************** +Rolls back a transaction back to a named savepoint. Modifications after the +savepoint are undone but InnoDB does NOT release the corresponding locks +which are stored in memory. If a lock is 'implicit', that is, a new inserted +row holds a lock where the lock information is carried by the trx id stored in +the row, these locks are naturally released in the rollback. Savepoints which +were set after this savepoint are deleted. */ + +ulint +trx_rollback_to_savepoint_for_mysql( +/*================================*/ + /* out: if no savepoint + of the name found then + DB_NO_SAVEPOINT, + otherwise DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache + position corresponding to this + savepoint; MySQL needs this + information to remove the + binlog entries of the queries + executed after the savepoint */ +{ + trx_named_savept_t* savep; + ulint err; + + savep = UT_LIST_GET_FIRST(trx->trx_savepoints); + + while (savep != NULL) { + if (0 == ut_strcmp(savep->name, savepoint_name)) { + /* Found */ + break; + } + savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + } + + if (savep == NULL) { + + return(DB_NO_SAVEPOINT); + } + + if (trx->conc_state == TRX_NOT_STARTED) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: Error: transaction has a savepoint %s though it is not started\n", + savep->name); + return(DB_ERROR); + } + + /* We can now free all savepoints strictly later than this one */ + + trx_roll_savepoints_free(trx, savep); + + *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos; + + trx->op_info = (char *) "rollback to a savepoint"; + + err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept)); + + /* Store the current undo_no of the transaction so that we know where + to roll back if we have to roll back the next SQL statement: */ + + trx_mark_sql_stat_end(trx); + + trx->op_info = (char *) ""; + + return(err); +} + +/*********************************************************************** +Creates a named savepoint. If the transaction is not yet started, starts it. +If there is already a savepoint of the same name, this call erases that old +savepoint and replaces it with a new. Savepoints are deleted in a transaction +commit or rollback. */ + +ulint +trx_savepoint_for_mysql( +/*====================*/ + /* out: always DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + char* savepoint_name, /* in: savepoint name */ + ib_longlong binlog_cache_pos) /* in: MySQL binlog cache + position corresponding to this + connection at the time of the + savepoint */ +{ + trx_named_savept_t* savep; + + ut_a(trx); + ut_a(savepoint_name); + + trx_start_if_not_started(trx); + + savep = UT_LIST_GET_FIRST(trx->trx_savepoints); + + while (savep != NULL) { + if (0 == ut_strcmp(savep->name, savepoint_name)) { + /* Found */ + break; + } + savep = UT_LIST_GET_NEXT(trx_savepoints, savep); + } + + if (savep) { + /* There is a savepoint with the same name: free that */ + + UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); + + mem_free(savep->name); + mem_free(savep); + } + + /* Create a new savepoint and add it as the last in the list */ + + savep = mem_alloc(sizeof(trx_named_savept_t)); + + savep->name = mem_alloc(1 + ut_strlen(savepoint_name)); + ut_memcpy(savep->name, savepoint_name, 1 + ut_strlen(savepoint_name)); + + savep->savept = trx_savept_take(trx); + + savep->mysql_binlog_cache_pos = binlog_cache_pos; + + UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep); + + return(DB_SUCCESS); +} + +/*********************************************************************** +Returns a transaction savepoint taken at this point in time. */ + +trx_savept_t +trx_savept_take( +/*============*/ + /* out: savepoint */ + trx_t* trx) /* in: transaction */ +{ + trx_savept_t savept; + + savept.least_undo_no = trx->undo_no; + + return(savept); +} + /*********************************************************************** Rollback or clean up transactions which have no user session. If the transaction already was committed, then we clean up a possible insert @@ -325,22 +489,6 @@ loop: goto loop; } - -/*********************************************************************** -Returns a transaction savepoint taken at this point in time. */ - -trx_savept_t -trx_savept_take( -/*============*/ - /* out: savepoint */ - trx_t* trx) /* in: transaction */ -{ - trx_savept_t savept; - - savept.least_undo_no = trx->undo_no; - - return(savept); -} /*********************************************************************** Creates an undo number array. */ diff --git a/innobase/trx/trx0sys.c b/innobase/trx/trx0sys.c index 51aad60d3e2..f1b03fff3bd 100644 --- a/innobase/trx/trx0sys.c +++ b/innobase/trx/trx0sys.c @@ -321,8 +321,8 @@ trx_sys_doublewrite_restore_corrupt_pages(void) for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { - space_id = mach_read_from_4(page + FIL_PAGE_SPACE); page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); + space_id = 0; if (!fil_check_adress_in_tablespace(space_id, page_no)) { fprintf(stderr, diff --git a/innobase/trx/trx0trx.c b/innobase/trx/trx0trx.c index d73d6327d76..9233e861784 100644 --- a/innobase/trx/trx0trx.c +++ b/innobase/trx/trx0trx.c @@ -135,6 +135,8 @@ trx_create( trx->lock_heap = mem_heap_create_in_buffer(256); UT_LIST_INIT(trx->trx_locks); + UT_LIST_INIT(trx->trx_savepoints); + trx->dict_operation_lock_mode = 0; trx->has_search_latch = FALSE; trx->search_latch_timeout = BTR_SEA_TIMEOUT; @@ -807,6 +809,9 @@ trx_commit_off_kernel( mutex_enter(&kernel_mutex); } + /* Free savepoints */ + trx_roll_savepoints_free(trx, NULL); + trx->conc_state = TRX_NOT_STARTED; trx->rseg = NULL; trx->undo_no = ut_dulint_zero; diff --git a/innobase/ut/ut0mem.c b/innobase/ut/ut0mem.c index 174ae4cc6bb..f5d207d8bba 100644 --- a/innobase/ut/ut0mem.c +++ b/innobase/ut/ut0mem.c @@ -166,7 +166,7 @@ ut_free( } /************************************************************************** -Frees all allocated memory not freed yet. */ +Frees in shutdown all allocated memory not freed yet. */ void ut_free_all_mem(void) @@ -174,7 +174,7 @@ ut_free_all_mem(void) { ut_mem_block_t* block; - os_fast_mutex_lock(&ut_list_mutex); + os_fast_mutex_free(&ut_list_mutex); while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) { @@ -187,11 +187,11 @@ ut_free_all_mem(void) free(block); } - os_fast_mutex_unlock(&ut_list_mutex); - - ut_a(ut_total_allocated_memory == 0); - - os_fast_mutex_free(&ut_list_mutex); + if (ut_total_allocated_memory != 0) { + fprintf(stderr, +"InnoDB: Warning: after shutdown total allocated memory is %lu\n", + ut_total_allocated_memory); + } } /************************************************************************** diff --git a/innobase/ut/ut0ut.c b/innobase/ut/ut0ut.c index 95037ec3570..06bfb5c45ba 100644 --- a/innobase/ut/ut0ut.c +++ b/innobase/ut/ut0ut.c @@ -53,6 +53,8 @@ ut_get_high32( ulint a) /* in: ulint */ { #if SIZEOF_LONG == 4 + UT_NOT_USED(a); + return 0; #else return(a >> 32); diff --git a/libmysql/libmysql.c b/libmysql/libmysql.c index af74182eb22..c008d625900 100644 --- a/libmysql/libmysql.c +++ b/libmysql/libmysql.c @@ -1646,7 +1646,7 @@ mysql_real_connect(MYSQL *mysql,const char *host, const char *user, net->vio = vio_new(sock, VIO_TYPE_SOCKET, TRUE); bzero((char*) &UNIXaddr,sizeof(UNIXaddr)); UNIXaddr.sun_family = AF_UNIX; - strmov(UNIXaddr.sun_path, unix_socket); + strmake(UNIXaddr.sun_path, unix_socket, sizeof(UNIXaddr.sun_path)-1); if (my_connect(sock,(struct sockaddr *) &UNIXaddr, sizeof(UNIXaddr), mysql->options.connect_timeout) <0) { diff --git a/myisam/mi_open.c b/myisam/mi_open.c index 99b97db3fbd..59fae36ac33 100644 --- a/myisam/mi_open.c +++ b/myisam/mi_open.c @@ -188,11 +188,8 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) share->state_diff_length=len-MI_STATE_INFO_SIZE; if (share->state.header.fulltext_keys) - { - /* Not supported in this version */ - my_errno= HA_ERR_UNSUPPORTED; - goto err; - } + fprintf(stderr, "Warning: table file %s was created in MySQL 4.1+, use REPAIR TABLE ... USE_FRM to recreate it as a valid MySQL 4.0 table\n", name_buff); + mi_state_info_read(disk_cache, &share->state); len= mi_uint2korr(share->state.header.base_info_length); if (len != MI_BASE_INFO_SIZE) diff --git a/mysql-test/mysql-test-run.sh b/mysql-test/mysql-test-run.sh index ab4a5354dae..1d25a4d0e3d 100644 --- a/mysql-test/mysql-test-run.sh +++ b/mysql-test/mysql-test-run.sh @@ -319,11 +319,15 @@ while test $# -gt 0; do $ECHO "Note: you will get more meaningful output on a source distribution compiled with debugging option when running tests with --client-gdb option" fi DO_CLIENT_GDB=1 + EXTRA_MASTER_MYSQLD_OPT="$EXTRA_MASTER_MYSQLD_OPT --gdb" + EXTRA_SLAVE_MYSQLD_OPT="$EXTRA_SLAVE_MYSQLD_OPT --gdb" ;; --manual-gdb ) DO_GDB=1 MANUAL_GDB=1 USE_RUNNING_SERVER="" + EXTRA_MASTER_MYSQLD_OPT="$EXTRA_MASTER_MYSQLD_OPT --gdb" + EXTRA_SLAVE_MYSQLD_OPT="$EXTRA_SLAVE_MYSQLD_OPT --gdb" ;; --ddd ) if [ x$BINARY_DIST = x1 ] ; then @@ -331,6 +335,8 @@ while test $# -gt 0; do fi DO_DDD=1 USE_RUNNING_SERVER="" + EXTRA_MASTER_MYSQLD_OPT="$EXTRA_MASTER_MYSQLD_OPT --gdb" + EXTRA_SLAVE_MYSQLD_OPT="$EXTRA_SLAVE_MYSQLD_OPT --gdb" ;; --valgrind) VALGRIND="valgrind --alignment=8 --leak-check=yes --num-callers=16" @@ -1064,6 +1070,16 @@ stop_slave () fi } +stop_slave_threads () +{ + eval "this_slave_running=\$SLAVE$1_RUNNING" + slave_ident="slave$1" + if [ x$this_slave_running = x1 ] + then + $MYSQLADMIN --no-defaults -uroot --socket=$MYSQL_TMP_DIR/$slave_ident.sock stop-slave > /dev/null 2>&1 + fi +} + stop_master () { if [ x$MASTER_RUNNING = x1 ] @@ -1157,6 +1173,12 @@ run_testcase () return fi + # Stop all slave threads, so that we don't have useless reconnection attempts + # and error messages in case the slave and master servers restart. + stop_slave_threads + stop_slave_threads 1 + stop_slave_threads 2 + if [ -z "$USE_RUNNING_SERVER" ] ; then if [ -f $master_opt_file ] ; diff --git a/mysql-test/r/fulltext.result b/mysql-test/r/fulltext.result index eaaaf9c8880..6f15b2eb973 100644 --- a/mysql-test/r/fulltext.result +++ b/mysql-test/r/fulltext.result @@ -5,6 +5,9 @@ INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'), ('Only MyISAM tables','support collections'), ('Function MATCH ... AGAINST()','is used to do a search'), ('Full-text search in MySQL', 'implements vector space model'); +explain select * from t1 where MATCH(a,b) AGAINST ("collections"); +table type possible_keys key key_len ref rows Extra +t1 fulltext a a 0 1 Using where select * from t1 where MATCH(a,b) AGAINST ("collections"); a b Only MyISAM tables support collections diff --git a/mysql-test/r/fulltext_left_join.result b/mysql-test/r/fulltext_left_join.result index abc63358dbe..6875a517718 100644 --- a/mysql-test/r/fulltext_left_join.result +++ b/mysql-test/r/fulltext_left_join.result @@ -31,3 +31,14 @@ match(t1.texte,t1.sujet,t1.motsclefs) against('droit' IN BOOLEAN MODE) 1 0 drop table t1, t2; +create table t1 (venue_id int(11) default null, venue_text varchar(255) default null, dt datetime default null) type=myisam; +insert into t1 (venue_id, venue_text, dt) values (1, 'a1', '2003-05-23 19:30:00'),(null, 'a2', '2003-05-23 19:30:00'); +create table t2 (name varchar(255) not null default '', entity_id int(11) not null auto_increment, primary key (entity_id), fulltext key name (name)) type=myisam; +insert into t2 (name, entity_id) values ('aberdeen town hall', 1), ('glasgow royal concert hall', 2), ('queen\'s hall, edinburgh', 3); +select * from t1 left join t2 on venue_id = entity_id where match(name) against('aberdeen' in boolean mode) and dt = '2003-05-23 19:30:00'; +venue_id venue_text dt name entity_id +1 a1 2003-05-23 19:30:00 aberdeen town hall 1 +select * from t1 left join t2 on venue_id = entity_id where match(name) against('aberdeen') and dt = '2003-05-23 19:30:00'; +venue_id venue_text dt name entity_id +1 a1 2003-05-23 19:30:00 aberdeen town hall 1 +drop table t1,t2; diff --git a/mysql-test/r/func_str.result b/mysql-test/r/func_str.result index a72d32d39f8..1a4cb9217e4 100644 --- a/mysql-test/r/func_str.result +++ b/mysql-test/r/func_str.result @@ -64,7 +64,7 @@ concat_ws(NULL,'a') concat_ws(',',NULL,'') NULL select concat_ws(',','',NULL,'a'); concat_ws(',','',NULL,'a') -a +,a SELECT CONCAT('"',CONCAT_WS('";"',repeat('a',60),repeat('b',60),repeat('c',60),repeat('d',100)), '"'); CONCAT('"',CONCAT_WS('";"',repeat('a',60),repeat('b',60),repeat('c',60),repeat('d',100)), '"') "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";"cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc";"dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd" diff --git a/mysql-test/r/func_time.result b/mysql-test/r/func_time.result index 2941352c776..06c0be86667 100644 --- a/mysql-test/r/func_time.result +++ b/mysql-test/r/func_time.result @@ -329,6 +329,9 @@ insert into t1 values ('2001-01-12 12:23:40'); select ctime, hour(ctime) from t1; ctime hour(ctime) 2001-01-12 12:23:40 12 +select ctime from t1 where extract(MONTH FROM ctime) = 1 AND extract(YEAR FROM ctime) = 2001; +ctime +2001-01-12 12:23:40 drop table t1; create table t1 (id int); create table t2 (id int, date date); diff --git a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result index e2dea324ff2..9d1c232e830 100644 --- a/mysql-test/r/innodb.result +++ b/mysql-test/r/innodb.result @@ -787,16 +787,6 @@ id id3 100 2 UNLOCK TABLES; DROP TABLE t1; -create table t1 (a char(20), unique (a(5))) type=innodb; -Incorrect sub part key. The used key part isn't a string, the used length is longer than the key part or the table handler doesn't support unique sub keys -create table t1 (a char(20), index (a(5))) type=innodb; -show create table t1; -Table Create Table -t1 CREATE TABLE `t1` ( - `a` char(20) default NULL, - KEY `a` (`a`) -) TYPE=InnoDB -drop table t1; create temporary table t1 (a int not null auto_increment, primary key(a)) type=innodb; insert into t1 values (NULL),(NULL),(NULL); delete from t1 where a=3; diff --git a/mysql-test/r/lock_tables_lost_commit.result b/mysql-test/r/lock_tables_lost_commit.result new file mode 100644 index 00000000000..ccf56793f45 --- /dev/null +++ b/mysql-test/r/lock_tables_lost_commit.result @@ -0,0 +1,8 @@ +drop table if exists t1; +create table t1(a int) type=innodb; +lock tables t1 write; +insert into t1 values(10); +select * from t1; +a +10 +drop table t1; diff --git a/mysql-test/r/rpl000018.result b/mysql-test/r/rpl000018.result index ba51406bba0..282c1e492a1 100644 --- a/mysql-test/r/rpl000018.result +++ b/mysql-test/r/rpl000018.result @@ -1,3 +1,4 @@ +reset master; reset slave; slave start; show master logs; diff --git a/mysql-test/r/rpl_loaddata.result b/mysql-test/r/rpl_loaddata.result index 62071a07d0c..b5154ca95cf 100644 --- a/mysql-test/r/rpl_loaddata.result +++ b/mysql-test/r/rpl_loaddata.result @@ -22,3 +22,6 @@ day id category name drop table t1; drop table t2; drop table t3; +create table t1(a int, b int, unique(b)); +insert into t1 values(1,10); +load data infile '../../std_data/rpl_loaddata.dat' into table t1; diff --git a/mysql-test/r/rpl_master_pos_wait.result b/mysql-test/r/rpl_master_pos_wait.result index 22c7aef621c..cb6ee31a54d 100644 --- a/mysql-test/r/rpl_master_pos_wait.result +++ b/mysql-test/r/rpl_master_pos_wait.result @@ -4,6 +4,10 @@ reset master; reset slave; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; slave start; -select master_pos_wait('master-bin.999999',0,10); -master_pos_wait('master-bin.999999',0,10) +select master_pos_wait('master-bin.999999',0,2); +master_pos_wait('master-bin.999999',0,2) -1 + select master_pos_wait('master-bin.999999',0); +stop slave sql_thread; +master_pos_wait('master-bin.999999',0) +NULL diff --git a/mysql-test/r/rpl_relayspace.result b/mysql-test/r/rpl_relayspace.result index 5e552ef7400..721c6a882bd 100644 --- a/mysql-test/r/rpl_relayspace.result +++ b/mysql-test/r/rpl_relayspace.result @@ -6,8 +6,14 @@ drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; slave start; stop slave; create table t1 (a int); +drop table t1; +create table t1 (a int); +drop table t1; +reset slave; +start slave io_thread; +stop slave io_thread; reset slave; start slave; -select master_pos_wait('master-bin.001',5000,45)=-1; -master_pos_wait('master-bin.001',5000,45)=-1 +select master_pos_wait('master-bin.001',200,6)=-1; +master_pos_wait('master-bin.001',200,6)=-1 0 diff --git a/mysql-test/t/fulltext.test b/mysql-test/t/fulltext.test index 128af680854..1b85f5903df 100644 --- a/mysql-test/t/fulltext.test +++ b/mysql-test/t/fulltext.test @@ -13,6 +13,7 @@ INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'), # nl search +explain select * from t1 where MATCH(a,b) AGAINST ("collections"); select * from t1 where MATCH(a,b) AGAINST ("collections"); select * from t1 where MATCH(a,b) AGAINST ("indexes"); select * from t1 where MATCH(a,b) AGAINST ("indexes collections"); diff --git a/mysql-test/t/fulltext_left_join.test b/mysql-test/t/fulltext_left_join.test index bcf7cbcc505..da4df13bc0c 100644 --- a/mysql-test/t/fulltext_left_join.test +++ b/mysql-test/t/fulltext_left_join.test @@ -28,3 +28,15 @@ select match(t1.texte,t1.sujet,t1.motsclefs) against('droit' IN BOOLEAN MODE) drop table t1, t2; +# +# Bug #484, reported by Stephen Brandon +# + +create table t1 (venue_id int(11) default null, venue_text varchar(255) default null, dt datetime default null) type=myisam; +insert into t1 (venue_id, venue_text, dt) values (1, 'a1', '2003-05-23 19:30:00'),(null, 'a2', '2003-05-23 19:30:00'); +create table t2 (name varchar(255) not null default '', entity_id int(11) not null auto_increment, primary key (entity_id), fulltext key name (name)) type=myisam; +insert into t2 (name, entity_id) values ('aberdeen town hall', 1), ('glasgow royal concert hall', 2), ('queen\'s hall, edinburgh', 3); +select * from t1 left join t2 on venue_id = entity_id where match(name) against('aberdeen' in boolean mode) and dt = '2003-05-23 19:30:00'; +select * from t1 left join t2 on venue_id = entity_id where match(name) against('aberdeen') and dt = '2003-05-23 19:30:00'; +drop table t1,t2; + diff --git a/mysql-test/t/func_time.test b/mysql-test/t/func_time.test index dd589ff2e66..3057729ab96 100644 --- a/mysql-test/t/func_time.test +++ b/mysql-test/t/func_time.test @@ -123,6 +123,8 @@ select extract(MONTH FROM "2001-02-00"); create table t1 (ctime varchar(20)); insert into t1 values ('2001-01-12 12:23:40'); select ctime, hour(ctime) from t1; +# test bug 614 (multiple extracts in where) +select ctime from t1 where extract(MONTH FROM ctime) = 1 AND extract(YEAR FROM ctime) = 2001; drop table t1; # diff --git a/mysql-test/t/innodb.test b/mysql-test/t/innodb.test index dc3c76f1a91..cf203d87c8b 100644 --- a/mysql-test/t/innodb.test +++ b/mysql-test/t/innodb.test @@ -471,15 +471,6 @@ select id,id3 from t1; UNLOCK TABLES; DROP TABLE t1; -# -# Test prefix key -# ---error 1089 -create table t1 (a char(20), unique (a(5))) type=innodb; -create table t1 (a char(20), index (a(5))) type=innodb; -show create table t1; -drop table t1; - # # Test using temporary table and auto_increment # diff --git a/mysql-test/t/lock_tables_lost_commit-master.opt b/mysql-test/t/lock_tables_lost_commit-master.opt new file mode 100644 index 00000000000..d357a51cb27 --- /dev/null +++ b/mysql-test/t/lock_tables_lost_commit-master.opt @@ -0,0 +1 @@ +--binlog-ignore-db=test innodb \ No newline at end of file diff --git a/mysql-test/t/lock_tables_lost_commit.test b/mysql-test/t/lock_tables_lost_commit.test new file mode 100644 index 00000000000..a12ee7369cb --- /dev/null +++ b/mysql-test/t/lock_tables_lost_commit.test @@ -0,0 +1,18 @@ +# This is a test for bug 578 + +connect (con1,localhost,root,,); +connect (con2,localhost,root,,); + +connection con1; +drop table if exists t1; +create table t1(a int) type=innodb; +lock tables t1 write; +insert into t1 values(10); +disconnect con1; + +connection con2; +# The bug was that, because of the LOCK TABLES, the handler "forgot" to commit, +# and the other commit when we write to the binlog was not done because of +# binlog-ignore-db +select * from t1; +drop table t1; diff --git a/mysql-test/t/rpl000001.test b/mysql-test/t/rpl000001.test index 4ffd7d1d78e..ebce3d0ac94 100644 --- a/mysql-test/t/rpl000001.test +++ b/mysql-test/t/rpl000001.test @@ -90,8 +90,8 @@ connection master; --error 1053; reap; connection slave; -sync_with_master; -#give the slave a chance to exit +# The SQL slave thread should now have stopped because the query was killed on +# the master (so it has a non-zero error code in the binlog). wait_for_slave_to_stop; # The following test can't be done because the result of Pos will differ diff --git a/mysql-test/t/rpl000018.test b/mysql-test/t/rpl000018.test index 291b482b912..3bd5fd0ef09 100644 --- a/mysql-test/t/rpl000018.test +++ b/mysql-test/t/rpl000018.test @@ -6,6 +6,8 @@ require_manager; connect (master,localhost,root,,test,0,master.sock); connect (slave,localhost,root,,test,0,slave.sock); +connection master; +reset master; server_stop master; server_start master; connection slave; diff --git a/mysql-test/t/rpl_loaddata.test b/mysql-test/t/rpl_loaddata.test index 1f34aa9d3f9..96a4eb3fb76 100644 --- a/mysql-test/t/rpl_loaddata.test +++ b/mysql-test/t/rpl_loaddata.test @@ -4,6 +4,9 @@ # # check replication of load data for temporary tables with additional parameters # +# check if duplicate entries trigger an error (they should unless IGNORE or +# REPLACE was used on the master) (bug 571). + source include/master-slave.inc; create table t1(a int not null auto_increment, b int, primary key(a) ); @@ -27,7 +30,17 @@ connection master; drop table t1; drop table t2; drop table t3; +create table t1(a int, b int, unique(b)); save_master_pos; connection slave; sync_with_master; +insert into t1 values(1,10); + +connection master; +load data infile '../../std_data/rpl_loaddata.dat' into table t1; + +save_master_pos; +connection slave; +# The SQL slave thread should be stopped now. +wait_for_slave_to_stop; diff --git a/mysql-test/t/rpl_master_pos_wait.test b/mysql-test/t/rpl_master_pos_wait.test index a6aae222a89..24479636c91 100644 --- a/mysql-test/t/rpl_master_pos_wait.test +++ b/mysql-test/t/rpl_master_pos_wait.test @@ -5,5 +5,11 @@ save_master_pos; connection slave; sync_with_master; # Ask for a master log that has certainly not been reached yet -# timeout= 10 seconds -select master_pos_wait('master-bin.999999',0,10); +# timeout= 2 seconds +select master_pos_wait('master-bin.999999',0,2); +# Testcase for bug 651 (master_pos_wait() hangs if slave idle and STOP SLAVE). +send select master_pos_wait('master-bin.999999',0); +connection slave1; +stop slave sql_thread; +connection slave; +reap; diff --git a/mysql-test/t/rpl_relayspace-slave.opt b/mysql-test/t/rpl_relayspace-slave.opt index 9365a2a0a26..05cb01731d2 100644 --- a/mysql-test/t/rpl_relayspace-slave.opt +++ b/mysql-test/t/rpl_relayspace-slave.opt @@ -1 +1 @@ - -O relay_log_space_limit=1024 \ No newline at end of file + -O relay_log_space_limit=10 \ No newline at end of file diff --git a/mysql-test/t/rpl_relayspace.test b/mysql-test/t/rpl_relayspace.test index 8d4f01339c7..bb82781b511 100644 --- a/mysql-test/t/rpl_relayspace.test +++ b/mysql-test/t/rpl_relayspace.test @@ -1,33 +1,32 @@ -# The slave is started with relay_log_space_limit=1024 bytes, -# to force the deadlock +# The slave is started with relay_log_space_limit=10 bytes, +# to force the deadlock after one event. source include/master-slave.inc; connection slave; stop slave; connection master; +# This will generate a master's binlog > 10 bytes create table t1 (a int); -let $1=200; -disable_query_log; -while ($1) -{ -# eval means expand $ expressions - eval insert into t1 values( $1 ); - dec $1; -} -# This will generate one 10kB master's binlog -enable_query_log; -save_master_pos; +drop table t1; +create table t1 (a int); +drop table t1; connection slave; reset slave; +start slave io_thread; +# Give the I/O thread time to block. +sleep 2; +# A bug caused the I/O thread to refuse stopping. +stop slave io_thread; +reset slave; start slave; # The I/O thread stops filling the relay log when -# it's 1kB. And the SQL thread cannot purge this relay log +# it's >10b. And the SQL thread cannot purge this relay log # as purge is done only when the SQL thread switches to another # relay log, which does not exist here. # So we should have a deadlock. # if it is not resolved automatically we'll detect -# it with master_pos_wait that waits for farther than 1kB; -# it will timeout after 45 seconds; +# it with master_pos_wait that waits for farther than 1Ob; +# it will timeout after 10 seconds; # also the slave will probably not cooperate to shutdown # (as 2 threads are locked) -select master_pos_wait('master-bin.001',5000,45)=-1; +select master_pos_wait('master-bin.001',200,6)=-1; diff --git a/mysys/default.c b/mysys/default.c index 3ff240da3a1..cdacc8bee2b 100644 --- a/mysys/default.c +++ b/mysys/default.c @@ -33,8 +33,6 @@ ** --print-defaults ; Print the modified command line and exit ****************************************************************************/ -#undef SAFEMALLOC /* safe_malloc is not yet initailized */ - #include "mysys_priv.h" #include "m_string.h" #include "m_ctype.h" diff --git a/mysys/my_getopt.c b/mysys/my_getopt.c index 759c96462f6..e18c5a0b9eb 100644 --- a/mysys/my_getopt.c +++ b/mysys/my_getopt.c @@ -609,9 +609,9 @@ static ulonglong getopt_ull(char *arg, const struct my_option *optp, int *err) ulonglong getopt_ull_limit_value(ulonglong num, const struct my_option *optp) { - if ((ulonglong) num > (ulonglong) (ulong) optp->max_value && + if ((ulonglong) num > (ulonglong) optp->max_value && optp->max_value) /* if max value is not set -> no upper limit */ - num= (ulonglong) (ulong) optp->max_value; + num= (ulonglong) optp->max_value; if (optp->block_size > 1) { num/= (ulonglong) optp->block_size; diff --git a/mysys/my_static.c b/mysys/my_static.c index bbf7582a454..b24ef28b7b1 100644 --- a/mysys/my_static.c +++ b/mysys/my_static.c @@ -69,14 +69,13 @@ uint sf_malloc_prehunc=0, /* If you have problem with core- */ sf_malloc_endhunc=0, /* dump when malloc-message.... */ /* set theese to 64 or 128 */ sf_malloc_quick=0; /* set if no calls to sanity */ -long lCurMemory = 0L; /* Current memory usage */ -long lMaxMemory = 0L; /* Maximum memory usage */ -uint cNewCount = 0; /* Number of times NEW() was called */ +ulong sf_malloc_cur_memory= 0L; /* Current memory usage */ +ulong sf_malloc_max_memory= 0L; /* Maximum memory usage */ +uint sf_malloc_count= 0; /* Number of times NEW() was called */ byte *sf_min_adress= (byte*) ~(unsigned long) 0L, *sf_max_adress= (byte*) 0L; - -/* Root of the linked list of remembers */ -struct remember *pRememberRoot = NULL; +/* Root of the linked list of struct st_irem */ +struct st_irem *sf_malloc_root = NULL; /* from my_alarm */ int volatile my_have_got_alarm=0; /* declare variable to reset */ diff --git a/mysys/my_static.h b/mysys/my_static.h index c1893f4074f..1a33bcf21f3 100644 --- a/mysys/my_static.h +++ b/mysys/my_static.h @@ -33,27 +33,23 @@ struct st_remember { }; /* - The size of the following structure MUST be dividable by 8 to not cause - alignment problems on some cpu's + Structure that stores information of a allocated memory block + The data is at &struct_adr+sizeof(ALIGN_SIZE(sizeof(struct irem))) + The lspecialvalue is at the previous 4 bytes from this, which may not + necessarily be in the struct if the struct size isn't aligned at a 8 byte + boundary. */ -struct irem +struct st_irem { - struct remember *_pNext; /* Linked list of structures */ - struct remember *_pPrev; /* Other link */ - char *_sFileName; /* File in which memory was new'ed */ - uint32 _uLineNum; /* Line number in above file */ - uint32 _uDataSize; /* Size requested */ -#if SIZEOF_CHARP == 8 - long _filler; /* For alignment */ -#endif - long _lSpecialValue; /* Underrun marker value */ + struct st_irem *next; /* Linked list of structures */ + struct st_irem *prev; /* Other link */ + char *filename; /* File in which memory was new'ed */ + uint32 linenum; /* Line number in above file */ + uint32 datasize; /* Size requested */ + uint32 SpecialValue; /* Underrun marker value */ }; -struct remember { - struct irem tInt; - char aData[1]; -}; extern char NEAR curr_dir[FN_REFLEN],NEAR home_dir_buff[FN_REFLEN]; @@ -70,8 +66,8 @@ extern int _my_tempnam_used; #endif extern byte *sf_min_adress,*sf_max_adress; -extern uint cNewCount; -extern struct remember *pRememberRoot; +extern uint sf_malloc_count; +extern struct st_irem *sf_malloc_root; #if defined(THREAD) && !defined(__WIN__) extern sigset_t my_signals; /* signals blocked by mf_brkhant */ diff --git a/mysys/safemalloc.c b/mysys/safemalloc.c index 9cbb178edb4..bd77b4821ff 100644 --- a/mysys/safemalloc.c +++ b/mysys/safemalloc.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2000 MySQL AB +/* Copyright (C) 2000-2003 MySQL AB This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -69,22 +69,15 @@ #include "my_static.h" #include "mysys_err.h" -ulonglong safemalloc_mem_limit = ~(ulonglong)0; - -#define pNext tInt._pNext -#define pPrev tInt._pPrev -#define sFileName tInt._sFileName -#define uLineNum tInt._uLineNum -#define uDataSize tInt._uDataSize -#define lSpecialValue tInt._lSpecialValue +ulonglong sf_malloc_mem_limit= ~(ulonglong)0; #ifndef PEDANTIC_SAFEMALLOC /* - Set to 1 after TERMINATE() if we had to fiddle with cNewCount and + Set to 1 after TERMINATE() if we had to fiddle with sf_malloc_count and the linked list of blocks so that _sanity() will not fuss when it is not supposed to */ -static int sf_malloc_tampered = 0; +static int sf_malloc_tampered= 0; #endif @@ -92,11 +85,11 @@ static int sf_malloc_tampered = 0; static int check_ptr(const char *where, byte *ptr, const char *sFile, uint uLine); -static int _checkchunk(struct remember *pRec, const char *sFile, uint uLine); +static int _checkchunk(struct st_irem *pRec, const char *sFile, uint uLine); /* - Note: both these refer to the NEW'ed data only. They do not include - malloc() roundoff or the extra space required by the remember + Note: We only fill up the allocated block. This do not include + malloc() roundoff or the extra space required by the irem structures. */ @@ -127,218 +120,211 @@ static int _checkchunk(struct remember *pRec, const char *sFile, uint uLine); /* Allocate some memory. */ -gptr _mymalloc (uint uSize, const char *sFile, uint uLine, myf MyFlags) +gptr _mymalloc(uint size, const char *filename, uint lineno, myf MyFlags) { - struct remember *pTmp; - DBUG_ENTER("_mymalloc"); - DBUG_PRINT("enter",("Size: %u",uSize)); + struct st_irem *irem; + char *data; + DBUG_ENTER("_mymalloc"); + DBUG_PRINT("enter",("Size: %u",size)); + if (!sf_malloc_quick) + (void) _sanity (filename, lineno); - if (!sf_malloc_quick) - (void) _sanity (sFile, uLine); - - if (uSize + lCurMemory > safemalloc_mem_limit) - pTmp = 0; - else + if (size + sf_malloc_cur_memory > sf_malloc_mem_limit) + irem= 0; + else + { + /* Allocate the physical memory */ + irem= (struct st_irem *) malloc (ALIGN_SIZE(sizeof(struct st_irem)) + + sf_malloc_prehunc + + size + /* size requested */ + 4 + /* overrun mark */ + sf_malloc_endhunc); + } + /* Check if there isn't anymore memory avaiable */ + if (!irem) + { + if (MyFlags & MY_FAE) + error_handler_hook=fatal_error_handler_hook; + if (MyFlags & (MY_FAE+MY_WME)) { - /* Allocate the physical memory */ - pTmp = (struct remember *) malloc ( - ALIGN_SIZE(sizeof(struct irem)) /* remember data */ - + sf_malloc_prehunc - + uSize /* size requested */ - + 4 /* overrun mark */ - + sf_malloc_endhunc - ); - } - /* Check if there isn't anymore memory avaiable */ - if (pTmp == NULL) - { - if (MyFlags & MY_FAE) - error_handler_hook=fatal_error_handler_hook; - if (MyFlags & (MY_FAE+MY_WME)) - { - char buff[SC_MAXWIDTH]; - my_errno=errno; - sprintf(buff,"Out of memory at line %d, '%s'", uLine, sFile); - my_message(EE_OUTOFMEMORY,buff,MYF(ME_BELL+ME_WAITTANG)); - sprintf(buff,"needed %d byte (%ldk), memory in use: %ld bytes (%ldk)", - uSize, (uSize + 1023L) / 1024L, - lMaxMemory, (lMaxMemory + 1023L) / 1024L); - my_message(EE_OUTOFMEMORY,buff,MYF(ME_BELL+ME_WAITTANG)); - } - DBUG_PRINT("error",("Out of memory, in use: %ld at line %d, '%s'", - lMaxMemory,uLine, sFile)); - if (MyFlags & MY_FAE) - exit(1); - DBUG_RETURN ((gptr) NULL); + char buff[SC_MAXWIDTH]; + my_errno=errno; + sprintf(buff,"Out of memory at line %d, '%s'", lineno, filename); + my_message(EE_OUTOFMEMORY,buff,MYF(ME_BELL+ME_WAITTANG)); + sprintf(buff,"needed %d byte (%ldk), memory in use: %ld bytes (%ldk)", + size, (size + 1023L) / 1024L, + sf_malloc_max_memory, (sf_malloc_max_memory + 1023L) / 1024L); + my_message(EE_OUTOFMEMORY,buff,MYF(ME_BELL+ME_WAITTANG)); } + DBUG_PRINT("error",("Out of memory, in use: %ld at line %d, '%s'", + sf_malloc_max_memory,lineno, filename)); + if (MyFlags & MY_FAE) + exit(1); + DBUG_RETURN ((gptr) 0); + } - /* Fill up the structure */ - *((long*) ((char*) &pTmp -> lSpecialValue+sf_malloc_prehunc)) = MAGICKEY; - pTmp -> aData[uSize + sf_malloc_prehunc+0] = MAGICEND0; - pTmp -> aData[uSize + sf_malloc_prehunc+1] = MAGICEND1; - pTmp -> aData[uSize + sf_malloc_prehunc+2] = MAGICEND2; - pTmp -> aData[uSize + sf_malloc_prehunc+3] = MAGICEND3; - pTmp -> sFileName = (my_string) sFile; - pTmp -> uLineNum = uLine; - pTmp -> uDataSize = uSize; - pTmp -> pPrev = NULL; + /* Fill up the structure */ + data= (((char*) irem) + ALIGN_SIZE(sizeof(struct st_irem)) + + sf_malloc_prehunc); + *((uint32*) (data-sizeof(uint32)))= MAGICKEY; + data[size + 0]= MAGICEND0; + data[size + 1]= MAGICEND1; + data[size + 2]= MAGICEND2; + data[size + 3]= MAGICEND3; + irem->filename= (my_string) filename; + irem->linenum= lineno; + irem->datasize= size; + irem->prev= NULL; - /* Add this remember structure to the linked list */ - pthread_mutex_lock(&THR_LOCK_malloc); - if ((pTmp->pNext=pRememberRoot)) - { - pRememberRoot -> pPrev = pTmp; - } - pRememberRoot = pTmp; + /* Add this remember structure to the linked list */ + pthread_mutex_lock(&THR_LOCK_malloc); + if ((irem->next= sf_malloc_root)) + sf_malloc_root->prev= irem; + sf_malloc_root= irem; - /* Keep the statistics */ - lCurMemory += uSize; - if (lCurMemory > lMaxMemory) { - lMaxMemory = lCurMemory; - } - cNewCount++; - pthread_mutex_unlock(&THR_LOCK_malloc); + /* Keep the statistics */ + sf_malloc_cur_memory+= size; + if (sf_malloc_cur_memory > sf_malloc_max_memory) + sf_malloc_max_memory= sf_malloc_cur_memory; + sf_malloc_count++; + pthread_mutex_unlock(&THR_LOCK_malloc); - /* Set the memory to the aribtrary wierd value */ - if ((MyFlags & MY_ZEROFILL) || !sf_malloc_quick) - bfill(&pTmp -> aData[sf_malloc_prehunc],uSize, - (char) (MyFlags & MY_ZEROFILL ? 0 : ALLOC_VAL)); - /* Return a pointer to the real data */ - DBUG_PRINT("exit",("ptr: %lx",&(pTmp -> aData[sf_malloc_prehunc]))); - if (sf_min_adress > &(pTmp -> aData[sf_malloc_prehunc])) - sf_min_adress = &(pTmp -> aData[sf_malloc_prehunc]); - if (sf_max_adress < &(pTmp -> aData[sf_malloc_prehunc])) - sf_max_adress = &(pTmp -> aData[sf_malloc_prehunc]); - DBUG_RETURN ((gptr) &(pTmp -> aData[sf_malloc_prehunc])); + /* Set the memory to the aribtrary wierd value */ + if ((MyFlags & MY_ZEROFILL) || !sf_malloc_quick) + bfill(data, size, (char) (MyFlags & MY_ZEROFILL ? 0 : ALLOC_VAL)); + /* Return a pointer to the real data */ + DBUG_PRINT("exit",("ptr: %lx", data)); + if (sf_min_adress > data) + sf_min_adress= data; + if (sf_max_adress < data) + sf_max_adress= data; + DBUG_RETURN ((gptr) data); } + /* Allocate some new memory and move old memoryblock there. Free then old memoryblock */ -gptr _myrealloc (register gptr pPtr, register uint uSize, - const char *sFile, uint uLine, myf MyFlags) +gptr _myrealloc(register gptr ptr, register uint size, + const char *filename, uint lineno, myf MyFlags) { - struct remember *pRec; - gptr ptr; + struct st_irem *irem; + char *data; DBUG_ENTER("_myrealloc"); - if (!pPtr && (MyFlags & MY_ALLOW_ZERO_PTR)) - DBUG_RETURN(_mymalloc(uSize,sFile,uLine,MyFlags)); + if (!ptr && (MyFlags & MY_ALLOW_ZERO_PTR)) + DBUG_RETURN(_mymalloc(size, filename, lineno, MyFlags)); if (!sf_malloc_quick) - (void) _sanity (sFile, uLine); + (void) _sanity (filename, lineno); - if (check_ptr("Reallocating",(byte*) pPtr,sFile,uLine)) + if (check_ptr("Reallocating", (byte*) ptr, filename, lineno)) DBUG_RETURN((gptr) NULL); - pRec = (struct remember *) ((char*) pPtr - ALIGN_SIZE(sizeof(struct irem))- - sf_malloc_prehunc); - if (*((long*) ((char*) &pRec -> lSpecialValue+sf_malloc_prehunc)) - != MAGICKEY) + irem= (struct st_irem *) (((char*) ptr) - ALIGN_SIZE(sizeof(struct st_irem))- + sf_malloc_prehunc); + if (*((uint32*) (((char*) ptr)- sizeof(uint32))) != MAGICKEY) { fprintf(stderr, "Error: Reallocating unallocated data at line %d, '%s'\n", - uLine, sFile); + lineno, filename); DBUG_PRINT("safe",("Reallocating unallocated data at line %d, '%s'", - uLine, sFile)); + lineno, filename)); (void) fflush(stderr); DBUG_RETURN((gptr) NULL); } - if ((ptr=_mymalloc(uSize,sFile,uLine,MyFlags))) /* Allocate new area */ + if ((data= _mymalloc(size,filename,lineno,MyFlags))) /* Allocate new area */ { - uSize=min(uSize,pRec-> uDataSize); /* Move as much as possibly */ - memcpy((byte*) ptr,pPtr,(size_t) uSize); /* Copy old data */ - _myfree(pPtr,sFile,uLine,0); /* Free not needed area */ + size=min(size, irem->datasize); /* Move as much as possibly */ + memcpy((byte*) data, ptr, (size_t) size); /* Copy old data */ + _myfree(ptr, filename, lineno, 0); /* Free not needed area */ } else { if (MyFlags & MY_HOLD_ON_ERROR) - DBUG_RETURN(pPtr); + DBUG_RETURN(ptr); if (MyFlags & MY_FREE_ON_ERROR) - _myfree(pPtr,sFile,uLine,0); + _myfree(ptr, filename, lineno, 0); } - DBUG_RETURN(ptr); + DBUG_RETURN(data); } /* _myrealloc */ /* Deallocate some memory. */ -void _myfree (gptr pPtr, const char *sFile, uint uLine, myf myflags) +void _myfree(gptr ptr, const char *filename, uint lineno, myf myflags) { - struct remember *pRec; + struct st_irem *irem; DBUG_ENTER("_myfree"); - DBUG_PRINT("enter",("ptr: %lx",pPtr)); + DBUG_PRINT("enter",("ptr: %lx", ptr)); if (!sf_malloc_quick) - (void) _sanity (sFile, uLine); + (void) _sanity (filename, lineno); - if ((!pPtr && (myflags & MY_ALLOW_ZERO_PTR)) || - check_ptr("Freeing",(byte*) pPtr,sFile,uLine)) + if ((!ptr && (myflags & MY_ALLOW_ZERO_PTR)) || + check_ptr("Freeing",(byte*) ptr,filename,lineno)) DBUG_VOID_RETURN; /* Calculate the address of the remember structure */ - pRec = (struct remember *) ((byte*) pPtr- ALIGN_SIZE(sizeof(struct irem))- - sf_malloc_prehunc); + irem= (struct st_irem *) ((char*) ptr- ALIGN_SIZE(sizeof(struct st_irem))- + sf_malloc_prehunc); /* Check to make sure that we have a real remember structure. Note: this test could fail for four reasons: - (1) The memory was already free'ed - (2) The memory was never new'ed - (3) There was an underrun - (4) A stray pointer hit this location + (1) The memory was already free'ed + (2) The memory was never new'ed + (3) There was an underrun + (4) A stray pointer hit this location */ - if (*((long*) ((char*) &pRec -> lSpecialValue+sf_malloc_prehunc)) - != MAGICKEY) + if (*((uint32*) ((char*) ptr- sizeof(uint32))) != MAGICKEY) { fprintf(stderr, "Error: Freeing unallocated data at line %d, '%s'\n", - uLine, sFile); - DBUG_PRINT("safe",("Unallocated data at line %d, '%s'",uLine,sFile)); + lineno, filename); + DBUG_PRINT("safe",("Unallocated data at line %d, '%s'",lineno,filename)); (void) fflush(stderr); DBUG_VOID_RETURN; } /* Remove this structure from the linked list */ pthread_mutex_lock(&THR_LOCK_malloc); - if (pRec -> pPrev) { - pRec -> pPrev -> pNext = pRec -> pNext; - } else { - pRememberRoot = pRec -> pNext; - } - if (pRec -> pNext) { - pRec -> pNext -> pPrev = pRec -> pPrev; - } + if (irem->prev) + irem->prev->next= irem->next; + else + sf_malloc_root= irem->next; + + if (irem->next) + irem->next->prev= irem->prev; /* Handle the statistics */ - lCurMemory -= pRec -> uDataSize; - cNewCount--; + sf_malloc_cur_memory-= irem->datasize; + sf_malloc_count--; pthread_mutex_unlock(&THR_LOCK_malloc); #ifndef HAVE_purify /* Mark this data as free'ed */ if (!sf_malloc_quick) - bfill(&pRec->aData[sf_malloc_prehunc],pRec->uDataSize,(pchar) FREE_VAL); + bfill(ptr, irem->datasize, (pchar) FREE_VAL); #endif - *((long*) ((char*) &pRec -> lSpecialValue+sf_malloc_prehunc)) = ~MAGICKEY; - + *((uint32*) ((char*) ptr- sizeof(uint32)))= ~MAGICKEY; /* Actually free the memory */ - free ((my_string ) pRec); + free((char*) irem); DBUG_VOID_RETURN; } /* Check if we have a wrong pointer */ -static int check_ptr(const char *where, byte *ptr, const char *sFile, - uint uLine) +static int check_ptr(const char *where, byte *ptr, const char *filename, + uint lineno) { if (!ptr) { fprintf(stderr, "Error: %s NULL pointer at line %d, '%s'\n", - where,uLine, sFile); - DBUG_PRINT("safe",("Null pointer at line %d '%s'", uLine, sFile)); + where,lineno, filename); + DBUG_PRINT("safe",("Null pointer at line %d '%s'", lineno, filename)); (void) fflush(stderr); return 1; } @@ -346,9 +332,9 @@ static int check_ptr(const char *where, byte *ptr, const char *sFile, if ((long) ptr & (ALIGN_SIZE(1)-1)) { fprintf(stderr, "Error: %s wrong aligned pointer at line %d, '%s'\n", - where,uLine, sFile); + where,lineno, filename); DBUG_PRINT("safe",("Wrong aligned pointer at line %d, '%s'", - uLine,sFile)); + lineno,filename)); (void) fflush(stderr); return 1; } @@ -356,9 +342,9 @@ static int check_ptr(const char *where, byte *ptr, const char *sFile, if (ptr < sf_min_adress || ptr > sf_max_adress) { fprintf(stderr, "Error: %s pointer out of range at line %d, '%s'\n", - where,uLine, sFile); + where,lineno, filename); DBUG_PRINT("safe",("Pointer out of range at line %d '%s'", - uLine,sFile)); + lineno,filename)); (void) fflush(stderr); return 1; } @@ -372,9 +358,9 @@ static int check_ptr(const char *where, byte *ptr, const char *sFile, free'ed as well as the statistics. */ -void TERMINATE (FILE *file) +void TERMINATE(FILE *file) { - struct remember *pPtr; + struct st_irem *irem; DBUG_ENTER("TERMINATE"); pthread_mutex_lock(&THR_LOCK_malloc); @@ -384,14 +370,15 @@ void TERMINATE (FILE *file) NEWs than FREEs. <0, etc. */ - if (cNewCount) + if (sf_malloc_count) { if (file) { - fprintf(file, "Warning: Not freed memory segments: %d\n", cNewCount); + fprintf(file, "Warning: Not freed memory segments: %d\n", + sf_malloc_count); (void) fflush(file); } - DBUG_PRINT("safe",("cNewCount: %d",cNewCount)); + DBUG_PRINT("safe",("sf_malloc_count: %d", sf_malloc_count)); } /* @@ -399,42 +386,44 @@ void TERMINATE (FILE *file) but not free'ed with FREE. */ - if ((pPtr=pRememberRoot)) + if ((irem= sf_malloc_root)) { if (file) { - fprintf(file, "Warning: Memory that was not free'ed (%ld bytes):\n",lCurMemory); + fprintf(file, "Warning: Memory that was not free'ed (%ld bytes):\n", + sf_malloc_cur_memory); (void) fflush(file); } - DBUG_PRINT("safe",("Memory that was not free'ed (%ld bytes):",lCurMemory)); - while (pPtr) + DBUG_PRINT("safe",("Memory that was not free'ed (%ld bytes):", + sf_malloc_cur_memory)); + while (irem) { + char *data= (((char*) irem) + ALIGN_SIZE(sizeof(struct st_irem)) + + sf_malloc_prehunc); if (file) { fprintf(file, "\t%6u bytes at 0x%09lx, allocated at line %4u in '%s'", - pPtr -> uDataSize, - (ulong) &(pPtr -> aData[sf_malloc_prehunc]), - pPtr -> uLineNum, pPtr -> sFileName); + irem->datasize, (long) data, irem->linenum, irem->filename); fprintf(file, "\n"); (void) fflush(file); } DBUG_PRINT("safe", ("%6u bytes at 0x%09lx, allocated at line %4d in '%s'", - pPtr -> uDataSize, &(pPtr -> aData[sf_malloc_prehunc]), - pPtr -> uLineNum, pPtr -> sFileName)); - pPtr = pPtr -> pNext; + irem->datasize, data, irem->linenum, irem->filename)); + irem= irem->next; } } /* Report the memory usage statistics */ if (file) { fprintf(file, "Maximum memory usage: %ld bytes (%ldk)\n", - lMaxMemory, (lMaxMemory + 1023L) / 1024L); + sf_malloc_max_memory, (sf_malloc_max_memory + 1023L) / 1024L); (void) fflush(file); } DBUG_PRINT("safe",("Maximum memory usage: %ld bytes (%ldk)", - lMaxMemory, (lMaxMemory + 1023L) / 1024L)); + sf_malloc_max_memory, (sf_malloc_max_memory + 1023L) / + 1024L)); pthread_mutex_unlock(&THR_LOCK_malloc); DBUG_VOID_RETURN; } @@ -442,44 +431,41 @@ void TERMINATE (FILE *file) /* Returns 0 if chunk is ok */ -static int _checkchunk (register struct remember *pRec, const char *sFile, - uint uLine) +static int _checkchunk(register struct st_irem *irem, const char *filename, + uint lineno) { - reg1 uint uSize; - reg2 my_string magicp; - reg3 int flag=0; + int flag=0; + char *magicp, *data; + data= (((char*) irem) + ALIGN_SIZE(sizeof(struct st_irem)) + + sf_malloc_prehunc); /* Check for a possible underrun */ - if (*((long*) ((char*) &pRec -> lSpecialValue+sf_malloc_prehunc)) - != MAGICKEY) + if (*((uint32*) (data- sizeof(uint32))) != MAGICKEY) { fprintf(stderr, "Error: Memory allocated at %s:%d was underrun,", - pRec -> sFileName, pRec -> uLineNum); - fprintf(stderr, " discovered at %s:%d\n", sFile, uLine); + irem->filename, irem->linenum); + fprintf(stderr, " discovered at %s:%d\n", filename, lineno); (void) fflush(stderr); DBUG_PRINT("safe",("Underrun at %lx, allocated at %s:%d", - &(pRec -> aData[sf_malloc_prehunc]), - pRec -> sFileName, - pRec -> uLineNum)); + data, irem->filename, irem->linenum)); flag=1; } /* Check for a possible overrun */ - uSize = pRec -> uDataSize; - magicp = &(pRec -> aData[uSize+sf_malloc_prehunc]); + magicp= data + irem->datasize; if (*magicp++ != MAGICEND0 || *magicp++ != MAGICEND1 || *magicp++ != MAGICEND2 || *magicp++ != MAGICEND3) { fprintf(stderr, "Error: Memory allocated at %s:%d was overrun,", - pRec -> sFileName, pRec -> uLineNum); - fprintf(stderr, " discovered at '%s:%d'\n", sFile, uLine); + irem->filename, irem->linenum); + fprintf(stderr, " discovered at '%s:%d'\n", filename, lineno); (void) fflush(stderr); DBUG_PRINT("safe",("Overrun at %lx, allocated at %s:%d", - &(pRec -> aData[sf_malloc_prehunc]), - pRec -> sFileName, - pRec -> uLineNum)); + data, + irem->filename, + irem->linenum)); flag=1; } return(flag); @@ -488,28 +474,28 @@ static int _checkchunk (register struct remember *pRec, const char *sFile, /* Returns how many wrong chunks */ -int _sanity (const char *sFile, uint uLine) +int _sanity(const char *filename, uint lineno) { - reg1 struct remember *pTmp; + reg1 struct st_irem *irem; reg2 int flag=0; uint count=0; pthread_mutex_lock(&THR_LOCK_malloc); #ifndef PEDANTIC_SAFEMALLOC - if (sf_malloc_tampered && cNewCount < 0) - cNewCount=0; + if (sf_malloc_tampered && sf_malloc_count < 0) + sf_malloc_count=0; #endif - count=cNewCount; - for (pTmp = pRememberRoot; pTmp != NULL && count-- ; pTmp = pTmp -> pNext) - flag+=_checkchunk (pTmp, sFile, uLine); + count=sf_malloc_count; + for (irem= sf_malloc_root; irem != NULL && count-- ; irem= irem->next) + flag+= _checkchunk (irem, filename, lineno); pthread_mutex_unlock(&THR_LOCK_malloc); - if (count || pTmp) + if (count || irem) { const char *format="Error: Safemalloc link list destroyed, discovered at '%s:%d'"; - fprintf(stderr, format, sFile, uLine); fputc('\n',stderr); - fprintf(stderr, "root=%p,count=%d,pTmp=%p\n", pRememberRoot,count,pTmp); + fprintf(stderr, format, filename, lineno); fputc('\n',stderr); + fprintf(stderr, "root=%p,count=%d,irem=%p\n", sf_malloc_root,count,irem); (void) fflush(stderr); - DBUG_PRINT("safe",(format, sFile, uLine)); + DBUG_PRINT("safe",(format, filename, lineno)); flag=1; } return flag; @@ -518,33 +504,33 @@ int _sanity (const char *sFile, uint uLine) /* malloc and copy */ -gptr _my_memdup(const byte *from, uint length, const char *sFile, uint uLine, - myf MyFlags) +gptr _my_memdup(const byte *from, uint length, const char *filename, + uint lineno, myf MyFlags) { gptr ptr; - if ((ptr=_mymalloc(length,sFile,uLine,MyFlags)) != 0) + if ((ptr=_mymalloc(length,filename,lineno,MyFlags)) != 0) memcpy((byte*) ptr, (byte*) from,(size_t) length); return(ptr); } /*_my_memdup */ -char *_my_strdup(const char *from, const char *sFile, uint uLine, +char *_my_strdup(const char *from, const char *filename, uint lineno, myf MyFlags) { gptr ptr; uint length=(uint) strlen(from)+1; - if ((ptr=_mymalloc(length,sFile,uLine,MyFlags)) != 0) + if ((ptr=_mymalloc(length,filename,lineno,MyFlags)) != 0) memcpy((byte*) ptr, (byte*) from,(size_t) length); return((char*) ptr); } /* _my_strdup */ char *_my_strdup_with_length(const byte *from, uint length, - const char *sFile, uint uLine, + const char *filename, uint lineno, myf MyFlags) { gptr ptr; - if ((ptr=_mymalloc(length+1,sFile,uLine,MyFlags)) != 0) + if ((ptr=_mymalloc(length+1,filename,lineno,MyFlags)) != 0) { memcpy((byte*) ptr, (byte*) from,(size_t) length); ptr[length]=0; diff --git a/mysys/thr_alarm.c b/mysys/thr_alarm.c index a2647ec7399..2a16eeec215 100644 --- a/mysys/thr_alarm.c +++ b/mysys/thr_alarm.c @@ -122,12 +122,24 @@ void init_thr_alarm(uint max_alarms) /* Request alarm after sec seconds. - A pointer is returned with points to a non-zero int when the alarm has been - given. This can't be called from the alarm-handling thread. - Returns 0 if no more alarms are allowed (aborted by process) + + SYNOPSIS + thr_alarm() + alrm Pointer to alarm detection + alarm_data Structure to store in alarm queue + + NOTES + This function can't be called from the alarm-handling thread. + + RETURN VALUES + 0 ok + 1 If no more alarms are allowed (aborted by process) + + Stores in first argument a pointer to a non-zero int which is set to 0 + when the alarm has been given */ -bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data) +my_bool thr_alarm(thr_alarm_t *alrm, uint sec, ALARM *alarm_data) { ulong now; sigset_t old_mask; @@ -208,8 +220,7 @@ void thr_end_alarm(thr_alarm_t *alarmed) { ALARM *alarm_data; sigset_t old_mask; - uint i; - bool found=0; + uint i, found=0; DBUG_ENTER("thr_end_alarm"); pthread_sigmask(SIG_BLOCK,&full_signal_set,&old_mask); @@ -223,17 +234,18 @@ void thr_end_alarm(thr_alarm_t *alarmed) queue_remove(&alarm_queue,i),MYF(0); if (alarm_data->malloced) my_free((gptr) alarm_data,MYF(0)); - found=1; + found++; +#ifndef DBUG_OFF break; +#endif } } - DBUG_ASSERT(!*alarmed || found); + DBUG_ASSERT(!*alarmed || found == 1); if (!found) { -#ifdef MAIN - printf("Warning: Didn't find alarm %lx in queue of %d alarms\n", - (long) *alarmed, alarm_queue.elements); -#endif + if (*alarmed) + fprintf(stderr,"Warning: Didn't find alarm %lx in queue of %d alarms\n", + (long) *alarmed, alarm_queue.elements); DBUG_PRINT("warning",("Didn't find alarm %lx in queue\n", (long) *alarmed)); } diff --git a/scripts/mysql_explain_log.sh b/scripts/mysql_explain_log.sh index c4a4ef21568..973d9e8a363 100644 --- a/scripts/mysql_explain_log.sh +++ b/scripts/mysql_explain_log.sh @@ -14,12 +14,14 @@ $Param->{host}=''; $Param->{user}=''; $Param->{password}=''; $Param->{PrintError}=0; +$Param->{socket}=''; if (!GetOptions ('date|d:i' => \$Param->{ViewDate}, 'host|h:s' => \$Param->{host}, 'user|u:s' => \$Param->{user}, 'password|p:s' => \$Param->{password}, 'printerror|e:s' => \$Param->{PrintError}, + 'socket|s:s' => \$Param->{socket}, )) { ShowOptions(); } @@ -50,7 +52,7 @@ else { #print "Date=$Param->{ViewDate}, host=$Param->{host}, user=$Param->{user}, password=$Param->{password}\n"; - $Param->{dbh}=DBI->connect("DBI:mysql:host=$Param->{host}",$Param->{user},$Param->{password},{PrintError=>0}); + $Param->{dbh}=DBI->connect("DBI:mysql:host=$Param->{host}".($Param->{socket}?";mysql_socket=$Param->{socket}":""),$Param->{user},$Param->{password},{PrintError=>0}); if (DBI::err()) { print "Error: " . DBI::errstr() . "\n"; } @@ -313,6 +315,8 @@ Usage: $0 [OPTIONS] < LOGFILE -u=USERNAME --password=PASSWORD password of db-user -p=PASSWORD +--socket=SOCKET mysqld socket file to connect +-s=SOCKET Read logfile from STDIN an try to EXPLAIN all SELECT statements. All UPDATE statements are rewritten to an EXPLAIN SELECT statement. The results of the EXPLAIN statement are collected and counted. All results with type=ALL are collected in an separete list. Results are printed to STDOUT. @@ -344,7 +348,7 @@ Then add indices to avoid table scans and remove those which aren't used. =head1 USAGE -explain_log.pl [--date=YYMMDD] --host=dbhost] [--user=dbuser] [--password=dbpw] < logfile +explain_log.pl [--date=YYMMDD] --host=dbhost] [--user=dbuser] [--password=dbpw] [--socket=/path/to/socket] < logfile --date=YYMMDD select only entrys of date @@ -362,14 +366,19 @@ explain_log.pl [--date=YYMMDD] --host=dbhost] [--user=dbuser] [--password=dbpw] -p=PASSWORD +--socket=SOCKET change path to the socket + +-s=SOCKET + =head1 EXAMPLE explain_log.pl --host=localhost --user=foo --password=bar < /var/lib/mysql/mobile.log -=head1 AUTHOR +=head1 AUTHORS Stefan Nitz Jan Willamowius , http://www.mobile.de + Dennis Haney (Added socket support) =head1 RECRUITING diff --git a/scripts/mysqld_safe.sh b/scripts/mysqld_safe.sh index e400c27b84c..fcd8e26c901 100644 --- a/scripts/mysqld_safe.sh +++ b/scripts/mysqld_safe.sh @@ -10,6 +10,8 @@ # mysql.server works by first doing a cd to the base directory and from there # executing mysqld_safe +KILL_MYSQLD=1; + trap '' 1 2 3 15 # we shouldn't let anyone kill us umask 007 @@ -34,6 +36,9 @@ parse_arguments() { for arg do case "$arg" in + --skip-kill-mysqld*) + KILL_MYSQLD=0; + ;; # these get passed explicitly to mysqld --basedir=*) MY_BASEDIR_VERSION=`echo "$arg" | sed -e "s;--basedir=;;"` ;; --datadir=*) DATADIR=`echo "$arg" | sed -e "s;--datadir=;;"` ;; @@ -70,6 +75,7 @@ parse_arguments() { MYSQLD="mysqld" fi ;; + --nice=*) niceness=`echo "$arg" | sed -e "s;--nice=;;"` ;; *) if test -n "$pick_args" then @@ -82,6 +88,7 @@ parse_arguments() { done } + MY_PWD=`pwd` # Check if we are starting this relative (for the binary release) if test -d $MY_PWD/data/mysql -a -f ./share/mysql/english/errmsg.sys -a \ @@ -110,6 +117,7 @@ fi MYSQL_UNIX_PORT=${MYSQL_UNIX_PORT:-@MYSQL_UNIX_ADDR@} MYSQL_TCP_PORT=${MYSQL_TCP_PORT:-@MYSQL_TCP_PORT@} user=@MYSQLD_USER@ +niceness=0 # Use the mysqld-max binary by default if the user doesn't specify a binary if test -x $ledir/mysqld-max @@ -167,7 +175,12 @@ export MYSQL_UNIX_PORT export MYSQL_TCP_PORT -NOHUP_NICENESS="nohup" +if test $niceness -eq 0 +then + NOHUP_NICENESS="nohup" +else + NOHUP_NICENESS="nohup nice -$niceness" +fi # Using nice with no args to get the niceness level is GNU-specific. # This check could be extended for other operating systems (e.g., @@ -198,8 +211,10 @@ then nice --$nice_value_diff echo testing > /dev/null 2>&1 then # nohup increases the priority (bad), and we are permitted - # to lower the priority - NOHUP_NICENESS="nice --$nice_value_diff nohup" + # to lower the priority with respect to the value the user + # might have been given + niceness=`expr $niceness - $nice_value_diff` + NOHUP_NICENESS="nice -$niceness nohup" fi fi else @@ -289,7 +304,7 @@ do break fi - if @IS_LINUX@ + if test @IS_LINUX@ -a $KILL_MYSQLD -eq 1 then # Test if one process was hanging. # This is only a fix for Linux (running as base 3 mysqld processes) diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc index 50bb4275eaa..795cffc0776 100644 --- a/sql/ha_innodb.cc +++ b/sql/ha_innodb.cc @@ -43,7 +43,9 @@ InnoDB */ pthread_mutex_t innobase_mutex; /* Store MySQL definition of 'byte': in Linux it is char while InnoDB -uses unsigned char */ +uses unsigned char; the header univ.i which we include next defines +'byte' as a macro which expands to 'unsigned char' */ + typedef byte mysql_byte; #define INSIDE_HA_INNOBASE_CC @@ -129,11 +131,46 @@ static void innobase_print_error(const char* db_errpfx, char* buffer); /* General functions */ +/********************************************************************** +Save some CPU by testing the value of srv_thread_concurrency in inline +functions. */ +inline +void +innodb_srv_conc_enter_innodb( +/*=========================*/ + trx_t* trx) /* in: transaction handle */ +{ + if (srv_thread_concurrency >= 500) { + + return; + } + + srv_conc_enter_innodb(trx); +} + +/********************************************************************** +Save some CPU by testing the value of srv_thread_concurrency in inline +functions. */ +inline +void +innodb_srv_conc_exit_innodb( +/*========================*/ + trx_t* trx) /* in: transaction handle */ +{ + if (srv_thread_concurrency >= 500) { + + return; + } + + srv_conc_exit_innodb(trx); +} + /********************************************************************** Releases possible search latch and InnoDB thread FIFO ticket. These should -be released at each SQL statement end. It does no harm to release these -also in the middle of an SQL statement. */ -static +be released at each SQL statement end, and also when mysqld passes the +control to the client. It does no harm to release these also in the middle +of an SQL statement. */ +inline void innobase_release_stat_resources( /*============================*/ @@ -181,7 +218,9 @@ innobase_active_small(void) } /************************************************************************ -Converts an InnoDB error code to a MySQL error code. */ +Converts an InnoDB error code to a MySQL error code and also tells to MySQL +about a possible transaction rollback inside InnoDB caused by a lock wait +timeout or a deadlock. */ static int convert_error_code_to_mysql( @@ -204,10 +243,10 @@ convert_error_code_to_mysql( } else if (error == (int) DB_ERROR) { - return(HA_ERR_NO_ACTIVE_RECORD); + return(-1); /* unspecified error */ } else if (error == (int) DB_DEADLOCK) { - /* Since we roll back the whole transaction, we must + /* Since we rolled back the whole transaction, we must tell it also to MySQL so that MySQL knows to empty the cached binlog for this transaction */ @@ -219,11 +258,10 @@ convert_error_code_to_mysql( } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) { - /* Since we roll back the whole transaction, we must + /* Since we rolled back the whole transaction, we must tell it also to MySQL so that MySQL knows to empty the cached binlog for this transaction */ - if (thd) { ha_rollback(thd); } @@ -269,6 +307,9 @@ convert_error_code_to_mysql( } else if (error == (int) DB_CORRUPTION) { return(HA_ERR_CRASHED); + } else if (error == (int) DB_NO_SAVEPOINT) { + + return(HA_ERR_NO_SAVEPOINT); } else { return(-1); // Unknown error } @@ -896,6 +937,11 @@ innobase_commit_low( /*================*/ trx_t* trx) /* in: transaction handle */ { + if (trx->conc_state == TRX_NOT_STARTED) { + + return; + } + /* TODO: Guilhem should check if master_log_name, pending etc. are right if the master log gets rotated! Possible bug here. Comment by Heikki March 4, 2003. */ @@ -910,11 +956,13 @@ innobase_commit_low( active_mi->rli.event_len + active_mi->rli.pending)); } - trx_commit_for_mysql(trx); + + trx_commit_for_mysql(trx); } /********************************************************************* -Commits a transaction in an InnoDB database. */ +Commits a transaction in an InnoDB database or marks an SQL statement +ended. */ int innobase_commit( @@ -934,27 +982,48 @@ innobase_commit( trx = check_trx_exists(thd); - if (trx->auto_inc_lock) { - - /* If we had reserved the auto-inc lock for - some table in this SQL statement, we release it now */ - - srv_conc_enter_innodb(trx); - row_unlock_table_autoinc_for_mysql(trx); - srv_conc_exit_innodb(trx); + /* The flag thd->transaction.all.innodb_active_trans is set to 1 in + ::external_lock, ::start_stmt, and innobase_savepoint, and it is only + set to 0 in a commit or a rollback. If it is 0 we know there cannot be + resources to be freed and we could return immediately. For the time + being we play safe and do the cleanup though there should be nothing + to clean up. */ + + if (thd->transaction.all.innodb_active_trans == 0 + && trx->conc_state != TRX_NOT_STARTED) { + + fprintf(stderr, +"InnoDB: Error: thd->transaction.all.innodb_active_trans == 0\n" +"InnoDB: but trx->conc_state != TRX_NOT_STARTED\n"); } - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { + if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle + || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { + innobase_commit_low(trx); - thd->transaction.all.innodb_active_trans=0; + + thd->transaction.all.innodb_active_trans = 0; + } else { + if (trx->auto_inc_lock) { + /* If we had reserved the auto-inc lock for some + table in this SQL statement we release it now */ + + innodb_srv_conc_enter_innodb(trx); + row_unlock_table_autoinc_for_mysql(trx); + innodb_srv_conc_exit_innodb(trx); + } + /* Store the current undo_no of the transaction so that we + know where to roll back if we have to roll back the next + SQL statement */ + + trx_mark_sql_stat_end(trx); } - /* Release possible statement level resources */ + /* Release a possible FIFO ticket and search latch */ innobase_release_stat_resources(trx); - trx_mark_sql_stat_end(trx); - /* Tell InnoDB server that there might be work for - utility threads: */ + /* Tell the InnoDB server that there might be work for utility + threads: */ srv_active_wake_master_thread(); @@ -1025,7 +1094,7 @@ innobase_commit_complete( } /********************************************************************* -Rolls back a transaction in an InnoDB database. */ +Rolls back a transaction or the latest SQL statement. */ int innobase_rollback( @@ -1046,30 +1115,107 @@ innobase_rollback( trx = check_trx_exists(thd); if (trx->auto_inc_lock) { - - /* If we had reserved the auto-inc lock for - some table in this SQL statement, we release it now */ - - srv_conc_enter_innodb(trx); + /* If we had reserved the auto-inc lock for some table (if + we come here to roll back the latest SQL statement) we + release it now before a possibly lengthy rollback */ + + innodb_srv_conc_enter_innodb(trx); row_unlock_table_autoinc_for_mysql(trx); - srv_conc_exit_innodb(trx); + innodb_srv_conc_exit_innodb(trx); } - srv_conc_enter_innodb(trx); + innodb_srv_conc_enter_innodb(trx); + + if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle + || (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))) { - if (trx_handle != (void*)&innodb_dummy_stmt_trx_handle) { error = trx_rollback_for_mysql(trx); - thd->transaction.all.innodb_active_trans=0; + thd->transaction.all.innodb_active_trans = 0; } else { error = trx_rollback_last_sql_stat_for_mysql(trx); } - srv_conc_exit_innodb(trx); + innodb_srv_conc_exit_innodb(trx); - /* Release possible statement level resources */ + /* Release a possible FIFO ticket and search latch */ innobase_release_stat_resources(trx); - trx_mark_sql_stat_end(trx); + DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); +} + +/********************************************************************* +Rolls back a transaction to a savepoint. */ + +int +innobase_rollback_to_savepoint( +/*===========================*/ + /* out: 0 if success, HA_ERR_NO_SAVEPOINT if + no savepoint with the given name */ + THD* thd, /* in: handle to the MySQL thread of the user + whose transaction should be rolled back */ + char* savepoint_name, /* in: savepoint name */ + my_off_t* binlog_cache_pos)/* out: position which corresponds to the + savepoint in the binlog cache of this + transaction, not defined if error */ +{ + ib_longlong mysql_binlog_cache_pos; + int error = 0; + trx_t* trx; + + DBUG_ENTER("innobase_rollback_to_savepoint"); + + trx = check_trx_exists(thd); + + innodb_srv_conc_enter_innodb(trx); + + error = trx_rollback_to_savepoint_for_mysql(trx, savepoint_name, + &mysql_binlog_cache_pos); + innodb_srv_conc_exit_innodb(trx); + + *binlog_cache_pos = (my_off_t)mysql_binlog_cache_pos; + + /* Release a possible FIFO ticket and search latch */ + innobase_release_stat_resources(trx); + + DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); +} + +/********************************************************************* +Sets a transaction savepoint. */ + +int +innobase_savepoint( +/*===============*/ + /* out: always 0, that is, always succeeds */ + THD* thd, /* in: handle to the MySQL thread */ + char* savepoint_name, /* in: savepoint name */ + my_off_t binlog_cache_pos)/* in: offset up to which the current + transaction has cached log entries to its + binlog cache, not defined if no transaction + active, or we are in the autocommit state, or + binlogging is not switched on */ +{ + int error = 0; + trx_t* trx; + + DBUG_ENTER("innobase_savepoint"); + + if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { + /* In the autocommit state there is no sense to set a + savepoint: we return immediate success */ + DBUG_RETURN(0); + } + + trx = check_trx_exists(thd); + + /* Setting a savepoint starts a transaction inside InnoDB since + it allocates resources for it (memory to store the savepoint name, + for example) */ + + thd->transaction.all.innodb_active_trans = 1; + + error = trx_savepoint_for_mysql(trx, savepoint_name, + (ib_longlong)binlog_cache_pos); DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); } @@ -1197,7 +1343,6 @@ ha_innobase::open( { dict_table_t* ib_table; int error = 0; - uint buff_len; char norm_name[1000]; DBUG_ENTER("ha_innobase::open"); @@ -1222,11 +1367,11 @@ ha_innobase::open( fields when packed actually became 1 byte longer, when we also stored the string length as the first byte. */ - buff_len = table->reclength + table->max_key_length + upd_and_key_val_buff_len = table->reclength + table->max_key_length + MAX_REF_PARTS * 3; if (!(mysql_byte*) my_multi_malloc(MYF(MY_WME), - &upd_buff, buff_len, - &key_val_buff, buff_len, + &upd_buff, upd_and_key_val_buff_len, + &key_val_buff, upd_and_key_val_buff_len, NullS)) { free_share(share); DBUG_RETURN(1); @@ -1477,6 +1622,10 @@ innobase_mysql_cmp( case FIELD_TYPE_STRING: case FIELD_TYPE_VAR_STRING: + case FIELD_TYPE_TINY_BLOB: + case FIELD_TYPE_MEDIUM_BLOB: + case FIELD_TYPE_BLOB: + case FIELD_TYPE_LONG_BLOB: ret = my_sortncmp((const char*) a, a_length, (const char*) b, b_length); if (ret < 0) { @@ -1503,7 +1652,7 @@ get_innobase_type_from_mysql_type( /* out: DATA_BINARY, DATA_VARCHAR, ... */ Field* field) /* in: MySQL field */ { - /* The following asserts check that MySQL type code fits in + /* The following asserts check that the MySQL type code fits in 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to the type */ @@ -1514,6 +1663,8 @@ get_innobase_type_from_mysql_type( DBUG_ASSERT((ulint)FIELD_TYPE_DECIMAL < 256); switch (field->type()) { + /* NOTE that we only allow string types in DATA_MYSQL + and DATA_VARMYSQL */ case FIELD_TYPE_VAR_STRING: if (field->flags & BINARY_FLAG) { return(DATA_BINARY); @@ -1567,8 +1718,7 @@ get_innobase_type_from_mysql_type( } /*********************************************************************** -Stores a key value for a row to a buffer. This must currently only be used -to store a row reference to the 'ref' buffer of this table handle! */ +Stores a key value for a row to a buffer. */ uint ha_innobase::store_key_val_for_row( @@ -1576,41 +1726,108 @@ ha_innobase::store_key_val_for_row( /* out: key value length as stored in buff */ uint keynr, /* in: key number */ char* buff, /* in/out: buffer for the key value (in MySQL - format); currently this MUST be the 'ref' - buffer! */ + format) */ + uint buff_len,/* in: buffer length */ const mysql_byte* record)/* in: row in MySQL format */ { KEY* key_info = table->key_info + keynr; KEY_PART_INFO* key_part = key_info->key_part; KEY_PART_INFO* end = key_part + key_info->key_parts; char* buff_start = buff; + enum_field_types mysql_type; + Field* field; + ulint blob_len; + byte* blob_data; + ibool is_null; DBUG_ENTER("store_key_val_for_row"); + /* The format for storing a key field in MySQL is the following: + + 1. If the column can be NULL, then in the first byte we put 1 if the + field value is NULL, 0 otherwise. + + 2. If the column is of a BLOB type (it must be a column prefix field + in this case), then we put the length of the data in the field to the + next 2 bytes, in the little-endian format. If the field is SQL NULL, + then these 2 bytes are set to 0. Note that the length of data in the + field is <= column prefix length. + + 3. In a column prefix field, prefix_len next bytes are reserved for + data. In a normal field the max field length next bytes are reserved + for data. For a VARCHAR(n) the max field length is n. If the stored + value is the SQL NULL then these data bytes are set to 0. */ + + /* We have to zero-fill the buffer so that MySQL is able to use a + simple memcmp to compare two key values to determine if they are + equal. MySQL does this to compare contents of two 'ref' values. */ + + bzero(buff, buff_len); + for (; key_part != end; key_part++) { + is_null = FALSE; if (key_part->null_bit) { - /* Store 0 if the key part is a NULL part */ - if (record[key_part->null_offset] & key_part->null_bit) { - *buff++ = 1; - continue; - } - - *buff++ = 0; + *buff = 1; + is_null = TRUE; + } else { + *buff = 0; + } + buff++; } - memcpy(buff, record + key_part->offset, key_part->length); - buff += key_part->length; + field = key_part->field; + mysql_type = field->type(); + + if (mysql_type == FIELD_TYPE_TINY_BLOB + || mysql_type == FIELD_TYPE_MEDIUM_BLOB + || mysql_type == FIELD_TYPE_BLOB + || mysql_type == FIELD_TYPE_LONG_BLOB) { + + ut_a(key_part->key_part_flag & HA_PART_KEY); + + if (is_null) { + buff += key_part->length + 2; + + continue; + } + + blob_data = row_mysql_read_blob_ref(&blob_len, + (byte*) (record + + (ulint)get_field_offset(table, field)), + (ulint) field->pack_length()); + + ut_a(get_field_offset(table, field) + == key_part->offset); + if (blob_len > key_part->length) { + blob_len = key_part->length; + } + + /* MySQL reserves 2 bytes for the length and the + storage of the number is little-endian */ + + ut_a(blob_len < 256); + *((byte*)buff) = (byte)blob_len; + buff += 2; + + memcpy(buff, blob_data, blob_len); + + buff += key_part->length; + } else { + if (is_null) { + buff += key_part->length; + + continue; + } + memcpy(buff, record + key_part->offset, + key_part->length); + buff += key_part->length; + } } - /* - We have to zero-fill the 'ref' buffer so that MySQL is able to - use a simple memcmp to compare two key values to determine if they - are equal - */ - bzero(buff, (ref_length- (uint) (buff - buff_start))); + ut_a(buff <= buff_start + buff_len); DBUG_RETURN((uint)(buff - buff_start)); } @@ -1882,9 +2099,9 @@ ha_innobase::write_row( The lock is released at each SQL statement's end. */ - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); error = row_lock_table_autoinc_for_mysql(prebuilt); - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); if (error != DB_SUCCESS) { @@ -1895,14 +2112,15 @@ ha_innobase::write_row( dict_table_autoinc_update(prebuilt->table, auto_inc); } else { - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); if (!prebuilt->trx->auto_inc_lock) { error = row_lock_table_autoinc_for_mysql( prebuilt); if (error != DB_SUCCESS) { - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb( + prebuilt->trx); error = convert_error_code_to_mysql( error, user_thd); @@ -1916,7 +2134,7 @@ ha_innobase::write_row( auto_inc = dict_table_autoinc_get(prebuilt->table); incremented_auto_inc_counter = TRUE; - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); /* We can give the new value for MySQL to place in the field */ @@ -1939,11 +2157,11 @@ ha_innobase::write_row( build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW); } - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); error = row_insert_for_mysql((byte*) record, prebuilt); - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); if (error != DB_SUCCESS) { /* If the insert did not succeed we restore the value of @@ -2014,7 +2232,6 @@ innobase_convert_and_store_changed_col( while (len > 0 && data[len - 1] == ' ') { len--; } - } else if (col_type == DATA_INT) { /* Store integer data in InnoDB in a big-endian format, sign bit negated, if signed */ @@ -2052,9 +2269,11 @@ calc_row_difference( struct st_table* table, /* in: table in MySQL data dictionary */ mysql_byte* upd_buff, /* in: buffer to use */ + ulint buff_len, /* in: buffer length */ row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */ THD* thd) /* in: user thread */ { + mysql_byte* original_upd_buff = upd_buff; Field* field; uint n_fields; ulint o_len; @@ -2136,12 +2355,13 @@ calc_row_difference( (prebuilt->table->cols + i)->clust_pos; n_changed++; } - ; } uvect->n_fields = n_changed; uvect->info_bits = 0; + ut_a(buf <= (byte*)original_upd_buff + buff_len); + return(0); } @@ -2190,17 +2410,19 @@ ha_innobase::update_row( (uses upd_buff of the handle) */ calc_row_difference(uvect, (mysql_byte*) old_row, new_row, table, - upd_buff, prebuilt, user_thd); + upd_buff, (ulint)upd_and_key_val_buff_len, + prebuilt, user_thd); + /* This is not a delete */ prebuilt->upd_node->is_delete = FALSE; assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); error = row_update_for_mysql((byte*) old_row, prebuilt); - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); error = convert_error_code_to_mysql(error, user_thd); @@ -2244,11 +2466,11 @@ ha_innobase::delete_row( prebuilt->upd_node->is_delete = TRUE; - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); error = row_update_for_mysql((byte*) record, prebuilt); - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); error = convert_error_code_to_mysql(error, user_thd); @@ -2436,10 +2658,11 @@ ha_innobase::index_read( prebuilt->search_tuple */ row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple, - (byte*) key_val_buff, - index, - (byte*) key_ptr, - (ulint) key_len); + (byte*) key_val_buff, + (ulint)upd_and_key_val_buff_len, + index, + (byte*) key_ptr, + (ulint) key_len); } else { /* We position the cursor to the last or the first entry in the index */ @@ -2461,11 +2684,11 @@ ha_innobase::index_read( last_match_mode = match_mode; - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); ret = row_search_for_mysql((byte*) buf, mode, prebuilt, match_mode, 0); - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); if (ret == DB_SUCCESS) { error = 0; @@ -2609,11 +2832,11 @@ ha_innobase::general_fetch( ut_a(prebuilt->trx == (trx_t*) current_thd->transaction.all.innobase_tid); - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode, direction); - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); if (ret == DB_SUCCESS) { error = 0; @@ -2902,7 +3125,8 @@ ha_innobase::position( memcpy(ref, prebuilt->row_id, len); } else { - len = store_key_val_for_row(primary_key, (char*) ref, record); + len = store_key_val_for_row(primary_key, (char*)ref, + ref_length, record); } /* Since we do not store len to the buffer 'ref', we must assume @@ -2916,7 +3140,6 @@ ha_innobase::position( } } - /********************************************************************* Creates a table definition to an InnoDB database. */ static @@ -2935,6 +3158,8 @@ create_table_def( ulint col_type; ulint nulls_allowed; ulint unsigned_type; + ulint binary_type; + ulint nonlatin1_type; ulint i; DBUG_ENTER("create_table_def"); @@ -2963,9 +3188,24 @@ create_table_def( unsigned_type = 0; } + if (col_type == DATA_BLOB + && strcmp(default_charset_info->name, "latin1") != 0) { + nonlatin1_type = DATA_NONLATIN1; + } else { + nonlatin1_type = 0; + } + + if (field->flags & BINARY_FLAG) { + binary_type = DATA_BINARY_TYPE; + nonlatin1_type = 0; + } else { + binary_type = 0; + } + dict_mem_table_add_col(table, (char*) field->field_name, col_type, (ulint)field->type() - | nulls_allowed | unsigned_type, + | nulls_allowed | unsigned_type + | nonlatin1_type | binary_type, field->pack_length(), 0); } @@ -2988,13 +3228,17 @@ create_index( const char* table_name, /* in: table name */ uint key_num) /* in: index number */ { + Field* field; dict_index_t* index; int error; ulint n_fields; KEY* key; KEY_PART_INFO* key_part; ulint ind_type; + ulint col_type; + ulint prefix_len; ulint i; + ulint j; DBUG_ENTER("create_index"); @@ -3021,10 +3265,63 @@ create_index( for (i = 0; i < n_fields; i++) { key_part = key->key_part + i; + /* (The flag HA_PART_KEY denotes in MySQL a column prefix + field in an index: we only store a specified number of first + bytes of the column to the index field.) The flag does not + seem to be properly set by MySQL. Let us fall back on testing + the length of the key part versus the column. */ + + field = NULL; + for (j = 0; j < form->fields; j++) { + + field = form->field[j]; + + if (strlen(field->field_name) + == strlen(key_part->field->field_name) + && 0 == ut_cmp_in_lower_case( + (char*)field->field_name, + (char*)key_part->field->field_name, + strlen(field->field_name))) { + /* Found the corresponding column */ + + break; + } + } + + ut_a(j < form->fields); + + col_type = get_innobase_type_from_mysql_type(key_part->field); + + if (DATA_BLOB == col_type + || key_part->length < field->pack_length()) { + + prefix_len = key_part->length; + + if (col_type == DATA_INT + || col_type == DATA_FLOAT + || col_type == DATA_DOUBLE + || col_type == DATA_DECIMAL) { + fprintf(stderr, +"InnoDB: error: MySQL is trying to create a column prefix index field\n" +"InnoDB: on an inappropriate data type. Table name %s, column name %s.\n", + table_name, key_part->field->field_name); + + prefix_len = 0; + } + } else { + prefix_len = 0; + } + + if (prefix_len >= DICT_MAX_COL_PREFIX_LEN) { + DBUG_RETURN(-1); + } + /* We assume all fields should be sorted in ascending order, hence the '0': */ + dict_mem_index_add_field(index, - (char*) key_part->field->field_name, 0); + (char*) key_part->field->field_name, + 0, prefix_len); } error = row_create_index_for_mysql(index, trx); @@ -3490,6 +3787,8 @@ ha_innobase::records_in_range( table->reclength + table->max_key_length + 100, MYF(MY_WME)); + ulint buff2_len = table->reclength + + table->max_key_length + 100; dtuple_t* range_start; dtuple_t* range_end; ib_longlong n_rows; @@ -3526,12 +3825,15 @@ ha_innobase::records_in_range( dict_index_copy_types(range_end, index, key->key_parts); row_sel_convert_mysql_key_to_innobase( - range_start, (byte*) key_val_buff, index, + range_start, (byte*) key_val_buff, + (ulint)upd_and_key_val_buff_len, + index, (byte*) start_key, (ulint) start_key_len); row_sel_convert_mysql_key_to_innobase( - range_end, (byte*) key_val_buff2, index, + range_end, (byte*) key_val_buff2, + buff2_len, index, (byte*) end_key, (ulint) end_key_len); @@ -3562,8 +3864,7 @@ ha_innobase::records_in_range( /************************************************************************* Gives an UPPER BOUND to the number of rows in a table. This is used in -filesort.cc and its better if the upper bound hold. -*/ +filesort.cc. */ ha_rows ha_innobase::estimate_number_of_rows(void) @@ -3598,11 +3899,11 @@ ha_innobase::estimate_number_of_rows(void) /* Calculate a minimum length for a clustered index record and from that an upper bound for the number of rows. Since we only calculate - new statistics in row0mysql.c when a tablehas grown - by a threshold factor, we must add a safety factor 2 in front - of the formula below. */ + new statistics in row0mysql.c when a table has grown by a threshold + factor, we must add a safety factor 2 in front of the formula below. */ - estimate = 2 * local_data_file_length / dict_index_calc_min_rec_len(index); + estimate = 2 * local_data_file_length / + dict_index_calc_min_rec_len(index); prebuilt->trx->op_info = (char*)""; @@ -3629,27 +3930,36 @@ ha_innobase::scan_time() return((double) (prebuilt->table->stat_clustered_index_size)); } -/* - Calculate the time it takes to read a set of ranges through and index - This enables us to optimise reads for clustered indexes. -*/ +/********************************************************************** +Calculate the time it takes to read a set of ranges through an index +This enables us to optimise reads for clustered indexes. */ -double ha_innobase::read_time(uint index, uint ranges, ha_rows rows) +double +ha_innobase::read_time( +/*===================*/ + /* out: estimated time measured in disk seeks */ + uint index, /* in: key number */ + uint ranges, /* in: how many ranges */ + ha_rows rows) /* in: estimated number of rows in the ranges */ { - ha_rows total_rows; - double time_for_scan; - if (index != table->primary_key) - return handler::read_time(index, ranges, rows); // Not clustered - if (rows <= 2) - return (double) rows; - /* - Assume that the read is proportional to scan time for all rows + one - seek per range. - */ - time_for_scan= scan_time(); - if ((total_rows= estimate_number_of_rows()) < rows) - return time_for_scan; - return (ranges + (double) rows / (double) total_rows * time_for_scan); + ha_rows total_rows; + double time_for_scan; + + if (index != table->primary_key) + return handler::read_time(index, ranges, rows); // Not clustered + + if (rows <= 2) + return (double) rows; + + /* Assume that the read time is proportional to the scan time for all + rows + at most one seek per range. */ + + time_for_scan= scan_time(); + + if ((total_rows= estimate_number_of_rows()) < rows) + return time_for_scan; + + return (ranges + (double) rows / (double) total_rows * time_for_scan); } /************************************************************************* @@ -3733,8 +4043,32 @@ ha_innobase::info( } for (i = 0; i < table->keys; i++) { + if (index == NULL) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: Error: table %s contains less indexes inside InnoDB\n" +"InnoDB: than are defined in the MySQL .frm file. Have you mixed up\n" +"InnoDB: .frm files from different installations? See section\n" +"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n", + ib_table->name); + break; + } + for (j = 0; j < table->key_info[i].key_parts; j++) { + if (j + 1 > index->n_uniq) { + ut_print_timestamp(stderr); + fprintf(stderr, +" InnoDB: Error: index %s of %s has %lu columns unique inside InnoDB\n" +"InnoDB: but MySQL is asking statistics for %lu columns. Have you mixed up\n" +"InnoDB: .frm files from different installations? See section\n" +"InnoDB: 15.1 at http://www.innodb.com/ibman.html\n", + index->name, + ib_table->name, index->n_uniq, + j + 1); + break; + } + if (index->stat_n_diff_key_vals[j + 1] == 0) { rec_per_key = records; @@ -3992,10 +4326,11 @@ ha_innobase::reset(void) } /********************************************************************** -Inside LOCK TABLES MySQL will not call external_lock() between SQL -statements. It will call this function at the start of each SQL statement. -Note also a spacial case: if a temporary table is created inside LOCK -TABLES, MySQL has not called external_lock() at all on that table. */ +MySQL calls this function at the start of each SQL statement inside LOCK +TABLES. Inside LOCK TABLES the ::external_lock method does not work to +mark SQL statement borders. Note also a special case: if a temporary table +is created inside LOCK TABLES, MySQL has not called external_lock() at all +on that table. */ int ha_innobase::start_stmt( @@ -4010,8 +4345,14 @@ ha_innobase::start_stmt( trx = prebuilt->trx; + /* Here we release the search latch and the InnoDB thread FIFO ticket + if they were reserved. They should have been released already at the + end of the previous statement, but because inside LOCK TABLES the + lock count method does not work to mark the end of a SELECT statement, + that may not be the case. We MUST release the search latch before an + INSERT, for example. */ + innobase_release_stat_resources(trx); - trx_mark_sql_stat_end(trx); if (trx->isolation_level <= TRX_ISO_READ_COMMITTED && trx->read_view) { @@ -4034,7 +4375,8 @@ ha_innobase::start_stmt( prebuilt->select_lock_type = LOCK_X; } - + + /* Set the MySQL flag to mark that there is an active transaction */ thd->transaction.all.innodb_active_trans = 1; return(0); @@ -4098,17 +4440,20 @@ ha_innobase::external_lock( } if (lock_type != F_UNLCK) { - if (trx->n_mysql_tables_in_use == 0) { - trx_mark_sql_stat_end(trx); - } + /* MySQL is setting a new table lock */ + /* Set the MySQL flag to mark that there is an active + transaction */ thd->transaction.all.innodb_active_trans = 1; + trx->n_mysql_tables_in_use++; prebuilt->mysql_has_locked = TRUE; - trx->isolation_level = innobase_map_isolation_level( + if (trx->n_mysql_tables_in_use == 1) { + trx->isolation_level = innobase_map_isolation_level( (enum_tx_isolation) thd->variables.tx_isolation); + } if (trx->isolation_level == TRX_ISO_SERIALIZABLE && prebuilt->select_lock_type == LOCK_NONE) { @@ -4124,37 +4469,44 @@ ha_innobase::external_lock( trx->mysql_n_tables_locked++; } - } else { - trx->n_mysql_tables_in_use--; - prebuilt->mysql_has_locked = FALSE; - auto_inc_counter_for_this_stat = 0; - if (trx->n_mysql_tables_in_use == 0) { + DBUG_RETURN(error); + } - trx->mysql_n_tables_locked = 0; + /* MySQL is releasing a table lock */ - prebuilt->used_in_HANDLER = FALSE; + trx->n_mysql_tables_in_use--; + prebuilt->mysql_has_locked = FALSE; + auto_inc_counter_for_this_stat = 0; - /* Here we release the search latch and InnoDB - thread FIFO ticket if they were reserved. */ + /* If the MySQL lock count drops to zero we know that the current SQL + statement has ended */ - innobase_release_stat_resources(trx); + if (trx->n_mysql_tables_in_use == 0) { + trx->mysql_n_tables_locked = 0; + prebuilt->used_in_HANDLER = FALSE; + + if (!(thd->options + & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { + if (thd->transaction.all.innodb_active_trans != 0) { + innobase_commit(thd, trx); + } + } else { if (trx->isolation_level <= TRX_ISO_READ_COMMITTED && trx->read_view) { - /* At low transaction isolation levels we let + /* At low transaction isolation levels we let each consistent read set its own snapshot */ - read_view_close_for_mysql(trx); + read_view_close_for_mysql(trx); } - - if (!(thd->options - & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - - innobase_commit(thd, trx); - } } + + /* Here we release the search latch and the InnoDB thread FIFO + ticket if they were reserved. */ + + innobase_release_stat_resources(trx); } DBUG_RETURN(error); @@ -4377,9 +4729,9 @@ ha_innobase::innobase_read_and_init_auto_inc( return(0); } - srv_conc_enter_innodb(prebuilt->trx); + innodb_srv_conc_enter_innodb(prebuilt->trx); error = row_lock_table_autoinc_for_mysql(prebuilt); - srv_conc_exit_innodb(prebuilt->trx); + innodb_srv_conc_exit_innodb(prebuilt->trx); if (error != DB_SUCCESS) { error = convert_error_code_to_mysql(error, user_thd); @@ -4473,4 +4825,3 @@ ha_innobase::get_auto_increment() } #endif /* HAVE_INNOBASE_DB */ - diff --git a/sql/ha_innodb.h b/sql/ha_innodb.h index 1a9b1b16c64..a3fe56f6bcd 100644 --- a/sql/ha_innodb.h +++ b/sql/ha_innodb.h @@ -52,6 +52,9 @@ class ha_innobase: public handler byte* key_val_buff; /* buffer used in converting search key values from MySQL format to Innodb format */ + ulong upd_and_key_val_buff_len; + /* the length of each of the previous + two buffers */ ulong int_table_flags; uint primary_key; uint last_dup_key; @@ -73,7 +76,8 @@ class ha_innobase: public handler longlong auto_inc_counter_for_this_stat; ulong max_row_length(const byte *buf); - uint store_key_val_for_row(uint keynr, char* buff, const byte* record); + uint store_key_val_for_row(uint keynr, char* buff, uint buff_len, + const byte* record); int update_thd(THD* thd); int change_active_index(uint keynr); int general_fetch(byte* buf, uint direction, uint match_mode); @@ -83,13 +87,15 @@ class ha_innobase: public handler public: ha_innobase(TABLE *table): handler(table), int_table_flags(HA_REC_NOT_IN_SEQ | - HA_KEYPOS_TO_RNDPOS | HA_LASTKEY_ORDER | - HA_NULL_KEY | HA_CAN_SQL_HANDLER | + HA_KEYPOS_TO_RNDPOS | + HA_LASTKEY_ORDER | + HA_NULL_KEY | + HA_BLOB_KEY | + HA_CAN_SQL_HANDLER | HA_NOT_EXACT_COUNT | HA_NO_WRITE_DELAYED | HA_PRIMARY_KEY_IN_READ_INDEX | HA_DROP_BEFORE_CREATE | - HA_NO_PREFIX_CHAR_KEYS | HA_TABLE_SCAN_ON_INDEX), last_dup_key((uint) -1), start_of_scan(0) @@ -217,6 +223,14 @@ int innobase_report_binlog_offset_and_commit( int innobase_commit_complete( void* trx_handle); int innobase_rollback(THD *thd, void* trx_handle); +int innobase_rollback_to_savepoint( + THD* thd, + char* savepoint_name, + my_off_t* binlog_cache_pos); +int innobase_savepoint( + THD* thd, + char* savepoint_name, + my_off_t binlog_cache_pos); int innobase_close_connection(THD *thd); int innobase_drop_database(char *path); int innodb_show_status(THD* thd); diff --git a/sql/handler.cc b/sql/handler.cc index 45c83355c94..4ea5bc0e9f5 100644 --- a/sql/handler.cc +++ b/sql/handler.cc @@ -208,8 +208,12 @@ void ha_close_connection(THD* thd) } /* - This is used to commit or rollback a single statement depending - on the value of error + This is used to commit or rollback a single statement depending on the value + of error. Note that if the autocommit is on, then the following call inside + InnoDB will commit or rollback the whole transaction (= the statement). The + autocommit mechanism built into InnoDB is based on counting locks, but if + the user has used LOCK TABLES then that mechanism does not know to do the + commit. */ int ha_autocommit_or_rollback(THD *thd, int error) @@ -375,7 +379,6 @@ int ha_commit_trans(THD *thd, THD_TRANS* trans) trans->innodb_active_trans=0; if (trans == &thd->transaction.all) operation_done= transaction_commited= 1; - } #endif #ifdef HAVE_QUERY_CACHE @@ -443,6 +446,70 @@ int ha_rollback_trans(THD *thd, THD_TRANS *trans) DBUG_RETURN(error); } + +/* +Rolls the current transaction back to a savepoint. +Return value: 0 if success, 1 if there was not a savepoint of the given +name. +*/ + +int ha_rollback_to_savepoint(THD *thd, char *savepoint_name) +{ + my_off_t binlog_cache_pos=0; + bool operation_done=0; + int error=0; + DBUG_ENTER("ha_rollback_to_savepoint"); +#ifdef USING_TRANSACTIONS + if (opt_using_transactions) + { +#ifdef HAVE_INNOBASE_DB + /* + Retrieve the trans_log binlog cache position corresponding to the + savepoint, and if the rollback is successful inside InnoDB reset the write + position in the binlog cache to what it was at the savepoint. + */ + if ((error=innobase_rollback_to_savepoint(thd, savepoint_name, + &binlog_cache_pos))) + { + my_error(ER_ERROR_DURING_ROLLBACK, MYF(0), error); + error=1; + } + else + reinit_io_cache(&thd->transaction.trans_log, WRITE_CACHE, + binlog_cache_pos, 0, 0); + operation_done=1; +#endif + if (operation_done) + statistic_increment(ha_rollback_count,&LOCK_status); + } +#endif /* USING_TRANSACTIONS */ + + DBUG_RETURN(error); +} + + +/* +Sets a transaction savepoint. +Return value: always 0, that is, succeeds always +*/ + +int ha_savepoint(THD *thd, char *savepoint_name) +{ + my_off_t binlog_cache_pos=0; + int error=0; + DBUG_ENTER("ha_savepoint"); +#ifdef USING_TRANSACTIONS + if (opt_using_transactions) + { + binlog_cache_pos=my_b_tell(&thd->transaction.trans_log); +#ifdef HAVE_INNOBASE_DB + innobase_savepoint(thd,savepoint_name, binlog_cache_pos); +#endif + } +#endif /* USING_TRANSACTIONS */ + DBUG_RETURN(error); +} + bool ha_flush_logs() { bool result=0; diff --git a/sql/handler.h b/sql/handler.h index fbad36bffdd..56f63d1d521 100644 --- a/sql/handler.h +++ b/sql/handler.h @@ -189,7 +189,8 @@ class handler :public Sql_alloc public: byte *ref; /* Pointer to current row */ byte *dupp_ref; /* Pointer to dupp row */ - uint ref_length; /* Length of ref (1-8) */ + uint ref_length; /* Length of ref (1-8 or the clustered + key length) */ uint block_size; /* index block size */ ha_rows records; /* Records i datafilen */ ha_rows deleted; /* Deleted records */ @@ -376,6 +377,8 @@ int ha_commit_complete(THD *thd); int ha_release_temporary_latches(THD *thd); int ha_commit_trans(THD *thd, THD_TRANS *trans); int ha_rollback_trans(THD *thd, THD_TRANS *trans); +int ha_rollback_to_savepoint(THD *thd, char *savepoint_name); +int ha_savepoint(THD *thd, char *savepoint_name); int ha_autocommit_or_rollback(THD *thd, int error); void ha_set_spin_retries(uint retries); bool ha_flush_logs(void); diff --git a/sql/item_func.cc b/sql/item_func.cc index 532a7cedec0..e847b203006 100644 --- a/sql/item_func.cc +++ b/sql/item_func.cc @@ -2299,6 +2299,9 @@ double Item_func_match::val() if (ft_handler == NULL) DBUG_RETURN(-1.0); + if (table->null_row) /* NULL row from an outer join */ + return 0.0; + if (join_key) { if (table->file->ft_handler) diff --git a/sql/item_strfunc.cc b/sql/item_strfunc.cc index ae8bf1dfecb..208be1ecd7f 100644 --- a/sql/item_strfunc.cc +++ b/sql/item_strfunc.cc @@ -495,18 +495,18 @@ String *Item_func_concat_ws::val_str(String *str) str->length(0); // QQ; Should be removed res=str; - // Skip until non-null and non-empty argument is found. + // Skip until non-null argument is found. // If not, return the empty string for (i=0; i < arg_count; i++) - if ((res= args[i]->val_str(str)) && res->length()) + if ((res= args[i]->val_str(str))) break; if (i == arg_count) return &empty_string; for (i++; i < arg_count ; i++) { - if (!(res2= args[i]->val_str(use_as_buff)) || !res2->length()) - continue; // Skip NULL and empty string + if (!(res2= args[i]->val_str(use_as_buff))) + continue; // Skip NULL if (res->length() + sep_str->length() + res2->length() > current_thd->variables.max_allowed_packet) diff --git a/sql/item_timefunc.cc b/sql/item_timefunc.cc index 6a95c15a226..84e7a44ac61 100644 --- a/sql/item_timefunc.cc +++ b/sql/item_timefunc.cc @@ -1137,6 +1137,22 @@ longlong Item_extract::val_int() return 0; // Impossible } +bool Item_extract::eq(const Item *item, bool binary_cmp) const +{ + if (this == item) + return 1; + if (item->type() != FUNC_ITEM || + func_name() != ((Item_func*)item)->func_name()) + return 0; + + Item_extract* ie= (Item_extract*)item; + if (ie->int_type != int_type) + return 0; + + if (!args[0]->eq(ie->args[0], binary_cmp)) + return 0; + return 1; +} void Item_typecast::print(String *str) { diff --git a/sql/item_timefunc.h b/sql/item_timefunc.h index 0ca2a36609d..e04e24627d9 100644 --- a/sql/item_timefunc.h +++ b/sql/item_timefunc.h @@ -422,6 +422,7 @@ class Item_extract :public Item_int_func longlong val_int(); const char *func_name() const { return "extract"; } void fix_length_and_dec(); + bool eq(const Item *item, bool binary_cmp) const; unsigned int size_of() { return sizeof(*this);} }; diff --git a/sql/lex.h b/sql/lex.h index d9a84dd25b4..3bbe1da185e 100644 --- a/sql/lex.h +++ b/sql/lex.h @@ -309,6 +309,7 @@ static SYMBOL symbols[] = { { "ROLLUP", SYM(ROLLUP_SYM),0,0}, { "ROW", SYM(ROW_SYM),0,0}, { "ROWS", SYM(ROWS_SYM),0,0}, + { "SAVEPOINT", SYM(SAVEPOINT_SYM),0,0}, { "SECOND", SYM(SECOND_SYM),0,0}, { "SELECT", SYM(SELECT_SYM),0,0}, { "SERIALIZABLE", SYM(SERIALIZABLE_SYM),0,0}, diff --git a/sql/log_event.cc b/sql/log_event.cc index cda2e50c53d..ff968babcf0 100644 --- a/sql/log_event.cc +++ b/sql/log_event.cc @@ -1886,9 +1886,27 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, else { char llbuff[22]; - enum enum_duplicates handle_dup = DUP_IGNORE; + enum enum_duplicates handle_dup; if (sql_ex.opt_flags & REPLACE_FLAG) handle_dup= DUP_REPLACE; + else if (sql_ex.opt_flags & IGNORE_FLAG) + handle_dup= DUP_IGNORE; + else + /* + Note that when replication is running fine, if it was DUP_ERROR on the + master then we could choose DUP_IGNORE here, because if DUP_ERROR + suceeded on master, and data is identical on the master and slave, + then there should be no uniqueness errors on slave, so DUP_IGNORE is + the same as DUP_ERROR. But in the unlikely case of uniqueness errors + (because the data on the master and slave happen to be different (user + error or bug), we want LOAD DATA to print an error message on the + slave to discover the problem. + + If reading from net (a 3.23 master), mysql_load() will change this + to DUP_IGNORE. + */ + handle_dup= DUP_ERROR; + sql_exchange ex((char*)fname, sql_ex.opt_flags & DUMPFILE_FLAG); String field_term(sql_ex.field_term,sql_ex.field_term_len); String enclosed(sql_ex.enclosed,sql_ex.enclosed_len); @@ -1949,12 +1967,19 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, close_thread_tables(thd); if (thd->query_error) { - int sql_error= thd->net.last_errno; - if (!sql_error) - sql_error= ER_UNKNOWN_ERROR; - slave_print_error(rli,sql_error, + /* this err/sql_errno code is copy-paste from send_error() */ + const char *err; + int sql_errno; + if ((err=thd->net.last_error)[0]) + sql_errno=thd->net.last_errno; + else + { + sql_errno=ER_UNKNOWN_ERROR; + err=ER(sql_errno); + } + slave_print_error(rli,sql_errno, "Error '%s' running load data infile", - ER_SAFE(sql_error)); + err); free_root(&thd->mem_root,0); return 1; } @@ -1975,11 +2000,8 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, IMPLEMENTATION - To handle the case where the master died without a stop event, - we clean up all temporary tables + locks that we got. - However, we don't clean temporary tables if the master was 3.23 - (this is because a 3.23 master writes a Start_log_event at every - binlog rotation; if we were not careful we would remove temp tables - on the slave when FLUSH LOGS is issued on the master). + we clean up all temporary tables that we got, if we are sure we + can (see below). TODO - Remove all active user locks @@ -1990,18 +2012,37 @@ int Load_log_event::exec_event(NET* net, struct st_relay_log_info* rli, int Start_log_event::exec_event(struct st_relay_log_info* rli) { - if (!rli->mi->old_format) - { + + switch (rli->mi->old_format) { + case BINLOG_FORMAT_CURRENT : /* - If 4.0 master, all temporary tables have been deleted on the master; - if 3.23 master, this is far from sure. + This is 4.x, so a Start_log_event is only at master startup, + so we are sure the master has restarted and cleared his temp tables. */ close_temporary_tables(thd); - /* - If we have old format, load_tmpdir is cleaned up by the I/O thread - */ cleanup_load_tmpdir(); + break; + /* + Now the older formats; in that case load_tmpdir is cleaned up by the I/O + thread. + */ + case BINLOG_FORMAT_323_LESS_57 : + /* + Cannot distinguish a Start_log_event generated at master startup and + one generated by master FLUSH LOGS, so cannot be sure temp tables + have to be dropped. So do nothing. + */ + break; + case BINLOG_FORMAT_323_GEQ_57 : + /* Can distinguish, based on the value of 'created' */ + if (created) /* this was generated at master startup*/ + close_temporary_tables(thd); + break; + default : + /* this case is impossible */ + return 1; } + return Log_event::exec_event(rli); } diff --git a/sql/mysql_priv.h b/sql/mysql_priv.h index 9cf18b53669..702db98748a 100644 --- a/sql/mysql_priv.h +++ b/sql/mysql_priv.h @@ -140,6 +140,7 @@ char* query_table_status(THD *thd,const char *db,const char *table_name); #define TEST_NO_EXTRA 128 #define TEST_CORE_ON_SIGNAL 256 /* Give core if signal */ #define TEST_NO_STACKTRACE 512 +#define TEST_SIGINT 1024 /* Allow sigint on threads */ /* options for select set by the yacc parser (stored in lex->options) */ #define SELECT_DISTINCT 1 @@ -820,6 +821,10 @@ Item *get_system_var(enum_var_type var_type, LEX_STRING name); /* log.cc */ bool flush_error_log(void); +/* sql_list.cc */ +void free_list(I_List *list); +void free_list(I_List *list); + /* Some inline functions for more speed */ inline bool add_item_to_list(Item *item) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index 1492d6ddb68..0f3500248c0 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -285,7 +285,7 @@ char log_error_file[FN_REFLEN]; bool opt_log, opt_update_log, opt_bin_log, opt_slow_log; bool opt_error_log= IF_WIN(1,0); bool opt_disable_networking=0, opt_skip_show_db=0; -my_bool opt_enable_named_pipe= 0; +my_bool opt_enable_named_pipe= 0, opt_debugging= 0; my_bool opt_local_infile, opt_external_locking, opt_slave_compressed_protocol; uint delay_key_write_options= (uint) DELAY_KEY_WRITE_ON; @@ -380,14 +380,14 @@ uint report_port = MYSQL_PORT; my_bool master_ssl = 0; ulong master_retry_count=0; -ulong bytes_sent = 0L, bytes_received = 0L; +ulong bytes_sent= 0L, bytes_received= 0L, net_big_packet_count= 0L; bool opt_endinfo,using_udf_functions, locked_in_memory; bool opt_using_transactions, using_update_log; bool volatile abort_loop, select_thread_in_use, signal_thread_in_use; bool volatile ready_to_exit, shutdown_in_progress, grant_option; ulong refresh_version=1L,flush_version=1L; /* Increments on each reload */ -ulong query_id=1L,long_query_count,aborted_threads, +ulong query_id=1L,long_query_count,aborted_threads, killed_threads, aborted_connects,delayed_insert_timeout,delayed_insert_limit, delayed_queue_size,delayed_insert_threads,delayed_insert_writes, delayed_rows_in_use,delayed_insert_errors,flush_time, thread_created; @@ -919,6 +919,12 @@ void clean_up(bool print_message) bitmap_free(&temp_pool); free_max_user_conn(); end_slave_list(); + free_list(&replicate_do_db); + free_list(&replicate_ignore_db); + free_list(&binlog_do_db); + free_list(&binlog_ignore_db); + free_list(&replicate_rewrite_db); + #ifdef HAVE_OPENSSL if (ssl_acceptor_fd) my_free((gptr) ssl_acceptor_fd, MYF(MY_ALLOW_ZERO_PTR)); @@ -1013,14 +1019,21 @@ static void set_ports() static void set_user(const char *user) { #if !defined(__WIN__) && !defined(OS2) && !defined(__NETWARE__) - struct passwd *ent; + struct passwd *ent; + uid_t user_id= geteuid(); // don't bother if we aren't superuser - if (geteuid()) + if (user_id) { if (user) - fprintf(stderr, - "Warning: One can only use the --user switch if running as root\n"); + { + /* Don't give a warning, if real user is same as given with --user */ + struct passwd *user_info= getpwnam(user); + + if (!user_info || user_id != user_info->pw_uid) + fprintf(stderr, + "Warning: One can only use the --user switch if running as root\n"); + } return; } else if (!user) @@ -1266,7 +1279,10 @@ extern "C" sig_handler end_thread_signal(int sig __attribute__((unused))) THD *thd=current_thd; DBUG_ENTER("end_thread_signal"); if (thd && ! thd->bootstrap) + { + statistic_increment(killed_threads, &LOCK_status); end_thread(thd,0); + } DBUG_VOID_RETURN; /* purecov: deadcode */ } @@ -1585,7 +1601,8 @@ static void init_signals(void) struct sigaction sa; DBUG_ENTER("init_signals"); - sigset(THR_KILL_SIGNAL,end_thread_signal); + if (test_flags & TEST_SIGINT) + sigset(THR_KILL_SIGNAL,end_thread_signal); sigset(THR_SERVER_ALARM,print_signal_warning); // Should never be called! if (!(test_flags & TEST_NO_STACKTRACE) || (test_flags & TEST_CORE_ON_SIGNAL)) @@ -1644,7 +1661,8 @@ static void init_signals(void) sigaddset(&set,SIGTSTP); #endif sigaddset(&set,THR_SERVER_ALARM); - sigdelset(&set,THR_KILL_SIGNAL); // May be SIGINT + if (test_flags & TEST_SIGINT) + sigdelset(&set,THR_KILL_SIGNAL); // May be SIGINT sigdelset(&set,THR_CLIENT_ALARM); // For alarms sigprocmask(SIG_SETMASK,&set,NULL); pthread_sigmask(SIG_SETMASK,&set,NULL); @@ -1700,9 +1718,12 @@ extern "C" void *signal_hand(void *arg __attribute__((unused))) */ init_thr_alarm(max_connections+max_insert_delayed_threads+10); #if SIGINT != THR_KILL_SIGNAL - (void) sigemptyset(&set); // Setup up SIGINT for debug - (void) sigaddset(&set,SIGINT); // For debugging - (void) pthread_sigmask(SIG_UNBLOCK,&set,NULL); + if (test_flags & TEST_SIGINT) + { + (void) sigemptyset(&set); // Setup up SIGINT for debug + (void) sigaddset(&set,SIGINT); // For debugging + (void) pthread_sigmask(SIG_UNBLOCK,&set,NULL); + } #endif (void) sigemptyset(&set); // Setup up SIGINT for debug #ifdef USE_ONE_SIGNAL_HAND @@ -1923,7 +1944,7 @@ extern "C" pthread_handler_decl(handle_shutdown,arg) #endif -const char *load_default_groups[]= { "mysqld","server",0 }; +const char *load_default_groups[]= { "mysqld","server",MYSQL_BASE_VERSION,0 }; bool open_log(MYSQL_LOG *log, const char *hostname, const char *opt_name, const char *extension, @@ -2708,6 +2729,7 @@ static void create_new_thread(THD *thd) thread_count--; thd->killed=1; // Safety (void) pthread_mutex_unlock(&LOCK_thread_count); + statistic_increment(aborted_connects,&LOCK_status); net_printf(net,ER_CANT_CREATE_THREAD,error); (void) pthread_mutex_lock(&LOCK_thread_count); close_connection(net,0,0); @@ -3138,7 +3160,7 @@ enum options { OPT_QUERY_CACHE_TYPE, OPT_RECORD_BUFFER, OPT_RECORD_RND_BUFFER, OPT_RELAY_LOG_SPACE_LIMIT, OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME, - OPT_READONLY, + OPT_READONLY, OPT_DEBUGGING, OPT_SORT_BUFFER, OPT_TABLE_CACHE, OPT_THREAD_CONCURRENCY, OPT_THREAD_CACHE_SIZE, OPT_TMP_TABLE_SIZE, OPT_THREAD_STACK, @@ -3270,6 +3292,10 @@ struct my_option my_long_options[] = GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}, /* We must always support the next option to make scripts like mysqltest easier to do */ + {"gdb", OPT_DEBUGGING, + "Set up signals usable for debugging", + (gptr*) &opt_debugging, (gptr*) &opt_debugging, + 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0}, {"init-rpl-role", OPT_INIT_RPL_ROLE, "Set the replication role", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, {"innodb_data_file_path", OPT_INNODB_DATA_FILE_PATH, @@ -3462,8 +3488,6 @@ Does nothing yet.", OPT_ARG, 0, 0, 0, 0, 0, 0}, {"port", 'P', "Port number to use for connection.", (gptr*) &mysql_port, (gptr*) &mysql_port, 0, GET_UINT, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, - {"reckless-slave", OPT_RECKLESS_SLAVE, "For debugging", 0, 0, 0, GET_NO_ARG, - NO_ARG, 0, 0, 0, 0, 0, 0}, {"replicate-do-db", OPT_REPLICATE_DO_DB, "Tells the slave thread to restrict replication to the specified database. To specify more than one database, use the directive multiple times, once for each database. Note that this will only work if you do not use cross-database queries such as UPDATE some_db.some_table SET foo='bar' while having selected a different or no database. If you need cross database updates to work, make sure you have 3.23.28 or later, and use replicate-wild-do-table=db_name.%.", 0, 0, 0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0}, @@ -4014,6 +4038,7 @@ struct show_var_st status_vars[]= { {"Com_restore_table", (char*) (com_stat+(uint) SQLCOM_RESTORE_TABLE),SHOW_LONG}, {"Com_revoke", (char*) (com_stat+(uint) SQLCOM_REVOKE),SHOW_LONG}, {"Com_rollback", (char*) (com_stat+(uint) SQLCOM_ROLLBACK),SHOW_LONG}, + {"Com_savepoint", (char*) (com_stat+(uint) SQLCOM_SAVEPOINT),SHOW_LONG}, {"Com_select", (char*) (com_stat+(uint) SQLCOM_SELECT),SHOW_LONG}, {"Com_set_option", (char*) (com_stat+(uint) SQLCOM_SET_OPTION),SHOW_LONG}, {"Com_show_binlog_events", (char*) (com_stat+(uint) SQLCOM_SHOW_BINLOG_EVENTS),SHOW_LONG}, @@ -4267,7 +4292,7 @@ get_one_option(int optid, const struct my_option *opt __attribute__((unused)), break; case OPT_SAFEMALLOC_MEM_LIMIT: #if !defined(DBUG_OFF) && defined(SAFEMALLOC) - safemalloc_mem_limit = atoi(argument); + sf_malloc_mem_limit = atoi(argument); #endif break; #ifdef EMBEDDED_LIBRARY @@ -4709,6 +4734,12 @@ static void get_options(int argc,char **argv) have_symlink=SHOW_OPTION_DISABLED; } #endif + if (opt_debugging) + { + /* Allow break with SIGINT, no core or stack trace */ + test_flags|= TEST_SIGINT | TEST_NO_STACKTRACE; + test_flags&= ~TEST_CORE_ON_SIGNAL; + } /* Set global MyISAM variables from delay_key_write_options */ fix_delay_key_write((THD*) 0, OPT_GLOBAL); diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 79d9041bb6d..a8bc559e3a0 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -65,11 +65,13 @@ void sql_print_error(const char *format,...); #define USE_QUERY_CACHE extern uint test_flags; extern void query_cache_insert(NET *net, const char *packet, ulong length); -extern ulong bytes_sent, bytes_received; +extern ulong bytes_sent, bytes_received, net_big_packet_count; extern pthread_mutex_t LOCK_bytes_sent , LOCK_bytes_received; #else #undef statistic_add +#undef statistic_increment #define statistic_add(A,B,C) +#define statistic_increment(A,B) #endif #define TEST_BLOCKING 8 @@ -523,7 +525,7 @@ static my_bool net_safe_read(NET *net, char *buff, uint32 length, if ((tmp=vio_read(net->vio,(char*) net->buff, length)) <= 0) { my_bool interrupted = vio_should_retry(net->vio); - if (!thr_got_alarm(&alarmed) && interrupted) + if (!thr_got_alarm(alarmed) && interrupted) { /* Probably in MIT threads */ if (retry_count++ < net->retry_count) continue; @@ -557,10 +559,13 @@ static my_bool my_net_skip_rest(NET *net, uint32 remain, thr_alarm_t *alarmed, DBUG_ENTER("my_net_skip_rest"); DBUG_PRINT("enter",("bytes_to_skip: %u", (uint) remain)); - if (!thr_alarm_in_use(&alarmed)) + /* The following is good for debugging */ + statistic_increment(net_big_packet_count,&LOCK_bytes_received); + + if (!thr_alarm_in_use(alarmed)) { my_bool old_mode; - if (!thr_alarm(alarmed,net->read_timeout, alarm_buff) || + if (thr_alarm(alarmed,net->read_timeout, alarm_buff) || vio_blocking(net->vio, TRUE, &old_mode) < 0) DBUG_RETURN(1); /* Can't setup, abort */ } diff --git a/sql/repl_failsafe.cc b/sql/repl_failsafe.cc index 8ed002ca649..1552b3994e9 100644 --- a/sql/repl_failsafe.cc +++ b/sql/repl_failsafe.cc @@ -71,6 +71,7 @@ static int init_failsafe_rpl_thread(THD* thd) if (init_thr_lock() || thd->store_globals()) { close_connection(&thd->net,ER_OUT_OF_RESOURCES); // is this needed? + statistic_increment(aborted_connects,&LOCK_status); end_thread(thd,0); DBUG_RETURN(-1); } diff --git a/sql/slave.cc b/sql/slave.cc index ec1041894bd..dc9ce9715d8 100644 --- a/sql/slave.cc +++ b/sql/slave.cc @@ -115,6 +115,8 @@ int init_slave() { DBUG_ENTER("init_slave"); + /* This is called when mysqld starts */ + /* TODO: re-write this to interate through the list of files for multi-master @@ -126,11 +128,16 @@ int init_slave() If master_host is specified, create the master_info file if it doesn't exists. */ - if (!active_mi || - init_master_info(active_mi,master_info_file,relay_log_info_file, + if (!active_mi) + { + sql_print_error("Failed to allocate memory for the master info structure"); + goto err; + } + + if(init_master_info(active_mi,master_info_file,relay_log_info_file, !master_host)) { - sql_print_error("Note: Failed to initialized master info"); + sql_print_error("Failed to initialize the master info structure"); goto err; } @@ -150,7 +157,7 @@ int init_slave() relay_log_info_file, SLAVE_IO | SLAVE_SQL)) { - sql_print_error("Warning: Can't create threads to handle slave"); + sql_print_error("Failed to create slave threads"); goto err; } } @@ -269,7 +276,7 @@ int init_relay_log_pos(RELAY_LOG_INFO* rli,const char* log, goto err; rli->cur_log = &rli->cache_buf; } - if (pos > BIN_LOG_HEADER_SIZE) + if (pos >= BIN_LOG_HEADER_SIZE) my_b_seek(rli->cur_log,(off_t)pos); err: @@ -282,6 +289,9 @@ err: pthread_cond_broadcast(&rli->data_cond); if (need_data_lock) pthread_mutex_unlock(&rli->data_lock); + + /* Isn't this strange: if !need_data_lock, we broadcast with no lock ?? */ + pthread_mutex_unlock(log_lock); DBUG_RETURN ((*errmsg) ? 1 : 0); } @@ -962,13 +972,19 @@ static int check_master_version(MYSQL* mysql, MASTER_INFO* mi) { const char* errmsg= 0; + /* + Note the following switch will bug when we have MySQL branch 30 ;) + */ switch (*mysql->server_version) { case '3': - mi->old_format = 1; + mi->old_format = + (strncmp(mysql->server_version, "3.23.57", 7) < 0) /* < .57 */ ? + BINLOG_FORMAT_323_LESS_57 : + BINLOG_FORMAT_323_GEQ_57 ; break; case '4': case '5': - mi->old_format = 0; + mi->old_format = BINLOG_FORMAT_CURRENT; break; default: errmsg = "Master reported unrecognized MySQL version"; @@ -1186,11 +1202,44 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) strmov(strcend(tmp,'.'),"-relay-bin"); opt_relay_logname=my_strdup(tmp,MYF(MY_WME)); } + + /* + The relay log will now be opened, as a SEQ_READ_APPEND IO_CACHE. It is + notable that the last kilobytes of it (8 kB for example) may live in memory, + not on disk (depending on what the thread using it does). While this is + efficient, it has a side-effect one must know: + the size of the relay log on disk (displayed by 'ls -l' on Unix) can be a + few kilobytes less than one would expect by doing SHOW SLAVE STATUS; this + happens when only the IO thread is started (not the SQL thread). The + "missing" kilobytes are in memory, are preserved during 'STOP SLAVE; START + SLAVE IO_THREAD', and are flushed to disk when the slave's mysqld stops. So + this does not cause any bug. Example of how disk size grows by leaps: + + Read_Master_Log_Pos: 7811 -rw-rw---- 1 guilhem qq 4 Jun 5 16:19 gbichot2-relay-bin.002 + ...later... + Read_Master_Log_Pos: 9744 -rw-rw---- 1 guilhem qq 8192 Jun 5 16:27 gbichot2-relay-bin.002 + + See how 4 is less than 7811 and 8192 is less than 9744. + + WARNING: this is risky because the slave can stay like this for a long time; + then if it has a power failure, master.info says the I/O thread has read + until 9744 while the relay-log contains only until 8192 (the in-memory part + from 8192 to 9744 has been lost), so the SQL slave thread will miss some + events, silently breaking replication. + Ideally we would like to flush master.info only when we know that the relay + log has no in-memory tail. + Note that the above problem may arise only when only the IO thread is + started, which is unlikely. + */ + if (open_log(&rli->relay_log, glob_hostname, opt_relay_logname, "-relay-bin", opt_relaylog_index_name, LOG_BIN, 1 /* read_append cache */, 1 /* no auto events */)) + { + sql_print_error("Failed in open_log() called from init_relay_log_info()"); DBUG_RETURN(1); + } /* if file does not exist */ if (access(fname,F_OK)) @@ -1201,10 +1250,18 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) */ if (info_fd >= 0) my_close(info_fd, MYF(MY_WME)); - if ((info_fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 || - init_io_cache(&rli->info_file, info_fd, IO_SIZE*2, READ_CACHE, 0L,0, - MYF(MY_WME))) + if ((info_fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0) { + sql_print_error("Failed to create a new relay log info file (\ +file '%s', errno %d)", fname, my_errno); + msg= current_thd->net.last_error; + goto err; + } + if (init_io_cache(&rli->info_file, info_fd, IO_SIZE*2, READ_CACHE, 0L,0, + MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on relay log info file (\ +file '%s')", fname); msg= current_thd->net.last_error; goto err; } @@ -1212,7 +1269,11 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) /* Init relay log with first entry in the relay index file */ if (init_relay_log_pos(rli,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */, &msg)) + { + sql_print_error("Failed to open the relay log (relay_log_name='FIRST', \ +relay_log_pos=4"); goto err; + } rli->master_log_name[0]= 0; rli->master_log_pos= 0; rli->info_fd= info_fd; @@ -1221,18 +1282,33 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) { if (info_fd >= 0) reinit_io_cache(&rli->info_file, READ_CACHE, 0L,0,0); - else if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 || - init_io_cache(&rli->info_file, info_fd, - IO_SIZE*2, READ_CACHE, 0L, 0, MYF(MY_WME))) + else { - if (info_fd >= 0) - my_close(info_fd, MYF(0)); - rli->info_fd= -1; - rli->relay_log.close(1); - pthread_mutex_unlock(&rli->data_lock); - DBUG_RETURN(1); + int error=0; + if ((info_fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0) + { + sql_print_error("Failed to open the existing relay log info file (\ +file '%s', errno %d)", fname, my_errno); + error= 1; + } + else if (init_io_cache(&rli->info_file, info_fd, + IO_SIZE*2, READ_CACHE, 0L, 0, MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on relay log info file (\ +file '%s')", fname); + error= 1; + } + if (error) + { + if (info_fd >= 0) + my_close(info_fd, MYF(0)); + rli->info_fd= -1; + rli->relay_log.close(1); + pthread_mutex_unlock(&rli->data_lock); + DBUG_RETURN(1); + } } - + rli->info_fd = info_fd; int relay_log_pos, master_log_pos; if (init_strvar_from_file(rli->relay_log_name, @@ -1256,7 +1332,12 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) rli->relay_log_pos, 0 /* no data lock*/, &msg)) + { + char llbuf[22]; + sql_print_error("Failed to open the relay log (relay_log_name='%s', \ +relay_log_pos=%s", rli->relay_log_name, llstr(rli->relay_log_pos, llbuf)); goto err; + } } DBUG_ASSERT(rli->relay_log_pos >= BIN_LOG_HEADER_SIZE); DBUG_ASSERT(my_b_tell(rli->cur_log) == rli->relay_log_pos); @@ -1265,7 +1346,8 @@ int init_relay_log_info(RELAY_LOG_INFO* rli, const char* info_fname) before flush_relay_log_info */ reinit_io_cache(&rli->info_file, WRITE_CACHE,0L,0,1); - error= flush_relay_log_info(rli); + if ((error= flush_relay_log_info(rli))) + sql_print_error("Failed to flush relay log info file"); if (count_relay_log_space(rli)) { msg="Error counting relay log space"; @@ -1310,18 +1392,18 @@ static bool wait_for_relay_log_space(RELAY_LOG_INFO* rli) { bool slave_killed=0; MASTER_INFO* mi = rli->mi; - const char* save_proc_info; THD* thd = mi->io_thd; DBUG_ENTER("wait_for_relay_log_space"); pthread_mutex_lock(&rli->log_space_lock); - save_proc_info = thd->proc_info; - thd->proc_info = "Waiting for relay log space to free"; + const char* save_proc_info= thd->enter_cond(&rli->log_space_cond, + &rli->log_space_lock, + "Waiting for relay log space to free"); while (rli->log_space_limit < rli->log_space_total && !(slave_killed=io_slave_killed(thd,mi)) && !rli->ignore_log_space_limit) pthread_cond_wait(&rli->log_space_cond, &rli->log_space_lock); - thd->proc_info = save_proc_info; + thd->exit_cond(save_proc_info); pthread_mutex_unlock(&rli->log_space_lock); DBUG_RETURN(slave_killed); } @@ -1368,6 +1450,8 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, pthread_mutex_lock(&mi->data_lock); fd = mi->fd; + + /* does master.info exist ? */ if (access(fname,F_OK)) { @@ -1382,10 +1466,19 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, */ if (fd >= 0) my_close(fd, MYF(MY_WME)); - if ((fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 || - init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,0, - MYF(MY_WME))) + if ((fd = my_open(fname, O_CREAT|O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ) + { + sql_print_error("Failed to create a new master info file (\ +file '%s', errno %d)", fname, my_errno); goto err; + } + if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L,0, + MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on master info file (\ +file '%s')", fname); + goto err; + } mi->master_log_name[0] = 0; mi->master_log_pos = BIN_LOG_HEADER_SIZE; // skip magic number @@ -1404,10 +1497,22 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, { if (fd >= 0) reinit_io_cache(&mi->file, READ_CACHE, 0L,0,0); - else if ((fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 || - init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L, - 0, MYF(MY_WME))) - goto err; + else + { + if ((fd = my_open(fname, O_RDWR|O_BINARY, MYF(MY_WME))) < 0 ) + { + sql_print_error("Failed to open the existing master info file (\ +file '%s', errno %d)", fname, my_errno); + goto err; + } + if (init_io_cache(&mi->file, fd, IO_SIZE*2, READ_CACHE, 0L, + 0, MYF(MY_WME))) + { + sql_print_error("Failed to create a cache on master info file (\ +file '%s')", fname); + goto err; + } + } mi->fd = fd; int port, connect_retry, master_log_pos; @@ -1448,7 +1553,8 @@ int init_master_info(MASTER_INFO* mi, const char* master_info_fname, mi->inited = 1; // now change cache READ -> WRITE - must do this before flush_master_info reinit_io_cache(&mi->file, WRITE_CACHE,0L,0,1); - error=test(flush_master_info(mi)); + if ((error=test(flush_master_info(mi)))) + sql_print_error("Failed to flush master info file"); pthread_mutex_unlock(&mi->data_lock); DBUG_RETURN(error); @@ -1675,6 +1781,13 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, the master info. To catch this, these commands modify abort_pos_wait ; we just monitor abort_pos_wait and see if it has changed. + Why do we have this mechanism instead of simply monitoring slave_running in + the loop (we do this too), as CHANGE MASTER/RESET SLAVE require that the + SQL thread be stopped? This is in case + STOP SLAVE;CHANGE MASTER/RESET SLAVE; START SLAVE; + happens very quickly between the moment pthread_cond_wait() wakes up and + the while() is evaluated: in that case slave_running is again 1 when the + while() is evaluated. */ init_abort_pos_wait= abort_pos_wait; @@ -1711,7 +1824,12 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, //"compare and wait" main loop while (!thd->killed && init_abort_pos_wait == abort_pos_wait && - mi->slave_running) + /* + formerly we tested mi->slave_running, but what we care about is + rli->slave_running (because this concerns the SQL thread, while + mi->slave_running concerns the I/O thread). + */ + slave_running) { bool pos_reached; int cmp_result= 0; @@ -1749,6 +1867,10 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, DBUG_PRINT("info",("Waiting for master update")); const char* msg = thd->enter_cond(&data_cond, &data_lock, "Waiting for master update"); + /* + We are going to pthread_cond_(timed)wait(); if the SQL thread stops it + will wake us up. + */ if (timeout > 0) { /* @@ -1766,6 +1888,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, } else pthread_cond_wait(&data_cond, &data_lock); + DBUG_PRINT("info",("Got signal of master update")); thd->exit_cond(msg); if (error == ETIMEDOUT || error == ETIME) { @@ -1774,6 +1897,7 @@ int st_relay_log_info::wait_for_pos(THD* thd, String* log_name, } error=0; event_count++; + DBUG_PRINT("info",("Testing if killed or SQL thread not running")); } err: @@ -1782,11 +1906,11 @@ err: improper_arguments: %d timed_out: %d", (int) thd->killed, (int) (init_abort_pos_wait != abort_pos_wait), - (int) mi->slave_running, + (int) slave_running, (int) (error == -2), (int) (error == -1))); if (thd->killed || init_abort_pos_wait != abort_pos_wait || - !mi->slave_running) + !slave_running) { error= -2; } @@ -2321,6 +2445,17 @@ reconnect done to recover from failed read"); for no reason, but this function will do a clean read, notice the clean value and exit immediately. */ +#ifndef DBUG_OFF + { + char llbuf1[22], llbuf2[22]; + DBUG_PRINT("info", ("log_space_limit=%s log_space_total=%s \ +ignore_log_space_limit=%d", + llstr(mi->rli.log_space_limit,llbuf1), + llstr(mi->rli.log_space_total,llbuf2), + (int) mi->rli.ignore_log_space_limit)); + } +#endif + if (mi->rli.log_space_limit && mi->rli.log_space_limit < mi->rli.log_space_total && !mi->rli.ignore_log_space_limit) @@ -2493,8 +2628,16 @@ the slave SQL thread with \"SLAVE START\". We stopped at log \ VOID(pthread_mutex_unlock(&LOCK_thread_count)); thd->proc_info = "Waiting for slave mutex on exit"; pthread_mutex_lock(&rli->run_lock); + /* We need data_lock, at least to wake up any waiting master_pos_wait() */ + pthread_mutex_lock(&rli->data_lock); DBUG_ASSERT(rli->slave_running == 1); // tracking buffer overrun - rli->slave_running = 0; + /* When master_pos_wait() wakes up it will check this and terminate */ + rli->slave_running= 0; + /* Wake up master_pos_wait() */ + pthread_mutex_unlock(&rli->data_lock); + DBUG_PRINT("info",("Signaling possibly waiting master_pos_wait() functions")); + pthread_cond_broadcast(&rli->data_cond); + rli->ignore_log_space_limit= 0; /* don't need any lock */ rli->save_temporary_tables = thd->temporary_tables; /* @@ -3137,8 +3280,8 @@ Log_event* next_event(RELAY_LOG_INFO* rli) log), and also when the SQL thread starts. We should also reset ignore_log_space_limit to 0 when the user does RESET SLAVE, but in fact, no need as RESET SLAVE requires that the slave - be stopped, and when the SQL thread is later restarted - ignore_log_space_limit will be reset to 0. + be stopped, and the SQL thread sets ignore_log_space_limit to 0 when + it stops. */ pthread_mutex_lock(&rli->log_space_lock); // prevent the I/O thread from blocking next times diff --git a/sql/slave.h b/sql/slave.h index 8832302056d..66000f45e69 100644 --- a/sql/slave.h +++ b/sql/slave.h @@ -35,6 +35,11 @@ extern my_bool opt_log_slave_updates; extern ulonglong relay_log_space_limit; struct st_master_info; +enum enum_binlog_formats { + BINLOG_FORMAT_CURRENT=0, /* 0 is important for easy 'if (mi->old_format)' */ + BINLOG_FORMAT_323_LESS_57, + BINLOG_FORMAT_323_GEQ_57 }; + /* TODO: this needs to be redone, but for now it does not matter since we do not have multi-master yet. @@ -266,15 +271,15 @@ typedef struct st_master_info int events_till_abort; #endif bool inited; - bool old_format; /* master binlog is in 3.23 format */ + enum enum_binlog_formats old_format; /* master binlog is in 3.23 format */ volatile bool abort_slave, slave_running; volatile ulong slave_run_id; bool ignore_stop_event; st_master_info() - :fd(-1), io_thd(0), inited(0), old_format(0),abort_slave(0), - slave_running(0), slave_run_id(0) + :fd(-1), io_thd(0), inited(0), old_format(BINLOG_FORMAT_CURRENT), + abort_slave(0),slave_running(0), slave_run_id(0) { host[0] = 0; user[0] = 0; password[0] = 0; bzero(&file, sizeof(file)); diff --git a/sql/sql_lex.h b/sql/sql_lex.h index a905871e629..0618f04a79b 100644 --- a/sql/sql_lex.h +++ b/sql/sql_lex.h @@ -53,8 +53,10 @@ enum enum_sql_command { SQLCOM_REPAIR, SQLCOM_REPLACE, SQLCOM_REPLACE_SELECT, SQLCOM_CREATE_FUNCTION, SQLCOM_DROP_FUNCTION, SQLCOM_REVOKE,SQLCOM_OPTIMIZE, SQLCOM_CHECK, - SQLCOM_FLUSH, SQLCOM_KILL, SQLCOM_ANALYZE, - SQLCOM_ROLLBACK, SQLCOM_COMMIT, SQLCOM_SLAVE_START, SQLCOM_SLAVE_STOP, + SQLCOM_FLUSH, SQLCOM_KILL, SQLCOM_ANALYZE, + SQLCOM_ROLLBACK, SQLCOM_ROLLBACK_TO_SAVEPOINT, + SQLCOM_COMMIT, SQLCOM_SAVEPOINT, + SQLCOM_SLAVE_START, SQLCOM_SLAVE_STOP, SQLCOM_BEGIN, SQLCOM_LOAD_MASTER_TABLE, SQLCOM_CHANGE_MASTER, SQLCOM_RENAME_TABLE, SQLCOM_BACKUP_TABLE, SQLCOM_RESTORE_TABLE, SQLCOM_RESET, SQLCOM_PURGE, SQLCOM_SHOW_BINLOGS, @@ -154,6 +156,7 @@ typedef struct st_lex SQL_LIST proc_list, auxilliary_table_list; TYPELIB *interval; create_field *last_field; + char* savepoint_name; // Transaction savepoint id Item *default_value; CONVERT *convert_set; CONVERT *thd_convert_set; // Set with SET CHAR SET diff --git a/sql/sql_list.cc b/sql/sql_list.cc index 1124605ca24..c99cfb8c918 100644 --- a/sql/sql_list.cc +++ b/sql/sql_list.cc @@ -22,3 +22,18 @@ #include "mysql_priv.h" list_node end_of_list; + +void free_list(I_List *list) +{ + i_string_pair *tmp; + while ((tmp= list->get())) + delete tmp; +} + + +void free_list(I_List *list) +{ + i_string *tmp; + while ((tmp= list->get())) + delete tmp; +} diff --git a/sql/sql_parse.cc b/sql/sql_parse.cc index 392b4ae4235..4e389b6d3f6 100644 --- a/sql/sql_parse.cc +++ b/sql/sql_parse.cc @@ -730,6 +730,10 @@ pthread_handler_decl(handle_one_connection,arg) send_error(net,net->last_errno,NullS); statistic_increment(aborted_threads,&LOCK_status); } + else if (thd->killed) + { + statistic_increment(aborted_threads,&LOCK_status); + } end_thread: close_connection(net); @@ -906,7 +910,10 @@ bool do_command(THD *thd) vio_description(net->vio))); /* Check if we can continue without closing the connection */ if (net->error != 3) + { + statistic_increment(aborted_threads,&LOCK_status); DBUG_RETURN(TRUE); // We have to close it. + } send_error(net,net->last_errno,NullS); net->error= 0; DBUG_RETURN(FALSE); @@ -1202,9 +1209,10 @@ bool dispatch_command(enum enum_server_command command, THD *thd, opened_tables,refresh_version, cached_tables(), uptime ? (float)thd->query_id/(float)uptime : 0); #ifdef SAFEMALLOC - if (lCurMemory) // Using SAFEMALLOC + if (sf_malloc_cur_memory) // Using SAFEMALLOC sprintf(strend(buff), " Memory in use: %ldK Max memory used: %ldK", - (lCurMemory+1023L)/1024L,(lMaxMemory+1023L)/1024L); + (sf_malloc_cur_memory+1023L)/1024L, + (sf_malloc_max_memory+1023L)/1024L); #endif VOID(my_net_write(net, buff,(uint) strlen(buff))); VOID(net_flush(net)); @@ -2465,8 +2473,10 @@ mysql_execute_command(void) res = mysql_ha_close(thd, tables); break; case SQLCOM_HA_READ: - if (check_db_used(thd,tables) || - check_table_access(thd,SELECT_ACL, tables)) + /* there is no need to check for table permissions here, because + if a user has no permissions to read a table, he won't be + able to open it (with SQLCOM_HA_OPEN) in the first place. */ + if (check_db_used(thd,tables)) goto error; res = mysql_ha_read(thd, tables, lex->ha_read_mode, lex->backup_dir, lex->insert_list, lex->ha_rkey_mode, select_lex->where, @@ -2522,6 +2532,23 @@ mysql_execute_command(void) res= -1; thd->options&= ~(ulong) (OPTION_BEGIN | OPTION_STATUS_NO_TRANS_UPDATE); break; + case SQLCOM_ROLLBACK_TO_SAVEPOINT: + if (!ha_rollback_to_savepoint(thd, lex->savepoint_name)) + { + if (thd->options & OPTION_STATUS_NO_TRANS_UPDATE) + send_warning(&thd->net,ER_WARNING_NOT_COMPLETE_ROLLBACK,0); + else + send_ok(&thd->net); + } + else + res= -1; + break; + case SQLCOM_SAVEPOINT: + if (!ha_savepoint(thd, lex->savepoint_name)) + send_ok(&thd->net); + else + res= -1; + break; default: /* Impossible */ send_ok(&thd->net); break; diff --git a/sql/sql_repl.cc b/sql/sql_repl.cc index 283dd20a56c..a651d8002fd 100644 --- a/sql/sql_repl.cc +++ b/sql/sql_repl.cc @@ -159,10 +159,18 @@ File open_binlog(IO_CACHE *log, const char *log_file_name, File file; DBUG_ENTER("open_binlog"); - if ((file = my_open(log_file_name, O_RDONLY | O_BINARY, MYF(MY_WME))) < 0 || - init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0, + if ((file = my_open(log_file_name, O_RDONLY | O_BINARY, MYF(MY_WME))) < 0) + { + sql_print_error("Failed to open log (\ +file '%s', errno %d)", log_file_name, my_errno); + *errmsg = "Could not open log file"; // This will not be sent + goto err; + } + if (init_io_cache(log, file, IO_SIZE*2, READ_CACHE, 0, 0, MYF(MY_WME | MY_DONT_CHECK_FILESIZE))) { + sql_print_error("Failed to create a cache on log (\ +file '%s')", log_file_name); *errmsg = "Could not open log file"; // This will not be sent goto err; } @@ -743,6 +751,9 @@ int reset_slave(THD *thd, MASTER_INFO* mi) //Clear master's log coordinates (only for good display of SHOW SLAVE STATUS) mi->master_log_name[0]= 0; mi->master_log_pos= BIN_LOG_HEADER_SIZE; + //Clear the errors displayed by SHOW SLAVE STATUS + mi->rli.last_slave_error[0]=0; + mi->rli.last_slave_errno=0; //close master_info_file, relay_log_info_file, set mi->inited=rli->inited=0 end_master_info(mi); //and delete these two files @@ -961,7 +972,7 @@ int show_binlog_events(THD* thd) { LEX_MASTER_INFO *lex_mi = &thd->lex.mi; ha_rows event_count, limit_start, limit_end; - my_off_t pos = lex_mi->pos; + my_off_t pos = max(BIN_LOG_HEADER_SIZE, lex_mi->pos); // user-friendly char search_file_name[FN_REFLEN], *name; const char *log_file_name = lex_mi->log_file_name; pthread_mutex_t *log_lock = mysql_bin_log.get_log_lock(); @@ -989,12 +1000,6 @@ int show_binlog_events(THD* thd) if ((file=open_binlog(&log, linfo.log_file_name, &errmsg)) < 0) goto err; - if (pos < 4) - { - errmsg = "Invalid log position"; - goto err; - } - pthread_mutex_lock(log_lock); my_b_seek(&log, pos); diff --git a/sql/sql_select.cc b/sql/sql_select.cc index ff6fde1ca0c..0e8b191e4ef 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1688,6 +1688,9 @@ add_ft_keys(DYNAMIC_ARRAY *keyuse_array, if (!cond_func || cond_func->key == NO_SUCH_KEY) return; + if (!(usable_tables & cond_func->table->map)) + return; + KEYUSE keyuse; keyuse.table= cond_func->table; diff --git a/sql/sql_yacc.yy b/sql/sql_yacc.yy index c011c058dc8..af0921825fc 100644 --- a/sql/sql_yacc.yy +++ b/sql/sql_yacc.yy @@ -135,6 +135,7 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize); %token RESET_SYM %token ROLLBACK_SYM %token ROLLUP_SYM +%token SAVEPOINT_SYM %token SELECT_SYM %token SHOW %token SLAVE @@ -573,7 +574,8 @@ bool my_yyoverflow(short **a, YYSTYPE **b,int *yystacksize); query verb_clause create change select do drop insert replace insert2 insert_values update delete truncate rename show describe load alter optimize flush - reset purge begin commit rollback slave master_def master_defs + reset purge begin commit rollback savepoint + slave master_def master_defs repair restore backup analyze check start field_list field_list_item field_spec kill column_def key_def select_item_list select_item values_list no_braces @@ -649,6 +651,7 @@ verb_clause: | restore | revoke | rollback + | savepoint | select | set | slave @@ -706,6 +709,18 @@ master_def: MASTER_LOG_POS_SYM EQ ulonglong_num { Lex->mi.pos = $3; + /* + If the user specified a value < BIN_LOG_HEADER_SIZE, adjust it + instead of causing subsequent errors. + We need to do it in this file, because only there we know that + MASTER_LOG_POS has been explicitely specified. On the contrary + in change_master() (sql_repl.cc) we cannot distinguish between 0 + (MASTER_LOG_POS explicitely specified as 0) and 0 (unspecified), + whereas we want to distinguish (specified 0 means "read the binlog + from 0" (4 in fact), unspecified means "don't change the position + (keep the preceding value)"). + */ + Lex->mi.pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.pos); } | MASTER_CONNECT_RETRY_SYM EQ ULONG_NUM @@ -721,6 +736,8 @@ master_def: RELAY_LOG_POS_SYM EQ ULONG_NUM { Lex->mi.relay_log_pos = $3; + /* Adjust if < BIN_LOG_HEADER_SIZE (same comment as Lex->mi.pos) */ + Lex->mi.relay_log_pos = max(BIN_LOG_HEADER_SIZE, Lex->mi.relay_log_pos); }; @@ -3398,6 +3415,7 @@ keyword: | ROWS_SYM {} | ROW_FORMAT_SYM {} | ROW_SYM {} + | SAVEPOINT_SYM {} | SECOND_SYM {} | SERIALIZABLE_SYM {} | SESSION_SYM {} @@ -3931,8 +3949,21 @@ commit: COMMIT_SYM { Lex->sql_command = SQLCOM_COMMIT;}; rollback: - ROLLBACK_SYM { Lex->sql_command = SQLCOM_ROLLBACK;}; - + ROLLBACK_SYM + { + Lex->sql_command = SQLCOM_ROLLBACK; + } + | ROLLBACK_SYM TO_SYM SAVEPOINT_SYM ident + { + Lex->sql_command = SQLCOM_ROLLBACK_TO_SAVEPOINT; + Lex->savepoint_name = $4.str; + }; +savepoint: + SAVEPOINT_SYM ident + { + Lex->sql_command = SQLCOM_SAVEPOINT; + Lex->savepoint_name = $2.str; + }; /* ** UNIONS : glue selects together diff --git a/sql/unireg.h b/sql/unireg.h index f69d67455dd..f2cace51fa7 100644 --- a/sql/unireg.h +++ b/sql/unireg.h @@ -130,9 +130,13 @@ bfill((A)->null_flags,(A)->null_bytes,255);\ */ #define MIN_TURBOBM_PATTERN_LEN 3 -/* Defines for binary logging */ +/* + Defines for binary logging. + Do not decrease the value of BIN_LOG_HEADER_SIZE. + Do not even increase it before checking code. +*/ -#define BIN_LOG_HEADER_SIZE 4 +#define BIN_LOG_HEADER_SIZE 4 /* Include prototypes for unireg */ diff --git a/support-files/mysql.server.sh b/support-files/mysql.server.sh index de01142beac..694e6fa8ebb 100644 --- a/support-files/mysql.server.sh +++ b/support-files/mysql.server.sh @@ -19,7 +19,7 @@ # Required-Start: $local_fs $network $remote_fs # Required-Stop: $local_fs $network $remote_fs # Default-Start: 2 3 4 5 -# Default-Stop: 2 3 4 5 +# Default-Stop: 0 1 6 # Short-Description: start and stop MySQL # Description: MySQL is a very fast and reliable SQL database engine. ### END INIT INFO diff --git a/support-files/mysql.spec.sh b/support-files/mysql.spec.sh index aab3e298e14..6cc33866efd 100644 --- a/support-files/mysql.spec.sh +++ b/support-files/mysql.spec.sh @@ -254,6 +254,13 @@ export PATH # Build the 4.0 Max binary (includes BDB and UDFs and therefore # cannot be linked statically against the patched glibc) +# If we want to compile with RAID using gcc 3, we need to use +# gcc instead of g++ to avoid linking problems (RAID code is written in C++) +if gcc -v 2>&1 | grep 'version 3' > /dev/null 2>&1 +then + export CXX="gcc" +fi + BuildMySQL "--enable-shared \ --with-berkeley-db \ --with-innodb \ @@ -318,6 +325,10 @@ install -m644 $MBD/sql/mysqld.sym $RBR/usr/lib/mysql/mysqld.sym install -m644 $MBD/support-files/mysql-log-rotate $RBR/etc/logrotate.d/mysql install -m755 $MBD/support-files/mysql.server $RBR/etc/init.d/mysql +# Create a symlink "rcmysql", pointing to the init.script. SuSE users +# will appreciate that, as all services usually offer this. +ln -s ../../etc/init.d/mysql $RPM_BUILD_ROOT/usr/sbin/rcmysql + # Create symbolic compatibility link safe_mysqld -> mysqld_safe # (safe_mysqld will be gone in MySQL 4.1) ln -sf ./mysqld_safe $RBR/usr/bin/safe_mysqld @@ -462,6 +473,7 @@ fi %attr(755, root, root) /usr/bin/safe_mysqld %attr(755, root, root) /usr/sbin/mysqld +%attr(755, root, root) /usr/sbin/rcmysql %attr(644, root, root) /usr/lib/mysql/mysqld.sym %attr(644, root, root) /etc/logrotate.d/mysql