Many files:

Merge InnoDB-3.23.50 innobase/btr/btr0btr.c: Merge InnoDB-3.23.50 innobase/btr/btr0cur.c: Merge InnoDB-3.23.50 innobase/btr/btr0sea.c: Merge InnoDB-3.23.50 innobase/buf/buf0buf.c: Merge InnoDB-3.23.50 innobase/buf/buf0flu.c: Merge InnoDB-3.23.50 innobase/dict/dict0dict.c: Merge InnoDB-3.23.50 innobase/dict/dict0load.c: Merge InnoDB-3.23.50 innobase/fil/fil0fil.c: Merge InnoDB-3.23.50 innobase/fsp/fsp0fsp.c: Merge InnoDB-3.23.50 innobase/include/buf0flu.h: Merge InnoDB-3.23.50 innobase/include/dict0dict.h: Merge InnoDB-3.23.50 innobase/include/fil0fil.h: Merge InnoDB-3.23.50 innobase/include/fsp0fsp.h: Merge InnoDB-3.23.50 innobase/include/log0log.h: Merge InnoDB-3.23.50 innobase/include/log0recv.h: Merge InnoDB-3.23.50 innobase/include/mem0mem.h: Merge InnoDB-3.23.50 innobase/include/os0file.h: Merge InnoDB-3.23.50 innobase/include/row0mysql.h: Merge InnoDB-3.23.50 innobase/include/srv0srv.h: Merge InnoDB-3.23.50 innobase/include/srv0start.h: Merge InnoDB-3.23.50 innobase/include/trx0sys.h: Merge InnoDB-3.23.50 innobase/include/ut0byte.h: Merge InnoDB-3.23.50 innobase/include/ut0rnd.h: Merge InnoDB-3.23.50 innobase/include/ut0ut.h: Merge InnoDB-3.23.50 innobase/log/log0log.c: Merge InnoDB-3.23.50 innobase/log/log0recv.c: Merge InnoDB-3.23.50 innobase/mem/mem0mem.c: Merge InnoDB-3.23.50 innobase/os/os0file.c: Merge InnoDB-3.23.50 innobase/rem/rem0cmp.c: Merge InnoDB-3.23.50 innobase/row/row0ins.c: Merge InnoDB-3.23.50 innobase/row/row0mysql.c: Merge InnoDB-3.23.50 innobase/row/row0sel.c: Merge InnoDB-3.23.50 innobase/row/row0upd.c: Merge InnoDB-3.23.50 innobase/srv/srv0srv.c: Merge InnoDB-3.23.50 innobase/srv/srv0start.c: Merge InnoDB-3.23.50 innobase/trx/trx0sys.c: Merge InnoDB-3.23.50 innobase/ut/ut0mem.c: Merge InnoDB-3.23.50 innobase/ut/ut0ut.c: Merge InnoDB-3.23.50 sql/ha_innobase.cc: Merge InnoDB-3.23.50 sql/ha_innobase.h: Merge InnoDB-3.23.50
2025-07-29 05:21:33 +03:00 · 2002-03-21 18:03:09 +02:00
parent 838c6427b2
commit d3c0752b6a
40 changed files with 2561 additions and 660 deletions
--- a/innobase/btr/btr0btr.c
+++ b/innobase/btr/btr0btr.c
@ -570,6 +570,19 @@ btr_page_get_father_for_rec(

 	node_ptr = btr_cur_get_rec(&cursor);

+	if (btr_node_ptr_get_child_page_no(node_ptr) !=
+                                                buf_frame_get_page_no(page)) {
+      		fprintf(stderr,
+"InnoDB: Corruption of an index tree: table %s, index %s,\n"
+"InnoDB: father ptr page no %lu, child page no %lu\n",
+                    (UT_LIST_GET_FIRST(tree->tree_indexes))->table_name,
+                    (UT_LIST_GET_FIRST(tree->tree_indexes))->name,
+                    btr_node_ptr_get_child_page_no(node_ptr),
+                    buf_frame_get_page_no(page));
+     		page_rec_print(page_rec_get_next(page_get_infimum_rec(page)));
+     		page_rec_print(node_ptr);
+	}
+
 	ut_a(btr_node_ptr_get_child_page_no(node_ptr) ==
 						buf_frame_get_page_no(page));
 	mem_heap_free(heap);
--- a/innobase/btr/btr0cur.c
+++ b/innobase/btr/btr0cur.c
@ -204,7 +204,7 @@ btr_cur_search_to_nth_level(
 				the caller uses his search latch
 				to protect the record! */
 	btr_cur_t*	cursor, /* in/out: tree cursor; the cursor page is
-				   s- or x-latched, but see also above! */
+				s- or x-latched, but see also above! */
 	ulint		has_search_latch,/* in: info on the latch mode the
 				caller currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
--- a/innobase/btr/btr0sea.c
+++ b/innobase/btr/btr0sea.c
@ -743,7 +743,7 @@ btr_search_guess_on_hash(
 	
 #ifdef notdefined
 	/* These lines of code can be used in a debug version to check
-	correctness of the searched cursor position: */
+	the correctness of the searched cursor position: */
 	
 	info->last_hash_succ = FALSE;

--- a/innobase/buf/buf0buf.c
+++ b/innobase/buf/buf0buf.c
@ -220,6 +220,10 @@ buf_calc_page_checksum(
 {
  	ulint checksum;

+	/* Since the fields FIL_PAGE_FILE_FLUSH_LSN and ..._ARCH_LOG_NO
+	are written outside the buffer pool to the first pages of data
+	files, we have to skip them in page checksum calculation */
+  	
  	checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
  		+ ut_fold_binary(page + FIL_PAGE_DATA,
 				UNIV_PAGE_SIZE - FIL_PAGE_DATA
@ -279,8 +283,9 @@ buf_page_print(

 	ut_sprintf_buf(buf, read_buf, UNIV_PAGE_SIZE);

+	ut_print_timestamp(stderr);
 	fprintf(stderr,
-	"InnoDB: Page dump in ascii and hex (%lu bytes):\n%s",
+	"  InnoDB: Page dump in ascii and hex (%lu bytes):\n%s",
 					UNIV_PAGE_SIZE, buf);
 	fprintf(stderr, "InnoDB: End of page dump\n");

@ -288,7 +293,8 @@ buf_page_print(

 	checksum = buf_calc_page_checksum(read_buf);

-	fprintf(stderr, "InnoDB: Page checksum %lu stored checksum %lu\n",
+	ut_print_timestamp(stderr);
+	fprintf(stderr, "  InnoDB: Page checksum %lu stored checksum %lu\n",
 			checksum, mach_read_from_4(read_buf
                                        + UNIV_PAGE_SIZE
 					- FIL_PAGE_END_LSN)); 
@ -1358,47 +1364,87 @@ buf_page_io_complete(
 /*=================*/
 	buf_block_t*	block)	/* in: pointer to the block in question */
 {
-	dulint		id;
 	dict_index_t*	index;
+	dulint		id;
 	ulint		io_type;
+	ulint		read_page_no;
 	
 	ut_ad(block);

 	io_type = block->io_fix;

 	if (io_type == BUF_IO_READ) {
+		/* If this page is not uninitialized and not in the
+		doublewrite buffer, then the page number should be the
+		same as in block */
+
+		read_page_no = mach_read_from_4((block->frame)
+						+ FIL_PAGE_OFFSET);
+		if (read_page_no != 0
+			&& !trx_doublewrite_page_inside(read_page_no)
+	    		&& read_page_no != block->offset) {
+
+			fprintf(stderr,
+"InnoDB: Error: page n:o stored in the page read in is %lu, should be %lu!\n",
+				read_page_no, block->offset);
+		}
+#ifdef notdefined
+		if (block->offset != 0 && read_page_no == 0) {
+			/* Check that the page is really uninited */
+
+			for (i = 0; i < UNIV_PAGE_SIZE; i++) {
+
+				if (*((block->frame) + i) != '\0') {
+					fprintf(stderr,
+"InnoDB: Error: page n:o in the page read in is 0, but page %lu is inited!\n",
+						block->offset);
+					break;
+				}
+			}
+		}
+#endif
 		/* From version 3.23.38 up we store the page checksum
-		   to the 4 upper bytes of the page end lsn field */
+		   to the 4 first bytes of the page end lsn field */

 		if (buf_page_is_corrupted(block->frame)) {
 		  	fprintf(stderr,
-			  "InnoDB: Database page corruption or a failed\n"
-			  "InnoDB: file read of page %lu.\n", block->offset);
+		"InnoDB: Database page corruption on disk or a failed\n"
+		"InnoDB: file read of page %lu.\n", block->offset);
 			  
 		  	fprintf(stderr,
-			  "InnoDB: You may have to recover from a backup.\n");
+		"InnoDB: You may have to recover from a backup.\n");

 			buf_page_print(block->frame);

 		  	fprintf(stderr,
-			  "InnoDB: Database page corruption or a failed\n"
-			  "InnoDB: file read of page %lu.\n", block->offset);
+		"InnoDB: Database page corruption on disk or a failed\n"
+		"InnoDB: file read of page %lu.\n", block->offset);
 		  	fprintf(stderr,
-			  "InnoDB: You may have to recover from a backup.\n");
+		"InnoDB: You may have to recover from a backup.\n");
 			fprintf(stderr,
-			  "InnoDB: It is also possible that your operating\n"
-			  "InnoDB: system has corrupted its own file cache\n"
-			  "InnoDB: and rebooting your computer removes the\n"
-			  "InnoDB: error.\n");
+		"InnoDB: It is also possible that your operating\n"
+		"InnoDB: system has corrupted its own file cache\n"
+		"InnoDB: and rebooting your computer removes the\n"
+		"InnoDB: error.\n"
+		"InnoDB: If the corrupt page is an index page\n"
+		"InnoDB: you can also try to fix the corruption\n"
+		"InnoDB: by dumping, dropping, and reimporting\n"
+		"InnoDB: the corrupt table. You can use CHECK\n"
+		"InnoDB: TABLE to scan your table for corruption.\n"
+		"InnoDB: Look also at section 6.1 of\n"
+		"InnoDB: http://www.innodb.com/ibman.html about\n"
+		"InnoDB: forcing recovery.\n");
 			  
 			if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) {
+				fprintf(stderr,
+	"InnoDB: Ending processing because of a corrupt database page.\n");
 		  		exit(1);
 		  	}
 		}

 		if (recv_recovery_is_on()) {
-			recv_recover_page(TRUE, block->frame, block->space,
-								block->offset);
+			recv_recover_page(FALSE, TRUE, block->frame,
+						block->space, block->offset);
 		}

 		if (!recv_no_ibuf_operations) {
--- a/innobase/buf/buf0flu.c
+++ b/innobase/buf/buf0flu.c
@ -327,6 +327,34 @@ try_again:
 	mutex_exit(&(trx_doublewrite->mutex));
 }

+/************************************************************************
+Initializes a page for writing to the tablespace. */
+
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*	page,		/* in: page */
+	dulint	newest_lsn,	/* in: newest modification lsn to the page */
+	ulint	space,		/* in: space id */
+	ulint	page_no)	/* in: page number */
+{	
+	/* Write the newest modification lsn to the page */
+	mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn);
+
+	mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN, newest_lsn);
+
+	/* Write to the page the space id and page number */
+
+	mach_write_to_4(page + FIL_PAGE_SPACE, space);
+	mach_write_to_4(page + FIL_PAGE_OFFSET, page_no);
+
+	/* We overwrite the first 4 bytes of the end lsn field to store
+	a page checksum */
+
+	mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
+					buf_calc_page_checksum(page));
+}
+
 /************************************************************************
 Does an asynchronous write of a buffer page. NOTE: in simulated aio and
 also when the doublewrite buffer is used, we must call
@ -349,23 +377,8 @@ buf_flush_write_block_low(
 	/* Force the log to the disk before writing the modified block */
 	log_flush_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS);
 #endif	
-	/* Write the newest modification lsn to the page */
-	mach_write_to_8(block->frame + FIL_PAGE_LSN,
-						block->newest_modification);
-	mach_write_to_8(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
-						block->newest_modification);
-
-	/* Write to the page the space id and page number */
-
-	mach_write_to_4(block->frame + FIL_PAGE_SPACE, block->space);
-	mach_write_to_4(block->frame + FIL_PAGE_OFFSET, block->offset);
-
-	/* We overwrite the first 4 bytes of the end lsn field to store
-	a page checksum */
-
-	mach_write_to_4(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
-			buf_calc_page_checksum(block->frame));
-
+	buf_flush_init_for_writing(block->frame, block->newest_modification,
+						block->space, block->offset);
 	if (!trx_doublewrite) {
 		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
 			FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
--- a/innobase/dict/dict0dict.c
+++ b/innobase/dict/dict0dict.c
@ -281,7 +281,8 @@ dict_table_autoinc_initialize(
 }

 /************************************************************************
-Gets the next autoinc value, 0 if not yet initialized. */
+Gets the next autoinc value, 0 if not yet initialized. If initialized,
+increments the counter by 1. */

 ib_longlong
 dict_table_autoinc_get(
@ -306,6 +307,32 @@ dict_table_autoinc_get(
 	return(value);
 }

+/************************************************************************
+Reads the autoinc counter value, 0 if not yet initialized. Does not
+increment the counter. */
+
+ib_longlong
+dict_table_autoinc_read(
+/*====================*/
+				/* out: value of the counter */
+	dict_table_t*	table)	/* in: table */
+{
+	ib_longlong	value;
+
+	mutex_enter(&(table->autoinc_mutex));
+
+	if (!table->autoinc_inited) {
+
+		value = 0;
+	} else {
+		value = table->autoinc;
+	}
+	
+	mutex_exit(&(table->autoinc_mutex));
+
+	return(value);
+}
+
 /************************************************************************
 Updates the autoinc counter if the value supplied is bigger than the
 current value. If not inited, does nothing. */
@ -644,7 +671,10 @@ dict_table_rename_in_cache(
 /*=======================*/
 					/* out: TRUE if success */
 	dict_table_t*	table,		/* in: table */
-	char*		new_name)	/* in: new name */
+	char*		new_name,	/* in: new name */
+	ibool		rename_also_foreigns)/* in: in ALTER TABLE we want
+					to preserve the original table name
+					in constraints which reference it */
 {
 	dict_foreign_t*	foreign;
 	dict_index_t*	index;
@ -702,6 +732,41 @@ dict_table_rename_in_cache(
 		index = dict_table_get_next_index(index);
 	}

+	if (!rename_also_foreigns) {
+		/* In ALTER TABLE we think of the rename table operation
+		in the direction table -> temporary table (#sql...)
+		as dropping the table with the old name and creating
+		a new with the new name. Thus we kind of drop the
+		constraints from the dictionary cache here. The foreign key
+		constraints will be inherited to the new table from the
+		system tables through a call of dict_load_foreigns. */
+	
+		/* Remove the foreign constraints from the cache */
+		foreign = UT_LIST_GET_LAST(table->foreign_list);
+
+		while (foreign != NULL) {
+			dict_foreign_remove_from_cache(foreign);
+			foreign = UT_LIST_GET_LAST(table->foreign_list);
+		}
+
+		/* Reset table field in referencing constraints */
+
+		foreign = UT_LIST_GET_FIRST(table->referenced_list);
+
+		while (foreign != NULL) {
+			foreign->referenced_table = NULL;
+			foreign->referenced_index = NULL;
+		
+			foreign = UT_LIST_GET_NEXT(referenced_list, foreign);
+		}
+
+		/* Make the list of referencing constraints empty */
+
+		UT_LIST_INIT(table->referenced_list);
+		
+		return(TRUE);
+	}
+
 	/* Update the table name fields in foreign constraints */

 	foreign = UT_LIST_GET_FIRST(table->foreign_list);
@ -768,8 +833,6 @@ dict_table_remove_from_cache(
 	foreign = UT_LIST_GET_LAST(table->foreign_list);

 	while (foreign != NULL) {
-		ut_a(0 == ut_strcmp(foreign->foreign_table_name, table->name));
-
 		dict_foreign_remove_from_cache(foreign);
 		foreign = UT_LIST_GET_LAST(table->foreign_list);
 	}
@ -779,8 +842,6 @@ dict_table_remove_from_cache(
 	foreign = UT_LIST_GET_FIRST(table->referenced_list);

 	while (foreign != NULL) {
-		ut_a(0 == ut_strcmp(foreign->referenced_table_name,
-								table->name));
 		foreign->referenced_table = NULL;
 		foreign->referenced_index = NULL;
 		
@ -1628,8 +1689,9 @@ dict_foreign_add_to_cache(
 {
 	dict_table_t*	for_table;
 	dict_table_t*	ref_table;
-	dict_foreign_t*	for_in_cache	= NULL;
+	dict_foreign_t*	for_in_cache			= NULL;
 	dict_index_t*	index;
+	ibool		added_to_referenced_list	= FALSE;

 	ut_ad(mutex_own(&(dict_sys->mutex)));

@ -1673,6 +1735,7 @@ dict_foreign_add_to_cache(
 		UT_LIST_ADD_LAST(referenced_list,
 					ref_table->referenced_list,
 					for_in_cache);
+		added_to_referenced_list = TRUE;
 	}

 	if (for_in_cache->foreign_table == NULL && for_table) {
@ -1683,6 +1746,12 @@ dict_foreign_add_to_cache(

 		if (index == NULL) {
 			if (for_in_cache == foreign) {
+				if (added_to_referenced_list) {
+					UT_LIST_REMOVE(referenced_list,
+						ref_table->referenced_list,
+						for_in_cache);
+				}
+			
 				mem_heap_free(foreign->heap);
 			}

@ -1802,9 +1871,14 @@ dict_scan_col(
 		return(ptr);
 	}

+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	old_ptr = ptr;
 	
-	while (!isspace(*ptr) && *ptr != ',' && *ptr != ')') {
+	while (!isspace(*ptr) && *ptr != ',' && *ptr != ')' && 	*ptr != '`') {
+
 		ptr++;
 	}

@ -1825,6 +1899,10 @@ dict_scan_col(
 		}
 	}
 	
+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	return(ptr);
 }

@ -1855,9 +1933,13 @@ dict_scan_table_name(
 		return(ptr);
 	}

+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	old_ptr = ptr;
 	
-	while (!isspace(*ptr) && *ptr != '(') {
+	while (!isspace(*ptr) && *ptr != '(' && *ptr != '`') {
 		if (*ptr == '.') {
 			dot_ptr = ptr;
 		}
@ -1898,6 +1980,10 @@ dict_scan_table_name(

 	*table = dict_table_get_low(second_table_name);

+	if (*ptr == '`') {
+		ptr++;
+	}
+
 	return(ptr);
 }

@ -1940,8 +2026,8 @@ dict_create_foreign_constraints(
 /*============================*/
 				/* out: error code or DB_SUCCESS */
 	trx_t*	trx,		/* in: transaction */
-	char*	sql_string,	/* in: table create statement where
-				foreign keys are declared like:
+	char*	sql_string,	/* in: table create or ALTER TABLE
+				statement where foreign keys are declared like:
 				FOREIGN KEY (a, b) REFERENCES table2(c, d),
 				table2 can be written also with the database
 				name before it: test.table2; the default
@ -1967,10 +2053,11 @@ dict_create_foreign_constraints(
 	if (table == NULL) {
 		return(DB_ERROR);
 	}
+
 loop:
 	ptr = dict_scan_to(ptr, "FOREIGN");

-	if (*ptr == '\0' || dict_bracket_count(sql_string, ptr) != 1) {
+	if (*ptr == '\0') {

 		/* The following call adds the foreign key constraints
 		to the data dictionary system tables on disk */
@ -2883,12 +2970,89 @@ dict_field_print_low(
 	printf(" %s", field->name);
 }

+/**************************************************************************
+Sprintfs to a string info on foreign keys of a table in a format suitable
+for CREATE TABLE. */
+static
+void
+dict_print_info_on_foreign_keys_in_create_format(
+/*=============================================*/
+	char*		buf,	/* in: auxiliary buffer of 10000 chars */
+	char*		str,	/* in/out: pointer to a string */
+	ulint		len,	/* in: space in str available for info */
+	dict_table_t*	table)	/* in: table */
+{
+
+	dict_foreign_t*	foreign;
+	ulint		i;
+	char*		buf2;
+
+	buf2 = buf;
+
+	mutex_enter(&(dict_sys->mutex));
+
+	foreign = UT_LIST_GET_FIRST(table->foreign_list);
+
+	if (foreign == NULL) {
+		mutex_exit(&(dict_sys->mutex));
+
+		return;
+	}
+
+	while (foreign != NULL) {
+		buf2 += sprintf(buf2, ",\n  FOREIGN KEY (");
+
+		for (i = 0; i < foreign->n_fields; i++) {
+			buf2 += sprintf(buf2, "`%s`",
+					foreign->foreign_col_names[i]);
+			
+			if (i + 1 < foreign->n_fields) {
+				buf2 += sprintf(buf2, ", ");
+			}
+		}
+
+		buf2 += sprintf(buf2, ") REFERENCES `%s` (",
+					foreign->referenced_table_name);
+		/* Change the '/' in the table name to '.' */
+
+		for (i = ut_strlen(buf); i > 0; i--) {
+			if (buf[i] == '/') {
+
+				buf[i] = '.';
+
+				break;
+			}
+		}
+	
+		for (i = 0; i < foreign->n_fields; i++) {
+			buf2 += sprintf(buf2, "`%s`",
+					foreign->referenced_col_names[i]);
+			if (i + 1 < foreign->n_fields) {
+				buf2 += sprintf(buf2, ", ");
+			}
+		}
+
+		buf2 += sprintf(buf2, ")");
+
+		foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
+	}
+
+	mutex_exit(&(dict_sys->mutex));
+
+	buf[len - 1] = '\0';
+	ut_memcpy(str, buf, len);
+}
+
 /**************************************************************************
 Sprintfs to a string info on foreign keys of a table. */

 void
 dict_print_info_on_foreign_keys(
 /*============================*/
+	ibool		create_table_format, /* in: if TRUE then print in
+				a format suitable to be inserted into
+				a CREATE TABLE, otherwise in the format
+				of SHOW TABLE STATUS */
 	char*		str,	/* in/out: pointer to a string */
 	ulint		len,	/* in: space in str available for info */
 	dict_table_t*	table)	/* in: table */
@ -2898,6 +3062,12 @@ dict_print_info_on_foreign_keys(
 	char*		buf2;
 	char		buf[10000];

+	if (create_table_format) {
+		dict_print_info_on_foreign_keys_in_create_format(
+						buf, str, len, table);
+		return;
+	}
+
 	buf2 = buf;

 	mutex_enter(&(dict_sys->mutex));
@ -2916,6 +3086,7 @@ dict_print_info_on_foreign_keys(
 		for (i = 0; i < foreign->n_fields; i++) {
 			buf2 += sprintf(buf2, "%s",
 					foreign->foreign_col_names[i]);
+			
 			if (i + 1 < foreign->n_fields) {
 				buf2 += sprintf(buf2, " ");
 			}
--- a/innobase/dict/dict0load.c
+++ b/innobase/dict/dict0load.c
@ -688,7 +688,16 @@ dict_load_indexes(
 		
 			dict_load_fields(table, index, heap);

-			dict_index_add_to_cache(table, index);
+			if (index->type & DICT_CLUSTERED == 0
+			    && NULL == dict_table_get_first_index(table)) {
+
+				fprintf(stderr,
+	"InnoDB: Error: trying to load index %s for table %s\n"
+	"InnoDB: but the first index was not clustered\n",
+				index->name, table->name);
+			} else {
+				dict_index_add_to_cache(table, index);
+			}
 		}

 		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
--- a/innobase/fil/fil0fil.c
+++ b/innobase/fil/fil0fil.c
@ -89,8 +89,8 @@ struct fil_node_struct {
 	char*		name;	/* the file name or path */
 	ibool		open;	/* TRUE if file open */
 	os_file_t	handle;	/* OS handle to the file, if file open */
-	ulint		size;	/* size of the file in database blocks
-				(where the possible last incomplete block
+	ulint		size;	/* size of the file in database pages
+				(where the possible last incomplete megabyte
 				is ignored) */
 	ulint		n_pending;
 				/* count of pending i/o-ops on this file */
@ -945,6 +945,76 @@ fil_node_complete_io(
 	}
 }
 		
+/**************************************************************************
+Tries to extend a data file by the number of pages given. Any fractions of a
+megabyte are ignored. */
+
+ibool
+fil_extend_last_data_file(
+/*======================*/
+				/* out: TRUE if success, also if we run
+				out of disk space we may return TRUE */
+	ulint*	actual_increase,/* out: number of pages we were able to
+				extend, here the orginal size of the file and
+				the resulting size of the file are rounded
+				downwards to a full megabyte, and the
+				difference expressed in pages is returned */
+	ulint	size_increase)	/* in: try to extend this many pages */
+{
+	fil_node_t*	node;
+	fil_space_t*	space;
+	fil_system_t*	system		= fil_system;
+	byte*		buf;
+	ibool		success;
+	ulint		i;
+
+	mutex_enter(&(system->mutex));
+
+	HASH_SEARCH(hash, system->spaces, 0, space, space->id == 0);
+
+	ut_a(space);
+	
+	node = UT_LIST_GET_LAST(space->chain);
+
+	fil_node_prepare_for_io(node, system, space);
+
+	buf = mem_alloc(1024 * 1024);
+
+	memset(buf, '\0', 1024 * 1024);
+
+	for (i = 0; i < size_increase / ((1024 * 1024) / UNIV_PAGE_SIZE); i++) {
+
+		success = os_file_write(node->name, node->handle, buf,
+			(node->size << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFF,
+			node->size >> (32 - UNIV_PAGE_SIZE_SHIFT),
+			1024 * 1024);
+
+		if (!success) {
+
+			break;
+		}
+
+		node->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
+		space->size += ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+		os_has_said_disk_full = FALSE;
+	}
+
+	mem_free(buf);
+
+	fil_node_complete_io(node, system, OS_FILE_WRITE);
+
+	mutex_exit(&(system->mutex));	
+
+	*actual_increase = i * ((1024 * 1024) / UNIV_PAGE_SIZE);
+
+	fil_flush(0);
+
+	srv_data_file_sizes[srv_n_data_files - 1] += *actual_increase;
+
+	return(TRUE);
+}
+
 /************************************************************************
 Reads or writes data. This operation is asynchronous (aio). */

@ -966,9 +1036,9 @@ fil_io(
 	ulint	byte_offset,	/* in: remainder of offset in bytes; in
 				aio this must be divisible by the OS block
 				size */
-	ulint	len,		/* in: how many bytes to read; this must
-				not cross a file boundary; in aio this must
-				be a block size multiple */
+	ulint	len,		/* in: how many bytes to read or write; this
+				must not cross a file boundary; in aio this
+				must be a block size multiple */
 	void*	buf,		/* in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
--- a/innobase/fsp/fsp0fsp.c
+++ b/innobase/fsp/fsp0fsp.c
@ -50,7 +50,7 @@ descriptor page, but used only in the first. */
 #define	FSP_FREE_LIMIT		12	/* Minimum page number for which the
 					free list has not been initialized:
 					the pages >= this limit are, by
-					definition free */
+					definition, free */
 #define	FSP_LOWEST_NO_WRITE	16	/* The lowest page offset for which
 					the page has not been written to disk
 					(if it has been written, we know that
@ -898,6 +898,106 @@ fsp_header_inc_size(
 	mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES, mtr); 
 }

+/**************************************************************************
+Gets the current free limit of a tablespace. The free limit means the
+place of the first page which has never been put to the the free list
+for allocation. The space above that address is initialized to zero.
+Sets also the global variable log_fsp_current_free_limit. */
+
+ulint
+fsp_header_get_free_limit(
+/*======================*/
+			/* out: free limit in megabytes */
+	ulint	space)	/* in: space id */
+{
+	fsp_header_t*	header;
+	ulint		limit;
+	mtr_t		mtr;
+
+	ut_a(space == 0); /* We have only one log_fsp_current_... variable */
+	
+	mtr_start(&mtr);
+
+	mtr_x_lock(fil_space_get_latch(space), &mtr);	
+
+	header = fsp_get_space_header(space, &mtr);
+
+	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr);
+
+	limit = limit / ((1024 * 1024) / UNIV_PAGE_SIZE);
+	
+	log_fsp_current_free_limit_set_and_checkpoint(limit);
+
+	mtr_commit(&mtr);
+
+	return(limit);
+}
+
+/***************************************************************************
+Tries to extend the last data file file if it is defined as auto-extending. */
+static
+ibool
+fsp_try_extend_last_file(
+/*=====================*/
+					/* out: FALSE if not auto-extending */
+	ulint*		actual_increase,/* out: actual increase in pages */
+	ulint		space,		/* in: space */
+	fsp_header_t*	header,		/* in: space header */
+	mtr_t*		mtr)		/* in: mtr */
+{
+	ulint	size;
+	ulint	size_increase;
+	ibool	success;
+
+	ut_a(space == 0);
+
+	*actual_increase = 0;
+
+	if (!srv_auto_extend_last_data_file) {
+
+		return(FALSE);
+	}
+
+	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+
+	if (srv_last_file_size_max != 0) {
+		if (srv_last_file_size_max
+			 < srv_data_file_sizes[srv_n_data_files - 1]) {
+
+			fprintf(stderr,
+"InnoDB: Error: Last data file size is %lu, max size allowed %lu\n",
+				srv_data_file_sizes[srv_n_data_files - 1],
+				srv_last_file_size_max);
+		}
+
+		size_increase = srv_last_file_size_max
+				 - srv_data_file_sizes[srv_n_data_files - 1];
+		if (size_increase > SRV_AUTO_EXTEND_INCREMENT) {
+			size_increase = SRV_AUTO_EXTEND_INCREMENT;
+		}
+	} else {
+		size_increase = SRV_AUTO_EXTEND_INCREMENT;
+	}
+				
+	if (size_increase == 0) {
+		return(TRUE);
+	}
+	
+	/* Extend the data file. If we are not able to extend
+	the full requested length, the function tells us
+	the number of full megabytes (but the unit is pages!)
+	we were able to extend. */
+				
+	success = fil_extend_last_data_file(actual_increase, size_increase);
+
+	if (success) {
+		mlog_write_ulint(header + FSP_SIZE, size + *actual_increase,
+							MLOG_4BYTES, mtr);
+	}
+
+	return(TRUE);
+}
+
 /**************************************************************************
 Puts new extents to the free list if there are free extents above the free
 limit. If an extent happens to contain an extent descriptor page, the extent
@ -917,8 +1017,9 @@ fsp_fill_free_list(
 	ulint	frag_n_used;
 	page_t*	descr_page;
 	page_t*	ibuf_page;
-	mtr_t	ibuf_mtr;
+	ulint	actual_increase;
 	ulint	i;
+	mtr_t	ibuf_mtr;

 	ut_ad(header && mtr);
 	
@ -926,12 +1027,28 @@ fsp_fill_free_list(
 	size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
 	limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr);

+	if (srv_auto_extend_last_data_file
+			&& size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) {
+
+		/* Try to increase the last data file size */
+		fsp_try_extend_last_file(&actual_increase, space, header,
+									mtr);
+		size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr);
+	}
+
 	i = limit;
 		
 	while ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD)) {

 		mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE,
 							MLOG_4BYTES, mtr); 
+
+		/* Update the free limit info in the log system and make
+		a checkpoint */
+		log_fsp_current_free_limit_set_and_checkpoint(
+				(i + FSP_EXTENT_SIZE)
+				/ ((1024 * 1024) / UNIV_PAGE_SIZE));
+
 		if (0 == i % XDES_DESCRIBED_PER_PAGE) {

 			/* We are going to initialize a new descriptor page
@ -1172,6 +1289,7 @@ fsp_free_page(
 	xdes_t*		descr;
 	ulint		state;
 	ulint		frag_n_used;
+	char		buf[1000];
 	
 	ut_ad(mtr);

@ -1183,10 +1301,38 @@ fsp_free_page(

 	state = xdes_get_state(descr, mtr);
 	
-	ut_a((state == XDES_FREE_FRAG) || (state == XDES_FULL_FRAG));
+	if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) {
+		fprintf(stderr,
+"InnoDB: Error: File space extent descriptor of page %lu has state %lu\n",
+								page, state);
+		ut_sprintf_buf(buf, ((byte*)descr) - 50, 200);

-	ut_a(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
-								== FALSE);
+		fprintf(stderr, "InnoDB: Dump of descriptor: %s\n", buf);
+		
+		if (state == XDES_FREE) {
+			/* We put here some fault tolerance: if the page
+			is already free, return without doing anything! */
+
+			return;
+		}
+
+		ut_a(0);
+	}
+
+	if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)
+								== TRUE) {
+		fprintf(stderr,
+"InnoDB: Error: File space extent descriptor of page %lu says it is free\n",
+									page);
+		ut_sprintf_buf(buf, ((byte*)descr) - 50, 200);
+
+		fprintf(stderr, "InnoDB: Dump of descriptor: %s\n", buf);
+
+		/* We put here some fault tolerance: if the page
+		is already free, return without doing anything! */
+
+		return;
+	}

 	xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
 	xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr);
@ -2243,13 +2389,15 @@ fsp_reserve_free_extents(
 	mtr_t*	mtr)	/* in: mtr */
 {
 	fsp_header_t*	space_header;
+	rw_lock_t*	latch;
 	ulint		n_free_list_ext;
 	ulint		free_limit;
 	ulint		size;
 	ulint		n_free;
 	ulint		n_free_up;
 	ulint		reserve;
-	rw_lock_t*	latch;
+	ibool		success;
+	ulint		n_pages_added;

 	ut_ad(mtr);	
 	ut_ad(!mutex_own(&kernel_mutex)
@ -2260,7 +2408,7 @@ fsp_reserve_free_extents(
 	mtr_x_lock(latch, mtr);

 	space_header = fsp_get_space_header(space, mtr);
-
+try_again:
 	size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr);
 	
 	n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr);
@ -2291,7 +2439,7 @@ fsp_reserve_free_extents(

 		if (n_free <= reserve + n_ext) {

-			return(FALSE);
+			goto try_to_extend;
 		}
 	} else if (alloc_type == FSP_UNDO) {
 		/* We reserve 1 % of the space size to cleaning operations */
@ -2300,13 +2448,26 @@ fsp_reserve_free_extents(

 		if (n_free <= reserve + n_ext) {

-			return(FALSE);
+			goto try_to_extend;
 		}
 	} else {
 		ut_a(alloc_type == FSP_CLEANING);
 	}

-	return(fil_space_reserve_free_extents(space, n_free, n_ext));
+	success = fil_space_reserve_free_extents(space, n_free, n_ext);
+
+	if (success) {
+		return(TRUE);
+	}
+try_to_extend:
+	success = fsp_try_extend_last_file(&n_pages_added, space,
+							space_header, mtr);
+	if (success && n_pages_added > 0) {
+
+		goto try_again;
+	}
+
+	return(FALSE);
 }

 /**************************************************************************
--- a/innobase/include/buf0flu.h
+++ b/innobase/include/buf0flu.h
@ -28,6 +28,16 @@ a margin of replaceable pages there. */
 void
 buf_flush_free_margin(void);
 /*=======================*/
+/************************************************************************
+Initializes a page for writing to the tablespace. */
+
+void
+buf_flush_init_for_writing(
+/*=======================*/
+	byte*	page,		/* in: page */
+	dulint	newest_lsn,	/* in: newest modification lsn to the page */
+	ulint	space,		/* in: space id */
+	ulint	page_no);	/* in: page number */
 /***********************************************************************
 This utility flushes dirty blocks from the end of the LRU list or flush_list.
 NOTE 1: in the case of an LRU flush the calling thread may own latches to
--- a/innobase/include/dict0dict.h
+++ b/innobase/include/dict0dict.h
@ -105,7 +105,8 @@ dict_table_autoinc_initialize(
 	dict_table_t*	table,	/* in: table */
 	ib_longlong	value);	/* in: value which was assigned to a row */
 /************************************************************************
-Gets the next autoinc value, 0 if not yet initialized. */
+Gets the next autoinc value, 0 if not yet initialized. If initialized,
+increments the counter by 1. */

 ib_longlong
 dict_table_autoinc_get(
@ -113,6 +114,15 @@ dict_table_autoinc_get(
 				/* out: value for a new row, or 0 */
 	dict_table_t*	table);	/* in: table */
 /************************************************************************
+Reads the autoinc counter value, 0 if not yet initialized. Does not
+increment the counter. */
+
+ib_longlong
+dict_table_autoinc_read(
+/*====================*/
+				/* out: value of the counter */
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
 Updates the autoinc counter if the value supplied is bigger than the
 current value. If not inited, does nothing. */

@ -143,7 +153,10 @@ dict_table_rename_in_cache(
 /*=======================*/
 					/* out: TRUE if success */
 	dict_table_t*	table,		/* in: table */
-	char*		new_name);	/* in: new name */
+	char*		new_name,	/* in: new name */
+	ibool		rename_also_foreigns);/* in: in ALTER TABLE we want
+					to preserve the original table name
+					in constraints which reference it */
 /**************************************************************************
 Adds a foreign key constraint object to the dictionary cache. May free
 the object if there already is an object with the same identifier in.
@ -284,6 +297,10 @@ Sprintfs to a string info on foreign keys of a table. */
 void
 dict_print_info_on_foreign_keys(
 /*============================*/
+	ibool		create_table_format, /* in: if TRUE then print in
+				a format suitable to be inserted into
+				a CREATE TABLE, otherwise in the format
+				of SHOW TABLE STATUS */
 	char*		str,	/* in/out: pointer to a string */
 	ulint		len,	/* in: space in str available for info */
 	dict_table_t*	table);	/* in: table */
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@ -64,8 +64,10 @@ extern fil_addr_t	fil_addr_null;
 #define FIL_PAGE_DATA		38	/* start of the data on the page */

 /* File page trailer */
-#define FIL_PAGE_END_LSN	8	/* this should be same as
-					FIL_PAGE_LSN */
+#define FIL_PAGE_END_LSN	8	/* the low 4 bytes of this are used
+					to store the page checksum, the
+					last 4 bytes should be identical
+					to the last 4 bytes of FIL_PAGE_LSN */
 #define FIL_PAGE_DATA_END	8

 /* File page types */
@ -134,6 +136,21 @@ fil_space_truncate_start(
 	ulint	trunc_len);	/* in: truncate by this much; it is an error
 				if this does not equal to the combined size of
 				some initial files in the space */
+/**************************************************************************
+Tries to extend a data file by the number of pages given. Any fractions of a
+megabyte are ignored. */
+
+ibool
+fil_extend_last_data_file(
+/*======================*/
+				/* out: TRUE if success, also if we run
+				out of disk space we may return TRUE */
+	ulint*	actual_increase,/* out: number of pages we were able to
+				extend, here the orginal size of the file and
+				the resulting size of the file are rounded
+				downwards to a full megabyte, and the
+				difference expressed in pages is returned */
+	ulint	size_increase);	/* in: try to extend this many pages */
 /***********************************************************************
 Frees a space object from a file system. Closes the files in the chain
 but does not delete them. */
--- a/innobase/include/fsp0fsp.h
+++ b/innobase/include/fsp0fsp.h
@ -46,6 +46,17 @@ void
 fsp_init(void);
 /*==========*/
 /**************************************************************************
+Gets the current free limit of a tablespace. The free limit means the
+place of the first page which has never been put to the the free list
+for allocation. The space above that address is initialized to zero.
+Sets also the global variable log_fsp_current_free_limit. */
+
+ulint
+fsp_header_get_free_limit(
+/*======================*/
+			/* out: free limit in megabytes */
+	ulint	space);	/* in: space id */
+/**************************************************************************
 Initializes the space header of a new created space. */

 void
--- a/innobase/include/log0log.h
+++ b/innobase/include/log0log.h
@ -26,6 +26,32 @@ extern 	ibool	log_debug_writes;
 #define	LOG_WAIT_ALL_GROUPS	93
 #define LOG_MAX_N_GROUPS	32

+/********************************************************************
+Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
+so that we know that the limit has been written to a log checkpoint field
+on disk. */
+
+void
+log_fsp_current_free_limit_set_and_checkpoint(
+/*==========================================*/
+	ulint	limit);	/* in: limit to set */
+/***********************************************************************
+Calculates where in log files we find a specified lsn. */
+
+ulint
+log_calc_where_lsn_is(
+/*==================*/
+						/* out: log file number */
+	ib_longlong*	log_file_offset,	/* out: offset in that file
+						(including the header) */
+	dulint		first_header_lsn,	/* in: first log file start
+						lsn */
+	dulint		lsn,			/* in: lsn whose position to
+						determine */
+	ulint		n_log_files,		/* in: total number of log
+						files */
+	ib_longlong	log_file_size);		/* in: log file size
+						(including the header) */
 /****************************************************************
 Writes to the log the string given. The log must be released with
 log_release. */
@ -225,6 +251,16 @@ Writes checkpoint info to groups. */
 void
 log_groups_write_checkpoint_info(void);
 /*==================================*/
+/**********************************************************
+Writes info to a buffer of a log group when log files are created in
+backup restoration. */
+
+void
+log_reset_first_header_and_checkpoint(
+/*==================================*/
+	byte*	hdr_buf,/* in: buffer which will be written to the start
+			of the first log file */
+	dulint	lsn);	/* in: lsn of the start of the first log file */
 /************************************************************************
 Starts an archiving operation. */

@ -507,7 +543,16 @@ extern log_t*	log_sys;
 							+ LOG_MAX_N_GROUPS * 8)
 #define LOG_CHECKPOINT_CHECKSUM_1 	LOG_CHECKPOINT_ARRAY_END
 #define LOG_CHECKPOINT_CHECKSUM_2 	(4 + LOG_CHECKPOINT_ARRAY_END)
-#define LOG_CHECKPOINT_SIZE		(8 + LOG_CHECKPOINT_ARRAY_END)
+#define LOG_CHECKPOINT_FSP_FREE_LIMIT	(8 + LOG_CHECKPOINT_ARRAY_END)
+					/* current fsp free limit in the
+					tablespace, in units of one megabyte */
+#define LOG_CHECKPOINT_FSP_MAGIC_N	(12 + LOG_CHECKPOINT_ARRAY_END)
+					/* this magic number tells if the
+					checkpoint contains the above field:
+					the field was added to InnoDB-3.23.50 */
+#define LOG_CHECKPOINT_SIZE		(16 + LOG_CHECKPOINT_ARRAY_END)
+
+#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL	1441231243

 /* Offsets of a log file header */
 #define LOG_GROUP_ID		0	/* log group number */
--- a/innobase/include/log0recv.h
+++ b/innobase/include/log0recv.h
@ -15,6 +15,39 @@ Created 9/20/1997 Heikki Tuuri
 #include "hash0hash.h"
 #include "log0log.h"

+/***********************************************************************
+Reads the checkpoint info needed in hot backup. */
+
+ibool
+recv_read_cp_info_for_backup(
+/*=========================*/
+			/* out: TRUE if success */
+	byte*	hdr,	/* in: buffer containing the log group header */
+	dulint*	lsn,	/* out: checkpoint lsn */
+	ulint*	offset,	/* out: checkpoint offset in the log group */
+	ulint*	fsp_limit,/* out: fsp limit, 1000000000 if the database
+			is running with < version 3.23.50 of InnoDB */
+	dulint*	cp_no,	/* out: checkpoint number */
+	dulint*	first_header_lsn);
+			/* out: lsn of of the start of the first log file */
+/***********************************************************************
+Scans the log segment and n_bytes_scanned is set to the length of valid
+log scanned. */
+
+void
+recv_scan_log_seg_for_backup(
+/*=========================*/
+	byte*		buf,		/* in: buffer containing log data */
+	ulint		buf_len,	/* in: data length in that buffer */
+	dulint*		scanned_lsn,	/* in/out: lsn of buffer start,
+					we return scanned lsn */
+	ulint*		scanned_checkpoint_no,
+					/* in/out: 4 lowest bytes of the
+					highest scanned checkpoint number so
+					far */
+	ulint*		n_bytes_scanned);/* out: how much we were able to
+					scan, smaller than buf_len if log
+					data ended here */
 /***********************************************************************
 Returns TRUE if recovery is currently running. */
 UNIV_INLINE
@ -35,6 +68,10 @@ read in, or also for a page already in the buffer pool. */
 void
 recv_recover_page(
 /*==============*/
+	ibool	recover_backup,	/* in: TRUE if we are recovering a backup
+				page: then we do not acquire any latches
+				since the page was read in outside the
+				buffer pool */
 	ibool	just_read_in,	/* in: TRUE if the i/o-handler calls this for
 				a freshly read page */
 	page_t*	page,		/* in: buffer page */
@ -69,8 +106,15 @@ recv_scan_log_recs(
 /*===============*/
 				/* out: TRUE if limit_lsn has been reached, or
 				not able to scan any more in this log group */
+	ibool	apply_automatically,/* in: TRUE if we want this function to
+				apply log records automatically when the
+				hash table becomes full; in the hot backup tool
+				the tool does the applying, not this
+				function */
+	ulint	available_memory,/* in: we let the hash table of recs to grow
+				to this size, at the maximum */
 	ibool	store_to_hash,	/* in: TRUE if the records should be stored
-				to the hash table; this is set FALSE if just
+				to the hash table; this is set to FALSE if just
 				debug checking is needed */
 	byte*	buf,		/* in: buffer containing a log segment or
 				garbage */
@ -92,6 +136,16 @@ recv_reset_logs(
 	ibool	new_logs_created);/* in: TRUE if resetting logs is done
 				at the log creation; FALSE if it is done
 				after archive recovery */
+/**********************************************************
+Creates new log files after a backup has been restored. */
+
+void
+recv_reset_log_files_for_backup(
+/*============================*/
+	char*	log_dir,	/* in: log file directory path */
+	ulint	n_log_files,	/* in: number of log files */
+	ulint	log_file_size,	/* in: log file size */
+	dulint	lsn);		/* in: new start lsn */
 /************************************************************
 Creates the recovery system. */

@ -102,8 +156,11 @@ recv_sys_create(void);
 Inits the recovery system for a recovery operation. */

 void
-recv_sys_init(void);
-/*===============*/
+recv_sys_init(
+/*==========*/
+	ibool	recover_from_backup,	/* in: TRUE if this is called
+					to recover from a hot backup */
+	ulint	available_memory);	/* in: available memory in bytes */
 /***********************************************************************
 Empties the hash table of stored log records, applying them to appropriate
 pages. */
@ -118,6 +175,17 @@ recv_apply_hashed_log_recs(
 				disk and invalidated in buffer pool: this
 				alternative means that no new log records
 				can be generated during the application */
+/***********************************************************************
+Applies log records in the hash table to a backup. */
+
+void
+recv_apply_log_recs_for_backup(
+/*===========================*/
+	ulint	n_data_files,	/* in: number of data files */
+	char**	data_files,	/* in: array containing the paths to the
+				data files */
+	ulint*	file_sizes);	/* in: sizes of the data files in database
+				pages */
 /************************************************************
 Recovers from archived log files, and also from log files, if they exist. */

@ -260,6 +328,14 @@ extern ibool		recv_recovery_on;
 extern ibool		recv_no_ibuf_operations;
 extern ibool		recv_needed_recovery;

+/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
+times! */ 
+#define RECV_PARSING_BUF_SIZE	(2 * 1024 * 1024)
+
+/* Size of block reads when the log groups are scanned forward to do a
+roll-forward */
+#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
+
 /* States of recv_addr_struct */
 #define RECV_NOT_PROCESSED	71
 #define RECV_BEING_READ		72
--- a/innobase/include/mem0mem.h
+++ b/innobase/include/mem0mem.h
@ -41,11 +41,11 @@ page buffer pool; the latter method is used for very big heaps */

 /* The following start size is used for the first block in the memory heap if
 the size is not specified, i.e., 0 is given as the parameter in the call of
-create. The standard size is the maximum size of the blocks used for
+create. The standard size is the maximum (payload) size of the blocks used for
 allocations of small buffers. */

 #define MEM_BLOCK_START_SIZE            64
-#define MEM_BLOCK_STANDARD_SIZE         8192
+#define MEM_BLOCK_STANDARD_SIZE         8000

 /* If a memory heap is allowed to grow into the buffer pool, the following
 is the maximum size for a single allocated buffer: */
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@ -11,6 +11,12 @@ Created 10/21/1995 Heikki Tuuri

 #include "univ.i"

+
+/* If the following is set to TRUE, we do not call os_file_flush in every
+os_file_write */
+extern ibool	os_do_not_call_flush_at_each_write;
+extern ibool	os_has_said_disk_full;
+
 #ifdef __WIN__

 /* We define always WIN_ASYNC_IO, and check at run-time whether
@ -55,6 +61,9 @@ log. */
 #define	OS_FILE_CREATE			52
 #define OS_FILE_OVERWRITE		53

+#define OS_FILE_READ_ONLY 		333
+#define	OS_FILE_READ_WRITE		444
+
 /* Options for file_create */
 #define	OS_FILE_AIO			61
 #define	OS_FILE_NORMAL			62
@ -118,6 +127,27 @@ os_get_os_version(void);
 /*===================*/
                  /* out: OS_WIN95, OS_WIN31, OS_WINNT (2000 == NT) */
 /********************************************************************
+Creates the seek mutexes used in positioned reads and writes. */
+
+void
+os_io_init_simple(void);
+/*===================*/
+/********************************************************************
+A simple function to open or create a file. */
+
+os_file_t
+os_file_create_simple(
+/*==================*/
+			/* out, own: handle to the file, not defined if error,
+			error number can be retrieved with os_get_last_error */
+	char*	name,	/* in: name of the file or path as a null-terminated
+			string */
+	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
+			(if does not exist, error), or OS_FILE_CREATE if a new
+			file is created (if exists, error) */
+	ulint	access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
+	ibool*	success);/* out: TRUE if succeed, FALSE if error */
+/********************************************************************
 Opens an existing file or creates a new. */

 os_file_t
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@ -402,13 +402,13 @@ struct row_prebuilt_struct {
 	byte*		ins_upd_rec_buff;/* buffer for storing data converted
 					to the Innobase format from the MySQL
 					format */
-	ibool		in_update_remember_pos;
-					/* if an update is processed, then if
-					this flag is set to TRUE, it means
-					that the stored cursor position in
-					SELECT is the right position also
-					for the update: we can just restore
-					the cursor and save CPU time */
+	ibool		hint_no_need_to_fetch_extra_cols;
+					/* normally this is TRUE, but
+					MySQL will set this to FALSE
+					if we might be required to fetch also
+					other columns than mentioned in the
+					query: the clustered index column(s),
+					or an auto-increment column*/
 	upd_node_t*	upd_node;	/* Innobase SQL update node used
 					to perform updates and deletes */
 	que_fork_t*	ins_graph;	/* Innobase SQL query graph used
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@ -24,10 +24,13 @@ extern char	srv_fatal_errbuf[];
 thread starts running */
 extern os_event_t	srv_lock_timeout_thread_event;

+/* If the last data file is auto-extended, we add this many pages to it
+at a time */
+#define SRV_AUTO_EXTEND_INCREMENT   (8 * ((1024 * 1024) / UNIV_PAGE_SIZE))
+
 /* Server parameters which are read from the initfile */

 extern char*	srv_data_home;
-extern char*	srv_logs_home;
 extern char*	srv_arch_dir;

 extern ulint	srv_n_data_files;
@ -35,6 +38,9 @@ extern char**	srv_data_file_names;
 extern ulint*	srv_data_file_sizes;
 extern ulint*   srv_data_file_is_raw_partition;

+extern ibool	srv_auto_extend_last_data_file;
+extern ulint	srv_last_file_size_max;
+
 extern ibool	srv_created_new_raw;

 #define SRV_NEW_RAW    1
@ -185,6 +191,19 @@ srv_boot(void);
 /*==========*/
 			/* out: DB_SUCCESS or error code */
 /*************************************************************************
+Initializes the server. */
+
+void
+srv_init(void);
+/*==========*/
+/*************************************************************************
+Initializes the synchronization primitives, memory system, and the thread
+local storage. */
+
+void
+srv_general_init(void);
+/*==================*/
+/*************************************************************************
 Gets the number of threads in the system. */

 ulint
--- a/innobase/include/srv0start.h
+++ b/innobase/include/srv0start.h
@ -12,6 +12,56 @@ Created 10/10/1995 Heikki Tuuri

 #include "univ.i"

+/*************************************************************************
+Normalizes a directory path for Windows: converts slashes to backslashes. */
+
+void
+srv_normalize_path_for_win(
+/*=======================*/
+	char*	str);	/* in/out: null-terminated character string */
+/*************************************************************************
+Adds a slash or a backslash to the end of a string if it is missing
+and the string is not empty. */
+
+char*
+srv_add_path_separator_if_needed(
+/*=============================*/
+			/* out, own: string which has the separator if the
+			string is not empty */
+	char*	str);	/* in: null-terminated character string */
+/*************************************************************************
+Reads the data files and their sizes from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_data_file_paths_and_sizes(
+/*================================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: the data file path string */
+	char***	data_file_names,	/* out, own: array of data file
+					names */
+	ulint**	data_file_sizes,	/* out, own: array of data file sizes
+					in megabytes */
+	ulint**	data_file_is_raw_partition,/* out, own: array of flags
+					showing which data files are raw
+					partitions */
+	ulint*	n_data_files,		/* out: number of data files */
+	ibool*	is_auto_extending,	/* out: TRUE if the last data file is
+					auto-extending */
+	ulint*	max_auto_extend_size);	/* out: max auto extend size for the
+					last file if specified, 0 if not */
+/*************************************************************************
+Reads log group home directories from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_log_group_home_dirs(
+/*==========================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: character string */
+	char***	log_group_home_dirs);	/* out, own: log group home dirs */
 /********************************************************************
 Starts Innobase and creates a new database if database files
 are not found and the user wants. Server parameters are
--- a/innobase/include/trx0sys.h
+++ b/innobase/include/trx0sys.h
@ -44,6 +44,15 @@ half-written pages in the data files. */
 void
 trx_sys_doublewrite_restore_corrupt_pages(void);
 /*===========================================*/
+/********************************************************************
+Determines if a page number is located inside the doublewrite buffer. */
+
+ibool
+trx_doublewrite_page_inside(
+/*========================*/
+				/* out: TRUE if the location is inside
+				the two blocks of the doublewrite buffer */
+	ulint	page_no);	/* in: page number */
 /*******************************************************************
 Checks if a page address is the trx sys header page. */
 UNIV_INLINE
--- a/innobase/include/ut0byte.h
+++ b/innobase/include/ut0byte.h
@ -55,6 +55,15 @@ ut_dulint_get_low(
 			/* out: 32 bits in ulint */
 	dulint	d);	/* in: dulint */
 /***********************************************************
+Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit
+integer type. */
+UNIV_INLINE
+ib_longlong
+ut_conv_dulint_to_longlong(
+/*=======================*/
+			/* out: value in ib_longlong type */
+	dulint	d);	/* in: dulint */
+/***********************************************************
 Tests if a dulint is zero. */
 UNIV_INLINE
 ibool
--- a/innobase/include/ut0rnd.h
+++ b/innobase/include/ut0rnd.h
@ -35,7 +35,7 @@ ut_rnd_gen_next_ulint(
 /*************************************************************
 The following function generates 'random' ulint integers which
 enumerate the value space (let there be N of them) of ulint integers
-in a pseudo random fashion. Note that the same integer is repeated
+in a pseudo-random fashion. Note that the same integer is repeated
 always after N calls to the generator. */
 UNIV_INLINE
 ulint
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@ -17,6 +17,16 @@ Created 1/20/1994 Heikki Tuuri

 typedef time_t	ib_time_t;

+/************************************************************
+Gets the high 32 bits in a ulint. That is makes a shift >> 32,
+but since there seem to be compiler bugs in both gcc and Visual C++,
+we do this by a special conversion. */
+
+ulint
+ut_get_high32(
+/*==========*/
+			/* out: a >> 32 */
+	ulint	a);	/* in: ulint */
 /**********************************************************
 Calculates the minimum of two ulints. */
 UNIV_INLINE
@ -144,6 +154,15 @@ void
 ut_print_timestamp(
 /*===============*/
 	FILE*  file); /* in: file where to print */
+/**************************************************************
+Returns current year, month, day. */
+
+void
+ut_get_year_month_day(
+/*==================*/
+	ulint*	year,	/* out: current year */
+	ulint*	month,	/* out: month */
+	ulint*	day);	/* out: day */
 /*****************************************************************
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++. */
--- a/innobase/log/log0log.c
+++ b/innobase/log/log0log.c
@ -1,7 +1,7 @@
 /******************************************************
 Database log

-(c) 1995-1997 InnoDB Oy
+(c) 1995-1997 Innobase Oy

 Created 12/9/1995 Heikki Tuuri
 *******************************************************/
@ -24,6 +24,9 @@ Created 12/9/1995 Heikki Tuuri
 #include "trx0sys.h"
 #include "trx0trx.h"

+/* Current free limit; protected by the log sys mutex; 0 means uninitialized */
+ulint	log_fsp_current_free_limit		= 0;
+
 /* Global log system variable */
 log_t*	log_sys	= NULL;

@ -95,6 +98,32 @@ void
 log_archive_margin(void);
 /*====================*/

+/********************************************************************
+Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint,
+so that we know that the limit has been written to a log checkpoint field
+on disk. */
+
+void
+log_fsp_current_free_limit_set_and_checkpoint(
+/*==========================================*/
+	ulint	limit)	/* in: limit to set */
+{
+	ibool	success;
+
+	mutex_enter(&(log_sys->mutex));
+
+	log_fsp_current_free_limit = limit;
+
+	mutex_exit(&(log_sys->mutex));
+
+	/* Try to make a synchronous checkpoint */
+	
+	success = FALSE;
+
+	while (!success) {
+		success = log_checkpoint(TRUE, TRUE);
+	}
+}

 /********************************************************************
 Returns the oldest modified block lsn in the pool, or log_sys->lsn if none
@ -436,6 +465,51 @@ log_group_calc_lsn_offset(
 	return(log_group_calc_real_offset(offset, group));
 }

+/***********************************************************************
+Calculates where in log files we find a specified lsn. */
+
+ulint
+log_calc_where_lsn_is(
+/*==================*/
+						/* out: log file number */
+	ib_longlong*	log_file_offset,	/* out: offset in that file
+						(including the header) */
+	dulint		first_header_lsn,	/* in: first log file start
+						lsn */
+	dulint		lsn,			/* in: lsn whose position to
+						determine */
+	ulint		n_log_files,		/* in: total number of log
+						files */
+	ib_longlong	log_file_size)		/* in: log file size
+						(including the header) */
+{
+	ib_longlong	ib_lsn;
+	ib_longlong	ib_first_header_lsn;
+	ib_longlong	capacity	= log_file_size - LOG_FILE_HDR_SIZE;
+	ulint		file_no;
+	ib_longlong	add_this_many;
+	
+	ib_lsn = ut_conv_dulint_to_longlong(lsn);
+	ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn);
+
+	if (ib_lsn < ib_first_header_lsn) {
+		add_this_many = 1 + (ib_first_header_lsn - ib_lsn)
+				/ (capacity * (ib_longlong)n_log_files);
+		ib_lsn += add_this_many
+		          * capacity * (ib_longlong)n_log_files;
+	}
+
+	ut_a(ib_lsn >= ib_first_header_lsn);
+	
+	file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity))
+			  % n_log_files;
+	*log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity;
+
+	*log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE;
+
+	return(file_no);
+}
+
 /************************************************************
 Sets the field values in group to correspond to a given lsn. For this function
 to work, the values must already be correctly initialized to correspond to
@ -653,7 +727,7 @@ log_init(void)

 #ifdef UNIV_LOG_DEBUG
 	recv_sys_create();
-	recv_sys_init();
+	recv_sys_init(FALSE, buf_pool_get_curr_size());

 	recv_sys->parse_start_lsn = log_sys->lsn;
 	recv_sys->scanned_lsn = log_sys->lsn;
@ -1002,9 +1076,28 @@ loop:
 	}
 	
 	if (log_debug_writes) {
+		ulint	i;
+
 		printf(
-		"Writing log file segment to group %lu offset %lu len %lu\n",
-					group->id, next_offset, write_len);
+		"Writing log file segment to group %lu offset %lu len %lu\n"
+		"start lsn %lu %lu\n",
+			group->id, next_offset, write_len,
+			ut_dulint_get_high(start_lsn),
+			ut_dulint_get_low(start_lsn));
+		printf(
+		"First block n:o %lu last block n:o %lu\n",
+			log_block_get_hdr_no(buf),
+			log_block_get_hdr_no(
+				buf + write_len - OS_FILE_LOG_BLOCK_SIZE));
+		ut_a(log_block_get_hdr_no(buf)
+			== log_block_convert_lsn_to_no(start_lsn));
+		
+		for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) {
+
+			ut_a(log_block_get_hdr_no(buf) + i
+				== log_block_get_hdr_no(buf
+					+ i * OS_FILE_LOG_BLOCK_SIZE));
+		}
 	}

 	if (log_do_write) {
@ -1346,7 +1439,7 @@ log_group_checkpoint(
 	ulint	i;

 	ut_ad(mutex_own(&(log_sys->mutex)));
-	ut_ad(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
+	ut_a(LOG_CHECKPOINT_SIZE <= OS_FILE_LOG_BLOCK_SIZE);
 	
 	buf = group->checkpoint_buf;
 	
@ -1394,6 +1487,15 @@ log_group_checkpoint(
 			LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
 	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);

+	/* Starting from InnoDB-3.23.50, we also write info on allocated
+	size in the tablespace */
+
+	mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT,
+						log_fsp_current_free_limit);
+
+	mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N,
+					LOG_CHECKPOINT_FSP_MAGIC_N_VAL);
+
 	/* We alternate the physical place of the checkpoint info in the first
 	log file */
 	
@ -1428,6 +1530,48 @@ log_group_checkpoint(
 	}
 }

+/**********************************************************
+Writes info to a buffer of a log group when log files are created in
+backup restoration. */
+
+void
+log_reset_first_header_and_checkpoint(
+/*==================================*/
+	byte*	hdr_buf,/* in: buffer which will be written to the start
+			of the first log file */
+	dulint	lsn)	/* in: lsn of the start of the first log file
+			+ LOG_BLOCK_HDR_SIZE */
+{
+	ulint	fold;
+	byte*	buf;
+
+	mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0);
+	mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, lsn);
+
+	buf = hdr_buf + LOG_CHECKPOINT_1;
+	
+	mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero);
+	mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn);
+
+	mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET,
+				LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE);
+								
+	mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024);
+
+	mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max);
+
+	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold);
+
+	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+			LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
+	mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold);
+
+	/* Starting from InnoDB-3.23.50, we should also write info on
+	allocated size in the tablespace, but unfortunately we do not
+	know it here */
+}
+
 /**********************************************************
 Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */

@ -2795,7 +2939,10 @@ log_check_log_recs(

 	ut_memcpy(scan_buf, start, end - start);
 	
-	recv_scan_log_recs(FALSE, scan_buf, end - start,
+	recv_scan_log_recs(TRUE,
+				buf_pool_get_curr_size() -
+				RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,	
+				FALSE, scan_buf, end - start,
 				ut_dulint_align_down(buf_start_lsn,
 						OS_FILE_LOG_BLOCK_SIZE),
 			&contiguous_lsn, &scanned_lsn);
--- a/innobase/log/log0recv.c
+++ b/innobase/log/log0recv.c
@ -1,7 +1,7 @@
 /******************************************************
 Recovery

-(c) 1997 InnoDB Oy
+(c) 1997 Innobase Oy

 Created 9/20/1997 Heikki Tuuri
 *******************************************************/
@ -33,13 +33,6 @@ Created 9/20/1997 Heikki Tuuri
 #include "dict0boot.h"
 #include "fil0fil.h"

-/* Size of block reads when the log groups are scanned forward to do a
-roll-forward */
-#define RECV_SCAN_SIZE		(4 * UNIV_PAGE_SIZE)
-
-/* Size of the parsing buffer */
-#define RECV_PARSING_BUF_SIZE	LOG_BUFFER_SIZE
-
 /* Log records are stored in the hash table in chunks at most of this size;
 this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */
 #define RECV_DATA_BLOCK_SIZE	(MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t))
@ -69,6 +62,9 @@ ibool	recv_no_ibuf_operations = FALSE;
 log scan */
 ulint	recv_scan_print_counter	= 0;

+ibool	recv_is_from_backup	= FALSE;
+
+
 /************************************************************
 Creates the recovery system. */

@ -94,8 +90,11 @@ recv_sys_create(void)
 Inits the recovery system for a recovery operation. */

 void
-recv_sys_init(void)
-/*===============*/
+recv_sys_init(
+/*==========*/
+	ibool	recover_from_backup,	/* in: TRUE if this is called
+					to recover from a hot backup */
+	ulint	available_memory)	/* in: available memory in bytes */
 {
 	if (recv_sys->heap != NULL) {

@ -104,13 +103,18 @@ recv_sys_init(void)

 	mutex_enter(&(recv_sys->mutex));

-	recv_sys->heap = mem_heap_create_in_buffer(256);
+	if (!recover_from_backup) {
+		recv_sys->heap = mem_heap_create_in_buffer(256);
+	} else {
+		recv_sys->heap = mem_heap_create(256);
+		recv_is_from_backup = TRUE;
+	}

 	recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
 	recv_sys->len = 0;
 	recv_sys->recovered_offset = 0;

-	recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 64);
+	recv_sys->addr_hash = hash_create(available_memory / 64);
 	recv_sys->n_addrs = 0;
 	
 	recv_sys->apply_log_recs = FALSE;
@ -337,7 +341,7 @@ recv_synchronize_groups(
 	start_lsn = ut_dulint_align_down(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);
 	end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE);

-	ut_ad(ut_dulint_cmp(start_lsn, end_lsn) != 0);
+	ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0);

 	log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block,
 					up_to_date_group, start_lsn, end_lsn);
@ -377,6 +381,35 @@ recv_synchronize_groups(
 	mutex_enter(&(log_sys->mutex));
 }

+/***************************************************************************
+Checks the consistency of the checkpoint info */
+static
+ibool
+recv_check_cp_is_consistent(
+/*========================*/
+			/* out: TRUE if ok */
+	byte*	buf)	/* in: buffer containing checkpoint info */
+{
+	ulint	fold;
+
+	fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
+
+	if ((fold & 0xFFFFFFFF) != mach_read_from_4(buf
+				+ LOG_CHECKPOINT_CHECKSUM_1)) {		
+		return(FALSE);
+	}
+
+	fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
+			LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN);
+
+	if ((fold & 0xFFFFFFFF) != mach_read_from_4(buf
+					+ LOG_CHECKPOINT_CHECKSUM_2)) {
+		return(FALSE);
+	}
+
+	return(TRUE);
+}
+
 /************************************************************
 Looks for the maximum consistent checkpoint from the log groups. */
 static
@ -392,7 +425,6 @@ recv_find_max_checkpoint(
 	dulint		max_no;
 	dulint		checkpoint_no;
 	ulint		field;
-	ulint		fold;
 	byte*		buf;
 	
 	group = UT_LIST_GET_FIRST(log_sys->log_groups);
@ -410,17 +442,11 @@ recv_find_max_checkpoint(
 	
 			log_group_read_checkpoint_info(group, field);

-			/* Check the consistency of the checkpoint info */
-			fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1);
-
-			if ((fold & 0xFFFFFFFF)
-                                  != mach_read_from_4(buf
-						+ LOG_CHECKPOINT_CHECKSUM_1)) {
+			if (!recv_check_cp_is_consistent(buf)) {
 				if (log_debug_writes) {
 					fprintf(stderr, 
-	    "InnoDB: Checkpoint in group %lu at %lu invalid, %lu, %lu\n",
+	    "InnoDB: Checkpoint in group %lu at %lu invalid, %lu\n",
 						group->id, field,
-                                                fold & 0xFFFFFFFF,
                                 mach_read_from_4(buf
 					      + LOG_CHECKPOINT_CHECKSUM_1));

@ -429,23 +455,6 @@ recv_find_max_checkpoint(
 				goto not_consistent;
 			}

-			fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN,
-						LOG_CHECKPOINT_CHECKSUM_2
-							- LOG_CHECKPOINT_LSN);
-			if ((fold & 0xFFFFFFFF)
-                                  != mach_read_from_4(buf
-						+ LOG_CHECKPOINT_CHECKSUM_2)) {
-				if (log_debug_writes) {
-					fprintf(stderr, 
-		"InnoDB: Checkpoint in group %lu at %lu invalid, %lu, %lu\n",
-						group->id, field,
-                                                fold & 0xFFFFFFFF,
-                                 mach_read_from_4(buf
-						  + LOG_CHECKPOINT_CHECKSUM_2));
-				}
-				goto not_consistent;
-			}
-
 			group->state = LOG_GROUP_OK;

 			group->lsn = mach_read_from_8(buf
@ -476,7 +485,13 @@ recv_find_max_checkpoint(

 	if (*max_group == NULL) {

-		fprintf(stderr, "InnoDB: No valid checkpoint found\n");
+		fprintf(stderr,
+"InnoDB: No valid checkpoint found.\n"
+"InnoDB: If this error appears when you are creating an InnoDB database,\n"
+"InnoDB: the problem may be that during an earlier attempt you managed\n"
+"InnoDB: to create the InnoDB data files, but log file creation failed.\n"
+"InnoDB: If that is the case, please refer to section 3.1 of\n"
+"InnoDB: http://www.innodb.com/ibman.html\n");

 		return(DB_ERROR);
 	}
@ -484,6 +499,162 @@ recv_find_max_checkpoint(
 	return(DB_SUCCESS);
 }

+/***********************************************************************
+Reads the checkpoint info needed in hot backup. */
+
+ibool
+recv_read_cp_info_for_backup(
+/*=========================*/
+			/* out: TRUE if success */
+	byte*	hdr,	/* in: buffer containing the log group header */
+	dulint*	lsn,	/* out: checkpoint lsn */
+	ulint*	offset,	/* out: checkpoint offset in the log group */
+	ulint*	fsp_limit,/* out: fsp limit, 1000000000 if the database
+			is running with < version 3.23.50 of InnoDB */
+	dulint*	cp_no,	/* out: checkpoint number */
+	dulint*	first_header_lsn)
+			/* out: lsn of of the start of the first log file */
+{
+	ulint	max_cp		= 0;
+	dulint	max_cp_no	= ut_dulint_zero;
+	byte*	cp_buf;
+
+	cp_buf = hdr + LOG_CHECKPOINT_1;
+
+	if (recv_check_cp_is_consistent(cp_buf)) {
+		max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+		max_cp = LOG_CHECKPOINT_1;
+	}
+
+	cp_buf = hdr + LOG_CHECKPOINT_2;
+
+	if (recv_check_cp_is_consistent(cp_buf)) {
+		if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO),
+					max_cp_no) > 0) {
+			max_cp = LOG_CHECKPOINT_2;
+		}
+	}
+
+	if (max_cp == 0) {
+		return(FALSE);
+	}
+
+	cp_buf = hdr + max_cp;
+	
+	*lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN);
+	*offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET);
+
+	/* If the user is running a pre-3.23.50 version of InnoDB, its
+	checkpoint data does not contain the fsp limit info */
+	if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N)
+	    == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) {
+	
+		*fsp_limit = mach_read_from_4(
+				cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT);
+
+		if (*fsp_limit == 0) {
+			*fsp_limit = 1000000000;
+		}	
+	} else {
+		*fsp_limit = 1000000000;
+	}
+
+/*	printf("fsp limit %lu MB\n", *fsp_limit); */
+
+	*cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO);
+
+	*first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN);
+
+	return(TRUE);
+}
+
+/***********************************************************************
+Scans the log segment and n_bytes_scanned is set to the length of valid
+log scanned. */
+
+void
+recv_scan_log_seg_for_backup(
+/*=========================*/
+	byte*		buf,		/* in: buffer containing log data */
+	ulint		buf_len,	/* in: data length in that buffer */
+	dulint*		scanned_lsn,	/* in/out: lsn of buffer start,
+					we return scanned lsn */
+	ulint*		scanned_checkpoint_no,
+					/* in/out: 4 lowest bytes of the
+					highest scanned checkpoint number so
+					far */
+	ulint*		n_bytes_scanned)/* out: how much we were able to
+					scan, smaller than buf_len if log
+					data ended here */
+{
+	ulint	data_len;
+	byte*	log_block;
+	ulint	no;
+
+	*n_bytes_scanned = 0;
+	
+	for (log_block = buf; log_block < buf + buf_len;
+				log_block += OS_FILE_LOG_BLOCK_SIZE) {
+	
+		no = log_block_get_hdr_no(log_block);
+
+		/* fprintf(stderr, "Log block header no %lu\n", no); */
+
+		if (no != log_block_get_trl_no(log_block)
+		    || no != log_block_convert_lsn_to_no(*scanned_lsn)) {
+
+/*			printf(
+"Log block n:o %lu, trailer n:o %lu, scanned lsn n:o %lu\n",
+			no, log_block_get_trl_no(log_block),
+			log_block_convert_lsn_to_no(*scanned_lsn));
+*/
+			/* Garbage or an incompletely written log block */
+
+			log_block += OS_FILE_LOG_BLOCK_SIZE;
+
+/*			printf(
+"Next log block n:o %lu, trailer n:o %lu\n",
+			log_block_get_hdr_no(log_block),
+			log_block_get_trl_no(log_block));
+*/			
+			break;
+		}
+
+		if (*scanned_checkpoint_no > 0
+		    && log_block_get_checkpoint_no(log_block)
+						< *scanned_checkpoint_no
+		    && *scanned_checkpoint_no
+			- log_block_get_checkpoint_no(log_block)
+							> 0x80000000) {
+
+			/* Garbage from a log buffer flush which was made
+			before the most recent database recovery */
+
+			printf("Scanned cp n:o %lu, block cp n:o %lu\n",
+				*scanned_checkpoint_no,
+				log_block_get_checkpoint_no(log_block));
+
+			break;
+		}
+
+		data_len = log_block_get_data_len(log_block);
+
+		*scanned_checkpoint_no
+				= log_block_get_checkpoint_no(log_block);
+		*scanned_lsn = ut_dulint_add(*scanned_lsn, data_len);
+
+		*n_bytes_scanned += data_len;
+		
+		if (data_len < OS_FILE_LOG_BLOCK_SIZE) {
+			/* Log data ends here */
+
+			/* printf("Log block data len %lu\n", data_len); */
+
+			break;
+		}
+	}
+}
+
 /***********************************************************************
 Tries to parse a single log record body and also applies it to a page if
 specified. */
@ -625,7 +796,6 @@ recv_get_fil_addr_struct(

 	recv_addr = HASH_GET_FIRST(recv_sys->addr_hash,
 						recv_hash(space, page_no));
-	
 	while (recv_addr) {
 		if ((recv_addr->space == space)
 				&& (recv_addr->page_no == page_no)) {
@ -755,6 +925,10 @@ read in, or also for a page already in the buffer pool. */
 void
 recv_recover_page(
 /*==============*/
+	ibool	recover_backup,	/* in: TRUE if we are recovering a backup
+				page: then we do not acquire any latches
+				since the page was read in outside the
+				buffer pool */
 	ibool	just_read_in,	/* in: TRUE if the i/o-handler calls this for
 				a freshly read page */
 	page_t*	page,		/* in: buffer page */
@ -799,39 +973,48 @@ recv_recover_page(
 	
 	mutex_exit(&(recv_sys->mutex));

-	block = buf_block_align(page);
-
-	if (just_read_in) {
-		/* Move the ownership of the x-latch on the page to this OS
-		thread, so that we can acquire a second x-latch on it. This
-		is needed for the operations to the page to pass the debug
-		checks. */
-
-		rw_lock_x_lock_move_ownership(&(block->lock));
-	}
-
 	mtr_start(&mtr);
-
 	mtr_set_log_mode(&mtr, MTR_LOG_NONE);

-	success = buf_page_get_known_nowait(RW_X_LATCH, page, BUF_KEEP_OLD,
+	if (!recover_backup) {	
+		block = buf_block_align(page);
+
+		if (just_read_in) {
+		  /* Move the ownership of the x-latch on the page to this OS
+		  thread, so that we can acquire a second x-latch on it. This
+		  is needed for the operations to the page to pass the debug
+		  checks. */
+
+			rw_lock_x_lock_move_ownership(&(block->lock));
+		}
+
+		success = buf_page_get_known_nowait(RW_X_LATCH, page,
+					BUF_KEEP_OLD,
 					IB__FILE__, __LINE__,
 					&mtr);
-	ut_a(success);
+		ut_a(success);

-	buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+		buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
+	}

 	/* Read the newest modification lsn from the page */
 	page_lsn = mach_read_from_8(page + FIL_PAGE_LSN);

-	/* It may be that the page has been modified in the buffer pool: read
-	the newest modification lsn there */
+	if (!recover_backup) {
+		/* It may be that the page has been modified in the buffer
+		pool: read the newest modification lsn there */
 		
-	page_newest_lsn = buf_frame_get_newest_modification(page);
+		page_newest_lsn = buf_frame_get_newest_modification(page);

-	if (!ut_dulint_is_zero(page_newest_lsn)) {
+		if (!ut_dulint_is_zero(page_newest_lsn)) {
 		
-		page_lsn = page_newest_lsn;
+			page_lsn = page_newest_lsn;
+		}
+	} else {
+		/* In recovery from a backup we do not use the buffer
+		pool */
+
+		page_newest_lsn = ut_dulint_zero;
 	}

 	modification_to_page = FALSE;
@ -852,13 +1035,13 @@ recv_recover_page(
 			buf = ((byte*)(recv->data)) + sizeof(recv_data_t);
 		}

-		if ((recv->type == MLOG_INIT_FILE_PAGE)
-		    || (recv->type == MLOG_FULL_PAGE)) {
-			/* A new file page may has been taken into use,
+		if (recv->type == MLOG_INIT_FILE_PAGE
+		    || recv->type == MLOG_FULL_PAGE) {
+			/* A new file page may have been taken into use,
 			or we have stored the full contents of the page:
 			in this case it may be that the original log record
 			type was MLOG_INIT_FILE_PAGE, and we replaced it
-			with MLOG_FULL_PAGE, thus to we have to apply
+			with MLOG_FULL_PAGE, thus we have to apply
 			any record of type MLOG_FULL_PAGE */
 			
 			page_lsn = page_newest_lsn;
@ -885,6 +1068,13 @@ recv_recover_page(
 					
 			recv_parse_or_apply_log_rec_body(recv->type, buf,
 						buf + recv->len, page, &mtr);
+			mach_write_to_8(page + UNIV_PAGE_SIZE
+					- FIL_PAGE_END_LSN,
+					ut_dulint_add(recv->start_lsn,
+							recv->len));
+			mach_write_to_8(page + FIL_PAGE_LSN,
+					ut_dulint_add(recv->start_lsn,
+							recv->len));
 		}
 						
 		if (recv->len > RECV_DATA_BLOCK_SIZE) {
@ -903,7 +1093,7 @@ recv_recover_page(

 	mutex_exit(&(recv_sys->mutex));
 	
-	if (modification_to_page) {
+	if (!recover_backup && modification_to_page) {
 		buf_flush_recv_note_modification(block, start_lsn, end_lsn);
 	}
 	
@ -1038,8 +1228,8 @@ loop:

 					buf_page_dbg_add_level(page,
 							SYNC_NO_ORDER_CHECK);
-					recv_recover_page(FALSE, page, space,
-								page_no);
+					recv_recover_page(FALSE, FALSE, page,
+							space, page_no);
 					mtr_commit(&mtr);
 				} else {
 					recv_read_in_area(space, page_no);
@ -1111,6 +1301,95 @@ loop:
 	mutex_exit(&(recv_sys->mutex));
 }

+/***********************************************************************
+Applies log records in the hash table to a backup. */
+
+void
+recv_apply_log_recs_for_backup(
+/*===========================*/
+	ulint	n_data_files,	/* in: number of data files */
+	char**	data_files,	/* in: array containing the paths to the
+				data files */
+	ulint*	file_sizes)	/* in: sizes of the data files in database
+				pages */
+{
+	recv_addr_t*	recv_addr;
+	os_file_t	data_file;
+	ulint		n_pages_total	= 0;
+	ulint		nth_file	= 0;
+	ulint		nth_page_in_file= 0;
+	byte*		page;
+	ibool		success;
+	ulint		i;
+
+	recv_sys->apply_log_recs = TRUE;
+	recv_sys->apply_batch_on = TRUE;
+
+	page = buf_pool->frame_zero;
+	
+	for (i = 0; i < n_data_files; i++) {
+		n_pages_total += file_sizes[i];
+	}
+
+	printf( 
+"InnoDB: Starting an apply batch of log records to the database...\n"
+"InnoDB: Progress in percents: ");
+	
+	for (i = 0; i < n_pages_total; i++) {
+
+		if (i == 0 || nth_page_in_file == file_sizes[nth_file]) {
+			if (i != 0) {
+				nth_file++;
+				nth_page_in_file = 0;
+				os_file_flush(data_file);
+				os_file_close(data_file);
+			}
+
+			data_file = os_file_create_simple(data_files[nth_file],
+							OS_FILE_OPEN,
+							OS_FILE_READ_WRITE,
+							&success);
+			ut_a(success);
+		}
+		
+		recv_addr = recv_get_fil_addr_struct(0, i);
+
+		if (recv_addr != NULL) {
+			os_file_read(data_file, page,
+			  (nth_page_in_file << UNIV_PAGE_SIZE_SHIFT)
+				& 0xFFFFFFFF,
+			  nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT), 
+				UNIV_PAGE_SIZE);
+
+			recv_recover_page(TRUE, FALSE, page, 0, i);
+
+			buf_flush_init_for_writing(page,
+				mach_read_from_8(page + FIL_PAGE_LSN),
+				0, i);
+
+			os_file_write(data_files[nth_file],
+			  data_file, page,
+			  (nth_page_in_file << UNIV_PAGE_SIZE_SHIFT)
+				& 0xFFFFFFFF,
+			  nth_page_in_file >> (32 - UNIV_PAGE_SIZE_SHIFT), 
+				UNIV_PAGE_SIZE);
+		}
+
+		if ((100 * i) / n_pages_total
+				!= (100 * (i + 1)) / n_pages_total) {
+			printf("%lu ", (100 * i) / n_pages_total);
+			fflush(stdout);
+		}
+
+		nth_page_in_file++;
+	}
+	
+	os_file_flush(data_file);
+	os_file_close(data_file);
+
+	recv_sys_empty_hash();
+}
+
 /***********************************************************************
 In the debug version, updates the replica of a file page, based on a log
 record. */
@ -1430,12 +1709,13 @@ recv_check_incomplete_log_recs(

 /***********************************************************
 Parses log records from a buffer and stores them to a hash table to wait
-merging to file pages. If the hash table becomes too full, applies it
-automatically to file pages. */
-
-void
+merging to file pages. */
+static
+ibool
 recv_parse_log_recs(
 /*================*/
+				/* out: TRUE if the hash table of parsed log
+				records became full */
 	ibool	store_to_hash)	/* in: TRUE if the records should be stored
 				to the hash table; this is set to FALSE if just
 				debug checking is needed */
@ -1462,7 +1742,7 @@ loop:

 	if (ptr == end_ptr) {

-		return;
+		return(FALSE);
 	}

 	single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG;
@ -1476,7 +1756,7 @@ loop:
 							&page_no, &body);
 		if (len == 0) {

-			return;
+			return(FALSE);
 		}

 		new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len);
@ -1487,7 +1767,7 @@ loop:
 			that also the next log block should have been scanned
 			in */

-			return;
+			return(FALSE);
 		}
 		
 		recv_sys->recovered_offset += len;
@ -1529,7 +1809,7 @@ loop:
 							&page_no, &body);
 			if (len == 0) {

-				return;
+				return(FALSE);
 			}

 			if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) {
@ -1570,27 +1850,9 @@ loop:
 			that also the next log block should have been scanned
 			in */

-			return;
+			return(FALSE);
 		}

-		if (2 * n_recs * (sizeof(recv_t) + sizeof(recv_addr_t))
-			+ total_len
-			+ mem_heap_get_size(recv_sys->heap)
-	    		+ RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE
-					> buf_pool_get_curr_size()) {
-
-			/* Hash table of log records will grow too big:
-			empty it */
-					
-			recv_apply_hashed_log_recs(FALSE);
-		}
-
-		ut_ad(2 * n_recs * (sizeof(recv_t) + sizeof(recv_addr_t))
-			+ total_len
-			+ mem_heap_get_size(recv_sys->heap)
-	    		+ RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE
-					< buf_pool_get_curr_size());
-
 		/* Add all the records to the hash table */

 		ptr = recv_sys->buf + recv_sys->recovered_offset;
@ -1628,17 +1890,6 @@ loop:
 		}
 	}
   
-	if (store_to_hash && buf_get_free_list_len()
-					< RECV_POOL_N_FREE_BLOCKS) {
-
-		/* Hash table of log records has grown too big: empty it;
-		FALSE means no ibuf operations allowed, as we cannot add
-		new records to the log yet: they would be produced by ibuf
-		operations */
-
-		recv_apply_hashed_log_recs(FALSE);
-	}	    
-
 	goto loop;
 }

@ -1713,7 +1964,7 @@ recv_sys_add_to_parsing_buf(

 		recv_sys->len += end_offset - start_offset;

-		ut_ad(recv_sys->len <= RECV_PARSING_BUF_SIZE);
+		ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE);
 	}

 	return(TRUE);
@ -1743,6 +1994,13 @@ recv_scan_log_recs(
 /*===============*/
 				/* out: TRUE if limit_lsn has been reached, or
 				not able to scan any more in this log group */
+	ibool	apply_automatically,/* in: TRUE if we want this function to
+				apply log records automatically when the
+				hash table becomes full; in the hot backup tool
+				the tool does the applying, not this
+				function */
+	ulint	available_memory,/* in: we let the hash table of recs to grow
+				to this size, at the maximum */
 	ibool	store_to_hash,	/* in: TRUE if the records should be stored
 				to the hash table; this is set to FALSE if just
 				debug checking is needed */
@ -1764,6 +2022,8 @@ recv_scan_log_recs(
 	ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len > 0);
+	ut_a(apply_automatically <= TRUE);
+	ut_a(store_to_hash <= TRUE);
 	
 	finished = FALSE;
 	
@ -1845,6 +2105,13 @@ recv_scan_log_recs(
 			/* We were able to find more log data: add it to the
 			parsing buffer if parse_start_lsn is already non-zero */

+			if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE
+						>= RECV_PARSING_BUF_SIZE) {
+				fprintf(stderr,
+"InnoDB: Error: log parsing buffer overflow. Recovery may have failed!\n");
+				finished = TRUE;
+			}
+
 			more_data = recv_sys_add_to_parsing_buf(log_block,
 								scanned_lsn);
 			recv_sys->scanned_lsn = scanned_lsn;
@ -1863,25 +2130,36 @@ recv_scan_log_recs(

 	*group_scanned_lsn = scanned_lsn;

-	if (more_data) {
+	if (recv_needed_recovery || recv_is_from_backup) {
 		recv_scan_print_counter++;

-		if (recv_scan_print_counter < 10
-		    || (recv_scan_print_counter % 10 == 0)) {
+		if (finished || (recv_scan_print_counter % 80 == 0)) {
+
 			fprintf(stderr, 
 "InnoDB: Doing recovery: scanned up to log sequence number %lu %lu\n",
 				ut_dulint_get_high(*group_scanned_lsn),
 				ut_dulint_get_low(*group_scanned_lsn));
-			if (recv_scan_print_counter == 10) {
-				fprintf(stderr,
-"InnoDB: After this prints a line for every 10th scan sweep:\n");
-			}
 		}
+	}

+	if (more_data) {
 		/* Try to parse more log records */

 		recv_parse_log_recs(store_to_hash);

+		if (store_to_hash && mem_heap_get_size(recv_sys->heap)
+						> available_memory
+		    && apply_automatically) {
+						
+			/* Hash table of log records has grown too big:
+			empty it; FALSE means no ibuf operations
+			allowed, as we cannot add new records to the
+			log yet: they would be produced by ibuf
+			operations */
+		
+			recv_apply_hashed_log_recs(FALSE);
+		} 
+
 		if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) {
 			/* Move parsing buffer data to the buffer start */

@ -1918,10 +2196,12 @@ recv_group_scan_log_recs(
 		log_group_read_log_seg(LOG_RECOVER, log_sys->buf,
 						group, start_lsn, end_lsn);

-		finished = recv_scan_log_recs(TRUE, log_sys->buf,
-						RECV_SCAN_SIZE, start_lsn,
-						contiguous_lsn,
-						group_scanned_lsn);
+		finished = recv_scan_log_recs(TRUE,
+				buf_pool_get_curr_size()
+				- RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
+				TRUE, log_sys->buf,
+				RECV_SCAN_SIZE, start_lsn,
+				contiguous_lsn, group_scanned_lsn);
 		start_lsn = end_lsn;
 	}

@ -1969,7 +2249,7 @@ recv_recovery_from_checkpoint_start(
 	if (type == LOG_CHECKPOINT) {

 		recv_sys_create();
-		recv_sys_init();
+		recv_sys_init(FALSE, buf_pool_get_curr_size());
 	}

 	if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) {
@ -2280,6 +2560,84 @@ recv_reset_logs(
 	mutex_enter(&(log_sys->mutex));
 }

+/**********************************************************
+Creates new log files after a backup has been restored. */
+
+void
+recv_reset_log_files_for_backup(
+/*============================*/
+	char*	log_dir,	/* in: log file directory path */
+	ulint	n_log_files,	/* in: number of log files */
+	ulint	log_file_size,	/* in: log file size */
+	dulint	lsn)		/* in: new start lsn, must be divisible by
+				OS_FILE_LOG_BLOCK_SIZE */
+{
+	os_file_t	log_file;
+	ibool		success;
+	byte*		buf;
+	ulint		i;
+	char		name[5000];
+	
+	buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+	
+	for (i = 0; i < n_log_files; i++) {
+
+		sprintf(name, "%sib_logfile%lu", log_dir, i);
+
+		log_file = os_file_create_simple(name, OS_FILE_CREATE,
+						OS_FILE_READ_WRITE, &success);
+		if (!success) {
+			printf(
+"InnoDB: Cannot create %s. Check that the file does not exist yet.\n", name);
+
+			exit(1);
+		}
+
+		printf(
+"Setting log file size to %lu %lu\n", ut_get_high32(log_file_size),
+						log_file_size & 0xFFFFFFFF);
+
+		success = os_file_set_size(name, log_file,
+					log_file_size & 0xFFFFFFFF,
+					ut_get_high32(log_file_size));
+
+		if (!success) {
+			printf(
+"InnoDB: Cannot set %s size to %lu %lu\n", name, ut_get_high32(log_file_size),
+						log_file_size & 0xFFFFFFFF);
+			exit(1);
+		}
+
+		os_file_flush(log_file);
+		os_file_close(log_file);
+	}
+
+	/* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */
+	
+	log_reset_first_header_and_checkpoint(buf,
+				ut_dulint_add(lsn, LOG_BLOCK_HDR_SIZE));
+	
+	log_block_init(buf + LOG_FILE_HDR_SIZE, lsn);
+	log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE,
+							LOG_BLOCK_HDR_SIZE);
+	sprintf(name, "%sib_logfile%lu", log_dir, 0);
+
+	log_file = os_file_create_simple(name, OS_FILE_OPEN,
+						OS_FILE_READ_WRITE, &success);
+	if (!success) {
+		printf("InnoDB: Cannot open %s.\n", name);
+
+		exit(1);
+	}
+
+	os_file_write(name, log_file, buf, 0, 0,
+				LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE);
+	os_file_flush(log_file);
+	os_file_close(log_file);
+
+	ut_free(buf);
+}
+
 /**********************************************************
 Reads from the archive of a log group and performs recovery. */
 static
@ -2296,13 +2654,13 @@ log_group_recover_from_archive_file(
 	dulint	dummy_lsn;
 	dulint	scanned_lsn;
 	ulint	len;
-	char	name[10000];
 	ibool	ret;
 	byte*	buf;
 	ulint	read_offset;
 	ulint	file_size;
 	ulint	file_size_high;
 	int	input_char;
+	char	name[10000];

 try_open_again:	
 	buf = log_sys->buf;
@ -2438,9 +2796,11 @@ ask_again:
 			group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
 			read_offset % UNIV_PAGE_SIZE, len, buf, NULL);

-		
-		ret = recv_scan_log_recs(TRUE, buf, len, start_lsn,
-						&dummy_lsn, &scanned_lsn);
+		ret = recv_scan_log_recs(TRUE,
+				buf_pool_get_curr_size() -
+				RECV_POOL_N_FREE_BLOCKS * UNIV_PAGE_SIZE,
+				TRUE, buf, len, start_lsn,
+				&dummy_lsn, &scanned_lsn);

 		if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) {

@ -2485,7 +2845,7 @@ recv_recovery_from_archive_start(
 	ulint		err;
 	
 	recv_sys_create();
-	recv_sys_init();
+	recv_sys_init(FALSE, buf_pool_get_curr_size());

 	sync_order_checks_on = TRUE;
 	
--- a/innobase/mem/mem0mem.c
+++ b/innobase/mem/mem0mem.c
@ -234,7 +234,8 @@ mem_heap_add_block(
 	new_size = 2 * mem_block_get_len(block);

 	if (heap->type != MEM_HEAP_DYNAMIC) {
-		ut_ad(n <= MEM_MAX_ALLOC_IN_BUF);
+		/* From the buffer pool we allocate buffer frames */
+		ut_a(n <= MEM_MAX_ALLOC_IN_BUF);

 		if (new_size > MEM_MAX_ALLOC_IN_BUF) {
 			new_size = MEM_MAX_ALLOC_IN_BUF;
@ -249,7 +250,7 @@ mem_heap_add_block(
 	}
 	
 	new_block = mem_heap_create_block(heap, new_size, NULL, heap->type,
-					heap->file_name, heap->line);
+						heap->file_name, heap->line);
 	if (new_block == NULL) {

 		return(NULL);
--- a/innobase/os/os0file.c
+++ b/innobase/os/os0file.c
@ -10,17 +10,22 @@ Created 10/21/1995 Heikki Tuuri
 #include "os0sync.h"
 #include "ut0mem.h"
 #include "srv0srv.h"
-#include "trx0sys.h"
 #include "fil0fil.h"

 #undef HAVE_FDATASYNC

+#undef UNIV_NON_BUFFERED_IO
+
 #ifdef POSIX_ASYNC_IO
 /* We assume in this case that the OS has standard Posix aio (at least SunOS
 2.6, HP-UX 11i and AIX 4.3 have) */

 #endif

+/* If the following is set to TRUE, we do not call os_file_flush in every
+os_file_write. We can set this TRUE if the doublewrite buffer is used. */
+ibool	os_do_not_call_flush_at_each_write	= FALSE;
+
 /* We use these mutexes to protect lseek + file i/o operation, if the
 OS does not provide an atomic pread or pwrite, or similar */
 #define OS_FILE_N_SEEK_MUTEXES	16
@ -118,6 +123,9 @@ ulint	os_n_file_writes_old	= 0;
 ulint	os_n_fsyncs_old		= 0;
 time_t	os_last_printout;

+ibool	os_has_said_disk_full	= FALSE;
+
+
 /***************************************************************************
 Gets the operating system version. Currently works only on Windows. */

@ -167,27 +175,28 @@ os_file_get_last_error(void)

 	err = (ulint) GetLastError();

-	if (err != ERROR_FILE_EXISTS) {
-	         fprintf(stderr,
-  "InnoDB: Operating system error number %li in a file operation.\n"
+	if (err != ERROR_FILE_EXISTS && err != ERROR_DISK_FULL) {
+		ut_print_timestamp(stderr);
+	     	fprintf(stderr,
+  "  InnoDB: Operating system error number %li in a file operation.\n"
  "InnoDB: See http://www.innodb.com/ibman.html for installation help.\n",
 		(long) err);

-		 if (err == ERROR_PATH_NOT_FOUND) {
+		if (err == ERROR_PATH_NOT_FOUND) {
 		         fprintf(stderr,
  "InnoDB: The error means the system cannot find the path specified.\n"
  "InnoDB: In installation you must create directories yourself, InnoDB\n"
  "InnoDB: does not create them.\n");
-		 } else if (err == ERROR_ACCESS_DENIED) {
+		} else if (err == ERROR_ACCESS_DENIED) {
 		         fprintf(stderr,
  "InnoDB: The error means mysqld does not have the access rights to\n"
  "InnoDB: the directory. It may also be you have created a subdirectory\n"
  "InnoDB: of the same name as a data file.\n"); 
-		 } else {
+		} else {
 		         fprintf(stderr,
  "InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
  "InnoDB: what the error number means.\n");
-		 }
+		}
 	}

 	if (err == ERROR_FILE_NOT_FOUND) {
@ -202,26 +211,28 @@ os_file_get_last_error(void)
 #else
 	err = (ulint) errno;

-	if (err != EEXIST) {
-	         fprintf(stderr,
-  "InnoDB: Operating system error number %li in a file operation.\n"
+	if (err != EEXIST && err != ENOSPC ) {
+		ut_print_timestamp(stderr);
+
+	     	fprintf(stderr,
+  "  InnoDB: Operating system error number %li in a file operation.\n"
  "InnoDB: See http://www.innodb.com/ibman.html for installation help.\n",
 		(long) err);

-		 if (err == ENOENT) {
+		if (err == ENOENT) {
 		         fprintf(stderr,
  "InnoDB: The error means the system cannot find the path specified.\n"
  "InnoDB: In installation you must create directories yourself, InnoDB\n"
  "InnoDB: does not create them.\n");
-		 } else if (err == EACCES) {
+		} else if (err == EACCES) {
 		         fprintf(stderr,
  "InnoDB: The error means mysqld does not have the access rights to\n"
  "InnoDB: the directory.\n");
-		 } else {
+		} else {
 		         fprintf(stderr,
  "InnoDB: Look from section 13.2 at http://www.innodb.com/ibman.html\n"
  "InnoDB: what the error number means or use the perror program of MySQL.\n");
-		 }
+		}
 	}

 	if (err == ENOSPC ) {
@ -259,18 +270,26 @@ os_file_handle_error(
 	err = os_file_get_last_error();
 	
 	if (err == OS_FILE_DISK_FULL) {
-		fprintf(stderr, "\n");
-		if (name) {
-		        fprintf(stderr,
-			  "InnoDB: Encountered a problem with file %s.\n",
-									name);
-		}
-	        fprintf(stderr,
-	   "InnoDB: Cannot continue operation.\n"
-	   "InnoDB: Disk is full. Try to clean the disk to free space.\n"
-	   "InnoDB: Delete a possible created file and restart.\n");
+		/* We only print a warning about disk full once */

-		exit(1);
+		if (os_has_said_disk_full) {
+
+			return(FALSE);
+		}
+	
+		if (name) {
+			ut_print_timestamp(stderr);
+			fprintf(stderr,
+	"  InnoDB: Encountered a problem with file %s\n", name);
+		}
+
+		ut_print_timestamp(stderr);
+	        fprintf(stderr,
+	"  InnoDB: Disk is full. Try to clean the disk to free space.\n");
+
+		os_has_said_disk_full = TRUE;
+
+		return(FALSE);

 	} else if (err == OS_FILE_AIO_RESOURCES_RESERVED) {
 		return(TRUE);
@ -290,6 +309,130 @@ os_file_handle_error(
 	return(FALSE);	
 }

+/********************************************************************
+Creates the seek mutexes used in positioned reads and writes. */
+
+void
+os_io_init_simple(void)
+/*===================*/
+{
+	ulint	i;
+
+	for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
+		os_file_seek_mutexes[i] = os_mutex_create(NULL);
+	}
+}
+
+/********************************************************************
+A simple function to open or create a file. */
+
+os_file_t
+os_file_create_simple(
+/*==================*/
+			/* out, own: handle to the file, not defined if error,
+			error number can be retrieved with os_get_last_error */
+	char*	name,	/* in: name of the file or path as a null-terminated
+			string */
+	ulint	create_mode,/* in: OS_FILE_OPEN if an existing file is opened
+			(if does not exist, error), or OS_FILE_CREATE if a new
+			file is created (if exists, error) */
+	ulint	access_type,/* in: OS_FILE_READ_ONLY or OS_FILE_READ_WRITE */
+	ibool*	success)/* out: TRUE if succeed, FALSE if error */
+{
+#ifdef __WIN__
+	os_file_t	file;
+	DWORD		create_flag;
+	DWORD		access;
+	DWORD		attributes	= 0;
+	ibool		retry;
+	
+try_again:	
+	ut_a(name);
+
+	if (create_mode == OS_FILE_OPEN) {
+		create_flag = OPEN_EXISTING;
+	} else if (create_mode == OS_FILE_CREATE) {
+		create_flag = CREATE_NEW;
+	} else {
+		create_flag = 0;
+		ut_error;
+	}
+
+	if (access_type == OS_FILE_READ_ONLY) {
+		access = GENERIC_READ;
+	} else if (access_type == OS_FILE_READ_WRITE) {
+		access = GENERIC_READ | GENERIC_WRITE;
+	} else {
+		access = 0;
+		ut_error;
+	}
+
+	file = CreateFile(name,
+			access,
+			FILE_SHARE_READ | FILE_SHARE_WRITE,
+					/* file can be read and written
+					also by other processes */
+			NULL,	/* default security attributes */
+			create_flag,
+			attributes,
+			NULL);	/* no template file */
+
+	if (file == INVALID_HANDLE_VALUE) {
+		*success = FALSE;
+
+		retry = os_file_handle_error(file, name);
+
+		if (retry) {
+			goto try_again;
+		}
+	} else {
+		*success = TRUE;
+	}
+
+	return(file);
+#else
+	os_file_t	file;
+	int		create_flag;
+	ibool		retry;
+	
+try_again:	
+	ut_a(name);
+
+	if (create_mode == OS_FILE_OPEN) {
+		if (access_type == OS_FILE_READ_ONLY) {
+			create_flag = O_RDONLY;
+		} else {
+			create_flag = O_RDWR;
+		}
+	} else if (create_mode == OS_FILE_CREATE) {
+		create_flag = O_RDWR | O_CREAT | O_EXCL;
+	} else {
+		create_flag = 0;
+		ut_error;
+	}
+
+	if (create_mode == OS_FILE_CREATE) {
+	        file = open(name, create_flag, S_IRUSR | S_IWUSR | S_IRGRP
+			                     | S_IWGRP | S_IROTH | S_IWOTH);
+        } else {
+                file = open(name, create_flag);
+        }
+	
+	if (file == -1) {
+		*success = FALSE;
+
+		retry = os_file_handle_error(file, name);
+
+		if (retry) {
+			goto try_again;
+		}
+	} else {
+		*success = TRUE;
+	}
+
+	return(file);	
+#endif
+}
 /********************************************************************
 Opens an existing file or creates a new. */

@ -355,8 +498,9 @@ try_again:
 	file = CreateFile(name,
 			GENERIC_READ | GENERIC_WRITE, /* read and write
 							access */
-			FILE_SHARE_READ,/* file can be read by other
-					processes */
+			FILE_SHARE_READ | FILE_SHARE_WRITE,
+					/* file can be read and written
+					also by other processes */
 			NULL,	/* default security attributes */
 			create_flag,
 			attributes,
@ -494,6 +638,11 @@ os_file_get_size(

 	offs = lseek(file, 0, SEEK_END);

+	if (offs == ((off_t)-1)) {
+
+		return(FALSE);
+	}
+	
 	if (sizeof(off_t) > 4) {
 	        *size = (ulint)(offs & 0xFFFFFFFF);
 		*size_high = (ulint)(offs >> 32);
@ -524,13 +673,11 @@ os_file_set_size(
 	ib_longlong	low;
 	ulint   	n_bytes;
 	ibool		ret;
-	ibool		retry;
 	byte*   	buf;
 	ulint   	i;

 	ut_a(size == (size & 0xFFFFFFFF));

-try_again:
 	/* We use a very big 8 MB buffer in writing because Linux may be
 	extremely slow in fsync on 1 MB writes */

@ -571,14 +718,6 @@ try_again:
 	}

 error_handling:
-	retry = os_file_handle_error(file, name); 
-
-	if (retry) {
-		goto try_again;
-	}
-	
-	ut_error;
-
 	return(FALSE);
 }

@ -725,8 +864,7 @@ os_file_pwrite(
 	64-bit address */

        if (sizeof(off_t) > 4) {
-	  offs = (off_t)offset + (((off_t)offset_high) << 32);
-        				
+	  	offs = (off_t)offset + (((off_t)offset_high) << 32);
        } else {
        	offs = (off_t)offset;

@ -743,7 +881,7 @@ os_file_pwrite(

 	if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
 	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
-	    && !trx_doublewrite) {
+	    && !os_do_not_call_flush_at_each_write) {
 	    	
 	        /* Always do fsync to reduce the probability that when
                the OS crashes, a database page is only partially
@ -774,7 +912,7 @@ os_file_pwrite(

 	if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
 	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
-	    && !trx_doublewrite) {
+	    && !os_do_not_call_flush_at_each_write) {

 	        /* Always do fsync to reduce the probability that when
                the OS crashes, a database page is only partially
@ -899,13 +1037,12 @@ os_file_write(
 	DWORD		ret2;
 	DWORD		low;
 	DWORD		high;
-	ibool		retry;
 	ulint		i;

 	ut_a((offset & 0xFFFFFFFF) == offset);

 	os_n_file_writes++;
-try_again:	
+
 	ut_ad(file);
 	ut_ad(buf);
 	ut_ad(n > 0);
@ -924,7 +1061,15 @@ try_again:

 		os_mutex_exit(os_file_seek_mutexes[i]);
 		
-		goto error_handling;
+		ut_print_timestamp(stderr);
+
+		fprintf(stderr,
+"  InnoDB: Error: File pointer positioning to file %s failed at\n"
+"InnoDB: offset %lu %lu. Operating system error number %lu.\n",
+			name, offset_high, offset,
+			(ulint)GetLastError());
+
+		return(FALSE);
 	} 

 	ret = WriteFile(file, buf, n, &len, NULL);
@ -932,38 +1077,61 @@ try_again:
 	/* Always do fsync to reduce the probability that when the OS crashes,
 	a database page is only partially physically written to disk. */

-	if (!trx_doublewrite) {
+	if (!os_do_not_call_flush_at_each_write) {
 		ut_a(TRUE == os_file_flush(file));
 	}

 	os_mutex_exit(os_file_seek_mutexes[i]);

 	if (ret && len == n) {
+
 		return(TRUE);
 	}
+
+	if (!os_has_said_disk_full) {
+	
+		ut_print_timestamp(stderr);
+
+		fprintf(stderr,
+"  InnoDB: Error: Write to file %s failed at offset %lu %lu.\n"
+"InnoDB: %lu bytes should have been written, only %lu were written.\n"
+"InnoDB: Operating system error number %lu.\n"
+"InnoDB: Check that your OS and file system support files of this size.\n"
+"InnoDB: Check also the disk is not full or a disk quota exceeded.\n",
+			name, offset_high, offset, n, len,
+			(ulint)GetLastError());
+
+		os_has_said_disk_full = TRUE;
+	}
+
+	return(FALSE);
 #else
-	ibool	retry;
 	ssize_t	ret;
 	
-try_again:
 	ret = os_file_pwrite(file, buf, n, offset, offset_high);

 	if ((ulint)ret == n) {
+
 		return(TRUE);
 	}
-#endif
-#ifdef __WIN__
-error_handling:		
-#endif
-	retry = os_file_handle_error(file, name); 

-	if (retry) {
-		goto try_again;
+	if (!os_has_said_disk_full) {
+	
+		ut_print_timestamp(stderr);
+
+		fprintf(stderr,
+"  InnoDB: Error: Write to file %s failed at offset %lu %lu.\n"
+"InnoDB: %lu bytes should have been written, only %lu were written.\n"
+"InnoDB: Operating system error number %lu.\n"
+"InnoDB: Check that your OS and file system support files of this size.\n"
+"InnoDB: Check also the disk is not full or a disk quota exceeded.\n",
+			name, offset_high, offset, n, ret, (ulint)errno);
+
+		os_has_said_disk_full = TRUE;
 	}

-	ut_error;
-
 	return(FALSE);	
+#endif
 }

 /********************************************************************
@ -1034,7 +1202,8 @@ os_aio_array_create(
 }

 /****************************************************************************
-Initializes the asynchronous io system. Creates separate aio array for
+Initializes the asynchronous io system. Calls also os_io_init_simple.
+Creates a separate aio array for
 non-ibuf read and write, a third aio array for the ibuf i/o, with just one
 segment, two aio arrays for log reads and writes with one segment, and a
 synchronous aio array of the specified size. The combined number of segments
@ -1061,6 +1230,8 @@ os_aio_init(
 	ut_ad(n % n_segments == 0);
 	ut_ad(n_segments >= 4);

+	os_io_init_simple();
+
 	n_per_seg = n / n_segments;
 	n_write_segs = (n_segments - 2) / 2;
 	n_read_segs = n_segments - 2 - n_write_segs;
@ -1081,10 +1252,6 @@ os_aio_init(

 	os_aio_validate();

-	for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) {
-		os_file_seek_mutexes[i] = os_mutex_create(NULL);
-	}
-
 	os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*));

 	for (i = 0; i < n_segments; i++) {
@ -1742,7 +1909,8 @@ os_aio_windows_handle(
 	if (ret && len == slot->len) {
 		ret_val = TRUE;

-		if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
+		if (slot->type == OS_FILE_WRITE
+				&& !os_do_not_call_flush_at_each_write) {
 		         ut_a(TRUE == os_file_flush(slot->file));
 		}
 	} else {
@ -1827,7 +1995,8 @@ os_aio_posix_handle(
 	*message1 = slot->message1;
 	*message2 = slot->message2;

-	if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
+	if (slot->type == OS_FILE_WRITE
+				&& !os_do_not_call_flush_at_each_write) {
 		ut_a(TRUE == os_file_flush(slot->file));
 	}

--- a/innobase/rem/rem0cmp.c
+++ b/innobase/rem/rem0cmp.c
@ -55,7 +55,8 @@ cmp_debug_dtuple_rec_with_match(
 				contains the value for current comparison */
 /*****************************************************************
 This function is used to compare two data fields for which the data type
-is such that we must use MySQL code to compare them. */
+is such that we must use MySQL code to compare them. The prototype here
+must be a copy of the the one in ha_innobase.cc! */

 int
 innobase_mysql_cmp(
--- a/innobase/row/row0ins.c
+++ b/innobase/row/row0ins.c
@ -391,7 +391,7 @@ row_ins_check_foreign_constraint(
 				/* out: DB_SUCCESS, DB_LOCK_WAIT,
 				DB_NO_REFERENCED_ROW,
 				or DB_ROW_IS_REFERENCED */
-	ibool		check_ref,/* in: TRUE If we want to check that
+	ibool		check_ref,/* in: TRUE if we want to check that
 				the referenced table is ok, FALSE if we
 				want to to check the foreign key table */
 	dict_foreign_t*	foreign,/* in: foreign constraint; NOTE that the
@ -411,10 +411,23 @@ row_ins_check_foreign_constraint(
 	ibool		moved;
 	int		cmp;
 	ulint		err;
+	ulint		i;
 	mtr_t		mtr;

 	ut_ad(rw_lock_own(&dict_foreign_key_check_lock, RW_LOCK_SHARED));

+	/* If any of the foreign key fields in entry is SQL NULL, we
+	suppress the foreign key check: this is compatible with Oracle,
+	for example */
+
+	for (i = 0; i < foreign->n_fields; i++) {
+		if (UNIV_SQL_NULL == dfield_get_len(
+                                         dtuple_get_nth_field(entry, i))) {
+
+			return(DB_SUCCESS);
+		}
+	}
+
 	if (check_ref) {
 		check_table = foreign->referenced_table;
 		check_index = foreign->referenced_index;
@ -591,6 +604,8 @@ row_ins_scan_sec_index_for_duplicate(
 	dtuple_t*	entry,	/* in: index entry */
 	que_thr_t*	thr)	/* in: query thread */
 {
+	ulint		n_unique;
+	ulint		i;
 	int		cmp;
 	ulint		n_fields_cmp;
 	rec_t*		rec;
@ -599,6 +614,20 @@ row_ins_scan_sec_index_for_duplicate(
 	ibool		moved;
 	mtr_t		mtr;

+	n_unique = dict_index_get_n_unique(index);
+
+	/* If the secondary index is unique, but one of the fields in the
+	n_unique first fields is NULL, a unique key violation cannot occur,
+	since we define NULL != NULL in this case */
+
+	for (i = 0; i < n_unique; i++) {
+		if (UNIV_SQL_NULL == dfield_get_len(
+                                         dtuple_get_nth_field(entry, i))) {
+
+			return(DB_SUCCESS);
+		}
+	}
+
 	mtr_start(&mtr);

 	/* Store old value on n_fields_cmp */
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@ -1881,6 +1881,28 @@ loop:
 	return(err);
 }

+/*************************************************************************
+Checks if a table name contains the string "/#sql" which denotes temporary
+tables in MySQL. */
+static
+ibool
+row_is_mysql_tmp_table_name(
+/*========================*/
+			/* out: TRUE if temporary table */
+	char*	name)	/* in: table name in the form 'database/tablename' */
+{
+	ulint	i;
+
+	for (i = 0; i <= ut_strlen(name) - 5; i++) {
+		if (ut_memcmp(name + i, "/#sql", 5) == 0) {
+
+			return(TRUE);
+		}
+	}
+
+	return(FALSE);
+}
+
 /*************************************************************************
 Renames a table for MySQL. */

@ -1944,16 +1966,27 @@ row_rename_table_for_mysql(
 	str2 = 
 	"';\nold_table_name := '";

-	str3 =
-	"';\n"
-	"UPDATE SYS_TABLES SET NAME = new_table_name\n"
-	"WHERE NAME = old_table_name;\n"
-	"UPDATE SYS_FOREIGN SET FOR_NAME = new_table_name\n"
-	"WHERE FOR_NAME = old_table_name;\n"
-	"UPDATE SYS_FOREIGN SET REF_NAME = new_table_name\n"
-	"WHERE REF_NAME = old_table_name;\n"
-	"COMMIT WORK;\n"
-	"END;\n";
+	if (row_is_mysql_tmp_table_name(new_name)) {
+
+		/* We want to preserve the original foreign key
+		constraint definitions despite the name change */
+
+		str3 =
+		"';\n"
+		"UPDATE SYS_TABLES SET NAME = new_table_name\n"
+		"WHERE NAME = old_table_name;\n"
+		"END;\n";
+	} else {
+		str3 =
+		"';\n"
+		"UPDATE SYS_TABLES SET NAME = new_table_name\n"
+		"WHERE NAME = old_table_name;\n"
+		"UPDATE SYS_FOREIGN SET FOR_NAME = new_table_name\n"
+		"WHERE FOR_NAME = old_table_name;\n"
+		"UPDATE SYS_FOREIGN SET REF_NAME = new_table_name\n"
+		"WHERE REF_NAME = old_table_name;\n"
+		"END;\n";
+	}

 	len = ut_strlen(str1);

@ -2028,7 +2061,32 @@ row_rename_table_for_mysql(
 		trx_general_rollback_for_mysql(trx, FALSE, NULL);
 		trx->error_state = DB_SUCCESS;
 	} else {
-		ut_a(dict_table_rename_in_cache(table, new_name));
+		ut_a(dict_table_rename_in_cache(table, new_name,
+				!row_is_mysql_tmp_table_name(new_name)));
+
+		if (row_is_mysql_tmp_table_name(old_name)) {
+
+			err = dict_load_foreigns(new_name);
+
+			if (err != DB_SUCCESS) {
+
+	    			ut_print_timestamp(stderr);
+
+				fprintf(stderr,
+     "  InnoDB: Error: in ALTER TABLE table %s\n"
+     "InnoDB: has or is referenced in foreign key constraints\n"
+     "InnoDB: which are not compatible with the new table definition.\n",
+     new_name);
+     
+				ut_a(dict_table_rename_in_cache(table,
+							old_name, FALSE));
+						
+				trx->error_state = DB_SUCCESS;
+				trx_general_rollback_for_mysql(trx, FALSE,
+									NULL);
+				trx->error_state = DB_SUCCESS;
+			}
+		}
 	}
 funct_exit:	
 	mutex_exit(&(dict_sys->mutex));
--- a/innobase/row/row0sel.c
+++ b/innobase/row/row0sel.c
@ -2233,7 +2233,7 @@ row_sel_get_clust_rec_for_mysql(
 		(or old_vers) is not rec; in that case we must ignore
 		such row because in our snapshot rec would not have existed.
 		Remember that from rec we cannot see directly which transaction
-		id corrsponds to it: we have to go to the clustered index
+		id corresponds to it: we have to go to the clustered index
 		record. A query where we want to fetch all rows where
 		the secondary index value is in some interval would return
 		a wrong result if we would not drop rows which we come to
@ -2244,6 +2244,12 @@ row_sel_get_clust_rec_for_mysql(
 		    && !row_sel_sec_rec_is_for_clust_rec(rec, sec_index,
 						clust_rec, clust_index)) {
 			clust_rec = NULL;
+		} else {
+#ifdef UNIV_SEARCH_DEBUG
+			ut_a(clust_rec == NULL ||
+			    row_sel_sec_rec_is_for_clust_rec(rec, sec_index,
+						clust_rec, clust_index));
+#endif		
 		}
 	}

@ -2399,7 +2405,12 @@ row_sel_try_search_shortcut_for_mysql(

 	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
 					BTR_SEARCH_LEAF, pcur,
-					RW_S_LATCH, mtr);
+#ifndef UNIV_SEARCH_DEBUG
+					RW_S_LATCH,
+#else
+					0,
+#endif
+					mtr);
 	rec = btr_pcur_get_rec(pcur);
 	
 	if (!page_rec_is_user_rec(rec)) {
@ -2623,15 +2634,18 @@ row_search_for_mysql(
 				
 				goto no_shortcut;
 			}
-			
+#ifndef UNIV_SEARCH_DEBUG			
 			if (!trx->has_search_latch) {
 				rw_lock_s_lock(&btr_search_latch);
 				trx->has_search_latch = TRUE;
 			}
-
+#endif
 			shortcut = row_sel_try_search_shortcut_for_mysql(&rec,
 							       prebuilt, &mtr);
 			if (shortcut == SEL_FOUND) {
+#ifdef UNIV_SEARCH_DEBUG
+				ut_a(0 == cmp_dtuple_rec(search_tuple, rec));
+#endif 
 				row_sel_store_mysql_rec(buf, prebuilt, rec);
 	
 				mtr_commit(&mtr);
@ -2793,7 +2807,9 @@ rec_loop:
 		/* The record matches enough */

 		ut_ad(mode == PAGE_CUR_GE);
-	
+#ifdef UNIV_SEARCH_DEBUG
+		ut_a(0 == cmp_dtuple_rec(search_tuple, rec));
+#endif	
 	} else if (match_mode == ROW_SEL_EXACT) {
 		/* Test if the index record matches completely to search_tuple
 		in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
--- a/innobase/row/row0upd.c
+++ b/innobase/row/row0upd.c
@ -142,7 +142,7 @@ try_again:

 /*************************************************************************
 Checks if possible foreign key constraints hold after a delete of the record
-under pcur. NOTE that this function will temporarily commit mtr and lose
+under pcur. NOTE that this function will temporarily commit mtr and lose the
 pcur position! */
 static
 ulint
--- a/innobase/srv/srv0srv.c
+++ b/innobase/srv/srv0srv.c
@ -69,13 +69,19 @@ char*	srv_main_thread_op_info = "";
 names, where the file name itself may also contain a path */

 char*	srv_data_home 	= NULL;
-char*	srv_logs_home 	= NULL;
 char*	srv_arch_dir 	= NULL;

 ulint	srv_n_data_files = 0;
 char**	srv_data_file_names = NULL;
 ulint*	srv_data_file_sizes = NULL;	/* size in database pages */ 

+ibool	srv_auto_extend_last_data_file	= FALSE; /* if TRUE, then we
+						 auto-extend the last data
+						 file */
+ulint	srv_last_file_size_max	= 0;		 /* if != 0, this tells
+						 the max size auto-extending
+						 may increase the last data
+						 file size */
 ulint*  srv_data_file_is_raw_partition = NULL;

 /* If the following is TRUE we do not allow inserts etc. This protects
@ -1596,7 +1602,7 @@ srv_read_initfile(

 /*************************************************************************
 Initializes the server. */
-static
+
 void
 srv_init(void)
 /*==========*/
@ -1664,7 +1670,7 @@ srv_init(void)
 /*************************************************************************
 Initializes the synchronization primitives, memory system, and the thread
 local storage. */
-static
+
 void
 srv_general_init(void)
 /*==================*/
@ -1686,6 +1692,7 @@ srv_conc_enter_innodb(
 	trx_t*	trx)	/* in: transaction object associated with the
 			thread */
 {
+	ibool			has_slept	= FALSE;
 	srv_conc_slot_t*	slot;
 	ulint			i;

@ -1703,7 +1710,7 @@ srv_conc_enter_innodb(

 		return;
 	}
-
+retry:
 	os_fast_mutex_lock(&srv_conc_mutex);

 	if (srv_conc_n_threads < (lint)srv_thread_concurrency) {
@ -1717,6 +1724,22 @@ srv_conc_enter_innodb(
 		return;
 	}

+	/* If the transaction is not holding resources, let it sleep
+	for 100 milliseconds, and try again then */
+	
+	if (!has_slept && !trx->has_search_latch
+	    && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
+
+	    	has_slept = TRUE; /* We let is sleep only once to avoid
+	    			  starvation */
+
+	    	os_fast_mutex_unlock(&srv_conc_mutex);
+
+	    	os_thread_sleep(100000);
+
+		goto retry;
+	}	    	
+
 	/* Too many threads inside: put the current thread to a queue */

 	for (i = 0; i < OS_THREAD_MAX_N; i++) {
@ -1908,6 +1931,9 @@ srv_normalize_init_values(void)
 					* ((1024 * 1024) / UNIV_PAGE_SIZE);
 	}		

+	srv_last_file_size_max = srv_last_file_size_max
+					* ((1024 * 1024) / UNIV_PAGE_SIZE);
+		
 	srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE;

 	srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE;
--- a/innobase/srv/srv0start.c
+++ b/innobase/srv/srv0start.c
@ -84,6 +84,308 @@ we may get an assertion failure in os0file.c */

 #define SRV_LOG_SPACE_FIRST_ID		1000000000

+/*************************************************************************
+Reads the data files and their sizes from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_data_file_paths_and_sizes(
+/*================================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: the data file path string */
+	char***	data_file_names,	/* out, own: array of data file
+					names */
+	ulint**	data_file_sizes,	/* out, own: array of data file sizes
+					in megabytes */
+	ulint**	data_file_is_raw_partition,/* out, own: array of flags
+					showing which data files are raw
+					partitions */
+	ulint*	n_data_files,		/* out: number of data files */
+	ibool*	is_auto_extending,	/* out: TRUE if the last data file is
+					auto-extending */
+	ulint*	max_auto_extend_size)	/* out: max auto extend size for the
+					last file if specified, 0 if not */
+{
+	char*	input_str;
+	char*	endp;
+	char*	path;
+	ulint	size;
+	ulint	i	= 0;
+
+	*is_auto_extending = FALSE;
+	*max_auto_extend_size = 0;
+
+	input_str = str;
+	
+	/* First calculate the number of data files and check syntax:
+	path:size[M | G];path:size[M | G]... . Note that a Windows path may
+	contain a drive name and a ':'. */
+
+	while (*str != '\0') {
+		path = str;
+
+		while ((*str != ':' && *str != '\0')
+		       || (*str == ':'
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
+			str++;
+		}
+
+		if (*str == '\0') {
+			return(FALSE);
+		}
+
+		str++;
+
+		size = strtoul(str, &endp, 10);
+
+		str = endp;
+
+		if (*str != 'M' && *str != 'G') {
+			size = size / (1024 * 1024);
+		} else if (*str == 'G') {
+		        size = size * 1024;
+			str++;
+		} else {
+		        str++;
+		}
+
+	        if (strlen(str) >= ut_strlen(":autoextend")
+	            && 0 == ut_memcmp(str, ":autoextend",
+						ut_strlen(":autoextend"))) {
+
+			str += ut_strlen(":autoextend");
+
+	        	if (strlen(str) >= ut_strlen(":max:")
+	            		&& 0 == ut_memcmp(str, ":max:",
+						ut_strlen(":max:"))) {
+
+				str += ut_strlen(":max:");
+
+				size = strtoul(str, &endp, 10);
+
+				str = endp;
+
+				if (*str != 'M' && *str != 'G') {
+					size = size / (1024 * 1024);
+				} else if (*str == 'G') {
+		        		size = size * 1024;
+					str++;
+				} else {
+		        		str++;
+				}
+			}
+
+			if (*str != '\0') {
+
+				return(FALSE);
+			}
+		}
+
+	        if (strlen(str) >= 6
+			   && *str == 'n'
+			   && *(str + 1) == 'e' 
+		           && *(str + 2) == 'w') {
+		  	str += 3;
+		}
+
+	        if (strlen(str) >= 3
+			   && *str == 'r'
+			   && *(str + 1) == 'a' 
+		           && *(str + 2) == 'w') {
+		  	str += 3;
+		}
+
+		if (size == 0) {
+			return(FALSE);
+		}
+
+		i++;
+
+		if (*str == ';') {
+			str++;
+		} else if (*str != '\0') {
+
+			return(FALSE);
+		}
+	}
+
+	*data_file_names = (char**)ut_malloc(i * sizeof(void*));
+	*data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint));
+	*data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint));
+
+	*n_data_files = i;
+
+	/* Then store the actual values to our arrays */
+
+	str = input_str;
+	i = 0;
+
+	while (*str != '\0') {
+		path = str;
+
+		/* Note that we must ignore the ':' in a Windows path */
+
+		while ((*str != ':' && *str != '\0')
+		       || (*str == ':'
+			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
+			str++;
+		}
+
+		if (*str == ':') {
+			/* Make path a null-terminated string */
+			*str = '\0';
+			str++;
+		}
+
+		size = strtoul(str, &endp, 10);
+
+		str = endp;
+
+		if ((*str != 'M') && (*str != 'G')) {
+			size = size / (1024 * 1024);
+		} else if (*str == 'G') {
+		        size = size * 1024;
+			str++;
+		} else {
+		        str++;
+		}
+
+		(*data_file_names)[i] = path;
+		(*data_file_sizes)[i] = size;
+
+	        if (strlen(str) >= ut_strlen(":autoextend")
+	            && 0 == ut_memcmp(str, ":autoextend",
+						ut_strlen(":autoextend"))) {
+
+			*is_auto_extending = TRUE;
+
+			str += ut_strlen(":autoextend");
+
+	        	if (strlen(str) >= ut_strlen(":max:")
+	            		&& 0 == ut_memcmp(str, ":max:",
+						ut_strlen(":max:"))) {
+
+				str += ut_strlen(":max:");
+
+				size = strtoul(str, &endp, 10);
+
+				str = endp;
+
+				if (*str != 'M' && *str != 'G') {
+					size = size / (1024 * 1024);
+				} else if (*str == 'G') {
+		        		size = size * 1024;
+					str++;
+				} else {
+		        		str++;
+				}
+
+				*max_auto_extend_size = size;
+			}
+
+			if (*str != '\0') {
+
+				return(FALSE);
+			}
+		}
+		
+		(*data_file_is_raw_partition)[i] = 0;
+
+	        if (strlen(str) >= 6
+			   && *str == 'n'
+			   && *(str + 1) == 'e' 
+		           && *(str + 2) == 'w') {
+		  	str += 3;
+		  	(*data_file_is_raw_partition)[i] = SRV_NEW_RAW;
+		}
+
+	        if (strlen(str) >= 3
+			   && *str == 'r'
+			   && *(str + 1) == 'a' 
+		           && *(str + 2) == 'w') {
+		 	str += 3;
+		  
+		  	if ((*data_file_is_raw_partition)[i] == 0) {
+		    		(*data_file_is_raw_partition)[i] = SRV_OLD_RAW;
+		  	}		  
+		}
+
+		i++;
+
+		if (*str == ';') {
+			str++;
+		}
+	}
+
+	return(TRUE);
+}
+
+/*************************************************************************
+Reads log group home directories from a character string given in
+the .cnf file. */
+
+ibool
+srv_parse_log_group_home_dirs(
+/*==========================*/
+					/* out: TRUE if ok, FALSE if parsing
+					error */
+	char*	str,			/* in: character string */
+	char***	log_group_home_dirs)	/* out, own: log group home dirs */
+{
+	char*	input_str;
+	char*	path;
+	ulint	i	= 0;
+
+	input_str = str;
+	
+	/* First calculate the number of directories and check syntax:
+	path;path;... */
+
+	while (*str != '\0') {
+		path = str;
+
+		while (*str != ';' && *str != '\0') {
+			str++;
+		}
+
+		i++;
+
+		if (*str == ';') {
+			str++;
+		} else if (*str != '\0') {
+
+			return(FALSE);
+		}
+	}
+
+	*log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
+
+	/* Then store the actual values to our array */
+
+	str = input_str;
+	i = 0;
+
+	while (*str != '\0') {
+		path = str;
+
+		while (*str != ';' && *str != '\0') {
+			str++;
+		}
+
+		if (*str == ';') {
+			*str = '\0';
+			str++;
+		}
+
+		(*log_group_home_dirs)[i] = path;
+
+		i++;
+	}
+
+	return(TRUE);
+}
+
 /************************************************************************
 I/o-handler thread function. */
 static
@ -127,7 +429,7 @@ io_handler_thread(

 /*************************************************************************
 Normalizes a directory path for Windows: converts slashes to backslashes. */
-static
+
 void
 srv_normalize_path_for_win(
 /*=======================*/
@ -148,7 +450,7 @@ srv_normalize_path_for_win(
 /*************************************************************************
 Adds a slash or a backslash to the end of a string if it is missing
 and the string is not empty. */
-static
+
 char*
 srv_add_path_separator_if_needed(
 /*=============================*/
@ -354,6 +656,7 @@ open_or_create_data_files(
 	ibool	one_created	= FALSE;
 	ulint	size;
 	ulint	size_high;
+	ulint	rounded_size_pages;
 	char	name[10000];

 	if (srv_n_data_files >= 1000) {
@ -433,17 +736,35 @@ open_or_create_data_files(
 				ret = os_file_get_size(files[i], &size,
 								&size_high);
 				ut_a(ret);
+				/* Round size downward to megabytes */
 		
-				/* File sizes in srv_... are given in
-				database pages */
+				rounded_size_pages = (size / (1024 * 1024)
+							+ 4096 * size_high)
+					     << (20 - UNIV_PAGE_SIZE_SHIFT);

-				if (size != srv_calc_low32(
-						srv_data_file_sizes[i])
-		    		    || size_high != srv_calc_high32(
-		    		    		srv_data_file_sizes[i])) {
+				if (i == srv_n_data_files - 1
+				    && srv_auto_extend_last_data_file) {
+
+				    	if (srv_data_file_sizes[i] >
+				    		rounded_size_pages
+				    	   || (srv_last_file_size_max > 0
+				    	      && srv_last_file_size_max <
+				    	       rounded_size_pages)) {
+				    	       	
+						fprintf(stderr,
+			"InnoDB: Error: data file %s is of a different size\n"
+			"InnoDB: than specified in the .cnf file!\n", name);	
+					}
+				    	     
+				    	srv_data_file_sizes[i] =
+				    			rounded_size_pages;
+				}
+				
+				if (rounded_size_pages
+						!= srv_data_file_sizes[i]) {

 					fprintf(stderr,
-			"InnoDB: Error: data file %s is of different size\n"
+			"InnoDB: Error: data file %s is of a different size\n"
 			"InnoDB: than specified in the .cnf file!\n", name);
 				
 					return(DB_ERROR);
@ -477,7 +798,7 @@ open_or_create_data_files(
 				      >> (20 - UNIV_PAGE_SIZE_SHIFT)));

 			fprintf(stderr,
-	    "InnoDB: Database physically writes the file full: wait...\n");
+	"InnoDB: Database physically writes the file full: wait...\n");

 			ret = os_file_set_size(name, files[i],
 				srv_calc_low32(srv_data_file_sizes[i]),
@ -675,6 +996,8 @@ innobase_start_or_create_for_mysql(void)
 	  	os_aio_use_native_aio = TRUE;
 	}
 #endif
+	os_aio_use_native_aio = FALSE;
+	
 	if (!os_aio_use_native_aio) {
 		os_aio_init(4 * SRV_N_PENDING_IOS_PER_THREAD
 						* srv_n_file_io_threads,
@ -721,12 +1044,10 @@ innobase_start_or_create_for_mysql(void)
 		return(DB_ERROR);
 	}

-	if (sizeof(ulint) == 4
-			&& srv_n_log_files * srv_log_file_size >= 262144) {
+	if (srv_n_log_files * srv_log_file_size >= 262144) {

 		fprintf(stderr,
-		"InnoDB: Error: combined size of log files must be < 4 GB\n"
-		"InnoDB: on 32-bit computers\n");
+		"InnoDB: Error: combined size of log files must be < 4 GB\n");

 		return(DB_ERROR);
 	}
@ -758,7 +1079,6 @@ innobase_start_or_create_for_mysql(void)
 					&max_flushed_lsn, &max_arch_log_no,
 					&sum_of_new_sizes);
 	if (err != DB_SUCCESS) {
-
 	        fprintf(stderr, "InnoDB: Could not open data files\n");

 		return((int) err);
@ -797,9 +1117,9 @@ innobase_start_or_create_for_mysql(void)
 			    		|| (log_opened && log_created)) {
 				fprintf(stderr, 
 	"InnoDB: Error: all log files must be created at the same time.\n"
-	"InnoDB: If you want bigger or smaller log files,\n"
-	"InnoDB: shut down the database and make sure there\n"
-	"InnoDB: were no errors in shutdown.\n"
+	"InnoDB: All log files must be created also in database creation.\n"
+	"InnoDB: If you want bigger or smaller log files, shut down the\n"
+	"InnoDB: database and make sure there were no errors in shutdown.\n"
 	"InnoDB: Then delete the existing log files. Edit the .cnf file\n"
 	"InnoDB: and start the database again.\n");

@ -835,9 +1155,7 @@ innobase_start_or_create_for_mysql(void)

 		mutex_enter(&(log_sys->mutex));

-		recv_reset_logs(ut_dulint_align_down(max_flushed_lsn,
-					OS_FILE_LOG_BLOCK_SIZE),
-					max_arch_log_no + 1, TRUE);
+		recv_reset_logs(max_flushed_lsn, max_arch_log_no + 1, TRUE);
 		
 		mutex_exit(&(log_sys->mutex));
 	}
@ -877,6 +1195,10 @@ innobase_start_or_create_for_mysql(void)
 		
                srv_startup_is_before_trx_rollback_phase = FALSE;

+		/* Initialize the fsp free limit global variable in the log
+		system */
+		fsp_header_get_free_limit(0);
+
 		recv_recovery_from_archive_finish();
 	} else {
 		/* We always try to do a recovery, even if the database had
@ -893,6 +1215,7 @@ innobase_start_or_create_for_mysql(void)

 		/* Since ibuf init is in dict_boot, and ibuf is needed
 		in any disk i/o, first call dict_boot */
+
 		dict_boot();
 		trx_sys_init_at_db_start();

@ -900,6 +1223,11 @@ innobase_start_or_create_for_mysql(void)
 		trx_sys_init_at_db_start */

                srv_startup_is_before_trx_rollback_phase = FALSE;
+
+		/* Initialize the fsp free limit global variable in the log
+		system */
+		fsp_header_get_free_limit(0);
+
 		recv_recovery_from_checkpoint_finish();
 	}
 	
--- a/innobase/trx/trx0sys.c
+++ b/innobase/trx/trx0sys.c
@ -20,11 +20,42 @@ Created 3/26/1996 Heikki Tuuri
 #include "srv0srv.h"
 #include "trx0purge.h"
 #include "log0log.h"
+#include "os0file.h"

 /* The transaction system */
 trx_sys_t*		trx_sys 	= NULL;
 trx_doublewrite_t*	trx_doublewrite = NULL;

+/********************************************************************
+Determines if a page number is located inside the doublewrite buffer. */
+
+ibool
+trx_doublewrite_page_inside(
+/*========================*/
+				/* out: TRUE if the location is inside
+				the two blocks of the doublewrite buffer */
+	ulint	page_no)	/* in: page number */
+{
+	if (trx_doublewrite == NULL) {
+
+		return(FALSE);
+	}
+
+	if (page_no >= trx_doublewrite->block1
+	    && page_no < trx_doublewrite->block1
+					+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		return(TRUE);
+	}
+
+	if (page_no >= trx_doublewrite->block2
+	    && page_no < trx_doublewrite->block2
+					+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
 /********************************************************************
 Creates or initialializes the doublewrite buffer at a database start. */
 static
@ -36,6 +67,11 @@ trx_doublewrite_init(
 {
 	trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));

+	/* When we have the doublewrite buffer in use, we do not need to
+	call os_file_flush (Unix fsync) after every write. */
+	
+	os_do_not_call_flush_at_each_write = TRUE;
+	
 	mutex_create(&(trx_doublewrite->mutex));
 	mutex_set_level(&(trx_doublewrite->mutex), SYNC_DOUBLEWRITE);

--- a/innobase/ut/ut0mem.c
+++ b/innobase/ut/ut0mem.c
@ -121,6 +121,7 @@ ut_malloc(
 {
        return(ut_malloc_low(n, TRUE));
 }
+
 /**************************************************************************
 Frees a memory block allocated with ut_malloc. */

--- a/innobase/ut/ut0ut.c
+++ b/innobase/ut/ut0ut.c
@ -16,6 +16,24 @@ Created 5/11/1994 Heikki Tuuri

 ibool	ut_always_false	= FALSE;

+/************************************************************
+Gets the high 32 bits in a ulint. That is makes a shift >> 32,
+but since there seem to be compiler bugs in both gcc and Visual C++,
+we do this by a special conversion. */
+
+ulint
+ut_get_high32(
+/*==========*/
+			/* out: a >> 32 */
+	ulint	a)	/* in: ulint */
+{
+	if (sizeof(ulint) == 4) {
+		return(0);
+	}
+
+	return(a >> 32);
+}
+
 /************************************************************
 The following function returns a clock time in milliseconds. */

@ -58,11 +76,11 @@ ut_print_timestamp(
 	FILE*  file) /* in: file where to print */
 {
 #ifdef __WIN__
-  SYSTEMTIME cal_tm;
+  	SYSTEMTIME cal_tm;

-  GetLocalTime(&cal_tm);
+  	GetLocalTime(&cal_tm);

-  fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
+  	fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
 	  (int)cal_tm.wYear % 100,
 	  (int)cal_tm.wMonth,
 	  (int)cal_tm.wDay,
@ -70,23 +88,21 @@ ut_print_timestamp(
 	  (int)cal_tm.wMinute,
 	  (int)cal_tm.wSecond);
 #else
+	struct tm  cal_tm;
+  	struct tm* cal_tm_ptr;
+  	time_t     tm;

-  struct tm  cal_tm;
-  struct tm* cal_tm_ptr;
-  time_t     tm;
-
-  time(&tm);
+  	time(&tm);

 #ifdef HAVE_LOCALTIME_R
-  localtime_r(&tm, &cal_tm);
-  cal_tm_ptr = &cal_tm;
+  	localtime_r(&tm, &cal_tm);
+  	cal_tm_ptr = &cal_tm;
 #else
-  cal_tm_ptr = localtime(&tm);
+  	cal_tm_ptr = localtime(&tm);
 #endif
-
-  fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
+  	fprintf(file,"%02d%02d%02d %2d:%02d:%02d",
 	  cal_tm_ptr->tm_year % 100,
-	  cal_tm_ptr->tm_mon+1,
+	  cal_tm_ptr->tm_mon + 1,
 	  cal_tm_ptr->tm_mday,
 	  cal_tm_ptr->tm_hour,
 	  cal_tm_ptr->tm_min,
@ -94,6 +110,39 @@ ut_print_timestamp(
 #endif
 }

+/**************************************************************
+Returns current year, month, day. */
+
+void
+ut_get_year_month_day(
+/*==================*/
+	ulint*	year,	/* out: current year */
+	ulint*	month,	/* out: month */
+	ulint*	day)	/* out: day */
+{
+#ifdef __WIN__
+  	SYSTEMTIME cal_tm;
+
+  	GetLocalTime(&cal_tm);
+
+  	*year = (ulint)cal_tm.wYear;
+  	*month = (ulint)cal_tm.wMonth;
+  	*day = (ulint)cal_tm.wDay;
+#else
+  	struct tm  cal_tm;
+  	struct tm* cal_tm_ptr;
+  	time_t     tm;
+
+  	time(&tm);
+
+  	cal_tm_ptr = localtime(&tm);
+
+  	*year = (ulint)cal_tm_ptr->tm_year;
+  	*month = (ulint)cal_tm_ptr->tm_mon + 1;
+  	*day = (ulint)cal_tm_ptr->tm_mday;
+#endif
+}
+
 /*****************************************************************
 Runs an idle loop on CPU. The argument gives the desired delay
 in microseconds on 100 MHz Pentium + Visual C++. */
--- a/sql/ha_innobase.cc
+++ b/sql/ha_innobase.cc
@ -76,20 +76,34 @@ bool 	innodb_skip 		= 0;
 uint 	innobase_init_flags 	= 0;
 ulong 	innobase_cache_size 	= 0;

+/* The default values for the following, type long, start-up parameters
+are declared in mysqld.cc: */
+
 long innobase_mirrored_log_groups, innobase_log_files_in_group,
     innobase_log_file_size, innobase_log_buffer_size,
     innobase_buffer_pool_size, innobase_additional_mem_pool_size,
     innobase_file_io_threads, innobase_lock_wait_timeout,
-  innobase_thread_concurrency, innobase_force_recovery;
+     innobase_thread_concurrency, innobase_force_recovery;

-char *innobase_data_home_dir, *innobase_data_file_path;
-char *innobase_log_group_home_dir, *innobase_log_arch_dir;
-char *innobase_unix_file_flush_method;
-my_bool innobase_flush_log_at_trx_commit, innobase_log_archive,
-  innobase_use_native_aio, innobase_fast_shutdown;
+/* The default values for the following char* start-up parameters
+are determined in innobase_init below: */

 /* innobase_data_file_path=ibdata:15,idata2:1,... */

+char*	innobase_data_home_dir			= NULL;
+char*	innobase_data_file_path			= NULL;
+char*	innobase_log_group_home_dir		= NULL;
+char*	innobase_log_arch_dir			= NULL;
+char*	innobase_unix_file_flush_method		= NULL;
+
+/* Below we have boolean-valued start-up parameters, and their default
+values */
+
+my_bool innobase_flush_log_at_trx_commit	= FALSE;
+my_bool innobase_log_archive			= FALSE;
+my_bool	innobase_use_native_aio			= FALSE;
+my_bool	innobase_fast_shutdown			= TRUE;
+
 /* The following counter is used to convey information to InnoDB
 about server activity: in selects it is not sensible to call
 srv_active_wake_master_thread after each fetch or search, we only do
@ -331,227 +345,6 @@ ha_innobase::update_thd(
 	return(0);
 }

-/*************************************************************************
-Reads the data files and their sizes from a character string given in
-the .cnf file. */
-static
-bool
-innobase_parse_data_file_paths_and_sizes(void)
-/*==========================================*/
-					/* out: TRUE if ok, FALSE if parsing
-					error */
-{
-	char*	str;
-	char*	endp;
-	char*	path;
-	ulint	size;
-	ulint	i	= 0;
-
-	str = innobase_data_file_path;
-
-	/* First calculate the number of data files and check syntax:
-	path:size[M];path:size[M]... . Note that a Windows path may
-	contain a drive name and a ':'. */
-
-	while (*str != '\0') {
-		path = str;
-
-		while ((*str != ':' && *str != '\0')
-		       || (*str == ':'
-			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
-			str++;
-		}
-
-		if (*str == '\0') {
-			return(FALSE);
-		}
-
-		str++;
-
-		size = strtoul(str, &endp, 10);
-
-		str = endp;
-
-		if ((*str != 'M') && (*str != 'G')) {
-			size = size / (1024 * 1024);
-		} else if (*str == 'G') {
-		        size = size * 1024;
-			str++;
-		} else {
-		        str++;
-		}
-
-	        if (strlen(str) >= 6
-			   && *str == 'n'
-			   && *(str + 1) == 'e' 
-		           && *(str + 2) == 'w') {
-		  str += 3;
-		}
-
-	        if (strlen(str) >= 3
-			   && *str == 'r'
-			   && *(str + 1) == 'a' 
-		           && *(str + 2) == 'w') {
-		  str += 3;
-		}
-
-		if (size == 0) {
-			return(FALSE);
-		}
-
-		i++;
-
-		if (*str == ';') {
-			str++;
-		} else if (*str != '\0') {
-
-			return(FALSE);
-		}
-	}
-
-	srv_data_file_names = (char**)ut_malloc(i * sizeof(void*));
-	srv_data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint));
-	srv_data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint));
-
-	srv_n_data_files = i;
-
-	/* Then store the actual values to our arrays */
-
-	str = innobase_data_file_path;
-	i = 0;
-
-	while (*str != '\0') {
-		path = str;
-
-		/* Note that we must ignore the ':' in a Windows path */
-
-		while ((*str != ':' && *str != '\0')
-		       || (*str == ':'
-			   && (*(str + 1) == '\\' || *(str + 1) == '/'))) {
-			str++;
-		}
-
-		if (*str == ':') {
-			/* Make path a null-terminated string */
-			*str = '\0';
-			str++;
-		}
-
-		size = strtoul(str, &endp, 10);
-
-		str = endp;
-
-		if ((*str != 'M') && (*str != 'G')) {
-			size = size / (1024 * 1024);
-		} else if (*str == 'G') {
-		        size = size * 1024;
-			str++;
-		} else {
-		        str++;
-		}
-
-		srv_data_file_is_raw_partition[i] = 0;
-
-	        if (strlen(str) >= 6
-			   && *str == 'n'
-			   && *(str + 1) == 'e' 
-		           && *(str + 2) == 'w') {
-		  str += 3;
-		  srv_data_file_is_raw_partition[i] = SRV_NEW_RAW;
-		}
-
-	        if (strlen(str) >= 3
-			   && *str == 'r'
-			   && *(str + 1) == 'a' 
-		           && *(str + 2) == 'w') {
-		  str += 3;
-		  
-		  if (srv_data_file_is_raw_partition[i] == 0) {
-		    srv_data_file_is_raw_partition[i] = SRV_OLD_RAW;
-		  }		  
-		}
-
-		srv_data_file_names[i] = path;
-		srv_data_file_sizes[i] = size;
-
-		i++;
-
-		if (*str == ';') {
-			str++;
-		}
-	}
-
-	return(TRUE);
-}
-
-/*************************************************************************
-Reads log group home directories from a character string given in
-the .cnf file. */
-static
-bool
-innobase_parse_log_group_home_dirs(void)
-/*====================================*/
-					/* out: TRUE if ok, FALSE if parsing
-					error */
-{
-	char*	str;
-	char*	path;
-	ulint	i	= 0;
-
-	str = innobase_log_group_home_dir;
-
-	/* First calculate the number of directories and check syntax:
-	path;path;... */
-
-	while (*str != '\0') {
-		path = str;
-
-		while (*str != ';' && *str != '\0') {
-			str++;
-		}
-
-		i++;
-
-		if (*str == ';') {
-			str++;
-		} else if (*str != '\0') {
-
-			return(FALSE);
-		}
-	}
-
-	if (i != (ulint) innobase_mirrored_log_groups) {
-
-		return(FALSE);
-	}
-
-	srv_log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*));
-
-	/* Then store the actual values to our array */
-
-	str = innobase_log_group_home_dir;
-	i = 0;
-
-	while (*str != '\0') {
-		path = str;
-
-		while (*str != ';' && *str != '\0') {
-			str++;
-		}
-
-		if (*str == ';') {
-			*str = '\0';
-			str++;
-		}
-
-		srv_log_group_home_dirs[i] = path;
-
-		i++;
-	}
-
-	return(TRUE);
-}
-
 /*************************************************************************
 Opens an InnoDB database. */

@ -574,49 +367,62 @@ innobase_init(void)
 	}

 	/* Use current_dir if no paths are set */
-	current_dir[0]=FN_CURLIB;
-	current_dir[1]=FN_LIBCHAR;
-	current_dir[2]=0;
+	current_dir[0] = FN_CURLIB;
+	current_dir[1] = FN_LIBCHAR;
+	current_dir[2] = 0;

 	/* Set InnoDB initialization parameters according to the values
 	read from MySQL .cnf file */

-	if (!innobase_data_file_path)
-	{
-	  fprintf(stderr,
+	if (!innobase_data_file_path) {
+		fprintf(stderr,
       "Cannot initialize InnoDB as 'innodb_data_file_path' is not set.\n"
       "If you do not want to use transactional InnoDB tables, add a line\n"
       "skip-innodb\n"
       "to the [mysqld] section of init parameters in your my.cnf\n"
       "or my.ini. If you want to use InnoDB tables, add for example,\n"
-       "innodb_data_file_path = ibdata1:30M\n"
+       "innodb_data_file_path = ibdata1:30M:autoextend\n"
       "But to get good performance you should adjust for your hardware\n"
       "the InnoDB startup options listed in section 2 at\n"
       "http://www.innodb.com/ibman.html\n");

-	  innodb_skip=1;
-	  DBUG_RETURN(FALSE);			// Continue without innobase
+	  	innodb_skip=1;
+	  	DBUG_RETURN(FALSE);	/* Continue without InnoDB */
 	}

 	srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir :
 			 current_dir);
-	srv_logs_home = (char*) "";
 	srv_arch_dir =  (innobase_log_arch_dir ? innobase_log_arch_dir :
 			 current_dir);

-	ret = innobase_parse_data_file_paths_and_sizes();
-
+	ret = (bool)
+		srv_parse_data_file_paths_and_sizes(innobase_data_file_path,
+				&srv_data_file_names,
+				&srv_data_file_sizes,
+				&srv_data_file_is_raw_partition,
+				&srv_n_data_files,
+				&srv_auto_extend_last_data_file,
+				&srv_last_file_size_max);
 	if (ret == FALSE) {
-	  fprintf(stderr, "InnoDB: syntax error in innodb_data_file_path\n");
-	  DBUG_RETURN(TRUE);
+		fprintf(stderr,
+			"InnoDB: syntax error in innodb_data_file_path\n");
+	  	DBUG_RETURN(TRUE);
 	}

-	if (!innobase_log_group_home_dir)
-	  innobase_log_group_home_dir= current_dir;
-	ret = innobase_parse_log_group_home_dirs();
+	if (!innobase_log_group_home_dir) {
+		innobase_log_group_home_dir = current_dir;
+	}

-	if (ret == FALSE) {
-	  DBUG_RETURN(TRUE);
+	ret = (bool)
+		srv_parse_log_group_home_dirs(innobase_log_group_home_dir,
+						&srv_log_group_home_dirs);
+
+	if (ret == FALSE || innobase_mirrored_log_groups != 1) {
+		fprintf(stderr,
+		"InnoDB: syntax error in innodb_log_group_home_dir\n"
+		"InnoDB: or a wrong number of mirrored log groups\n");
+
+		DBUG_RETURN(TRUE);
 	}

 	srv_unix_file_flush_method_str = (innobase_unix_file_flush_method ?
@ -658,10 +464,11 @@ innobase_init(void)

 	if (err != DB_SUCCESS) {

-	  DBUG_RETURN(1);
+		DBUG_RETURN(1);
 	}
+
 	(void) hash_init(&innobase_open_tables,32,0,0,
-			 (hash_get_key) innobase_get_key,0,0);
+			 		(hash_get_key) innobase_get_key,0,0);
 	pthread_mutex_init(&innobase_mutex,MY_MUTEX_INIT_FAST);
  	DBUG_RETURN(0);
 }
@ -1340,33 +1147,43 @@ build_template(

 	clust_index = dict_table_get_first_index_noninline(prebuilt->table);

-	if (!prebuilt->in_update_remember_pos) {
+	if (!prebuilt->hint_no_need_to_fetch_extra_cols) {
+		/* We have a hint that we should at least fetch all
+		columns in the key, or all columns in the table */
+
 		if (prebuilt->read_just_key) {
+			/* MySQL has instructed us that it is enough to
+			fetch the columns in the key */
+			
 			fetch_all_in_key = TRUE;
 		} else {
 			/* We are building a temporary table: fetch all
-			columns */
+ 			columns; the reason is that MySQL may use the
+			clustered index key to store rows, but the mechanism
+			we use below to detect required columns does not
+			reveal that. Actually, it might be enough to
+			fetch only all in the key also in this case! */
 		
 			templ_type = ROW_MYSQL_WHOLE_ROW;
 		}
 	}

 	if (prebuilt->select_lock_type == LOCK_X) {
-	  /* TODO: should fix the code in sql_update so that we could do
-	     with fetching only the needed columns */
+		/* We always retrieve the whole clustered index record if we
+		use exclusive row level locks, for example, if the read is
+		done in an UPDATE statement. */

 	        templ_type = ROW_MYSQL_WHOLE_ROW;
 	}

 	if (templ_type == ROW_MYSQL_REC_FIELDS) {
+		/* In versions < 3.23.50 we always retrieved the clustered
+		index record if prebuilt->select_lock_type == LOCK_S,
+		but there is really not need for that, and in some cases
+		performance could be seriously degraded because the MySQL
+		optimizer did not know about our convention! */

-		if (prebuilt->select_lock_type != LOCK_NONE) {
-			/* Let index be the clustered index */
-
-			index = clust_index;
-		} else {
-			index = prebuilt->index;
-		}
+		index = prebuilt->index;
 	} else {
 		index = clust_index;
 	}
@ -1462,12 +1279,6 @@ skip_field:
 			    (index->table->cols + templ->col_no)->clust_pos;
 		}
 	}
-
-	if (templ_type == ROW_MYSQL_REC_FIELDS
-				&& prebuilt->select_lock_type != LOCK_NONE) {
-
-		prebuilt->need_to_access_clustered = TRUE;
-	}
 }

 /************************************************************************
@ -1500,6 +1311,8 @@ ha_innobase::write_row(
 	}

  	if (table->next_number_field && record == table->record[0]) {
+		/* This is the case where the table has an
+		auto-increment column */
  	
 	        /* Fetch the value the user possibly has set in the
 	        autoincrement field */
@ -1584,12 +1397,6 @@ ha_innobase::write_row(
 			}
 		}
 	        
-	        /* Set the 'in_update_remember_pos' flag to FALSE to
-	        make sure all columns are fetched in the select done by
-	        update_auto_increment */
-
-	        prebuilt->in_update_remember_pos = FALSE;
-	        
    		update_auto_increment();

 		if (auto_inc == 0) {
@ -1613,7 +1420,7 @@ ha_innobase::write_row(
 		}
    		
 		/* We have to set sql_stat_start to TRUE because
-		update_auto_increment has called a select, and
+		update_auto_increment may have called a select, and
 		has reset that flag; row_insert_for_mysql has to
 		know to set the IX intention lock on the table, something
 		it only does at the start of each statement */
@ -1853,9 +1660,7 @@ ha_innobase::update_row(
 	/* This is not a delete */
 	prebuilt->upd_node->is_delete = FALSE;

-	if (!prebuilt->in_update_remember_pos) {
-		assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);
-	}
+	assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW);

 	srv_conc_enter_innodb(prebuilt->trx);

@ -1901,7 +1706,6 @@ ha_innobase::delete_row(
 	/* This is a delete */

 	prebuilt->upd_node->is_delete = TRUE;
-	prebuilt->in_update_remember_pos = TRUE;

 	srv_conc_enter_innodb(prebuilt->trx);

@ -2616,7 +2420,9 @@ ha_innobase::create(

  	/* Create the table definition in InnoDB */

-  	if (error = create_table_def(trx, form, norm_name)) {
+  	error = create_table_def(trx, form, norm_name);
+  	
+  	if (error) {

 		trx_commit_for_mysql(trx);

@ -3203,13 +3009,59 @@ ha_innobase::update_table_comment(
  	pos += sprintf(pos, "InnoDB free: %lu kB",
 					(ulong) innobase_get_free_space());

-	/* We assume 150 bytes of space to print info */
+	/* We assume 450 - length bytes of space to print info */

-  	dict_print_info_on_foreign_keys(pos, 500, prebuilt->table);
+	if (length < 450) {
+  		dict_print_info_on_foreign_keys(FALSE, pos, 450 - length,
+							prebuilt->table);
+	}
  
  	return(str);
 }

+/***********************************************************************
+Gets the foreign key create info for a table stored in InnoDB. */
+
+char*
+ha_innobase::get_foreign_key_create_info(void)
+/*==========================================*/
+			/* out, own: character string in the form which
+			can be inserted to the CREATE TABLE statement,
+			MUST be freed with ::free_foreign_key_create_info */
+{
+	row_prebuilt_t* prebuilt = (row_prebuilt_t*)innobase_prebuilt;
+	char*	str;
+	
+	if (prebuilt == NULL) {
+		fprintf(stderr,
+"InnoDB: Error: cannot get create info for foreign keys\n");
+
+		return(NULL);
+	}
+
+	str = (char*)ut_malloc(10000);
+
+	str[0] = '\0';
+	
+  	dict_print_info_on_foreign_keys(TRUE, str, 9000, prebuilt->table);
+
+  	return(str);
+}			
+
+/***********************************************************************
+Frees the foreign key create info for a table stored in InnoDB, if it is
+non-NULL. */
+
+void
+ha_innobase::free_foreign_key_create_info(
+/*======================================*/
+	char*	str)	/* in, own: create info string to free  */
+{
+	if (str) {
+		ut_free(str);
+	}
+}
+
 /***********************************************************************
 Tells something additional to the handler about how to do things. */

@ -3235,7 +3087,7 @@ ha_innobase::extra(
    			prebuilt->read_just_key = 0;
    			break;
 	        case HA_EXTRA_DONT_USE_CURSOR_TO_UPDATE:
-			prebuilt->in_update_remember_pos = FALSE;
+			prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;
 			break;
 	        case HA_EXTRA_KEYREAD:
 	        	prebuilt->read_just_key = 1;
@ -3282,7 +3134,7 @@ ha_innobase::external_lock(
 	trx = prebuilt->trx;

 	prebuilt->sql_stat_start = TRUE;
-	prebuilt->in_update_remember_pos = TRUE;
+	prebuilt->hint_no_need_to_fetch_extra_cols = TRUE;

 	prebuilt->read_just_key = 0;

@ -3301,6 +3153,16 @@ ha_innobase::external_lock(
 		thd->transaction.all.innodb_active_trans = 1;
 		trx->n_mysql_tables_in_use++;

+		if (thd->tx_isolation == ISO_SERIALIZABLE
+		    && prebuilt->select_lock_type == LOCK_NONE) {
+
+		    	/* To get serializable execution we let InnoDB
+		    	conceptually add 'LOCK IN SHARE MODE' to all SELECTs
+			which otherwise would have been consistent reads */
+
+			prebuilt->select_lock_type = LOCK_S;
+		}
+
 		if (prebuilt->select_lock_type != LOCK_NONE) {

 		  	trx->mysql_n_tables_locked++;
@ -3407,8 +3269,8 @@ ha_innobase::store_lock(
 	    lock_type == TL_READ_NO_INSERT) {
 		/* This is a SELECT ... IN SHARE MODE, or
 		we are doing a complex SQL statement like
-		INSERT INTO ... SELECT ... and the logical logging
-		requires the use of a locking read */
+		INSERT INTO ... SELECT ... and the logical logging (MySQL
+		binlog) requires the use of a locking read */

 		prebuilt->select_lock_type = LOCK_S;
 	} else {
@ -3448,37 +3310,59 @@ ha_innobase::get_auto_increment()
 /*=============================*/
                         /* out: the next auto-increment column value */
 {
-  row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
-  longlong        nr;
-  int             error;
+  	row_prebuilt_t* prebuilt	= (row_prebuilt_t*) innobase_prebuilt;
+  	longlong        nr;
+  	int     	error;

-  (void) extra(HA_EXTRA_KEYREAD);
-  index_init(table->next_number_index);
+	/* Also SHOW TABLE STATUS calls this function. Previously, when we did
+	always read the max autoinc key value, setting x-locks, users were
+	surprised that SHOW TABLE STATUS could end up in a deadlock with
+	ordinary SQL queries. We avoid these deadlocks if the auto-inc
+	counter for the table has been initialized by fetching the value
+	from the table struct in dictionary cache. */

-  /* We use an exclusive lock when we read the max key value from the
-  auto-increment column index. This is because then build_template will
-  advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query
-  id of the auto-increment column is not changed, and previously InnoDB
-  did not fetch it, causing SHOW TABLE STATUS to show wrong values
-  for the autoinc column. */
+	assert(prebuilt->table);
  	
-  prebuilt->select_lock_type = LOCK_X;
-  prebuilt->trx->mysql_n_tables_locked += 1;
+	nr = dict_table_autoinc_read(prebuilt->table);

-  error=index_last(table->record[1]);
+	if (nr != 0) {

-  if (error) {
-    nr = 1;
-  } else {
-    nr = (longlong) table->next_number_field->
-                        val_int_offset(table->rec_buff_length) + 1;
-  }
+		return(nr + 1);
+	}

-  (void) extra(HA_EXTRA_NO_KEYREAD);
+  	(void) extra(HA_EXTRA_KEYREAD);
+  	index_init(table->next_number_index);

-  index_end();
+	/* We use an exclusive lock when we read the max key value from the
+  	auto-increment column index. This is because then build_template will
+  	advise InnoDB to fetch all columns. In SHOW TABLE STATUS the query
+  	id of the auto-increment column is not changed, and previously InnoDB
+  	did not fetch it, causing SHOW TABLE STATUS to show wrong values
+  	for the autoinc column. */

-  return(nr);
+  	prebuilt->select_lock_type = LOCK_X;
+
+  	/* Play safe and also give in another way the hint to fetch
+  	all columns in the key: */
+  	
+	prebuilt->hint_no_need_to_fetch_extra_cols = FALSE;
+
+  	prebuilt->trx->mysql_n_tables_locked += 1;
+  
+  	error = index_last(table->record[1]);
+
+  	if (error) {
+  		nr = 1;
+  	} else {
+    		nr = (longlong) table->next_number_field->
+                        	val_int_offset(table->rec_buff_length) + 1;
+  	}
+
+  	(void) extra(HA_EXTRA_NO_KEYREAD);
+
+  	index_end();
+
+  	return(nr);
 }

 #endif /* HAVE_INNOBASE_DB */
--- a/sql/ha_innobase.h
+++ b/sql/ha_innobase.h
@ -154,7 +154,8 @@ class ha_innobase: public handler
 	int rename_table(const char* from, const char* to);
 	int check(THD* thd, HA_CHECK_OPT* check_opt);
        char* update_table_comment(const char* comment);
-
+	char* get_foreign_key_create_info();
+	void free_foreign_key_create_info(char* str);	
  	THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to,
 			     		enum thr_lock_type lock_type);
 	longlong get_auto_increment();