From 5e55d1ced52c52fb2f0508e1346059901a85960f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 19 Dec 2013 14:36:38 +0200
Subject: [PATCH 01/56] Changes for Fusion-io multi-threaded flush, page
 compressed tables and tables using atomic write/table.

This is work in progress and some parts are at most POC quality.
---
 storage/innobase/CMakeLists.txt               |    3 +
 storage/innobase/buf/buf0buf.cc               |    2 +
 storage/innobase/buf/buf0dblwr.cc             |   31 +-
 storage/innobase/buf/buf0flu.cc               |  324 ++++-
 storage/innobase/buf/buf0mtflu.cc             | 1103 +++++++++++++++++
 storage/innobase/buf/buf0rea.cc               |    5 +-
 storage/innobase/dict/dict0dict.cc            |    7 +-
 storage/innobase/fil/fil0fil.cc               |  175 ++-
 storage/innobase/fil/fil0pagecompress.cc      |  369 ++++++
 storage/innobase/handler/ha_innodb.cc         |  242 +++-
 storage/innobase/handler/ha_innodb.h          |   15 +
 storage/innobase/handler/handler0alter.cc     |   28 +
 storage/innobase/include/buf0buf.h            |    6 +
 storage/innobase/include/dict0dict.h          |   14 +-
 storage/innobase/include/dict0dict.ic         |  151 ++-
 storage/innobase/include/dict0mem.h           |   56 +-
 storage/innobase/include/dict0pagecompress.h  |   94 ++
 storage/innobase/include/dict0pagecompress.ic |  191 +++
 storage/innobase/include/fil0fil.h            |   43 +-
 storage/innobase/include/fil0pagecompress.h   |  117 ++
 storage/innobase/include/fsp0fsp.h            |   66 +-
 storage/innobase/include/fsp0fsp.ic           |   17 +
 storage/innobase/include/fsp0pagecompress.h   |   64 +
 storage/innobase/include/fsp0pagecompress.ic  |   61 +
 storage/innobase/include/fsp0types.h          |    1 +
 storage/innobase/include/os0file.h            |   57 +-
 storage/innobase/include/os0file.ic           |   13 +-
 storage/innobase/include/srv0mon.h            |   10 +
 storage/innobase/include/srv0srv.h            |   64 +-
 storage/innobase/log/log0log.cc               |   17 +-
 storage/innobase/log/log0recv.cc              |   19 +-
 storage/innobase/os/os0file.cc                |  561 ++++++++-
 storage/innobase/srv/srv0mon.cc               |   68 +
 storage/innobase/srv/srv0srv.cc               |   41 +-
 storage/innobase/srv/srv0start.cc             |  720 ++++++++++-
 35 files changed, 4559 insertions(+), 196 deletions(-)
 create mode 100644 storage/innobase/buf/buf0mtflu.cc
 create mode 100644 storage/innobase/fil/fil0pagecompress.cc
 create mode 100644 storage/innobase/include/dict0pagecompress.h
 create mode 100644 storage/innobase/include/dict0pagecompress.ic
 create mode 100644 storage/innobase/include/fil0pagecompress.h
 create mode 100644 storage/innobase/include/fsp0pagecompress.h
 create mode 100644 storage/innobase/include/fsp0pagecompress.ic

diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index ee8758a08d2..e41d2406bd2 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -278,6 +278,8 @@ SET(INNOBASE_SOURCES
 	buf/buf0flu.cc
 	buf/buf0lru.cc
 	buf/buf0rea.cc
+# TODO: JAN uncomment
+#	buf/buf0mtflu.cc
 	data/data0data.cc
 	data/data0type.cc
 	dict/dict0boot.cc
@@ -291,6 +293,7 @@ SET(INNOBASE_SOURCES
 	eval/eval0eval.cc
 	eval/eval0proc.cc
 	fil/fil0fil.cc
+        fil/fil0pagecompress.cc
 	fsp/fsp0fsp.cc
 	fut/fut0fut.cc
 	fut/fut0lst.cc
diff --git a/storage/innobase/buf/buf0buf.cc b/storage/innobase/buf/buf0buf.cc
index 6efa14e6791..328d5a6f3bf 100644
--- a/storage/innobase/buf/buf0buf.cc
+++ b/storage/innobase/buf/buf0buf.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -3254,6 +3255,7 @@ buf_page_init_low(
 	bpage->access_time = 0;
 	bpage->newest_modification = 0;
 	bpage->oldest_modification = 0;
+	bpage->write_size = 0;
 	HASH_INVALIDATE(bpage, hash);
 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
 	bpage->file_page_was_freed = FALSE;
diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index fb853fe1543..933b56eaf88 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -365,8 +366,8 @@ buf_dblwr_init_or_restore_pages(
 	/* Read the trx sys header to check if we are using the doublewrite
 	buffer */
 
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
-	       UNIV_PAGE_SIZE, read_buf, NULL);
+	fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
+	       UNIV_PAGE_SIZE, read_buf, NULL, 0);
 	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
@@ -402,11 +403,11 @@ buf_dblwr_init_or_restore_pages(
 
 	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
 	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-	       buf, NULL);
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
+	       buf, NULL, 0);
+	fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block2, 0,
 	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 	       buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-	       NULL);
+	       NULL, 0);
 	/* Check if any of these pages is half-written in data files, in the
 	intended position */
 
@@ -433,8 +434,8 @@ buf_dblwr_init_or_restore_pages(
 					+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
 			}
 
-			fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
-			       UNIV_PAGE_SIZE, page, NULL);
+			fil_io(OS_FILE_WRITE, true, 0, 0, source_page_no, 0,
+			       UNIV_PAGE_SIZE, page, NULL, 0);
 		} else {
 
 			space_id = mach_read_from_4(
@@ -476,7 +477,7 @@ buf_dblwr_init_or_restore_pages(
 			fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
 			       page_no, 0,
 			       zip_size ? zip_size : UNIV_PAGE_SIZE,
-			       read_buf, NULL);
+			       read_buf, NULL, 0);
 
 			/* Check if the page is corrupt */
 
@@ -528,7 +529,7 @@ buf_dblwr_init_or_restore_pages(
 				fil_io(OS_FILE_WRITE, TRUE, space_id,
 				       zip_size, page_no, 0,
 				       zip_size ? zip_size : UNIV_PAGE_SIZE,
-				       page, NULL);
+				       page, NULL, 0);
 
 				ib_logf(IB_LOG_LEVEL_INFO,
 					"Recovered the page from"
@@ -714,7 +715,7 @@ buf_dblwr_write_block_to_datafile(
 		       buf_page_get_page_no(bpage), 0,
 		       buf_page_get_zip_size(bpage),
 		       (void*) bpage->zip.data,
-		       (void*) bpage);
+		       (void*) bpage, 0);
 
 		return;
 	}
@@ -727,7 +728,7 @@ buf_dblwr_write_block_to_datafile(
 	fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
 	       FALSE, buf_block_get_space(block), 0,
 	       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
-	       (void*) block->frame, (void*) block);
+	       (void*) block->frame, (void*) block, 0);
 }
 
 /********************************************************************//**
@@ -820,7 +821,7 @@ try_again:
 
 	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
 	       buf_dblwr->block1, 0, len,
-	       (void*) write_buf, NULL);
+	       (void*) write_buf, NULL, 0);
 
 	if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 		/* No unwritten pages in the second block. */
@@ -836,7 +837,7 @@ try_again:
 
 	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
 	       buf_dblwr->block2, 0, len,
-	       (void*) write_buf, NULL);
+	       (void*) write_buf, NULL, 0);
 
 flush:
 	/* increment the doublewrite flushed pages counter */
@@ -1056,14 +1057,14 @@ retry:
 		fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
 		       offset, 0, UNIV_PAGE_SIZE,
 		       (void*) (buf_dblwr->write_buf
-				+ UNIV_PAGE_SIZE * i), NULL);
+			        + UNIV_PAGE_SIZE * i), NULL, 0);
 	} else {
 		/* It is a regular page. Write it directly to the
 		doublewrite buffer */
 		fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
 		       offset, 0, UNIV_PAGE_SIZE,
 		       (void*) ((buf_block_t*) bpage)->frame,
-		       NULL);
+		       NULL, 0);
 	}
 
 	/* Now flush the doublewrite buffer data to disk */
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 542c1669667..06ae7b5375c 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1,6 +1,8 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, Fusion-io. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -673,8 +675,10 @@ buf_flush_write_complete(
 	flush_type = buf_page_get_flush_type(bpage);
 	buf_pool->n_flush[flush_type]--;
 
+#ifdef UNIV_DEBUG
 	/* fprintf(stderr, "n pending flush %lu\n",
 	buf_pool->n_flush[flush_type]); */
+#endif
 
 	if (buf_pool->n_flush[flush_type] == 0
 	    && buf_pool->init_flush[flush_type] == FALSE) {
@@ -938,7 +942,7 @@ buf_flush_write_block_low(
 		       FALSE, buf_page_get_space(bpage), zip_size,
 		       buf_page_get_page_no(bpage), 0,
 		       zip_size ? zip_size : UNIV_PAGE_SIZE,
-		       frame, bpage);
+		       frame, bpage, 0);
 	} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
 		buf_dblwr_write_single_page(bpage);
 	} else {
@@ -1213,7 +1217,9 @@ buf_flush_try_neighbors(
 		}
 	}
 
+#ifdef UNIV_DEBUG
 	/* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */
+#endif
 
 	if (high > fil_space_get_size(space)) {
 		high = fil_space_get_size(space);
@@ -1655,7 +1661,7 @@ pages: to avoid deadlocks, this function must be written so that it cannot
 end up waiting for these latches! NOTE 2: in the case of a flush list flush,
 the calling thread is not allowed to own any latches on pages!
 @return number of blocks for which the write request was queued */
-static
+//static
 ulint
 buf_flush_batch(
 /*============*/
@@ -1712,7 +1718,7 @@ buf_flush_batch(
 
 /******************************************************************//**
 Gather the aggregated stats for both flush list and LRU list flushing */
-static
+//static
 void
 buf_flush_common(
 /*=============*/
@@ -1737,7 +1743,7 @@ buf_flush_common(
 
 /******************************************************************//**
 Start a buffer flush batch for LRU or flush list */
-static
+//static
 ibool
 buf_flush_start(
 /*============*/
@@ -1766,7 +1772,7 @@ buf_flush_start(
 
 /******************************************************************//**
 End a buffer flush batch for LRU or flush list */
-static
+//static
 void
 buf_flush_end(
 /*==========*/
@@ -1816,11 +1822,55 @@ buf_flush_wait_batch_end(
 		}
 	} else {
 		thd_wait_begin(NULL, THD_WAIT_DISKIO);
-	os_event_wait(buf_pool->no_flush[type]);
+		os_event_wait(buf_pool->no_flush[type]);
 		thd_wait_end(NULL);
 	}
 }
 
+/* JAN: TODO: */
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list and also
+puts replaceable clean pages from the end of the LRU list to the free
+list.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@return true if a batch was queued successfully. false if another batch
+of same type was already running. */
+static
+bool
+pgcomp_buf_flush_LRU(
+/*==========*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+{
+	ulint		page_count;
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
+	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
+		return(false);
+	}
+
+	page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
+
+	buf_flush_end(buf_pool, BUF_FLUSH_LRU);
+
+	buf_flush_common(BUF_FLUSH_LRU, page_count);
+
+	if (n_processed) {
+		*n_processed = page_count;
+	}
+
+	return(true);
+}
+/* JAN: TODO: END: */
+
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the LRU list and also
 puts replaceable clean pages from the end of the LRU list to the free
@@ -1863,6 +1913,168 @@ buf_flush_LRU(
 	return(true);
 }
 
+/* JAN: TODO: */
+/*******************************************************************//**/
+extern int is_pgcomp_wrk_init_done(void);
+extern int pgcomp_flush_work_items(int buf_pool_inst, int *pages_flushed,
+        int flush_type, int min_n, unsigned long long lsn_limit);
+
+#define	MT_COMP_WATER_MARK	50
+
+#include <time.h>
+int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time)
+{
+	if (g_time->tv_usec < s_time->tv_usec)
+	{
+		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000 + 1;
+		s_time->tv_usec -= 1000000 * nsec;
+		s_time->tv_sec += nsec;
+	}
+	if (g_time->tv_usec - s_time->tv_usec > 1000000)
+	{
+		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000;
+		s_time->tv_usec += 1000000 * nsec;
+		s_time->tv_sec -= nsec;
+	}
+	d_time->tv_sec = g_time->tv_sec - s_time->tv_sec;
+	d_time->tv_usec = g_time->tv_usec - s_time->tv_usec;
+
+	return 0;
+}
+
+static pthread_mutex_t  pgcomp_mtx = PTHREAD_MUTEX_INITIALIZER;
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+UNIV_INTERN
+bool
+pgcomp_buf_flush_list(
+/*==================*/
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+
+{
+	ulint		i;
+	bool		success = true;
+	struct timeval p_start_time, p_end_time, d_time;
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
+	if (min_n != ULINT_MAX) {
+		/* Ensure that flushing is spread evenly amongst the
+		buffer pool instances. When min_n is ULINT_MAX
+		we need to flush everything up to the lsn limit
+		so no limit here. */
+		min_n = (min_n + srv_buf_pool_instances - 1)
+			 / srv_buf_pool_instances;
+	}
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_start_time, 0x0);
+#endif
+	if(is_pgcomp_wrk_init_done() && (min_n > MT_COMP_WATER_MARK)) {
+		int cnt_flush[32];
+
+		//stack_trace();
+		pthread_mutex_lock(&pgcomp_mtx);
+		//gettimeofday(&p_start_time, 0x0);
+		//fprintf(stderr, "Calling into wrk-pgcomp [min:%lu]", min_n);
+		pgcomp_flush_work_items(srv_buf_pool_instances,
+					cnt_flush, BUF_FLUSH_LIST,
+					min_n, lsn_limit);
+
+		for (i = 0; i < srv_buf_pool_instances; i++) {
+			if (n_processed) {
+				*n_processed += cnt_flush[i];
+			}
+			if (cnt_flush[i]) {
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+					MONITOR_FLUSH_BATCH_COUNT,
+					MONITOR_FLUSH_BATCH_PAGES,
+					cnt_flush[i]);
+
+			}
+		}
+
+		pthread_mutex_unlock(&pgcomp_mtx);
+
+#ifdef UNIV_DEBUG
+		gettimeofday(&p_end_time, 0x0);
+		timediff(&p_end_time, &p_start_time, &d_time);
+		fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+				min_n * srv_buf_pool_instances), *n_processed,
+				(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+		return(success);
+	}
+	/* Flush to lsn_limit in all buffer pool instances */
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
+		ulint		page_count = 0;
+
+		buf_pool = buf_pool_from_array(i);
+
+		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
+			/* We have two choices here. If lsn_limit was
+			specified then skipping an instance of buffer
+			pool means we cannot guarantee that all pages
+			up to lsn_limit has been flushed. We can
+			return right now with failure or we can try
+			to flush remaining buffer pools up to the
+			lsn_limit. We attempt to flush other buffer
+			pools based on the assumption that it will
+			help in the retry which will follow the
+			failure. */
+			success = false;
+
+			continue;
+		}
+
+		page_count = buf_flush_batch(
+			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
+
+		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
+
+		buf_flush_common(BUF_FLUSH_LIST, page_count);
+
+		if (n_processed) {
+			*n_processed += page_count;
+		}
+
+		if (page_count) {
+			MONITOR_INC_VALUE_CUMULATIVE(
+				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+				MONITOR_FLUSH_BATCH_COUNT,
+				MONITOR_FLUSH_BATCH_PAGES,
+				page_count);
+		}
+	}
+
+#if UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	fprintf(stderr, "[2] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+			min_n * srv_buf_pool_instances), *n_processed,
+			(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+	return(success);
+}
+#endif
+/* JAN: TODO: END: */
+
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the flush list of
 all buffer pool instances.
@@ -1890,6 +2102,12 @@ buf_flush_list(
 	ulint		i;
 	bool		success = true;
 
+	/* JAN: TODO: */
+	if (is_pgcomp_wrk_init_done()) {
+		return(pgcomp_buf_flush_list(min_n, lsn_limit, n_processed));
+	}
+	/* JAN: TODO: END: */
+
 	if (n_processed) {
 		*n_processed = 0;
 	}
@@ -2043,6 +2261,59 @@ buf_flush_single_page_from_LRU(
 	return(freed);
 }
 
+/* JAN: TODO: */
+/*********************************************************************//**
+pgcomp_Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+pgcomp_buf_flush_LRU_tail(void)
+/*====================*/
+{
+	struct  timeval p_start_time, p_end_time, d_time;
+	ulint   total_flushed=0, i=0;
+	int cnt_flush[32];
+
+#if UNIV_DEBUG
+	gettimeofday(&p_start_time, 0x0);
+#endif
+	assert(is_pgcomp_wrk_init_done());
+
+	pthread_mutex_lock(&pgcomp_mtx);
+	pgcomp_flush_work_items(srv_buf_pool_instances,
+		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		if (cnt_flush[i]) {
+			total_flushed += cnt_flush[i];
+
+			MONITOR_INC_VALUE_CUMULATIVE(
+			        MONITOR_LRU_BATCH_TOTAL_PAGE,
+			        MONITOR_LRU_BATCH_COUNT,
+			        MONITOR_LRU_BATCH_PAGES,
+			        cnt_flush[i]);
+		}
+	}
+
+	pthread_mutex_unlock(&pgcomp_mtx);
+
+#if UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed,
+		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+
+	return(total_flushed);
+}
+/* JAN: TODO: END: */
+
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -2056,6 +2327,12 @@ buf_flush_LRU_tail(void)
 /*====================*/
 {
 	ulint	total_flushed = 0;
+	/* JAN: TODO: */
+	if(is_pgcomp_wrk_init_done())
+	{
+		return(pgcomp_buf_flush_LRU_tail());
+	}
+	/* JAN: TODO: END */
 
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
@@ -2342,6 +2619,8 @@ page_cleaner_sleep_if_needed(
 	}
 }
 
+
+
 /******************************************************************//**
 page_cleaner thread tasked with flushing dirty pages from the buffer
 pools. As of now we'll have only one instance of this thread.
@@ -2357,6 +2636,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	ulint	next_loop_time = ut_time_ms() + 1000;
 	ulint	n_flushed = 0;
 	ulint	last_activity = srv_get_activity_count();
+	ulint	n_lru=0, n_pgc_flush=0, n_pgc_batch=0;
 
 	ut_ad(!srv_read_only_mode);
 
@@ -2368,7 +2648,6 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	fprintf(stderr, "InnoDB: page_cleaner thread running, id %lu\n",
 		os_thread_pf(os_thread_get_curr_id()));
 #endif /* UNIV_DEBUG_THREAD_CREATION */
-
 	buf_page_cleaner_is_active = TRUE;
 
 	while (srv_shutdown_state == SRV_SHUTDOWN_NONE) {
@@ -2388,12 +2667,23 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 			last_activity = srv_get_activity_count();
 
 			/* Flush pages from end of LRU if required */
-			n_flushed = buf_flush_LRU_tail();
+			n_lru = n_flushed = buf_flush_LRU_tail();
+#ifdef UNIV_DEBUG
+			if (n_lru) {
+				fprintf(stderr,"n_lru:%lu ",n_lru);
+			}
+#endif
 
 			/* Flush pages from flush_list if required */
-			n_flushed += page_cleaner_flush_pages_if_needed();
+			n_flushed += n_pgc_flush = page_cleaner_flush_pages_if_needed();
+
+#ifdef UNIV_DEBUG
+			if (n_pgc_flush) {
+				fprintf(stderr,"n_pgc_flush:%lu ",n_pgc_flush);
+			}
+#endif
 		} else {
-			n_flushed = page_cleaner_do_flush_batch(
+			n_pgc_batch = n_flushed = page_cleaner_do_flush_batch(
 							PCT_IO(100),
 							LSN_MAX);
 
@@ -2404,7 +2694,18 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 					MONITOR_FLUSH_BACKGROUND_PAGES,
 					n_flushed);
 			}
+#ifdef UNIV_DEBUG
+			if (n_pgc_batch) {
+				fprintf(stderr,"n_pgc_batch:%lu ",n_pgc_batch);
+			}
+#endif
 		}
+#ifdef UNIV_DEBUG
+		if (n_lru || n_pgc_flush || n_pgc_batch) {
+			fprintf(stderr,"\n");
+			n_lru = n_pgc_flush = n_pgc_batch = 0;
+		}
+#endif
 	}
 
 	ut_ad(srv_shutdown_state > 0);
@@ -2573,8 +2874,9 @@ buf_flush_validate(
 
 	return(ret);
 }
+
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-#endif /* !UNIV_HOTBACKUP */
+
 
 #ifdef UNIV_DEBUG
 /******************************************************************//**
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
new file mode 100644
index 00000000000..7abe0547877
--- /dev/null
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -0,0 +1,1103 @@
+/*****************************************************************************
+
+Copyright (C) 2013 Fusion-io. All Rights Reserved.
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file buf/buf0mtflu.cc
+Multi-threaded flush method implementation
+
+Created  06/11/2013 Dhananjoy Das DDas@fusionio.com
+Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#include <time.h>
+
+#ifdef UNIV_PFS_MUTEX
+/* Key to register fil_system_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t	mtflush_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
+/* Mutex to protect critical sections during multi-threaded flush */
+ib_mutex_t mt_flush_mutex;
+
+#define	MT_COMP_WATER_MARK	50
+
+/* Work item status */
+typedef enum {
+	WORK_ITEM_SET=0,	/* Work item information set */
+	WORK_ITEM_START=1,	/* Work item assigned to thread and
+				execution started */
+	WORK_ITEM_DONE=2,	/* Work item execution done */
+} mtflu_witem_status_t;
+
+/* Work thread status */
+typedef enum {
+	WORK_THREAD_NOT_INIT=0,		/* Work thread not initialized */
+	WORK_THREAD_INITIALIZED=1,	/* Work thread initialized */
+	WORK_THREAD_SIG_WAITING=2,	/* Work thred signaled */
+	WORK_THREAD_RUNNING=3,		/* Work thread running */
+	WORK_THREAD_NO_WORK=4,		/* Work thread has no work to do */
+} mtflu_wthr_status_t;
+
+/* Structure containing multi-treaded flush thread information */
+typedef struct {
+	os_thread_t  		wthread_id;		/* Thread id */
+	opq_t			*wq;			/* Write queue ? */
+	opq_t			*cq;			/* Commit queue ?*/
+	ib_mutex_t 		thread_mutex;		/* Mutex proecting below
+							structures */
+	mtflu_wthr_status_t	thread_status;		/* Thread status */
+	ib_uint64_t		total_num_processed;	/* Total number of
+							pages processed */
+	ib_uint64_t		cycle_num_processed;	/* Numper of pages
+							processed on last
+							cycle */
+	ulint			check_wrk_done_count;	/* Number of pages
+							to process in this
+							work item ? */
+	ulint			done_cnt_flag;		/* Number of pages
+							processed in this
+							work item ?*/
+} mtflu_thread_t;
+
+struct work_item_t {
+	/****************************/
+	/* Need to group into struct*/
+	buf_pool_t*	buf_pool;	//buffer-pool instance
+	int 		flush_type;	//flush-type for buffer-pool flush operation
+	ulint 		min;		//minimum number of pages requested to be flushed
+	lsn_t 		lsn_limit;	//lsn limit for the buffer-pool flush operation
+	/****************************/
+
+	unsigned long	result; 	//flush pages count
+	unsigned long	t_usec;		//time-taken in usec
+	os_thread_t		id_usr;		/* thread-id
+						currently working , why ? */
+	mtflu_witem_status_t    wi_status;     /* work item status */
+
+	UT_LIST_NODE_T(work_node_t) next;
+};
+
+/* Multi-threaded flush system structure */
+typedef struct {
+	int 		pgc_n_threads = 8;// ??? why what this is
+
+	mtflu_thread_t 	pc_sync[PGCOMP_MAX_WORKER];
+	wrk_t 		work_items[PGCOMP_MAX_WORKER];
+	int 		pgcomp_wrk_initialized = -1; /* ???? */
+	opq_t		wq; /* write queue ? */
+	opq_t		cq; /* commit queue ? */
+} mtflu_system_t;
+
+typedef enum op_q_status {
+    Q_NOT_INIT=0,
+    Q_EMPTY=1,
+    Q_INITIALIZED=2,
+    Q_PROCESS=3,
+    Q_DONE=4,
+    Q_ERROR=5,
+    Q_STATUS_UNDEFINED
+} q_status_t;
+
+// NOTE: jan: could we use ut/ut0wqueue.(h|cc)
+// NOTE: jan: here ????, it would handle waiting, signaling
+// and contains simple interface
+
+typedef struct op_queue
+{
+	ib_mutex_t		mtx;	/* Mutex protecting below variables
+					*/
+	os_cond_t 		cv;	/* ? is waiting here ? */
+	q_status_t		flag;	/* Operation queue status */
+	UT_LIST_BASE_NODE_T(work_item_t) work_list;
+} opq_t;
+
+
+/*******************************************************************//**
+Initialize multi-threaded flush.
+*/
+void
+buf_mtflu_init(void)
+/*================*/
+{
+	mutex_create(mtflush_mutex_key,
+			     &mt_flush_mutex, SYNC_ANY_LATCH);
+}
+
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list and also
+puts replaceable clean pages from the end of the LRU list to the free
+list.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@return true if a batch was queued successfully. false if another batch
+of same type was already running. */
+bool
+buf_mtflu_flush_LRU(
+/*================*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+{
+	ulint		page_count;
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
+	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
+		return(false);
+	}
+
+	page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
+
+	buf_flush_end(buf_pool, BUF_FLUSH_LRU);
+
+	buf_flush_common(BUF_FLUSH_LRU, page_count);
+
+	if (n_processed) {
+		*n_processed = page_count;
+	}
+
+	return(true);
+}
+
+#ifdef UNIV_DEBUG
+/*******************************************************************//**
+Utility function to calculate time difference between start time
+and end time.
+@return Time difference.
+*/
+UNIV_INTERN
+void
+mtflu_timediff(
+/*===========*/
+	struct timeval *g_time, /*!< in/out: Start time*/
+	struct timeval *s_time, /*!< in/out: End time */
+	struct timeval *d_time) /*!< out: Time difference */
+{
+	if (g_time->tv_usec < s_time->tv_usec)
+	{
+		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000 + 1;
+		s_time->tv_usec -= 1000000 * nsec;
+		s_time->tv_sec += nsec;
+	}
+	if (g_time->tv_usec - s_time->tv_usec > 1000000)
+	{
+		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000;
+		s_time->tv_usec += 1000000 * nsec;
+		s_time->tv_sec -= nsec;
+	}
+	d_time->tv_sec = g_time->tv_sec - s_time->tv_sec;
+	d_time->tv_usec = g_time->tv_usec - s_time->tv_usec;
+}
+#endif
+
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the flush list of
+all buffer pool instances. This is multi-threaded version of buf_flush_list.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@return true if a batch was queued successfully for each buffer pool
+instance. false if another batch of same type was already running in
+at least one of the buffer pool instance */
+bool
+buf_mtflu_flush_list(
+/*=================*/
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+
+{
+	ulint		i;
+	bool		success = true;
+	struct timeval p_start_time, p_end_time, d_time;
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
+	if (min_n != ULINT_MAX) {
+		/* Ensure that flushing is spread evenly amongst the
+		buffer pool instances. When min_n is ULINT_MAX
+		we need to flush everything up to the lsn limit
+		so no limit here. */
+		min_n = (min_n + srv_buf_pool_instances - 1)
+			 / srv_buf_pool_instances;
+	}
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_start_time, 0x0);
+#endif
+	if(is_pgcomp_wrk_init_done() && (min_n > MT_COMP_WATER_MARK)) {
+		int cnt_flush[32];
+
+                mutex_enter(&mt_flush_mutex);
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr, "Calling into wrk-pgcomp [min:%lu]", min_n);
+#endif
+		pgcomp_flush_work_items(srv_buf_pool_instances,
+					cnt_flush, BUF_FLUSH_LIST,
+					min_n, lsn_limit);
+
+		for (i = 0; i < srv_buf_pool_instances; i++) {
+			if (n_processed) {
+				*n_processed += cnt_flush[i];
+			}
+			if (cnt_flush[i]) {
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+					MONITOR_FLUSH_BATCH_COUNT,
+					MONITOR_FLUSH_BATCH_PAGES,
+					cnt_flush[i]);
+
+			}
+		}
+
+		mutex_exit(&pgcomp_mtx);
+
+#ifdef UNIV_DEBUG
+		gettimeofday(&p_end_time, 0x0);
+		timediff(&p_end_time, &p_start_time, &d_time);
+		fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+				min_n * srv_buf_pool_instances), *n_processed,
+				(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+		return(success);
+	}
+
+	/* Flush to lsn_limit in all buffer pool instances */
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
+		ulint		page_count = 0;
+
+		buf_pool = buf_pool_from_array(i);
+
+		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
+			/* We have two choices here. If lsn_limit was
+			specified then skipping an instance of buffer
+			pool means we cannot guarantee that all pages
+			up to lsn_limit has been flushed. We can
+			return right now with failure or we can try
+			to flush remaining buffer pools up to the
+			lsn_limit. We attempt to flush other buffer
+			pools based on the assumption that it will
+			help in the retry which will follow the
+			failure. */
+			success = false;
+
+			continue;
+		}
+
+		page_count = buf_flush_batch(
+			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
+
+		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
+
+		buf_flush_common(BUF_FLUSH_LIST, page_count);
+
+		if (n_processed) {
+			*n_processed += page_count;
+		}
+
+		if (page_count) {
+			MONITOR_INC_VALUE_CUMULATIVE(
+				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+				MONITOR_FLUSH_BATCH_COUNT,
+				MONITOR_FLUSH_BATCH_PAGES,
+				page_count);
+		}
+	}
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	fprintf(stderr, "[2] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+			min_n * srv_buf_pool_instances), *n_processed,
+			(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+	return(success);
+}
+
+/*********************************************************************//**
+Clear up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+ulint
+buf_mtflu_flush_LRU_tail(void)
+/*==========================*/
+{
+	ulint   total_flushed=0, i=0;
+	int cnt_flush[32];
+
+#ifdef UNIV_DEBUG
+	struct  timeval p_start_time, p_end_time, d_time;
+	gettimeofday(&p_start_time, 0x0);
+#endif
+	assert(is_pgcomp_wrk_init_done());
+
+	mutex_enter(&pgcomp_mtx);
+	pgcomp_flush_work_items(srv_buf_pool_instances,
+		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		if (cnt_flush[i]) {
+			total_flushed += cnt_flush[i];
+
+			MONITOR_INC_VALUE_CUMULATIVE(
+				MONITOR_LRU_BATCH_TOTAL_PAGE,
+			        MONITOR_LRU_BATCH_COUNT,
+			        MONITOR_LRU_BATCH_PAGES,
+			        cnt_flush[i]);
+		}
+	}
+
+	mutex_exit(&pgcomp_mtx);
+
+#if UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed,
+		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+
+	return(total_flushed);
+}
+
+/*******************************************************************//**
+Set work done count to given count.
+@return 1 if still work to do, 0 if no work left */
+int
+set_check_done_flag_count(int cnt)
+/*================*/
+{
+	return(check_wrk_done_count = cnt);
+}
+
+/*******************************************************************//**
+?
+@return why ? */
+int
+set_pgcomp_wrk_init_done(void)
+/*================*/
+{
+	pgcomp_wrk_initialized = 1;
+	return 0;
+}
+
+/*******************************************************************//**
+?
+@return true if work is initialized */
+bool
+is_pgcomp_wrk_init_done(void)
+/*================*/
+{
+	return(pgcomp_wrk_initialized == 1);
+}
+
+/*******************************************************************//**
+Set current done pages count to the given value
+@return number of pages flushed */
+int 
+set_done_cnt_flag(int val)
+/*================*/
+{
+	/*
+ 	 * Assumption: The thread calling into set_done_cnt_flag
+ 	 * needs to have "cq.mtx" acquired, else not safe.
+ 	 */
+	done_cnt_flag = val;
+	return done_cnt_flag;
+}
+
+/*******************************************************************//**
+?
+@return number of pages flushed */
+int
+cv_done_inc_flag_sig(thread_sync_t * ppc)
+/*================*/
+{
+	mutex_enter(&ppc->cq->mtx);
+	ppc->stat_universal_num_processed++;
+	ppc->stat_cycle_num_processed++;
+	done_cnt_flag++;
+	if(!(done_cnt_flag <= check_wrk_done_count)) {
+		fprintf(stderr, "ERROR: done_cnt:%d check_wrk_done_count:%d\n",
+			done_cnt_flag, check_wrk_done_count);
+	}
+	assert(done_cnt_flag <= check_wrk_done_count);
+	mutex_exit(&ppc->cq->mtx);
+	if(done_cnt_flag == check_wrk_done_count) {
+		// why below does not need mutex protection ?
+		ppc->wq->flag = Q_DONE;
+		mutex_enter(&ppc->cq->mtx);
+		ppc->cq->flag = Q_DONE;
+		os_cond_signal(&ppc->cq->cv);
+		mutex_exit(&ppc->cq->mtx);
+	}
+	return(done_cnt_flag);
+}
+
+/*******************************************************************//**
+Remove work item from queue, in my opinion not needed after we use
+UT_LIST
+@return number of pages flushed */
+int
+q_remove_wrk(opq_t *q, wrk_t **wi)
+/*================*/
+{
+	int ret = 0;
+
+	if(!wi || !q) {
+		return -1;
+	}
+
+	mutex_enter(&q->mtx);
+	assert(!((q->tail == NULL) && (q->head != NULL)));
+	assert(!((q->tail != NULL) && (q->head == NULL)));
+
+	/* get the first in the list*/
+	*wi = q->head;
+	if(q->head) {
+		ret = 0;
+		q->head = q->head->next;
+		(*wi)->next = NULL;
+		if(!q->head) {
+			q->tail = NULL;
+		}
+	} else {
+		q->tail = NULL;
+		ret = 1; /* indicating remove from queue failed */
+	}
+	mutex_exit(&q->mtx);
+	return (ret);
+}
+
+/*******************************************************************//**
+Return true if work item has being assigned to a thread or false
+if work item is not assigned.
+@return true if work is assigned, false if not */
+bool
+is_busy_wrk_itm(wrk_t *wi)
+/*================*/
+{
+	if(!wi) {
+		return -1;
+	}
+	return(!(wi->id_usr == -1));
+}
+
+/*******************************************************************//**
+Initialize work items.
+@return why ? */
+int
+setup_wrk_itm(int items)
+/*================*/
+{
+	int i;
+	for(i=0; i<items; i++) {
+		work_items[i].buf_pool = NULL;
+		work_items[i].result = 0;
+		work_items[i].t_usec = 0;
+		work_items[i].id_usr = -1;
+		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
+		work_items[i].next = &work_items[(i+1)%items];
+	}
+	/* last node should be the tail */
+	work_items[items-1].next = NULL;
+	return 0;
+}
+
+/*******************************************************************//**
+Initialize queue
+@return why ? */
+int
+init_queue(opq_t *q)
+/*================*/
+{
+	if(!q) {
+		return -1;
+	}
+	/* Initialize Queue mutex and CV */
+	q->mtx = os_mutex_create();
+        os_cond_init(&q->cv);
+	q->flag = Q_INITIALIZED;
+	q->head = q->tail = NULL;
+
+	return 0;
+}
+
+/// NEEDED ?
+#if 0
+int drain_cq(opq_t *cq, int items)
+{
+	int i=0;
+
+	if(!cq) {
+		return -1;
+	}
+	mutex_enter(&cq->mtx);
+	for(i=0; i<items; i++) {
+		work_items[i].result=0;
+		work_items[i].t_usec = 0;
+		work_items[i].id_usr = -1;
+	}
+	cq->head = cq->tail = NULL;
+	mutex_unlock(&cq->mtx);
+	return 0;
+}
+#endif
+
+/*******************************************************************//**
+Insert work item list to queue, not needed with UT_LIST
+@return why ? */
+int
+q_insert_wrk_list(opq_t *q, wrk_t *w_list)
+/*================*/
+{
+	if((!q) || (!w_list)) {
+		fprintf(stderr, "insert failed q:%p w:%p\n", q, w_list);
+		return -1;
+	}
+
+	mutex_enter(&q->mtx);
+
+	assert(!((q->tail == NULL) && (q->head != NULL)));
+	assert(!((q->tail != NULL) && (q->head == NULL)));
+
+	/* list is empty */
+	if(!q->tail) {
+		q->head = q->tail = w_list;
+	} else {
+		/* added the first of the node to list */
+        	assert(q->head != NULL);
+		q->tail->next = w_list;
+	}
+
+	/* move tail to the last node */
+	while(q->tail->next) {
+		q->tail = q->tail->next;
+	}
+	mutex_exit(&q->mtx);
+
+	return 0;
+}
+
+/*******************************************************************//**
+Flush ?
+@return why ? */
+int
+flush_pool_instance(wrk_t *wi)
+/*================*/
+{
+	struct timeval p_start_time, p_end_time, d_time;
+
+	if(!wi) {
+		fprintf(stderr, "work item invalid wi:%p\n", wi);
+		return -1;
+	}
+
+	wi->t_usec = 0;
+	if (!buf_flush_start(wi->buf_pool, (buf_flush_t)wi->flush_type)) {
+		/* We have two choices here. If lsn_limit was
+		specified then skipping an instance of buffer
+		pool means we cannot guarantee that all pages
+		up to lsn_limit has been flushed. We can
+		return right now with failure or we can try
+		to flush remaining buffer pools up to the
+		lsn_limit. We attempt to flush other buffer
+		pools based on the assumption that it will
+		help in the retry which will follow the
+		failure. */
+		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
+			(buf_flush_t)wi->flush_type);
+		return -1;
+	}
+
+#ifdef UNIV_DEBUG
+	/* Record time taken for the OP in usec */
+	gettimeofday(&p_start_time, 0x0);
+#endif
+
+	if((buf_flush_t)wi->flush_type == BUF_FLUSH_LRU) {
+		/* srv_LRU_scan_depth can be arbitrarily large value.
+		* We cap it with current LRU size.
+		*/
+		buf_pool_mutex_enter(wi->buf_pool);
+		wi->min = UT_LIST_GET_LEN(wi->buf_pool->LRU);
+		buf_pool_mutex_exit(wi->buf_pool);
+		wi->min = ut_min(srv_LRU_scan_depth,wi->min);
+	}
+
+	wi->result = buf_flush_batch(wi->buf_pool,
+                                    (buf_flush_t)wi->flush_type,
+                                    wi->min, wi->lsn_limit);
+
+	buf_flush_end(wi->buf_pool, (buf_flush_t)wi->flush_type);
+	buf_flush_common((buf_flush_t)wi->flush_type, wi->result);
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
+#endif
+	return 0;
+}
+
+/*******************************************************************//**
+?
+@return why ? */
+int
+service_page_comp_io(thread_sync_t * ppc)
+/*================*/
+{
+	wrk_t 		*wi = NULL;
+	int 		ret=0;
+	struct timespec	ts;
+
+	mutex_enter(&ppc->wq->mtx);
+	do{
+		ppc->wt_status = WTHR_SIG_WAITING;
+		ret = os_cond_wait(&ppc->wq->cv, &ppc->wq->mtx);
+		ppc->wt_status = WTHR_RUNNING;
+		if(ret == ETIMEDOUT) {
+			fprintf(stderr, "ERROR ETIMEDOUT cnt_flag:[%d] ret:%d\n",
+				done_cnt_flag, ret);
+		} else if(ret == EINVAL || ret == EPERM) {
+			fprintf(stderr, "ERROR EINVAL/EPERM cnt_flag:[%d] ret:%d\n",
+				done_cnt_flag, ret);
+		}
+		if(ppc->wq->flag == Q_PROCESS) {
+			break;
+		} else {
+			mutex_exit(&ppc->wq->mtx);
+			return -1;
+		}
+	} while (ppc->wq->flag == Q_PROCESS && ret == 0);
+
+	mutex_exit(&ppc->wq->mtx);
+
+	while (ppc->cq->flag == Q_PROCESS) {
+		wi = NULL;
+		/* Get the work item */
+		if (0 != (ret = q_remove_wrk(ppc->wq, &wi))) {
+			ppc->wt_status = WTHR_NO_WORK;
+			return -1;
+		}
+
+		assert(ret==0);
+		assert(wi != NULL);
+		assert(0 == is_busy_wrk_itm(wi));
+		assert(wi->id_usr == -1);
+
+		wi->id_usr = ppc->wthread;
+		wi->wi_status = WRK_ITEM_START;
+
+		/* Process work item */
+		if(0 != (ret = flush_pool_instance(wi))) {
+			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
+			wi->wi_status = WRK_ITEM_FAILED;
+		}
+		ret = q_insert_wrk_list(ppc->cq, wi);
+
+		assert(0==ret);
+		assert(check_wrk_done_count >= done_cnt_flag);
+		wi->wi_status = WRK_ITEM_SUCCESS;
+		if(check_wrk_done_count == cv_done_inc_flag_sig(ppc)) {
+			break;
+		}
+	}
+	return(0);
+}
+
+/******************************************************************//**
+Thread main function for multi-threaded flush
+@return a dummy parameter*/
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(page_comp_io_thread)(
+/*==========================================*/
+	void * arg)
+{
+	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
+
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+		service_page_comp_io(ppc_io);
+		ppc_io->stat_cycle_num_processed = 0;
+	}
+	os_thread_exit(NULL);
+	OS_THREAD_DUMMY_RETURN;
+}
+
+/*******************************************************************//**
+Print queue work item
+@return why ? */
+int
+print_queue_wrk_itm(opq_t *q)
+/*================*/
+{
+#if UNIV_DEBUG
+	wrk_t *wi = NULL;
+
+	if(!q) {
+		fprintf(stderr, "queue NULL\n");
+		return -1;
+	}
+
+	if(!q->head || !q->tail) {
+		assert(!(((q->tail==NULL) && (q->head!=NULL)) && ((q->tail != NULL) && (q->head == NULL))));
+		fprintf(stderr, "queue empty (h:%p t:%p)\n", q->head, q->tail);
+		return 0;
+	}
+
+	mutex_enter(&q->mtx);
+	for(wi = q->head; (wi != NULL) ; wi = wi->next) {
+		//fprintf(stderr, "- [%p] %p %lu %luus [%ld] >%p\n",
+		//	wi, wi->buf_pool, wi->result, wi->t_usec, wi->id_usr, wi->next);
+		fprintf(stderr, "- [%p] [%s] >%p\n",
+			wi, (wi->id_usr == -1)?"free":"Busy", wi->next);
+	}
+	mutex_exit(&q->mtx);
+#endif
+	return(0);
+}
+
+/*******************************************************************//**
+Print work list
+@return why ? */
+int
+print_wrk_list(wrk_t *wi_list)
+/*================*/
+{
+	wrk_t *wi = wi_list;
+	int i=0;
+
+	if(!wi_list) {
+		fprintf(stderr, "list NULL\n");
+	}
+
+	while(wi) {
+		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu]\t[%luus] > %p\n",
+			wi, (wi->id_usr == -1)?"free":"Busy", wi->result, wi->t_usec, wi->next);
+		wi = wi->next;
+		i++;
+	}
+	fprintf(stderr, "list len: %d\n", i);
+	return 0;
+}
+
+/*******************************************************************//**
+?
+@return why ? */
+int
+pgcomp_handler(wrk_t *w_list)
+/*================*/
+{
+	struct timespec   ts;
+	int ret=0, t_flag=0;
+	opq_t *wrk_q=NULL, *comp_q=NULL;
+	wrk_t *tw_list=NULL;
+
+	wrk_q=&wq;
+	comp_q=&cq;
+
+	mutex_enter(&wrk_q->mtx);
+	/* setup work queue here.. */
+	wrk_q->flag = Q_EMPTY;
+	mutex_exit(&wrk_q->mtx);
+
+	ret = q_insert_wrk_list(wrk_q, w_list);
+	if(ret != 0) {
+		fprintf(stderr, "%s():work-queue setup FAILED wq:%p w_list:%p \n",
+			__FUNCTION__, &wq, w_list);
+		return -1;
+	}
+
+retry_submit:
+	mutex_enter(&wrk_q->mtx);
+	/* setup work queue here.. */
+	wrk_q->flag = Q_INITIALIZED;
+	mutex_exit(&wrk_q->mtx);
+
+
+	mutex_enter(&comp_q->mtx);
+	if(0 != set_done_cnt_flag(0)) {
+		fprintf(stderr, "FAILED %s:%d\n", __FILE__, __LINE__);
+		mutex_exit(&comp_q->mtx);
+		return -1;
+	}
+	comp_q->flag = Q_PROCESS;
+	mutex_enter(&comp_q->mtx);
+
+	/* if threads are waiting request them to start */
+	mutex_enter(&wrk_q->mtx);
+	wrk_q->flag = Q_PROCESS;
+	os_cond_broadcast(&wrk_q->cv);
+	mutex_exit(&wrk_q->mtx);
+
+	/* Wait on all worker-threads to complete */
+	mutex_enter(&comp_q->mtx);
+	if (comp_q->flag != Q_DONE) {
+		do {
+			os_cond_wait(&comp_q->cv, &comp_q->mtx);
+			if(comp_q->flag != Q_DONE) {
+				fprintf(stderr, "[1] cv wait on CQ failed flag:%d cnt:%d\n",
+					comp_q->flag, done_cnt_flag);
+				if (done_cnt_flag != srv_buf_pool_instances) {
+					fprintf(stderr, "[2] cv wait on CQ failed flag:%d cnt:%d\n",
+						comp_q->flag, done_cnt_flag);
+					fprintf(stderr, "============\n");
+					print_wrk_list(w_list);
+					fprintf(stderr, "============\n");
+				}
+				continue;
+			} else if (done_cnt_flag != srv_buf_pool_instances) {
+				fprintf(stderr, "[3]cv wait on CQ failed flag:%d cnt:%d\n",
+					comp_q->flag, done_cnt_flag);
+				fprintf(stderr, "============\n");
+				print_wrk_list(w_list);
+				fprintf(stderr, "============\n");
+				comp_q->flag = Q_INITIALIZED;
+				mutex_exit(&comp_q->mtx);
+				goto retry_submit;
+
+				ut_ad(!done_cnt_flag);
+				continue;
+			}
+			ut_ad(done_cnt_flag == srv_buf_pool_instances);
+
+			if ((comp_q->flag == Q_DONE) &&
+				(done_cnt_flag == srv_buf_pool_instances)) {
+				break;
+			}
+		} while((comp_q->flag == Q_INITIALIZED) &&
+			(done_cnt_flag != srv_buf_pool_instances));
+	} else {
+		fprintf(stderr, "[4] cv wait on CQ failed flag:%d cnt:%d\n",
+			comp_q->flag, done_cnt_flag);
+		if (!done_cnt_flag) {
+			fprintf(stderr, "============\n");
+			print_wrk_list(w_list);
+			fprintf(stderr, "============\n");
+			comp_q->flag = Q_INITIALIZED;
+			mutex_enter(&comp_q->mtx);
+			goto retry_submit;
+			ut_ad(!done_cnt_flag);
+		}
+		ut_ad(done_cnt_flag == srv_buf_pool_instances);
+	}
+
+	mutex_exit(&comp_q->mtx);
+	mutex_enter(&wrk_q->mtx);
+	wrk_q->flag = Q_DONE;
+        mutex_exit(&wrk_q->mtx);
+
+	return 0;
+}
+
+/******************************************************************//**
+@return a dummy parameter*/
+int 
+pgcomp_handler_init(
+	int num_threads, 
+	int wrk_cnt, 
+	opq_t *wq, 
+	opq_t *cq)
+/*================*/
+{
+	int   	i=0;
+
+	if(is_pgcomp_wrk_init_done()) {
+		fprintf(stderr, "pgcomp_handler_init(): ERROR already initialized\n");
+		return -1;
+	}
+
+	if(!wq || !cq) {
+		fprintf(stderr, "%s() FAILED wq:%p cq:%p\n", __FUNCTION__, wq, cq);
+		return -1;
+	}
+	
+	/* work-item setup */
+	setup_wrk_itm(wrk_cnt);
+
+	/* wq & cq setup */
+	init_queue(wq);
+	init_queue(cq);
+
+	/* Mark each of the thread sync entires */
+	for(i=0; i < PGCOMP_MAX_WORKER; i++) {
+		pc_sync[i].wthread_id = i;
+	}
+
+	/* Create threads for page-compression-flush */
+	for(i=0; i < num_threads; i++) {
+		pc_sync[i].wthread_id = i;
+		pc_sync[i].wq = wq;
+		pc_sync[i].cq = cq;
+		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
+					thread_ids + START_PGCOMP_CNT + i);
+		//pc_sync[i].wthread = thread_ids[START_PGCOMP_CNT + i];
+		pc_sync[i].wthread = (START_PGCOMP_CNT + i);
+		pc_sync[i].wt_status = WTHR_INITIALIZED;
+	}
+
+	set_check_done_flag_count(wrk_cnt);
+	set_pgcomp_wrk_init_done();
+
+	return 0;
+}
+
+
+/*******************************************************************//**
+Print work thread status information
+@return why ? */
+int 
+wrk_thread_stat(
+	thread_sync_t *wthr, 
+	unsigned int num_threads)
+/*================*/
+{
+	long stat_tot=0;
+	int i=0;
+	for(i=0; i<num_threads;i++) {
+		stat_tot+=wthr[i].stat_universal_num_processed;
+		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
+			wthr[i].stat_universal_num_processed);
+	}
+	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
+}
+
+/*******************************************************************//**
+Reset work items
+@return why ? */
+int
+reset_wrk_itm(int items)
+/*================*/
+{
+	int i;
+
+	mutex_enter(&wq.mtx);
+	wq.head = wq.tail = NULL;
+	mutex_exit(&wq.mtx);
+
+	mutex_enter(&cq.mtx);
+	for(i=0;i<items; i++) {
+		work_items[i].id_usr = -1;
+	}
+	cq.head = cq.tail = NULL;
+	mutex_exit(&cq.mtx);
+	return 0;
+}
+
+/*******************************************************************//**
+?
+@return why ? */
+int 
+pgcomp_flush_work_items(
+/*================*/
+	int buf_pool_inst, 
+	int *per_pool_pages_flushed,
+	int flush_type, 
+	int min_n, 
+	lsn_t lsn_limit)
+{
+	int ret=0, i=0;
+
+   	mutex_enter(&wq.mtx);
+   	mutex_enter(&cq.mtx);
+    
+	assert(wq.head == NULL);
+    	assert(wq.tail == NULL);
+	if(cq.head) {
+		print_wrk_list(cq.head);
+	}
+    	assert(cq.head == NULL);
+    	assert(cq.tail == NULL);
+
+	for(i=0;i<buf_pool_inst; i++) {
+		work_items[i].buf_pool = buf_pool_from_array(i);
+		work_items[i].flush_type = flush_type;
+		work_items[i].min = min_n;
+		work_items[i].lsn_limit = lsn_limit;
+		work_items[i].id_usr = -1;
+		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
+		work_items[i].wi_status = WRK_ITEM_SET;
+	}
+	work_items[i-1].next=NULL;
+
+	mutex_exit(&cq.mtx);
+   	mutex_exit(&wq.mtx);
+
+	pgcomp_handler(work_items);
+
+   	mutex_enter(&wq.mtx);
+   	mutex_enter(&cq.mtx);
+	/* collect data/results total pages flushed */
+	for(i=0; i<buf_pool_inst; i++) {
+		if(work_items[i].result == -1) {
+			ret = -1;
+			per_pool_pages_flushed[i] = 0;
+		} else {
+			per_pool_pages_flushed[i] = work_items[i].result;
+		}
+		if((work_items[i].id_usr == -1) && (work_items[i].wi_status == WRK_ITEM_SET )) {
+           		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].flush_type);
+			assert(0);
+		}
+	}
+
+	wq.flag = cq.flag = Q_INITIALIZED;
+
+	mutex_exit(&cq.mtx);
+   	mutex_exit(&wq.mtx);
+
+#if UNIV_DEBUG
+	/* Print work-list stats */
+	fprintf(stderr, "==wq== [DONE]\n");
+	print_wrk_list(wq.head);
+	fprintf(stderr, "==cq== [DONE]\n");
+	print_wrk_list(cq.head);
+	fprintf(stderr, "==worker-thread-stats==\n");
+	wrk_thread_stat(pc_sync, pgc_n_threads);
+#endif
+
+	/* clear up work-queue for next flush */
+	reset_wrk_itm(buf_pool_inst);
+	return(ret);
+}
+
+ 
+
diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index 3a579e251ff..174b4ab278f 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -182,14 +183,14 @@ buf_read_page_low(
 		*err = fil_io(OS_FILE_READ | wake_later
 			      | ignore_nonexistent_pages,
 			      sync, space, zip_size, offset, 0, zip_size,
-			      bpage->zip.data, bpage);
+			      bpage->zip.data, bpage, 0);
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
 		*err = fil_io(OS_FILE_READ | wake_later
 			      | ignore_nonexistent_pages,
 			      sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
-			      ((buf_block_t*) bpage)->frame, bpage);
+			      ((buf_block_t*) bpage)->frame, bpage, 0);
 	}
 	thd_wait_end(NULL);
 
diff --git a/storage/innobase/dict/dict0dict.cc b/storage/innobase/dict/dict0dict.cc
index a560dc54eac..a382b211275 100644
--- a/storage/innobase/dict/dict0dict.cc
+++ b/storage/innobase/dict/dict0dict.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1446,8 +1447,8 @@ dict_table_rename_in_cache(
 		ibool		exists;
 		char*		filepath;
 
-		ut_ad(table->space != TRX_SYS_SPACE);
-
+		ut_ad(table->space != TRX_SYS_SPACE);
+
 		if (DICT_TF_HAS_DATA_DIR(table->flags)) {
 
 			dict_get_and_save_data_dir_path(table, true);
@@ -1459,7 +1460,7 @@ dict_table_rename_in_cache(
 			filepath = fil_make_ibd_name(table->name, false);
 		}
 
-		fil_delete_tablespace(table->space, BUF_REMOVE_FLUSH_NO_WRITE);
+		fil_delete_tablespace(table->space, BUF_REMOVE_FLUSH_NO_WRITE);
 
 		/* Delete any temp file hanging around. */
 		if (os_file_status(filepath, &exists, &type)
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 1779ae86c46..2bf5922e07d 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1,6 +1,8 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013 SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -24,6 +26,8 @@ Created 10/25/1995 Heikki Tuuri
 *******************************************************/
 
 #include "fil0fil.h"
+#include "fil0pagecompress.h"
+#include "fsp0pagecompress.h"
 
 #include <debug_sync.h>
 #include <my_dbug.h>
@@ -54,6 +58,14 @@ Created 10/25/1995 Heikki Tuuri
 # include "srv0srv.h"
 static ulint srv_data_read, srv_data_written;
 #endif /* !UNIV_HOTBACKUP */
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+#include "row0mysql.h"
 
 /*
 		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
@@ -428,11 +440,16 @@ fil_read(
 				block size multiple */
 	void*	buf,		/*!< in/out: buffer where to store data read;
 				in aio this must be appropriately aligned */
-	void*	message)	/*!< in: message for aio handler if non-sync
+	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
+	ulint	write_size)	/*!< in/out: Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size does not decrease. */
 {
 	return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
-					  byte_offset, len, buf, message));
+		      byte_offset, len, buf, message, write_size));
 }
 
 /********************************************************************//**
@@ -457,18 +474,22 @@ fil_write(
 				be a block size multiple */
 	void*	buf,		/*!< in: buffer from which to write; in aio
 				this must be appropriately aligned */
-	void*	message)	/*!< in: message for aio handler if non-sync
+	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
+	ulint	write_size)	/*!< in/out: Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size does not decrease. */
 {
 	ut_ad(!srv_read_only_mode);
 
 	return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
-					   byte_offset, len, buf, message));
+		      byte_offset, len, buf, message, write_size));
 }
 
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
-UNIV_INLINE
 fil_space_t*
 fil_space_get_by_id(
 /*================*/
@@ -486,6 +507,19 @@ fil_space_get_by_id(
 	return(space);
 }
 
+/****************************************************************//**
+Get space id from fil node */
+ulint
+fil_node_get_space_id(
+/*==================*/
+        fil_node_t*     node)           /*!< in: Compressed node*/
+{
+	ut_ad(node);
+	ut_ad(node->space);
+
+	return (node->space->id);
+}
+
 /*******************************************************************//**
 Returns the table space by a given name, NULL if not found. */
 UNIV_INLINE
@@ -704,8 +738,9 @@ fil_node_open_file(
 	byte*		buf2;
 	byte*		page;
 	ulint		space_id;
-	ulint		flags;
+	ulint		flags=0;
 	ulint		page_size;
+	ibool           atomic_writes=FALSE;
 
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->n_pending == 0);
@@ -722,7 +757,7 @@ fil_node_open_file(
 
 		node->handle = os_file_create_simple_no_error_handling(
 			innodb_file_data_key, node->name, OS_FILE_OPEN,
-			OS_FILE_READ_ONLY, &success);
+			OS_FILE_READ_ONLY, &success, FALSE);
 		if (!success) {
 			/* The following call prints an error message */
 			os_file_get_last_error(true);
@@ -774,6 +809,8 @@ fil_node_open_file(
 		space_id = fsp_header_get_space_id(page);
 		flags = fsp_header_get_flags(page);
 		page_size = fsp_flags_get_page_size(flags);
+		atomic_writes = fsp_flags_get_atomic_writes(flags);
+
 
 		ut_free(buf2);
 
@@ -824,6 +861,17 @@ fil_node_open_file(
 			ut_error;
 		}
 
+		if (UNIV_UNLIKELY(space->flags != flags)) {
+			if (!dict_tf_verify_flags(space->flags, flags)) {
+				fprintf(stderr,
+					"InnoDB: Error: table flags are 0x%lx"
+					" in the data dictionary\n"
+					"InnoDB: but the flags in file %s are 0x%lx!\n",
+					space->flags, node->name, flags);
+				ut_error;
+			}
+		}
+
 		if (size_bytes >= 1024 * 1024) {
 			/* Truncate the size to whole megabytes. */
 			size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
@@ -843,6 +891,8 @@ add_size:
 		space->size += node->size;
 	}
 
+	atomic_writes = fsp_flags_get_atomic_writes(space->flags);
+
 	/* printf("Opening file %s\n", node->name); */
 
 	/* Open the file for reading and writing, in Windows normally in the
@@ -853,18 +903,18 @@ add_size:
 		node->handle = os_file_create(innodb_file_log_key,
 					      node->name, OS_FILE_OPEN,
 					      OS_FILE_AIO, OS_LOG_FILE,
-					      &ret);
+					      &ret, atomic_writes);
 	} else if (node->is_raw_disk) {
 		node->handle = os_file_create(innodb_file_data_key,
 					      node->name,
 					      OS_FILE_OPEN_RAW,
 					      OS_FILE_AIO, OS_DATA_FILE,
-						     &ret);
+					      &ret, atomic_writes);
 	} else {
 		node->handle = os_file_create(innodb_file_data_key,
 					      node->name, OS_FILE_OPEN,
 					      OS_FILE_AIO, OS_DATA_FILE,
-					      &ret);
+					      &ret, atomic_writes);
 	}
 
 	ut_a(ret);
@@ -1481,6 +1531,21 @@ fil_space_get_space(
 	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
 		ut_a(id != 0);
 
+		mutex_exit(&fil_system->mutex);
+
+		/* It is possible that the space gets evicted at this point
+		before the fil_mutex_enter_and_prepare_for_io() acquires
+		the fil_system->mutex. Check for this after completing the
+		call to fil_mutex_enter_and_prepare_for_io(). */
+		fil_mutex_enter_and_prepare_for_io(id);
+
+		/* We are still holding the fil_system->mutex. Check if
+		the space is still in memory cache. */
+		space = fil_space_get_by_id(id);
+		if (space == NULL) {
+			return(NULL);
+		}
+
 		/* The following code must change when InnoDB supports
 		multiple datafiles per tablespace. */
 		ut_a(1 == UT_LIST_GET_LEN(space->chain));
@@ -1858,12 +1923,12 @@ fil_write_lsn_and_arch_no_to_file(
 	buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
 
 	err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
-		       UNIV_PAGE_SIZE, buf, NULL);
+		       UNIV_PAGE_SIZE, buf, NULL, 0);
 	if (err == DB_SUCCESS) {
 		mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
 
 		err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
-				UNIV_PAGE_SIZE, buf, NULL);
+				UNIV_PAGE_SIZE, buf, NULL, 0);
 	}
 
 	mem_free(buf1);
@@ -3095,7 +3160,7 @@ fil_create_link_file(
 
 	file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, link_filepath,
-		OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
+		OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, FALSE);
 
 	if (!success) {
 		/* The following call will print an error message */
@@ -3111,10 +3176,10 @@ fil_create_link_file(
 			ut_print_filename(stderr, filepath);
 			fputs(" already exists.\n", stderr);
 			err = DB_TABLESPACE_EXISTS;
-
 		} else if (error == OS_FILE_DISK_FULL) {
 			err = DB_OUT_OF_FILE_SPACE;
-
+		} else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+			err = DB_UNSUPPORTED;
 		} else {
 			err = DB_ERROR;
 		}
@@ -3204,8 +3269,9 @@ fil_open_linked_file(
 /*===============*/
 	const char*	tablename,	/*!< in: database/tablename */
 	char**		remote_filepath,/*!< out: remote filepath */
-	os_file_t*	remote_file)	/*!< out: remote file handle */
-
+	os_file_t*	remote_file,	/*!< out: remote file handle */
+	ibool           atomic_writes)  /*!< in: should atomic writes be
+					used */
 {
 	ibool		success;
 
@@ -3219,7 +3285,7 @@ fil_open_linked_file(
 	*remote_file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, *remote_filepath,
 		OS_FILE_OPEN, OS_FILE_READ_ONLY,
-		&success);
+		&success, atomic_writes);
 
 	if (!success) {
 		char*	link_filepath = fil_make_isl_name(tablename);
@@ -3274,6 +3340,7 @@ fil_create_new_single_table_tablespace(
 	/* TRUE if a table is created with CREATE TEMPORARY TABLE */
 	bool		is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
 	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
+	bool		atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
 
 	ut_a(space_id > 0);
 	ut_ad(!srv_read_only_mode);
@@ -3306,7 +3373,8 @@ fil_create_new_single_table_tablespace(
 		OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
 		OS_FILE_NORMAL,
 		OS_DATA_FILE,
-		&ret);
+		&ret,
+		atomic_writes);
 
 	if (ret == FALSE) {
 		/* The following call will print an error message */
@@ -3333,6 +3401,11 @@ fil_create_new_single_table_tablespace(
 			goto error_exit_3;
 		}
 
+		if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+			err = DB_UNSUPPORTED;
+			goto error_exit_3;
+		}
+
 		if (error == OS_FILE_DISK_FULL) {
 			err = DB_OUT_OF_FILE_SPACE;
 			goto error_exit_3;
@@ -3371,6 +3444,7 @@ fil_create_new_single_table_tablespace(
 	flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
 	fsp_header_init_fields(page, space_id, flags);
 	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+	ut_ad(fsp_flags_is_valid(flags));
 
 	if (!(fsp_flags_is_compressed(flags))) {
 		buf_flush_init_for_writing(page, NULL, 0);
@@ -3547,6 +3621,7 @@ fil_open_single_table_tablespace(
 	fsp_open_info	remote;
 	ulint		tablespaces_found = 0;
 	ulint		valid_tablespaces_found = 0;
+	ibool           atomic_writes = FALSE;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -3557,6 +3632,8 @@ fil_open_single_table_tablespace(
 		return(DB_CORRUPTION);
 	}
 
+	atomic_writes = fsp_flags_get_atomic_writes(flags);
+
 	/* If the tablespace was relocated, we do not
 	compare the DATA_DIR flag */
 	ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
@@ -3581,7 +3658,7 @@ fil_open_single_table_tablespace(
 	}
 
 	link_file_found = fil_open_linked_file(
-		tablename, &remote.filepath, &remote.file);
+		tablename, &remote.filepath, &remote.file, atomic_writes);
 	remote.success = link_file_found;
 	if (remote.success) {
 		/* possibility of multiple files. */
@@ -3609,7 +3686,7 @@ fil_open_single_table_tablespace(
 	if (dict.filepath) {
 		dict.file = os_file_create_simple_no_error_handling(
 			innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
-			OS_FILE_READ_ONLY, &dict.success);
+			OS_FILE_READ_ONLY, &dict.success, atomic_writes);
 		if (dict.success) {
 			/* possibility of multiple files. */
 			validate = true;
@@ -3621,7 +3698,7 @@ fil_open_single_table_tablespace(
 	ut_a(def.filepath);
 	def.file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
-		OS_FILE_READ_ONLY, &def.success);
+		OS_FILE_READ_ONLY, &def.success, atomic_writes);
 	if (def.success) {
 		tablespaces_found++;
 	}
@@ -4020,7 +4097,7 @@ fil_load_single_table_tablespace(
 
 	/* Check for a link file which locates a remote tablespace. */
 	remote.success = fil_open_linked_file(
-		tablename, &remote.filepath, &remote.file);
+		tablename, &remote.filepath, &remote.file, FALSE);
 
 	/* Read the first page of the remote tablespace */
 	if (remote.success) {
@@ -4035,7 +4112,7 @@ fil_load_single_table_tablespace(
 	/* Try to open the tablespace in the datadir. */
 	def.file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
-		OS_FILE_READ_ONLY, &def.success);
+		OS_FILE_READ_ONLY, &def.success, FALSE);
 
 	/* Read the first page of the remote tablespace */
 	if (def.success) {
@@ -4167,7 +4244,7 @@ will_not_choose:
 		new_path = fil_make_ibbackup_old_name(fsp->filepath);
 
 		bool	success = os_file_rename(
-			innodb_file_data_key, fsp->filepath, new_path));
+			innodb_file_data_key, fsp->filepath, new_path);
 
 		ut_a(success);
 
@@ -4821,7 +4898,7 @@ retry:
 		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
 				 node->name, node->handle, buf,
 				 offset, page_size * n_pages,
-				 NULL, NULL);
+			         NULL, NULL, 0);
 #endif /* UNIV_HOTBACKUP */
 		if (success) {
 			os_has_said_disk_full = FALSE;
@@ -4852,6 +4929,7 @@ retry:
 
 	space->size += pages_added;
 	node->size += pages_added;
+	node->being_extended = FALSE;
 
 #ifdef HAVE_POSIX_FALLOCATE
 complete_io:
@@ -4917,7 +4995,7 @@ fil_extend_tablespaces_to_stored_len(void)
 					      single-threaded operation */
 		error = fil_read(TRUE, space->id,
 				 fsp_flags_get_zip_size(space->flags),
-				 0, 0, UNIV_PAGE_SIZE, buf, NULL);
+				 0, 0, UNIV_PAGE_SIZE, buf, NULL, 0);
 		ut_a(error == DB_SUCCESS);
 
 		size_in_header = fsp_get_size_low(buf);
@@ -5191,8 +5269,13 @@ fil_io(
 	void*	buf,		/*!< in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
-	void*	message)	/*!< in: message for aio handler if non-sync
+	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
+	ulint	write_size)	/*!< in/out: Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size does not decrease. */
 {
 	ulint		mode;
 	fil_space_t*	space;
@@ -5255,6 +5338,9 @@ fil_io(
 	} else if (type == OS_FILE_WRITE) {
 		ut_ad(!srv_read_only_mode);
 		srv_stats.data_written.add(len);
+		if (fil_page_is_index_page((byte *)buf)) {
+			srv_stats.index_pages_written.inc();
+		}
 	}
 
 	/* Reserve the fil_system mutex and make sure that we can open at
@@ -5371,7 +5457,7 @@ fil_io(
 #else
 	/* Queue the aio request */
 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
-		     offset, len, node, message);
+		     offset, len, node, message, write_size);
 #endif /* UNIV_HOTBACKUP */
 	ut_a(ret);
 
@@ -5994,7 +6080,7 @@ fil_tablespace_iterate(
 
 		file = os_file_create_simple_no_error_handling(
 			innodb_file_data_key, filepath,
-			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
+			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
 
 		DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
 		{
@@ -6210,3 +6296,32 @@ fil_mtr_rename_log(
 	mtr_commit(&mtr);
 }
 
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void)
+/*==================*/
+{
+	ut_ad(!mutex_own(&fil_system->mutex));
+	mutex_enter(&fil_system->mutex);
+}
+
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void)
+/*=================*/
+{
+	ut_ad(mutex_own(&fil_system->mutex));
+	mutex_exit(&fil_system->mutex);
+}
+
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+	fil_space_t*	space)	/*!< in: space */
+{
+	return (space->name);
+}
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
new file mode 100644
index 00000000000..3926b23c677
--- /dev/null
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -0,0 +1,369 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fil/fil0pagecompress.cc
+Implementation for page compressed file spaces.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#include "fil0fil.h"
+#include "fil0pagecompress.h"
+
+#include <debug_sync.h>
+#include <my_dbug.h>
+
+#include "mem0mem.h"
+#include "hash0hash.h"
+#include "os0file.h"
+#include "mach0data.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "log0recv.h"
+#include "fsp0fsp.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "dict0dict.h"
+#include "page0page.h"
+#include "page0zip.h"
+#include "trx0sys.h"
+#include "row0mysql.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0lru.h"
+# include "ibuf0ibuf.h"
+# include "sync0sync.h"
+# include "os0sync.h"
+#else /* !UNIV_HOTBACKUP */
+# include "srv0srv.h"
+static ulint srv_data_read, srv_data_written;
+#endif /* !UNIV_HOTBACKUP */
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+#include "row0mysql.h"
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+	ulint		space_id,      /*!< in: tablespace id of the
+				       table. */
+	byte*           buf,           /*!< in: buffer from which to write; in aio
+				       this must be appropriately aligned */
+        byte*           out_buf,       /*!< out: compressed buffer */
+        ulint           len,           /*!< in: length of input buffer.*/
+	ulint*          out_len)       /*!< out: actual length of compressed page */
+{
+        int err = Z_OK;
+        int level = 0;
+        ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
+	ulint write_size=0;
+
+	ut_a(buf);
+	ut_a(out_buf);
+	ut_a(len);
+	ut_a(out_len);
+
+        level = fil_space_get_page_compression_level(space_id);
+	ut_a(fil_space_is_page_compressed(space_id));
+
+	fil_system_enter();
+	fil_space_t* space = fil_space_get_by_id(space_id);
+	fil_system_exit();
+
+	/* If no compression level was provided to this table, use system
+	default level */
+	if (level == 0) {
+		level = srv_compress_zlib_level;
+	}
+
+#ifdef UNIV_DEBUG
+	fprintf(stderr,
+		"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
+		space_id, fil_space_name(space), len);
+#endif
+
+	write_size = UNIV_PAGE_SIZE - header_len;
+	err = compress2(out_buf+header_len, &write_size, buf, len, level);
+
+        if (err != Z_OK) {
+		/* If error we leave the actual page as it was */
+
+		fprintf(stderr,
+			"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+			space_id, fil_space_name(space), len, err, write_size);
+
+		*out_len = len;
+		return (buf);
+	} else {
+		/* Set up the page header */
+		memcpy(out_buf, buf, FIL_PAGE_DATA);
+                /* Set up the checksum */
+		mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+		/* Set up the correct page type */
+		mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
+		/* Set up the flush lsn to be compression algorithm */
+		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
+		/* Set up the actual payload lenght */
+		mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
+
+#ifdef UNIV_DEBUG
+		/* Verify */
+		ut_ad(fil_page_is_compressed(out_buf));
+		ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
+		ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
+		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
+#endif
+
+		write_size+=header_len;
+		/* Actual write needs to be alligned on block size */
+		if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
+			write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
+		}
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
+			space_id, fil_space_name(space), len, write_size);
+#endif
+#define SECT_SIZE 512
+		srv_stats.page_compression_saved.add((len - write_size));
+		if ((len - write_size) > 0) {
+			srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
+			srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
+		}
+		//srv_stats.page_compressed_trim_op.inc();
+		srv_stats.pages_page_compressed.inc();
+		*out_len = write_size;
+
+		return(out_buf);
+	}
+}
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation. */
+void
+fil_decompress_page(
+/*================*/
+	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
+	byte*           buf,           /*!< out: buffer from which to read; in aio
+				       this must be appropriately aligned */
+        ulint           len)           /*!< in: length of output buffer.*/
+{
+        int err = 0;
+        ulint actual_size = 0;
+	ulint compression_alg = 0;
+	byte *in_buf;
+
+	ut_a(buf);
+	ut_a(len);
+
+	/* Before actual decompress, make sure that page type is correct */
+
+	if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
+		mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) {
+		fprintf(stderr,
+			"InnoDB: Corruption: We try to uncompress corrupted page\n"
+			"InnoDB: CRC %lu type %lu.\n"
+			"InnoDB: len %lu\n",
+			mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
+			mach_read_from_2(buf+FIL_PAGE_TYPE), len);
+
+		fflush(stderr);
+		ut_error;
+	}
+
+	/* Get compression algorithm */
+	compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
+
+	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
+		// If no buffer was given, we need to allocate temporal buffer
+		if (page_buf == NULL) {
+			in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
+		} else {
+			in_buf = page_buf;
+		}
+
+		/* Get the actual size of compressed page */
+		actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Note: Preparing for decompress for len %lu\n",
+			actual_size);
+#endif
+
+		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
+
+
+		/* If uncompress fails it means that page is corrupted */
+		if (err != Z_OK) {
+
+			fprintf(stderr,
+				"InnoDB: Corruption: Page is marked as compressed\n"
+				"InnoDB: but uncompress failed with error %d.\n"
+				"InnoDB: size %lu len %lu\n",
+				err, actual_size, len);
+
+			fflush(stderr);
+
+			ut_error;
+		}
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Note: Decompression succeeded for len %lu \n",
+			len);
+#endif
+
+		/* Copy the uncompressed page to the buffer pool, not
+		really any other options. */
+		memcpy(buf, in_buf, len);
+
+		// Need to free temporal buffer if no buffer was given
+		if (page_buf == NULL) {
+			ut_free(in_buf);
+		}
+
+		srv_stats.pages_page_decompressed.inc();
+	} else {
+		fprintf(stderr,
+			"InnoDB: Corruption: Page is marked as compressed\n"
+			"InnoDB: but compression algorithm %s\n"
+			"InnoDB: is not known.\n"
+			,fil_get_compression_alg_name(compression_alg));
+
+		fflush(stderr);
+		ut_error;
+	}
+}
+
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return	true if page type index page, false if not */
+ibool
+fil_page_is_index_page(
+/*===================*/
+	byte *buf)	/*!< in: page */
+{
+	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
+}
+
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return	true if page is page compressed, false if not */
+ibool
+fil_page_is_compressed(
+/*===================*/
+	byte *buf)	/*!< in: page */
+{
+	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+}
+
+/*******************************************************************//**
+Returns the page compression level of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	page compression level, ULINT_UNDEFINED if space not found */
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(fsp_flags_get_page_compression_level(flags));
+	}
+
+	return(flags);
+}
+
+/*******************************************************************//**
+Extract the page compression from space.
+@return true if space is page compressed, false if space is not found
+or space is not page compressed. */
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(fsp_flags_is_page_compressed(flags));
+	}
+
+	return(flags);
+}
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+       ulint           comp_alg)     /*!<in: compression algorithm number */
+{
+	switch(comp_alg) {
+	case FIL_PAGE_COMPRESSION_ZLIB:
+		return ("ZLIB");
+		break;
+	default:
+		return("UNKNOWN");
+		break;
+	}
+}
+
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return	true if space using atomic writes, false if not */
+ibool
+fil_space_get_atomic_writes(
+/*========================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(fsp_flags_get_atomic_writes(flags));
+	}
+
+	return(flags);
+}
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 82421d2d725..085521ac7e5 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4,6 +4,7 @@ Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -485,6 +486,28 @@ ib_cb_t innodb_api_cb[] = {
 	(ib_cb_t) ib_cfg_bk_commit_interval
 };
 
+/**
+  Structure for CREATE TABLE options (table options).
+  It needs to be called ha_table_option_struct.
+
+  The option values can be specified in the CREATE TABLE at the end:
+  CREATE TABLE ( ... ) *here*
+*/
+
+ha_create_table_option innodb_table_option_list[]=
+{
+  /* With this option user can enable page compression feature for the
+  table */
+  HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0),
+  /* With this option user can set zip compression level for page
+  compression for this table*/
+  HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1),
+  /* With this option user can enable atomic writes feature for this table */
+  HA_TOPTION_BOOL("ATOMIC_WRITES", atomic_writes, 0),
+  HA_TOPTION_END
+};
+
+
 /*************************************************************//**
 Check whether valid argument given to innodb_ft_*_stopword_table.
 This function is registered as a callback with MySQL.
@@ -647,6 +670,24 @@ static SHOW_VAR innodb_status_variables[]= {
   {"purge_view_trx_id_age",
   (char*) &export_vars.innodb_purge_view_trx_id_age,      SHOW_LONG},
 #endif /* UNIV_DEBUG */
+  /* Status variables for page compression */
+  {"page_compression_saved",
+   (char*) &export_vars.innodb_page_compression_saved,    SHOW_LONGLONG},
+  {"page_compression_trim_sect512",
+   (char*) &export_vars.innodb_page_compression_trim_sect512,    SHOW_LONGLONG},
+  {"page_compression_trim_sect4096",
+   (char*) &export_vars.innodb_page_compression_trim_sect4096,    SHOW_LONGLONG},
+  {"num_index_pages_written",
+   (char*) &export_vars.innodb_index_pages_written,       SHOW_LONGLONG},
+  {"num_pages_page_compressed",
+   (char*) &export_vars.innodb_pages_page_compressed,     SHOW_LONGLONG},
+  {"num_page_compressed_trim_op",
+   (char*) &export_vars.innodb_page_compressed_trim_op,     SHOW_LONGLONG},
+  {"num_page_compressed_trim_op_saved",
+   (char*) &export_vars.innodb_page_compressed_trim_op_saved,     SHOW_LONGLONG},
+  {"num_pages_page_decompressed",
+   (char*) &export_vars.innodb_pages_page_decompressed,   SHOW_LONGLONG},
+
   {NullS, NullS, SHOW_LONG}
 };
 
@@ -2796,6 +2837,8 @@ innobase_init(
         if (srv_file_per_table)
           innobase_hton->tablefile_extensions = ha_innobase_exts;
 
+	innobase_hton->table_options = innodb_table_option_list;
+
 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
 
 #ifndef DBUG_OFF
@@ -3118,8 +3161,6 @@ innobase_change_buffering_inited_ok:
 
 	srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite;
 
-	page_compression_level = (ulint) innobase_compression_level;
-
 	if (!innobase_use_checksums) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
@@ -9465,11 +9506,16 @@ innobase_table_flags(
 	enum row_type	row_format;
 	rec_format_t	innodb_row_format = REC_FORMAT_COMPACT;
 	bool		use_data_dir;
+	ha_table_option_struct *options= form->s->option_struct;
 
 	/* Cache the value of innodb_file_format, in case it is
 	modified by another thread while the table is being created. */
 	const ulint	file_format_allowed = srv_file_format;
 
+	/* Cache the value of innobase_compression_level, in case it is
+	modified by another thread while the table is being created. */
+	const ulint     default_compression_level = innobase_compression_level;
+
 	*flags = 0;
 	*flags2 = 0;
 
@@ -9513,6 +9559,8 @@ index_bad:
 		}
 	}
 
+	row_format = form->s->row_type;
+
 	if (create_info->key_block_size) {
 		/* The requested compressed page size (key_block_size)
 		is given in kilobytes. If it is a valid number, store
@@ -9522,7 +9570,7 @@ index_bad:
 		ulint kbsize;		/* Key Block Size */
 		for (zssize = kbsize = 1;
 		     zssize <= ut_min(UNIV_PAGE_SSIZE_MAX,
-				      PAGE_ZIP_SSIZE_MAX);
+			     	      PAGE_ZIP_SSIZE_MAX);
 		     zssize++, kbsize <<= 1) {
 			if (kbsize == create_info->key_block_size) {
 				zip_ssize = zssize;
@@ -9550,8 +9598,8 @@ index_bad:
 		}
 
 		if (!zip_allowed
-		    || zssize > ut_min(UNIV_PAGE_SSIZE_MAX,
-				       PAGE_ZIP_SSIZE_MAX)) {
+			|| zssize > ut_min(UNIV_PAGE_SSIZE_MAX,
+					   PAGE_ZIP_SSIZE_MAX)) {
 			push_warning_printf(
 				thd, Sql_condition::WARN_LEVEL_WARN,
 				ER_ILLEGAL_HA_CREATE_OPTION,
@@ -9560,8 +9608,6 @@ index_bad:
 		}
 	}
 
-	row_format = form->s->row_type;
-
 	if (zip_ssize && zip_allowed) {
 		/* if ROW_FORMAT is set to default,
 		automatically change it to COMPRESSED.*/
@@ -9598,7 +9644,6 @@ index_bad:
 	case ROW_TYPE_REDUNDANT:
 		innodb_row_format = REC_FORMAT_REDUNDANT;
 		break;
-
 	case ROW_TYPE_COMPRESSED:
 	case ROW_TYPE_DYNAMIC:
 		if (!use_tablespace) {
@@ -9616,10 +9661,18 @@ index_bad:
 				" innodb_file_format > Antelope.",
 				get_row_format_name(row_format));
 		} else {
-			innodb_row_format = (row_format == ROW_TYPE_DYNAMIC
-					     ? REC_FORMAT_DYNAMIC
-					     : REC_FORMAT_COMPRESSED);
-			break;
+			switch(row_format) {
+			  case ROW_TYPE_COMPRESSED:
+			    innodb_row_format = REC_FORMAT_COMPRESSED;
+			    break;
+			  case ROW_TYPE_DYNAMIC:
+			    innodb_row_format = REC_FORMAT_DYNAMIC;
+                            break;
+			  default:
+			    /* Not possible, avoid compiler warning */
+			    break;
+			}
+			break; /* Correct row_format */
 		}
 		zip_allowed = FALSE;
 		/* fall through to set row_format = COMPACT */
@@ -9646,7 +9699,15 @@ index_bad:
 		       && ((create_info->data_file_name != NULL)
 		       && !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
 
-	dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir);
+	/* Set up table dictionary flags */
+	dict_tf_set(flags,
+		    innodb_row_format,
+		    zip_ssize,
+		    use_data_dir,
+		    options->page_compressed,
+		    (ulint)options->page_compression_level == ULINT_UNDEFINED ?
+		        default_compression_level : options->page_compression_level,
+		    options->atomic_writes);
 
 	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
 		*flags2 |= DICT_TF2_TEMPORARY;
@@ -9659,6 +9720,111 @@ index_bad:
 	DBUG_RETURN(true);
 }
 
+
+/*****************************************************************//**
+Check engine specific table options not handled by SQL-parser.
+@return	NULL if valid, string if not */
+UNIV_INTERN
+const char*
+ha_innobase::check_table_options(
+	THD		*thd,		/*!< in: thread handle */
+	TABLE*		table,		/*!< in: information on table
+					columns and indexes */
+	HA_CREATE_INFO*	create_info,	/*!< in: more information of the
+					created table, contains also the
+					create statement string */
+	const bool	use_tablespace, /*!< in: use file par table */
+	const ulint     file_format)
+{
+	enum row_type	row_format = table->s->row_type;;
+	ha_table_option_struct *options= table->s->option_struct;
+
+	/* Check page compression requirements */
+	if (options->page_compressed) {
+		if (!srv_compress_pages) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				"innodb_compress_pages not enabled");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (row_format == ROW_TYPE_COMPRESSED) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED table can't have"
+				" ROW_TYPE=COMPRESSED");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (!use_tablespace) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				" innodb_file_per_table.");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (file_format < UNIV_FORMAT_B) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				" innodb_file_format > Antelope.");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (create_info->key_block_size) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED table can't have"
+				" key_block_size");
+			return "PAGE_COMPRESSED";
+		}
+	}
+
+	/* Check page compression level requirements, some of them are
+	already checked above */
+	if ((ulint)options->page_compression_level != ULINT_UNDEFINED) {
+		if (options->page_compressed == false) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSION_LEVEL requires"
+				" PAGE_COMPRESSED");
+			return "PAGE_COMPRESSION_LEVEL";
+		}
+
+		if (options->page_compression_level < 0 || options->page_compression_level > 9) {
+			push_warning_printf(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
+				" Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",
+				create_info->key_block_size);
+			return "PAGE_COMPRESSION_LEVEL";
+		}
+	}
+
+	/* Check atomic writes requirements */
+	if (options->atomic_writes) {
+		if (!srv_use_atomic_writes && !use_tablespace) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: ATOMIC_WRITES requires"
+				" innodb_file_per_table.");
+			return "ATOMIC_WRITES";
+		}
+	}
+
+	return 0;
+}
+
 /*****************************************************************//**
 Creates a new table to an InnoDB database.
 @return	error number */
@@ -9690,6 +9856,7 @@ ha_innobase::create(
 	while creating the table. So we read the current value here
 	and make all further decisions based on this. */
 	bool		use_tablespace = srv_file_per_table;
+	const ulint     file_format    = srv_file_format;
 
 	/* Zip Shift Size - log2 - 9 of compressed page size,
 	zero for uncompressed */
@@ -9713,6 +9880,12 @@ ha_innobase::create(
 
 	/* Create the table definition in InnoDB */
 
+	/* Validate table options not handled by the SQL-parser */
+	if(check_table_options(thd, form, create_info, use_tablespace,
+			       file_format)) {
+		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+	}
+
 	/* Validate create options if innodb_strict_mode is set. */
 	if (create_options_are_invalid(
 			thd, form, create_info, use_tablespace)) {
@@ -13952,6 +14125,12 @@ ha_innobase::check_if_incompatible_data(
 	HA_CREATE_INFO*	info,
 	uint		table_changes)
 {
+	ha_table_option_struct *param_old, *param_new;
+
+	/* Cache engine specific options */
+	param_new = info->option_struct;
+	param_old = table->s->option_struct;
+
 	innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
 
 	if (table_changes != IS_EQUAL_YES) {
@@ -13978,6 +14157,13 @@ ha_innobase::check_if_incompatible_data(
 		return(COMPATIBLE_DATA_NO);
 	}
 
+	/* Changes on engine specific table options requests a rebuild of the table. */
+	if (param_new->page_compressed != param_old->page_compressed ||
+	    param_new->page_compression_level != param_old->page_compression_level ||
+	    param_new->atomic_writes != param_old->atomic_writes) {
+		return(COMPATIBLE_DATA_NO);
+	}
+
 	return(COMPATIBLE_DATA_YES);
 }
 
@@ -16447,6 +16633,31 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
   NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG */
 
+static MYSQL_SYSVAR_BOOL(compress_pages, srv_compress_pages,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Use page compression.",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
+  PLUGIN_VAR_OPCMDARG ,
+  "How many percent of compressed pages should be trimmed",
+  NULL, NULL, 100, 0, 100, 0);
+
+static MYSQL_SYSVAR_LONG(compress_zlib_level, srv_compress_zlib_level,
+  PLUGIN_VAR_OPCMDARG ,
+  "Default zlib compression level",
+  NULL, NULL, 6, 0, 9, 0);
+
+static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
+  PLUGIN_VAR_OPCMDARG,
+  "Use page compression for only index pages.",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
+  PLUGIN_VAR_OPCMDARG,
+  "Use trim.",
+  NULL, NULL, TRUE);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
   MYSQL_SYSVAR(api_trx_level),
@@ -16592,6 +16803,11 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(limit_optimistic_insert_debug),
   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
 #endif /* UNIV_DEBUG */
+  MYSQL_SYSVAR(compress_pages),
+  MYSQL_SYSVAR(trim_pct),
+  MYSQL_SYSVAR(compress_zlib_level),
+  MYSQL_SYSVAR(compress_index_pages),
+  MYSQL_SYSVAR(use_trim),
   NULL
 };
 
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index ece9f7cf58a..5eb460072bb 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -56,6 +57,18 @@ typedef struct st_innobase_share {
 /** Prebuilt structures in an InnoDB table handle used within MySQL */
 struct row_prebuilt_t;
 
+/** Engine specific table options are definined using this struct */
+struct ha_table_option_struct
+{
+	bool  page_compressed;		/*!< Table is using page compression
+					if this option is true. */
+	int   page_compression_level;	/*!< Table page compression level
+					or UNIV_UNSPECIFIED. */
+	bool  atomic_writes;		/*!< Use atomic writes for this
+					table if this options is true. */
+};
+
+
 /** The class defining a handle to an Innodb table */
 class ha_innobase: public handler
 {
@@ -182,6 +195,8 @@ class ha_innobase: public handler
 			     char* norm_name,
 			     char* temp_path,
 			     char* remote_path);
+	const char* check_table_options(THD *thd, TABLE* table,
+		HA_CREATE_INFO*	create_info, const bool use_tablespace, const ulint file_format);
 	int create(const char *name, register TABLE *form,
 					HA_CREATE_INFO *create_info);
 	int truncate();
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index a120534b36d..49f8a05d11a 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2005, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -248,6 +249,22 @@ ha_innobase::check_if_supported_inplace_alter(
 	update_thd();
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
+	/* Change on engine specific table options require rebuild of the
+	table */
+	if (ha_alter_info->handler_flags
+		== Alter_inplace_info::CHANGE_CREATE_OPTION) {
+		ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct;
+		ha_table_option_struct *old_options= table->s->option_struct;
+
+		if (new_options->page_compressed != old_options->page_compressed ||
+		    new_options->page_compression_level != old_options->page_compression_level ||
+			new_options->atomic_writes != old_options->page_compression_level) {
+			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+	}
+
 	if (ha_alter_info->handler_flags
 	    & ~(INNOBASE_ONLINE_OPERATIONS | INNOBASE_INPLACE_REBUILD)) {
 		if (ha_alter_info->handler_flags
@@ -3331,6 +3348,17 @@ ha_innobase::prepare_inplace_alter_table(
 
 	if (ha_alter_info->handler_flags
 	    & Alter_inplace_info::CHANGE_CREATE_OPTION) {
+		/* Check engine specific table options */
+		if (const char* invalid_tbopt = check_table_options(
+				user_thd, altered_table,
+				ha_alter_info->create_info,
+				prebuilt->table->space != 0,
+				srv_file_format)) {
+			my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
+				 table_type(), invalid_tbopt);
+			goto err_exit_no_heap;
+		}
+
 		if (const char* invalid_opt = create_options_are_invalid(
 			    user_thd, altered_table,
 			    ha_alter_info->create_info,
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index 74a6e203808..5e301a27e32 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1470,6 +1471,11 @@ struct buf_page_t{
 					state == BUF_BLOCK_ZIP_PAGE and
 					zip.data == NULL means an active
 					buf_pool->watch */
+
+	ulint           write_size;     /* Write size is set when this
+					page is first time written and then
+					if written again we check is TRIM
+					operation needed. */
 #ifndef UNIV_HOTBACKUP
 	buf_page_t*	hash;		/*!< node used in chaining to
 					buf_pool->page_hash or
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index af0a5b31cc4..0ca64956a2e 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +43,8 @@ Created 1/8/1996 Heikki Tuuri
 #include "ut0byte.h"
 #include "trx0types.h"
 #include "row0types.h"
+#include "fsp0fsp.h"
+#include "dict0pagecompress.h"
 
 #ifndef UNIV_HOTBACKUP
 # include "sync0sync.h"
@@ -878,7 +881,14 @@ dict_tf_set(
 	ulint*		flags,		/*!< in/out: table */
 	rec_format_t	format,		/*!< in: file format */
 	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		remote_path)	/*!< in: table uses DATA DIRECTORY */
+	bool		remote_path,	/*!< in: table uses DATA DIRECTORY
+					*/
+        bool		page_compressed,/*!< in: table uses page compressed
+					pages */
+	ulint		page_compression_level, /*!< in: table page compression
+						 level */
+	bool		atomic_writes)  /*!< in: table uses atomic
+					writes */
 	__attribute__((nonnull));
 /********************************************************************//**
 Convert a 32 bit integer table flags to the 32 bit integer that is
@@ -906,6 +916,7 @@ dict_tf_get_zip_size(
 /*=================*/
 	ulint	flags)			/*!< in: flags */
 	__attribute__((const));
+
 /********************************************************************//**
 Check whether the table uses the compressed compact page format.
 @return	compressed page size, or 0 if not compressed */
@@ -1779,6 +1790,7 @@ dict_tf_to_row_format_string(
 
 #endif /* !UNIV_HOTBACKUP */
 
+
 #ifndef UNIV_NONINL
 #include "dict0dict.ic"
 #endif
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 83953c9325a..65967552b87 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -537,9 +538,25 @@ dict_tf_is_valid(
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
 	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
 	ulint	unused = DICT_TF_GET_UNUSED(flags);
+	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
+	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+	ulint	data_dir = DICT_TF_HAS_DATA_DIR(flags);
+	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused != 0) {
+		fprintf(stderr,
+			"InnoDB: Error: table unused flags are %ld"
+			" in the data dictionary and are corrupted\n"
+			"InnoDB: Error: data dictionary flags are\n"
+			"InnoDB: compact %ld atomic_blobs %ld\n"
+			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+			"InnoDB: page_compression %ld page_compression_level %ld\n"
+			"InnoDB: atomic_writes %ld\n",
+			unused,
+			compact, atomic_blobs, unused, data_dir, zip_ssize,
+			page_compression, page_compression_level, atomic_writes
+		);
 
 		return(false);
 
@@ -550,12 +567,34 @@ dict_tf_is_valid(
 		data stored off-page in the clustered index. */
 
 		if (!compact) {
+			fprintf(stderr,
+				"InnoDB: Error: table compact flags are %ld"
+				" in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+			);
 			return(false);
 		}
 
 	} else if (zip_ssize) {
 
 		/* Antelope does not support COMPRESSED row format. */
+		fprintf(stderr,
+			"InnoDB: Error: table flags are %ld"
+			" in the data dictionary and are corrupted\n"
+			"InnoDB: Error: data dictionary flags are\n"
+			"InnoDB: compact %ld atomic_blobs %ld\n"
+			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+			"InnoDB: page_compression %ld page_compression_level %ld\n"
+			"InnoDB: atomic_writes %ld\n",
+			flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+			page_compression, page_compression_level, atomic_writes
+		);
 		return(false);
 	}
 
@@ -568,6 +607,40 @@ dict_tf_is_valid(
 		    || !atomic_blobs
 		    || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
 
+			fprintf(stderr,
+				"InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				flags,
+				compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+
+			);
+			return(false);
+		}
+	}
+
+        if (page_compression || page_compression_level) {
+		/* Page compression format must have compact and
+		atomic_blobs and page_compression_level requires
+		page_compression */
+		if (!compact
+			|| !page_compression
+			|| !atomic_blobs) {
+
+			fprintf(stderr,
+				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+			);
 			return(false);
 		}
 	}
@@ -594,6 +667,9 @@ dict_sys_tables_type_validate(
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
 	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
 	ulint	unused = DICT_TF_GET_UNUSED(type);
+	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
+	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
+	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
 
 	/* The low order bit of SYS_TABLES.TYPE is always set to 1.
 	If the format is UNIV_FORMAT_B or higher, this field is the same
@@ -647,6 +723,23 @@ dict_sys_tables_type_validate(
 	format, so the DATA_DIR flag is compatible with any other
 	table flags. However, it is not used with TEMPORARY tables.*/
 
+        if (page_compression || page_compression_level) {
+		/* page compressed row format must have low_order_bit and
+		atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
+		should be in N_COLS, but we already know about the
+		low_order_bit and DICT_N_COLS_COMPACT flags. */
+
+                if (!atomic_blobs || !page_compression) {
+			return(ULINT_UNDEFINED);
+		}
+	}
+
+	if (atomic_writes) {
+		if (!atomic_blobs) {
+			return(ULINT_UNDEFINED);
+		}
+	}
+
 	/* Return the validated SYS_TABLES.TYPE. */
 	return(type);
 }
@@ -719,7 +812,14 @@ dict_tf_set(
 	ulint*		flags,		/*!< in/out: table flags */
 	rec_format_t	format,		/*!< in: file format */
 	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		use_data_dir)	/*!< in: table uses DATA DIRECTORY */
+	bool		use_data_dir,	/*!< in: table uses DATA DIRECTORY
+					*/
+	bool		page_compressed,/*!< in: table uses page compressed
+					pages */
+	ulint		page_compression_level, /*!< in: table page compression
+						 level */
+	bool		atomic_writes)  /*!< in: table uses atomic
+					writes */
 {
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
@@ -742,6 +842,22 @@ dict_tf_set(
 		break;
 	}
 
+	if (page_compressed) {
+		*flags = DICT_TF_COMPACT
+			| (1 << DICT_TF_POS_ATOMIC_BLOBS)
+                        | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+			| (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+
+		ut_ad(zip_ssize == 0);
+		ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
+		ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
+	}
+
+	if (atomic_writes) {
+		*flags |= (1 << DICT_TF_POS_ATOMIC_WRITES);
+		ut_ad(dict_tf_get_atomic_writes(*flags) == TRUE);
+	}
+
 	if (use_data_dir) {
 		*flags |= (1 << DICT_TF_POS_DATA_DIR);
 	}
@@ -765,6 +881,9 @@ dict_tf_to_fsp_flags(
 	ulint	table_flags)	/*!< in: dict_table_t::flags */
 {
 	ulint fsp_flags;
+	ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+	ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+	ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
 
 	DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
 			return(ULINT_UNDEFINED););
@@ -783,7 +902,20 @@ dict_tf_to_fsp_flags(
 	fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
 		     ? FSP_FLAGS_MASK_DATA_DIR : 0;
 
+	/* In addition, tablespace flags also contain if the page
+	compression is used for this table. */
+	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression);
+
+	/* In addition, tablespace flags also contain page compression level
+	if page compression is used for this table. */
+	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+
+	/* In addition, tablespace flags also contain flag if atomic writes
+	is used for this table */
+	fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
+
 	ut_a(fsp_flags_is_valid(fsp_flags));
+	ut_a(dict_tf_verify_flags(table_flags, fsp_flags));
 
 	return(fsp_flags);
 }
@@ -811,10 +943,15 @@ dict_sys_tables_type_to_tf(
 	/* Adjust bit zero. */
 	flags = redundant ? 0 : 1;
 
-	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	/* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+	PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
 	flags |= type & (DICT_TF_MASK_ZIP_SSIZE
 			 | DICT_TF_MASK_ATOMIC_BLOBS
-			 | DICT_TF_MASK_DATA_DIR);
+			 | DICT_TF_MASK_DATA_DIR
+			 | DICT_TF_MASK_PAGE_COMPRESSION
+			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+			 | DICT_TF_MASK_ATOMIC_WRITES
+	);
 
 	return(flags);
 }
@@ -842,10 +979,14 @@ dict_tf_to_sys_tables_type(
 	/* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
 	type = 1;
 
-	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	/* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+	PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
 	type |= flags & (DICT_TF_MASK_ZIP_SSIZE
 			 | DICT_TF_MASK_ATOMIC_BLOBS
-			 | DICT_TF_MASK_DATA_DIR);
+			 | DICT_TF_MASK_DATA_DIR
+			 | DICT_TF_MASK_PAGE_COMPRESSION
+			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+			 | DICT_TF_MASK_ATOMIC_WRITES);
 
 	return(type);
 }
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 671f67eb1f8..6cfcb81bcd5 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -120,11 +121,25 @@ This flag prevents older engines from attempting to open the table and
 allows InnoDB to update_create_info() accordingly. */
 #define DICT_TF_WIDTH_DATA_DIR		1
 
+/**
+Width of the page compression flag
+*/
+#define DICT_TF_WIDTH_PAGE_COMPRESSION  1
+#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
+
+/**
+Width of atomic writes flag
+*/
+#define DICT_TF_WIDTH_ATOMIC_WRITES 1
+
 /** Width of all the currently known table flags */
 #define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT		\
 			+ DICT_TF_WIDTH_ZIP_SSIZE	\
 			+ DICT_TF_WIDTH_ATOMIC_BLOBS	\
-			+ DICT_TF_WIDTH_DATA_DIR)
+			+ DICT_TF_WIDTH_DATA_DIR        \
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION \
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
+			+ DICT_TF_WIDTH_ATOMIC_WRITES)
 
 /** A mask of all the known/used bits in table flags */
 #define DICT_TF_BIT_MASK	(~(~0 << DICT_TF_BITS))
@@ -140,9 +155,19 @@ allows InnoDB to update_create_info() accordingly. */
 /** Zero relative shift position of the DATA_DIR field */
 #define DICT_TF_POS_DATA_DIR		(DICT_TF_POS_ATOMIC_BLOBS	\
 					+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define DICT_TF_POS_PAGE_COMPRESSION	(DICT_TF_POS_DATA_DIR	\
+		                        + DICT_TF_WIDTH_DATA_DIR)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL	(DICT_TF_POS_PAGE_COMPRESSION	\
+					+ DICT_TF_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define DICT_TF_POS_ATOMIC_WRITES	(DICT_TF_POS_PAGE_COMPRESSION_LEVEL	\
+					+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
+
 /** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED		(DICT_TF_POS_DATA_DIR		\
-					+ DICT_TF_WIDTH_DATA_DIR)
+#define DICT_TF_POS_UNUSED		(DICT_TF_POS_ATOMIC_WRITES     \
+					+ DICT_TF_WIDTH_ATOMIC_WRITES)
 
 /** Bit mask of the COMPACT field */
 #define DICT_TF_MASK_COMPACT				\
@@ -160,6 +185,18 @@ allows InnoDB to update_create_info() accordingly. */
 #define DICT_TF_MASK_DATA_DIR				\
 		((~(~0 << DICT_TF_WIDTH_DATA_DIR))	\
 		<< DICT_TF_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define DICT_TF_MASK_PAGE_COMPRESSION			\
+		((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION)) \
+		<< DICT_TF_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL		\
+		((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
+		<< DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define DICT_TF_MASK_ATOMIC_WRITES		\
+		((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \
+		<< DICT_TF_POS_ATOMIC_WRITES)
 
 /** Return the value of the COMPACT field */
 #define DICT_TF_GET_COMPACT(flags)			\
@@ -177,6 +214,19 @@ allows InnoDB to update_create_info() accordingly. */
 #define DICT_TF_HAS_DATA_DIR(flags)			\
 		((flags & DICT_TF_MASK_DATA_DIR)	\
 		>> DICT_TF_POS_DATA_DIR)
+/** Return the value of the PAGE_COMPRESSION field */
+#define DICT_TF_GET_PAGE_COMPRESSION(flags)	       \
+		((flags & DICT_TF_MASK_PAGE_COMPRESSION) \
+		>> DICT_TF_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags)       \
+		((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL)	\
+		>> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define DICT_TF_GET_ATOMIC_WRITES(flags)       \
+		((flags & DICT_TF_MASK_ATOMIC_WRITES)	\
+		>> DICT_TF_POS_ATOMIC_WRITES)
+
 /** Return the contents of the UNUSED bits */
 #define DICT_TF_GET_UNUSED(flags)			\
 		(flags >> DICT_TF_POS_UNUSED)
diff --git a/storage/innobase/include/dict0pagecompress.h b/storage/innobase/include/dict0pagecompress.h
new file mode 100644
index 00000000000..236924758f1
--- /dev/null
+++ b/storage/innobase/include/dict0pagecompress.h
@@ -0,0 +1,94 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.h
+Helper functions for extracting/storing page compression information
+to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef dict0pagecompress_h
+#define dict0pagecompress_h
+
+/********************************************************************//**
+Extract the page compression level from table flags.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+	ulint	flags)			/*!< in: flags */
+	__attribute__((const));
+/********************************************************************//**
+Extract the page compression flag from table flags
+@return	page compression flag, or false if not compressed */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*==========================*/
+	ulint	flags)			/*!< in: flags */
+	__attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the page compressed page format.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((const));
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return	true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+	ulint	table_flags,	/*!< in: dict_table_t::flags */
+	ulint   fsp_flags)     /*!< in: fil_space_t::flags  */
+	__attribute__((const));
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return	true if atomic writes are used, false if not used  */
+UNIV_INLINE
+ibool
+dict_tf_get_atomic_writes(
+/*======================*/
+	ulint	flags)			/*!< in: flags */
+	__attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return	true if atomic writes is used, false if not */
+UNIV_INLINE
+ibool
+dict_table_get_atomic_writes(
+/*=========================*/
+	const dict_table_t* table);	/*!< in: table */
+
+
+#ifndef UNIV_NONINL
+#include "dict0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic
new file mode 100644
index 00000000000..98b64723542
--- /dev/null
+++ b/storage/innobase/include/dict0pagecompress.ic
@@ -0,0 +1,191 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.ic
+Inline implementation for helper functions for extracting/storing
+page compression and atomic writes information to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return	true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+	ulint	table_flags,	/*!< in: dict_table_t::flags */
+	ulint   fsp_flags)      /*!< in: fil_space_t::flags  */
+{
+	ulint   table_unused = DICT_TF_GET_UNUSED(table_flags);
+	ulint   compact = DICT_TF_GET_COMPACT(table_flags);
+	ulint   ssize = DICT_TF_GET_ZIP_SSIZE(table_flags);
+	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table_flags);
+	ulint   data_dir = DICT_TF_HAS_DATA_DIR(table_flags);
+        ulint   page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+	ulint   page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+	ulint   atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
+	ulint	post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
+	ulint	zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
+	ulint	fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
+	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags);
+	ulint	fsp_unused = FSP_FLAGS_GET_UNUSED(fsp_flags);
+        ulint   fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags);
+	ulint   fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags);
+	ulint   fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags);
+
+	DBUG_EXECUTE_IF("dict_tf_verify_flags_failure",
+			return(ULINT_UNDEFINED););
+
+	ut_ad(!table_unused);
+	ut_ad(!fsp_unused);
+	ut_ad(page_ssize == 0 || page_ssize != 0); /* silence compiler */
+	ut_ad(compact == 0 || compact == 1); /* silence compiler */
+	ut_ad(data_dir == 0 || data_dir == 1); /* silence compiler */
+	ut_ad(post_antelope == 0 || post_antelope == 1); /* silence compiler */
+
+	if (ssize != zip_ssize) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has zip_ssize %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has zip_ssize %ld\n",
+			ssize, zip_ssize);
+		return (FALSE);
+	}
+	if (atomic_blobs != fsp_atomic_blobs) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has atomic_blobs %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has atomic_blobs %ld\n",
+			atomic_blobs, fsp_atomic_blobs);
+
+		return (FALSE);
+	}
+	if (page_compression != fsp_page_compression) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has page_compression %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file ahas page_compression %ld\n",
+			page_compression, fsp_page_compression);
+
+		return (FALSE);
+	}
+	if (page_compression_level != fsp_page_compression_level) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has page_compression_level %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has page_compression_level %ld\n",
+			page_compression_level, fsp_page_compression_level);
+
+		return (FALSE);
+	}
+
+	if (atomic_writes != fsp_atomic_writes) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has atomic writes %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has atomic_writes %ld\n",
+			atomic_writes, fsp_atomic_writes);
+
+		return (FALSE);
+	}
+
+	return(TRUE);
+}
+
+/********************************************************************//**
+Extract the page compression level from dict_table_t::flags.
+These flags are in memory, so assert that they are valid.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+	ulint	flags)	/*!< in: flags */
+{
+        ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+
+	ut_ad(page_compression_level >= 0 && page_compression_level <= 9);
+
+	return(page_compression_level);
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(dict_tf_get_page_compression(table->flags));
+
+	return(dict_tf_get_page_compression_level(table->flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return	true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*=========================*/
+	ulint	flags)	/*!< in: flags */
+{
+	return(DICT_TF_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return	true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_table_is_page_compressed(
+/*==========================*/
+	const dict_table_t* table)	/*!< in: table */
+{
+	return (dict_tf_get_page_compression(table->flags));
+}
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return	true if atomic writes are used, false if not used  */
+UNIV_INLINE
+ibool
+dict_tf_get_atomic_writes(
+/*======================*/
+	ulint	flags)			/*!< in: flags */
+{
+	return(DICT_TF_GET_ATOMIC_WRITES(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return	true if atomic writes is used, false if not */
+UNIV_INLINE
+ibool
+dict_table_get_atomic_writes(
+/*=========================*/
+	const dict_table_t* table)	/*!< in: table */
+{
+	return (dict_tf_get_atomic_writes(table->flags));
+}
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 56fda8b39b1..c5edd33f46b 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -128,6 +129,12 @@ extern fil_addr_t	fil_addr_null;
 #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
 					contains the space id of the page */
 #define FIL_PAGE_DATA		38	/*!< start of the data on the page */
+/* Following are used when page compression is used */
+#define FIL_PAGE_COMPRESSED_SIZE 2      /*!< Number of bytes used to store
+ 					actual payload data size on
+ 					compressed pages. */
+#define FIL_PAGE_COMPRESSION_ZLIB 1    /*!< Compressin algorithm ZLIB. */
+
 /* @} */
 /** File page trailer @{ */
 #define FIL_PAGE_END_LSN_OLD_CHKSUM 8	/*!< the low 4 bytes of this are used
@@ -140,6 +147,7 @@ extern fil_addr_t	fil_addr_null;
 #ifndef UNIV_INNOCHECKSUM
 
 /** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_PAGE_COMPRESSED 34354  /*!< page compressed page */
 #define FIL_PAGE_INDEX		17855	/*!< B-tree node */
 #define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
 #define FIL_PAGE_INODE		3	/*!< Index node */
@@ -202,6 +210,7 @@ ulint
 fil_space_get_type(
 /*===============*/
 	ulint	id);	/*!< in: space id */
+
 #endif /* !UNIV_HOTBACKUP */
 /*******************************************************************//**
 Appends a new file to the chain of files of a space. File must be closed.
@@ -742,8 +751,13 @@ fil_io(
 	void*	buf,		/*!< in/out: buffer where to store read data
 				or from where to write; in aio this must be
 				appropriately aligned */
-	void*	message)	/*!< in: message for aio handler if non-sync
+	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
+	ulint	write_size)	/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 	__attribute__((nonnull(8)));
 /**********************************************************************//**
 Waits for an aio operation to complete. This function is used to write the
@@ -977,8 +991,33 @@ fil_mtr_rename_log(
 	ulint		new_space_id,	/*!< in: tablespace id of the new
 					table */
 	const char*	new_name,	/*!< in: new table name */
-	const char*	tmp_name);	/*!< in: temp table name used while
+	const char*	tmp_name)	/*!< in: temp table name used while
 					swapping */
+	__attribute__((nonnull));
 
 #endif /* !UNIV_INNOCHECKSUM */
+
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void);
+/*==================*/
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void);
+/*==================*/
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+fil_space_t*
+fil_space_get_by_id(
+/*================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+	fil_space_t*	space);	/*!< in: space */
+
 #endif /* fil0fil_h */
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
new file mode 100644
index 00000000000..e21eae7a5ee
--- /dev/null
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -0,0 +1,117 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+#ifndef fil0pagecompress_h
+#define fil0pagecompress_h
+
+#include "fsp0fsp.h"
+#include "fsp0pagecompress.h"
+
+/******************************************************************//**
+@file include/fil0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to table space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/*******************************************************************//**
+Returns the page compression level flag of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	page compression level if page compressed, ULINT_UNDEFINED if space not found */
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the page compression flag of the space, or false if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	true if page compressed, false if not or space not found */
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+	ulint   id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return	true if space using atomic writes, false if not */
+ibool
+fil_space_get_atomic_writes(
+/*=========================*/
+	ulint   id);	/*!< in: space id */
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return	true if page type index page, false if not */
+ibool
+fil_page_is_index_page(
+/*===================*/
+	byte *buf);	/*!< in: page */
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+       ulint           comp_alg);    /*!<in: compression algorithm number */
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+	ulint		space_id,      /*!< in: tablespace id of the
+				       table. */
+	byte*           buf,           /*!< in: buffer from which to write; in aio
+				       this must be appropriately aligned */
+        byte*           out_buf,       /*!< out: compressed buffer */
+        ulint           len,           /*!< in: length of input buffer.*/
+	ulint*          out_len);       /*!< out: actual length of compressed page */
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation.
+@return uncompressed page */
+void
+fil_decompress_page(
+/*================*/
+	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
+	byte*           buf,           /*!< out: buffer from which to read; in aio
+				       this must be appropriately aligned */
+        ulint           len);          /*!< in: length of output buffer.*/
+
+/****************************************************************//**
+Get space id from fil node
+@return space id*/
+ulint
+fil_node_get_space_id(
+/*==================*/
+        fil_node_t*     node);         /*!< in: Node where to get space id*/
+
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return	true if page is page compressed*/
+ibool
+fil_page_is_compressed(
+/*===================*/
+	byte *buf);	/*!< in: page */
+
+#endif
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index a587ccc9f20..31c34cdafca 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -53,12 +54,21 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
 /** Width of the DATA_DIR flag.  This flag indicates that the tablespace
 is found in a remote location, not the default data directory. */
 #define FSP_FLAGS_WIDTH_DATA_DIR	1
+/** Number of flag bits used to indicate the page compression and compression level */
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION  1
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
+/** Number of flag bits used to indicate atomic writes for this tablespace */
+#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  1
+
 /** Width of all the currently known tablespace flags */
 #define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
 				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
 				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
 				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
-				+ FSP_FLAGS_WIDTH_DATA_DIR)
+				+ FSP_FLAGS_WIDTH_DATA_DIR      \
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
+				+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
 
 /** A mask of all the known/used bits in tablespace flags */
 #define FSP_FLAGS_MASK		(~(~0 << FSP_FLAGS_WIDTH))
@@ -71,9 +81,20 @@ is found in a remote location, not the default data directory. */
 /** Zero relative shift position of the ATOMIC_BLOBS field */
 #define FSP_FLAGS_POS_ATOMIC_BLOBS	(FSP_FLAGS_POS_ZIP_SSIZE	\
 					+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
+/** Note that these need to be before the page size to be compatible with
+dictionary */
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
 					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	(FSP_FLAGS_POS_PAGE_COMPRESSION	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define FSP_FLAGS_POS_ATOMIC_WRITES	(FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
+ /** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_WRITES	\
+					+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
 /** Zero relative shift position of the start of the UNUSED bits */
 #define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
 					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
@@ -101,6 +122,18 @@ is found in a remote location, not the default data directory. */
 #define FSP_FLAGS_MASK_DATA_DIR					\
 		((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR))		\
 		<< FSP_FLAGS_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION			\
+		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL		\
+		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define FSP_FLAGS_MASK_ATOMIC_WRITES		\
+		((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES))	\
+		<< FSP_FLAGS_POS_ATOMIC_WRITES)
 
 /** Return the value of the POST_ANTELOPE field */
 #define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
@@ -126,11 +159,38 @@ is found in a remote location, not the default data directory. */
 #define FSP_FLAGS_GET_UNUSED(flags)				\
 		(flags >> FSP_FLAGS_POS_UNUSED)
 
+/** Return the value of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags)		\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION)	\
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)		\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define FSP_FLAGS_GET_ATOMIC_WRITES(flags)		\
+		((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) \
+		>> FSP_FLAGS_POS_ATOMIC_WRITES)
+
 /** Set a PAGE_SSIZE into the correct bits in a given
 tablespace flags. */
 #define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize)			\
 		(flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
 
+/** Set a PAGE_COMPRESSION into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression)	\
+		(flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
+
+/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level)	\
+		(flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
+/** Set a ATOMIC_WRITES into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics)	\
+		(flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
+
 /* @} */
 
 /* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 0d81e817cc9..0ca02a5652d 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,6 +64,9 @@ fsp_flags_is_valid(
 	ulint	atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
 	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
 	ulint	unused = FSP_FLAGS_GET_UNUSED(flags);
+	ulint	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
+	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
+	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
 
 	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
 
@@ -104,6 +108,18 @@ fsp_flags_is_valid(
 		return(false);
 	}
 
+	/* Page compression level requires page compression and atomic blobs
+	to be set */
+        if (page_compression_level || page_compression) {
+		if (!page_compression || !atomic_blobs) {
+			return(false);
+		}
+	}
+
+	if (atomic_writes && !atomic_blobs) {
+		return (false);
+	}
+
 #if UNIV_FORMAT_MAX != UNIV_FORMAT_B
 # error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
 #endif
@@ -312,3 +328,4 @@ xdes_calc_descriptor_page(
 }
 
 #endif /* !UNIV_INNOCHECKSUM */
+
diff --git a/storage/innobase/include/fsp0pagecompress.h b/storage/innobase/include/fsp0pagecompress.h
new file mode 100644
index 00000000000..417d4a6879e
--- /dev/null
+++ b/storage/innobase/include/fsp0pagecompress.h
@@ -0,0 +1,64 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef fsp0pagecompress_h
+#define fsp0pagecompress_h
+
+/**********************************************************************//**
+Reads the page compression level from the first page of a tablespace.
+@return	page compression level, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_compression_level(
+/*=============================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return	TRUE if page compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+	ulint	flags);	/*!< in: tablespace flags */
+
+/********************************************************************//**
+Extract the page compression level from tablespace flags.
+A tablespace has only one physical page compression level
+whether that page is compressed or not.
+@return	page compression level of the file-per-table tablespace,
+or zero if the table is not compressed.  */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+	ulint	flags);	/*!< in: tablespace flags */
+
+#ifndef UNIV_NONINL
+#include "fsp0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
new file mode 100644
index 00000000000..1dffd1bedf1
--- /dev/null
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -0,0 +1,61 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.ic
+Implementation for helper functions for extracting/storing page
+compression and atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return	TRUE if page compressed, FALSE if not page compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	return(FSP_FLAGS_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is page compression level from dict_table_t::flags.
+@return	page compression level or 0 if not compressed*/
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return	true if atomic writes is used, false if not */
+UNIV_INLINE
+ibool
+fsp_flags_get_atomic_writes(
+/*========================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	return(FSP_FLAGS_GET_ATOMIC_WRITES(flags));
+}
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index 94fd908ab0c..e5c1734b842 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -29,6 +29,7 @@ Created May 26, 2009 Vasil Dimov
 #include "univ.i"
 
 #include "fil0fil.h" /* for FIL_PAGE_DATA */
+#include "ut0byte.h"
 
 /** @name Flags for inserting records in order
 If records are inserted in order, there are the following
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 4a744c1b268..3c70f9925fe 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -150,6 +151,7 @@ enum os_file_create_t {
 #define	OS_FILE_INSUFFICIENT_RESOURCE	78
 #define	OS_FILE_AIO_INTERRUPTED		79
 #define	OS_FILE_OPERATION_ABORTED	80
+#define	OS_FILE_OPERATION_NOT_SUPPORTED	125
 /* @} */
 
 /** Types for aio operations @{ */
@@ -269,26 +271,26 @@ os_file_write
 The wrapper functions have the prefix of "innodb_". */
 
 #ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success)	\
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
 	pfs_os_file_create_func(key, name, create, purpose,	type,	\
-				success, __FILE__, __LINE__)
+				success, atomic_writes, __FILE__, __LINE__)
 
-# define os_file_create_simple(key, name, create, access, success)	\
+# define os_file_create_simple(key, name, create, access, success, atomic_writes)	\
 	pfs_os_file_create_simple_func(key, name, create, access,	\
-				       success, __FILE__, __LINE__)
+				       success, atomic_writes, __FILE__, __LINE__)
 
 # define os_file_create_simple_no_error_handling(			\
-		key, name, create_mode, access, success)		\
+	key, name, create_mode, access, success, atomic_writes)			\
 	pfs_os_file_create_simple_no_error_handling_func(		\
-		key, name, create_mode, access, success, __FILE__, __LINE__)
+		key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
 
 # define os_file_close(file)						\
 	pfs_os_file_close_func(file, __FILE__, __LINE__)
 
 # define os_aio(type, mode, name, file, buf, offset,			\
-		n, message1, message2)					\
+		n, message1, message2, write_size)						\
 	pfs_os_aio_func(type, mode, name, file, buf, offset,		\
-			n, message1, message2, __FILE__, __LINE__)
+			n, message1, message2, write_size, __FILE__, __LINE__)
 
 # define os_file_read(file, buf, offset, n)				\
 	pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
@@ -310,22 +312,22 @@ The wrapper functions have the prefix of "innodb_". */
 
 /* If UNIV_PFS_IO is not defined, these I/O APIs point
 to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success)	\
-	os_file_create_func(name, create, purpose, type, success)
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
+	os_file_create_func(name, create, purpose, type, success, atomic_writes)
 
-# define os_file_create_simple(key, name, create_mode, access, success)	\
-	os_file_create_simple_func(name, create_mode, access, success)
+# define os_file_create_simple(key, name, create_mode, access, success, atomic_writes) \
+	os_file_create_simple_func(name, create_mode, access, success, atomic_writes)
 
 # define os_file_create_simple_no_error_handling(			\
-		key, name, create_mode, access, success)		\
-	os_file_create_simple_no_error_handling_func(			\
-		name, create_mode, access, success)
+	key, name, create_mode, access, success, atomic_writes)			\
+		os_file_create_simple_no_error_handling_func(			\
+		name, create_mode, access, success, atomic_writes)
 
 # define os_file_close(file)	os_file_close_func(file)
 
-# define os_aio(type, mode, name, file, buf, offset, n, message1, message2) \
+# define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size) \
 	os_aio_func(type, mode, name, file, buf, offset, n,		\
-		    message1, message2)
+		    message1, message2, write_size)
 
 # define os_file_read(file, buf, offset, n)	\
 	os_file_read_func(file, buf, offset, n)
@@ -468,7 +470,8 @@ os_file_create_simple_func(
 	ulint		create_mode,/*!< in: create mode */
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
-	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes); /*!<in TRUE if atomic writes are used */
 /****************************************************************//**
 NOTE! Use the corresponding macro
 os_file_create_simple_no_error_handling(), not directly this function!
@@ -486,7 +489,8 @@ os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_WRITE, or
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes)/*!<in TRUE if atomic writes are used */
 	__attribute__((nonnull, warn_unused_result));
 /****************************************************************//**
 Tries to disable OS caching on an opened file descriptor. */
@@ -520,7 +524,8 @@ os_file_create_func(
 				async i/o or unbuffered i/o: look in the
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes)/*!<in TRUE if atomic writes are used */
 	__attribute__((nonnull, warn_unused_result));
 /***********************************************************************//**
 Deletes a file. The file has to be closed before calling this.
@@ -585,6 +590,7 @@ pfs_os_file_create_simple_func(
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes,/*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -610,6 +616,7 @@ pfs_os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -638,6 +645,7 @@ pfs_os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -716,6 +724,7 @@ pfs_os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
                                 OS_AIO_SYNC */
+	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line);/*!< in: line where the func invoked */
 /*******************************************************************//**
@@ -1044,10 +1053,16 @@ os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
-	void*		message2);/*!< in: message for the aio handler
+	void*		message2,/*!< in: message for the aio handler
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
+	ulint		write_size);/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
+
 /************************************************************************//**
 Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index bdd7eb5f8f4..2be0f6a8d97 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -44,6 +45,7 @@ pfs_os_file_create_simple_func(
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -59,7 +61,7 @@ pfs_os_file_create_simple_func(
 				     name, src_file, src_line);
 
 	file = os_file_create_simple_func(name, create_mode,
-					  access_type, success);
+					  access_type, success, atomic_writes);
 
 	/* Regsiter the returning "file" value with the system */
 	register_pfs_file_open_end(locker, file);
@@ -88,6 +90,7 @@ pfs_os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -103,7 +106,7 @@ pfs_os_file_create_simple_no_error_handling_func(
 				     name, src_file, src_line);
 
 	file = os_file_create_simple_no_error_handling_func(
-		name, create_mode, access_type, success);
+		name, create_mode, access_type, success, atomic_writes);
 
 	register_pfs_file_open_end(locker, file);
 
@@ -134,6 +137,7 @@ pfs_os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -148,7 +152,7 @@ pfs_os_file_create_func(
 					: PSI_FILE_OPEN),
 				     name, src_file, src_line);
 
-	file = os_file_create_func(name, create_mode, purpose, type, success);
+	file = os_file_create_func(name, create_mode, purpose, type, success, atomic_writes);
 
 	register_pfs_file_open_end(locker, file);
 
@@ -210,6 +214,7 @@ pfs_os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
                                 OS_AIO_SYNC */
+	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -225,7 +230,7 @@ pfs_os_aio_func(
 				   src_file, src_line);
 
 	result = os_aio_func(type, mode, name, file, buf, offset,
-			     n, message1, message2);
+			     n, message1, message2, atomic_writes);
 
 	register_pfs_file_io_end(locker, n);
 
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 48d4b94dcae..c0869c2434f 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 2010, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -163,6 +164,7 @@ enum monitor_id_t {
 	MONITOR_OVLD_BUF_POOL_PAGES_FREE,
 	MONITOR_OVLD_PAGE_CREATED,
 	MONITOR_OVLD_PAGES_WRITTEN,
+	MONITOR_OVLD_INDEX_PAGES_WRITTEN,
 	MONITOR_OVLD_PAGES_READ,
 	MONITOR_OVLD_BYTE_READ,
 	MONITOR_OVLD_BYTE_WRITTEN,
@@ -303,6 +305,14 @@ enum monitor_id_t {
 	MONITOR_PAGE_DECOMPRESS,
 	MONITOR_PAD_INCREMENTS,
 	MONITOR_PAD_DECREMENTS,
+	/* New monitor variables for page compression */
+	MONITOR_OVLD_PAGE_COMPRESS_SAVED,
+	MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
+	MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
+	MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
+	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
+	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
+	MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
 
 	/* Index related counters */
 	MONITOR_MODULE_INDEX,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 1e98cf690d8..f4fa8b434fe 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -3,6 +3,7 @@
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009, Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -102,6 +103,23 @@ struct srv_stats_t {
 	a disk page */
 	ulint_ctr_1_t		buf_pool_reads;
 
+	/** Number of bytes saved by page compression */
+	ulint_ctr_64_t          page_compression_saved;
+	/** Number of 512Byte TRIM by page compression */
+	ulint_ctr_64_t          page_compression_trim_sect512;
+	/** Number of 4K TRIM  by page compression */
+	ulint_ctr_64_t          page_compression_trim_sect4096;
+	/* Number of index pages written */
+	ulint_ctr_64_t          index_pages_written;
+	/* Number of pages compressed with page compression */
+        ulint_ctr_64_t          pages_page_compressed;
+	/* Number of TRIM operations induced by page compression */
+        ulint_ctr_64_t          page_compressed_trim_op;
+	/* Number of TRIM operations saved by using actual write size knowledge */
+        ulint_ctr_64_t          page_compressed_trim_op_saved;
+	/* Number of pages decompressed with page compression */
+        ulint_ctr_64_t          pages_page_decompressed;
+
 	/** Number of data read in total (in bytes) */
 	ulint_ctr_1_t		data_read;
 
@@ -217,6 +235,29 @@ OS (provided we compiled Innobase with it in), otherwise we will
 use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 extern my_bool	srv_use_native_aio;
+
+/* Is page compression used */
+extern my_bool srv_compress_pages;
+
+/* Is page compression used only for index pages */
+extern my_bool srv_page_compress_index_pages;
+
+/* Frequency of trim operations */
+extern long srv_trim_pct;
+
+/* Use trim operation */
+extern my_bool srv_use_trim;
+
+/* Use posix fallocate */
+extern my_bool srv_use_posix_fallocate;
+
+/* Use atomic writes i.e disable doublewrite buffer */
+extern my_bool srv_use_atomic_writes;
+
+/* Default zlib compression level */
+extern long srv_compress_zlib_level;
+
+
 #ifdef __WIN__
 extern ibool	srv_use_native_conditions;
 #endif /* __WIN__ */
@@ -348,11 +389,6 @@ extern ibool	srv_use_doublewrite_buf;
 extern ulong	srv_doublewrite_batch_size;
 extern ulong	srv_checksum_algorithm;
 
-extern ibool	srv_use_atomic_writes;
-#ifdef HAVE_POSIX_FALLOCATE
-extern ibool	srv_use_posix_fallocate;
-#endif
-
 extern ulong	srv_max_buf_pool_modified_pct;
 extern ulong	srv_max_purge_lag;
 extern ulong	srv_max_purge_lag_delay;
@@ -850,6 +886,24 @@ struct export_var_t{
 	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
 						- purged view's min trx_id */
 #endif /* UNIV_DEBUG */
+
+	ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
+						by page compression */
+	ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
+						by page compression */
+	ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM 
+						by page compression */
+	ib_int64_t innodb_index_pages_written;  /*!< Number of index pages
+						written */
+	ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
+						compressed by page compression */
+	ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
+						induced by page compression */
+	ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
+						saved by page compression */
+	ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
+						decompressed by page
+						compression */
 };
 
 /** Thread slot in the thread table.  */
diff --git a/storage/innobase/log/log0log.cc b/storage/innobase/log/log0log.cc
index b6909f4771a..00b4c02465a 100644
--- a/storage/innobase/log/log0log.cc
+++ b/storage/innobase/log/log0log.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Google Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -1165,7 +1166,7 @@ log_group_file_header_flush(
 		       (ulint) (dest_offset / UNIV_PAGE_SIZE),
 		       (ulint) (dest_offset % UNIV_PAGE_SIZE),
 		       OS_FILE_LOG_BLOCK_SIZE,
-		       buf, group);
+		       buf, group, 0);
 
 		srv_stats.os_log_pending_writes.dec();
 	}
@@ -1293,7 +1294,7 @@ loop:
 		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, 0,
 		       (ulint) (next_offset / UNIV_PAGE_SIZE),
 		       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
-		       group);
+		       group, 0);
 
 		srv_stats.os_log_pending_writes.dec();
 
@@ -1859,7 +1860,7 @@ log_group_checkpoint(
 		       write_offset / UNIV_PAGE_SIZE,
 		       write_offset % UNIV_PAGE_SIZE,
 		       OS_FILE_LOG_BLOCK_SIZE,
-		       buf, ((byte*) group + 1));
+		       buf, ((byte*) group + 1), 0);
 
 		ut_ad(((ulint) group & 0x1UL) == 0);
 	}
@@ -1939,7 +1940,7 @@ log_group_read_checkpoint_info(
 
 	fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, 0,
 	       field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
-	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
 }
 
 /******************************************************//**
@@ -2233,7 +2234,7 @@ loop:
 	fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
 	       (ulint) (source_offset / UNIV_PAGE_SIZE),
 	       (ulint) (source_offset % UNIV_PAGE_SIZE),
-	       len, buf, NULL);
+	       len, buf, NULL, 0);
 
 	start_lsn += len;
 	buf += len;
@@ -2298,7 +2299,7 @@ log_group_archive_file_header_write(
 	       dest_offset / UNIV_PAGE_SIZE,
 	       dest_offset % UNIV_PAGE_SIZE,
 	       2 * OS_FILE_LOG_BLOCK_SIZE,
-	       buf, &log_archive_io);
+	       buf, &log_archive_io, 0);
 }
 
 /******************************************************//**
@@ -2334,7 +2335,7 @@ log_group_archive_completed_header_write(
 	       dest_offset % UNIV_PAGE_SIZE,
 	       OS_FILE_LOG_BLOCK_SIZE,
 	       buf + LOG_FILE_ARCH_COMPLETED,
-	       &log_archive_io);
+	       &log_archive_io, 0);
 }
 
 /******************************************************//**
@@ -2462,7 +2463,7 @@ loop:
 	       (ulint) (next_offset / UNIV_PAGE_SIZE),
 	       (ulint) (next_offset % UNIV_PAGE_SIZE),
 	       ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
-	       &log_archive_io);
+	       &log_archive_io, 0);
 
 	start_lsn += len;
 	next_offset += len;
diff --git a/storage/innobase/log/log0recv.cc b/storage/innobase/log/log0recv.cc
index 8cefa9e4b70..a3df6a8d5bd 100644
--- a/storage/innobase/log/log0recv.cc
+++ b/storage/innobase/log/log0recv.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1997, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -2063,7 +2064,7 @@ recv_apply_log_recs_for_backup(void)
 				error = fil_io(OS_FILE_READ, TRUE,
 					       recv_addr->space, zip_size,
 					       recv_addr->page_no, 0, zip_size,
-					       block->page.zip.data, NULL);
+					       block->page.zip.data, NULL, 0);
 				if (error == DB_SUCCESS
 				    && !buf_zip_decompress(block, TRUE)) {
 					exit(1);
@@ -2073,7 +2074,7 @@ recv_apply_log_recs_for_backup(void)
 					       recv_addr->space, 0,
 					       recv_addr->page_no, 0,
 					       UNIV_PAGE_SIZE,
-					       block->frame, NULL);
+					       block->frame, NULL, 0);
 			}
 
 			if (error != DB_SUCCESS) {
@@ -2102,13 +2103,13 @@ recv_apply_log_recs_for_backup(void)
 					       recv_addr->space, zip_size,
 					       recv_addr->page_no, 0,
 					       zip_size,
-					       block->page.zip.data, NULL);
+					       block->page.zip.data, NULL, 0);
 			} else {
 				error = fil_io(OS_FILE_WRITE, TRUE,
 					       recv_addr->space, 0,
 					       recv_addr->page_no, 0,
 					       UNIV_PAGE_SIZE,
-					       block->frame, NULL);
+					       block->frame, NULL, 0);
 			}
 skip_this_recv_addr:
 			recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
@@ -3074,7 +3075,7 @@ recv_recovery_from_checkpoint_start_func(
 
 	fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, 0,
 	       0, 0, LOG_FILE_HDR_SIZE,
-	       log_hdr_buf, max_cp_group);
+	       log_hdr_buf, max_cp_group, 0);
 
 	if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 			   (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
@@ -3105,7 +3106,7 @@ recv_recovery_from_checkpoint_start_func(
 		fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE,
 		       max_cp_group->space_id, 0,
 		       0, 0, OS_FILE_LOG_BLOCK_SIZE,
-		       log_hdr_buf, max_cp_group);
+		       log_hdr_buf, max_cp_group, 0);
 	}
 
 #ifdef UNIV_LOG_ARCHIVE
@@ -3753,8 +3754,8 @@ ask_again:
 #endif
 
 	/* Read the archive file header */
-	fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0,
-	       LOG_FILE_HDR_SIZE, buf, NULL);
+	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0, 0,
+	       LOG_FILE_HDR_SIZE, buf, NULL, 0);
 
 	/* Check if the archive file header is consistent */
 
@@ -3827,7 +3828,7 @@ ask_again:
 
 		fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE,
 		       group->archive_space_id, read_offset / UNIV_PAGE_SIZE,
-		       read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
+		       read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0);
 
 		ret = recv_scan_log_recs(
 			(buf_pool_get_n_pages()
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index d1b2b12bf59..60331f9c483 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -42,8 +43,14 @@ Created 10/21/1995 Heikki Tuuri
 #include "srv0srv.h"
 #include "srv0start.h"
 #include "fil0fil.h"
+#include "fil0pagecompress.h"
 #include "buf0buf.h"
 #include "srv0mon.h"
+#include "srv0srv.h"
+#ifdef HAVE_POSIX_FALLOCATE
+#include "fcntl.h"
+#include "linux/falloc.h"
+#endif
 #ifndef UNIV_HOTBACKUP
 # include "os0sync.h"
 # include "os0thread.h"
@@ -60,6 +67,13 @@ Created 10/21/1995 Heikki Tuuri
 #include <libaio.h>
 #endif
 
+#if defined(UNIV_LINUX) && defined(HAVE_SYS_IOCTL_H)
+# include <sys/ioctl.h>
+# ifndef DFS_IOCTL_ATOMIC_WRITE_SET
+#  define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
+# endif
+#endif
+
 /** Insert buffer segment id */
 static const ulint IO_IBUF_SEGMENT = 0;
 
@@ -175,6 +189,25 @@ struct os_aio_slot_t{
 					and which can be used to identify
 					which pending aio operation was
 					completed */
+	ulint           bitmap;
+
+	byte*           page_compression_page; /*!< Memory allocated for
+					       page compressed page and
+					       freed after the write
+					       has been completed */
+
+	ulint           write_size;     /*!< Actual write size initialized
+					after fist successfull trim
+					operation for this page and if
+					initialized we do not trim again if
+					actual page size does not decrease. */
+
+	byte*           page_buf;       /*!< Actual page buffer for
+					page compressed pages, do not
+					free this */
+
+	ibool           page_compress_success;
+
 #ifdef WIN_ASYNC_IO
 	HANDLE		handle;		/*!< handle object we need in the
 					OVERLAPPED struct */
@@ -294,6 +327,79 @@ UNIV_INTERN ulint	os_n_pending_writes = 0;
 /** Number of pending read operations */
 UNIV_INTERN ulint	os_n_pending_reads = 0;
 
+/** After first fallocate failure we will disable os_file_trim */
+UNIV_INTERN ibool       os_fallocate_failed = FALSE;
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd  for  the  byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file.  After a successful call,
+subsequent reads from  this range will return zeroes.
+@return	true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+	os_file_t	file, /*!< in: file to be trimmed */
+	os_aio_slot_t*	slot, /*!< in: slot structure     */
+	ulint		len); /*!< in: length of area     */
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+	os_aio_slot_t*	slot); /*!< in: slot structure     */
+
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return	TRUE if we should retry the operation */
+static
+ibool
+os_file_handle_error_no_exit(
+/*=========================*/
+	const char*	name,		/*!< in: name of a file or NULL */
+	const char*	operation,	/*!< in: operation */
+	ibool		on_error_silent,/*!< in: if TRUE then don't print
+					any message to the log. */
+	const char*	file,		/*!< in: file name */
+	const ulint	line);		/*!< in: line */
+
+/****************************************************************//**
+Tries to enable the atomic write feature, if available, for the specified file
+handle.
+@return TRUE if success */
+static __attribute__((warn_unused_result))
+ibool
+os_file_set_atomic_writes(
+/*======================*/
+	const char*	name	/*!< in: name of the file */
+	__attribute__((unused)),
+	os_file_t	file	/*!< in: handle to the file */
+	__attribute__((unused)))
+{
+#ifdef DFS_IOCTL_ATOMIC_WRITE_SET
+	int	atomic_option	= 1;
+
+	if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
+
+		os_file_handle_error_no_exit(name, "ioctl", FALSE, __FILE__, __LINE__);
+		return(FALSE);
+	}
+
+	return(TRUE);
+#else
+	fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
+		"non-supported platform! Please restart with "
+		"innodb_use_atomic_writes disabled.\n");
+	return(FALSE);
+#endif
+}
+
+
 #ifdef UNIV_DEBUG
 # ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
@@ -498,7 +604,17 @@ os_file_get_last_error_low(
 			fprintf(stderr,
 				"InnoDB: The error means mysqld does not have"
 				" the access rights to\n"
-				"InnoDB: the directory.\n");
+				"InnoDECANCELEDB: the directory.\n");
+		} else if (err == ECANCELED) {
+			fprintf(stderr,
+				"InnoDB: Operation canceled (%d):%s\n",
+				err, strerror(err));
+
+			if(srv_use_atomic_writes) {
+				fprintf(stderr,
+					"InnoDB: Error trying to enable atomic writes on "
+					"non-supported destination!\n");
+			}
 		} else {
 			if (strerror(err) != NULL) {
 				fprintf(stderr,
@@ -530,6 +646,8 @@ os_file_get_last_error_low(
 	case ENOTDIR:
 	case EISDIR:
 		return(OS_FILE_PATH_ERROR);
+	case ECANCELED:
+                return(OS_FILE_OPERATION_NOT_SUPPORTED);
 	case EAGAIN:
 		if (srv_use_native_aio) {
 			return(OS_FILE_AIO_RESOURCES_RESERVED);
@@ -574,9 +692,11 @@ os_file_handle_error_cond_exit(
 	const char*	operation,	/*!< in: operation */
 	ibool		should_exit,	/*!< in: call exit(3) if unknown error
 					and this parameter is TRUE */
-	ibool		on_error_silent)/*!< in: if TRUE then don't print
+	ibool		on_error_silent,/*!< in: if TRUE then don't print
 					any message to the log iff it is
 					an unknown non-fatal error */
+	const char*     file,           /*!< in: file name */
+	const ulint     line)           /*!< in: line */
 {
 	ulint	err;
 
@@ -606,6 +726,9 @@ os_file_handle_error_cond_exit(
 			"  InnoDB: Disk is full. Try to clean the disk"
 			" to free space.\n");
 
+		fprintf(stderr,
+			" InnoDB: at file %s and at line %ld\n", file, line);
+
 		os_has_said_disk_full = TRUE;
 
 		fflush(stderr);
@@ -652,6 +775,9 @@ os_file_handle_error_cond_exit(
 				operation, err);
 		}
 
+		fprintf(stderr,
+			" InnoDB: at file %s and at line %ld\n", file, line);
+
 		if (should_exit) {
 			ut_print_timestamp(stderr);
 			fprintf(stderr, "  InnoDB: Cannot continue "
@@ -675,10 +801,12 @@ ibool
 os_file_handle_error(
 /*=================*/
 	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation)	/*!< in: operation */
+	const char*	operation,	/*!< in: operation */
+	const char*     file,           /*!< in: file name */
+	const ulint     line)           /*!< in: line */
 {
 	/* exit in case of unknown error */
-	return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE));
+	return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line));
 }
 
 /****************************************************************//**
@@ -690,12 +818,14 @@ os_file_handle_error_no_exit(
 /*=========================*/
 	const char*	name,		/*!< in: name of a file or NULL */
 	const char*	operation,	/*!< in: operation */
-	ibool		on_error_silent)/*!< in: if TRUE then don't print
+	ibool		on_error_silent,/*!< in: if TRUE then don't print
 					any message to the log. */
+	const char*     file,           /*!< in: file name */
+	const ulint     line)           /*!< in: line */
 {
 	/* don't exit in case of unknown error */
 	return(os_file_handle_error_cond_exit(
-			name, operation, FALSE, on_error_silent));
+			name, operation, FALSE, on_error_silent, file, line));
 }
 
 #undef USE_FILE_LOCK
@@ -835,7 +965,7 @@ os_file_opendir(
 	if (dir == INVALID_HANDLE_VALUE) {
 
 		if (error_is_fatal) {
-			os_file_handle_error(dirname, "opendir");
+			os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
 		}
 
 		return(NULL);
@@ -846,7 +976,7 @@ os_file_opendir(
 	dir = opendir(dirname);
 
 	if (dir == NULL && error_is_fatal) {
-		os_file_handle_error(dirname, "opendir");
+		os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
 	}
 
 	return(dir);
@@ -868,7 +998,7 @@ os_file_closedir(
 	ret = FindClose(dir);
 
 	if (!ret) {
-		os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+		os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
 
 		return(-1);
 	}
@@ -880,7 +1010,7 @@ os_file_closedir(
 	ret = closedir(dir);
 
 	if (ret) {
-		os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+		os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
 	}
 
 	return(ret);
@@ -952,7 +1082,7 @@ next_file:
 
 		return(1);
 	} else {
-		os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE);
+		os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__);
 		return(-1);
 	}
 #else
@@ -1038,7 +1168,7 @@ next_file:
 			goto next_file;
 		}
 
-		os_file_handle_error_no_exit(full_path, "stat", FALSE);
+		os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__);
 
 		ut_free(full_path);
 
@@ -1089,7 +1219,7 @@ os_file_create_directory(
 		  && !fail_if_exists))) {
 
 		os_file_handle_error_no_exit(
-			pathname, "CreateDirectory", FALSE);
+			pathname, "CreateDirectory", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -1102,7 +1232,7 @@ os_file_create_directory(
 
 	if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
 		/* failure */
-		os_file_handle_error_no_exit(pathname, "mkdir", FALSE);
+		os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -1126,7 +1256,8 @@ os_file_create_simple_func(
 	ulint		create_mode,/*!< in: create mode */
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+        ibool           atomic_writes) /*!<in TRUE if atomic writes are used */
 {
 	os_file_t	file;
 	ibool		retry;
@@ -1211,7 +1342,7 @@ os_file_create_simple_func(
 
 			retry = os_file_handle_error(
 				name, create_mode == OS_FILE_OPEN ?
-				"open" : "create");
+				"open" : "create", __FILE__, __LINE__);
 
 		} else {
 			*success = TRUE;
@@ -1220,6 +1351,14 @@ os_file_create_simple_func(
 
 	} while (retry);
 
+	if (file != INVALID_HANDLE_VALUE
+	    && (srv_use_atomic_writes  || atomic_writes)
+	    && !os_file_set_atomic_writes(name, file)) {
+			 CloseHandle(file);
+			*success = FALSE;
+			file = INVALID_HANDLE_VALUE;
+	}
+
 #else /* __WIN__ */
 	int		create_flag;
 
@@ -1279,7 +1418,7 @@ os_file_create_simple_func(
 			retry = os_file_handle_error(
 				name,
 				create_mode == OS_FILE_OPEN
-				?  "open" : "create");
+				?  "open" : "create", __FILE__, __LINE__);
 		} else {
 			*success = TRUE;
 			retry = false;
@@ -1299,6 +1438,14 @@ os_file_create_simple_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	if (file != -1
+	    && (srv_use_atomic_writes  || atomic_writes)
+	    && !os_file_set_atomic_writes(name, file)) {
+		*success = FALSE;
+		close(file);
+		file = -1;
+	}
+
 #endif /* __WIN__ */
 
 	return(file);
@@ -1321,7 +1468,8 @@ os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_WRITE, or
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+        ibool           atomic_writes) /*!<in TRUE if atomic writes are used */
 {
 	os_file_t	file;
 
@@ -1383,6 +1531,14 @@ os_file_create_simple_no_error_handling_func(
 			  attributes,
 			  NULL);		// No template file
 
+	if (file != INVALID_HANDLE_VALUE
+	    && (srv_use_atomic_writes  || atomic_writes)
+	    && !os_file_set_atomic_writes(name, file)) {
+		CloseHandle(file);
+		file = INVALID_HANDLE_VALUE;
+	}
+
+
 	*success = (file != INVALID_HANDLE_VALUE);
 #else /* __WIN__ */
 	int		create_flag;
@@ -1443,6 +1599,14 @@ os_file_create_simple_no_error_handling_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	if (file != -1
+	    && (srv_use_atomic_writes  || atomic_writes)
+	    && !os_file_set_atomic_writes(name, file)) {
+		*success = FALSE;
+		close(file);
+		file = -1;
+	}
+
 #endif /* __WIN__ */
 
 	return(file);
@@ -1513,7 +1677,9 @@ os_file_create_func(
 				async i/o or unbuffered i/o: look in the
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ibool           atomic_writes) /*! in: true if atomic writes for
+					this file should be used */
 {
 	os_file_t	file;
 	ibool		retry;
@@ -1662,6 +1828,16 @@ os_file_create_func(
 
 	} while (retry);
 
+	if (file != INVALID_HANDLE_VALUE
+            && type == OS_DATA_FILE
+	    && (srv_use_atomic_writes  || atomic_writes)
+	    && !os_file_set_atomic_writes(name, file)) {
+			 CloseHandle(file);
+			*success = FALSE;
+			file = INVALID_HANDLE_VALUE;
+	}
+
+
 #else /* __WIN__ */
 	int		create_flag;
 	const char*	mode_str	= NULL;
@@ -1736,9 +1912,9 @@ os_file_create_func(
 
 			if (on_error_no_exit) {
 				retry = os_file_handle_error_no_exit(
-					name, operation, on_error_silent);
+					name, operation, on_error_silent, __FILE__, __LINE__);
 			} else {
-				retry = os_file_handle_error(name, operation);
+				retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
 			}
 		} else {
 			*success = TRUE;
@@ -1790,6 +1966,16 @@ os_file_create_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	if (file != -1
+            && type == OS_DATA_FILE
+	    && (srv_use_atomic_writes  || atomic_writes)
+	    && !os_file_set_atomic_writes(name, file)) {
+		*success = FALSE;
+		close(file);
+		file = -1;
+	}
+
+
 #endif /* __WIN__ */
 
 	return(file);
@@ -1848,7 +2034,7 @@ loop:
 	ret = unlink(name);
 
 	if (ret != 0 && errno != ENOENT) {
-		os_file_handle_error_no_exit(name, "delete", FALSE);
+		os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
 
 		return(false);
 	}
@@ -1912,7 +2098,7 @@ loop:
 	ret = unlink(name);
 
 	if (ret != 0) {
-		os_file_handle_error_no_exit(name, "delete", FALSE);
+		os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
 
 		return(false);
 	}
@@ -1956,7 +2142,7 @@ os_file_rename_func(
 		return(TRUE);
 	}
 
-	os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+	os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
 
 	return(FALSE);
 #else
@@ -1965,7 +2151,7 @@ os_file_rename_func(
 	ret = rename(oldpath, newpath);
 
 	if (ret != 0) {
-		os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+		os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -1996,7 +2182,7 @@ os_file_close_func(
 		return(TRUE);
 	}
 
-	os_file_handle_error(NULL, "close");
+	os_file_handle_error(NULL, "close", __FILE__, __LINE__);
 
 	return(FALSE);
 #else
@@ -2005,7 +2191,7 @@ os_file_close_func(
 	ret = close(file);
 
 	if (ret == -1) {
-		os_file_handle_error(NULL, "close");
+		os_file_handle_error(NULL, "close", __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -2108,7 +2294,7 @@ os_file_set_size(
 				"space for file \'%s\' failed.  Current size "
 				"%lu, desired size %lu\n",
 				name, current_size, size);
-			os_file_handle_error_no_exit(name, "posix_fallocate", FALSE);
+			os_file_handle_error_no_exit(name, "posix_fallocate", FALSE, __FILE__, __LINE__);
 			return(FALSE);
 		}
 		return(TRUE);
@@ -2274,7 +2460,7 @@ os_file_flush_func(
 		return(TRUE);
 	}
 
-	os_file_handle_error(NULL, "flush");
+	os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
 
 	/* It is a fatal error if a file flush does not succeed, because then
 	the database can get corrupt on disk */
@@ -2328,7 +2514,7 @@ os_file_flush_func(
 
 	ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
 
-	os_file_handle_error(NULL, "flush");
+	os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
 
 	/* It is a fatal error if a file flush does not succeed, because then
 	the database can get corrupt on disk */
@@ -2635,6 +2821,9 @@ try_again:
 	os_mutex_exit(os_file_count_mutex);
 
 	if (ret && len == n) {
+		if (fil_page_is_compressed((byte *)buf)) {
+		        fil_decompress_page(NULL, (byte *)buf, len);
+		}
 		return(TRUE);
 	}
 #else /* __WIN__ */
@@ -2647,6 +2836,9 @@ try_again:
 	ret = os_file_pread(file, buf, n, offset);
 
 	if ((ulint) ret == n) {
+		if (fil_page_is_compressed((byte *)buf)) {
+		        fil_decompress_page(NULL, (byte *)buf, n);
+		}
 
 		return(TRUE);
 	}
@@ -2658,7 +2850,7 @@ try_again:
 #ifdef __WIN__
 error_handling:
 #endif
-	retry = os_file_handle_error(NULL, "read");
+	retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
 
 	if (retry) {
 		goto try_again;
@@ -2781,7 +2973,7 @@ try_again:
 #ifdef __WIN__
 error_handling:
 #endif
-	retry = os_file_handle_error_no_exit(NULL, "read", FALSE);
+	retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
 
 	if (retry) {
 		goto try_again;
@@ -3030,7 +3222,7 @@ os_file_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -3058,7 +3250,7 @@ os_file_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -3107,7 +3299,7 @@ os_file_get_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(DB_FAIL);
 
@@ -3160,7 +3352,7 @@ os_file_get_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(DB_FAIL);
 
@@ -3715,7 +3907,8 @@ os_aio_array_create(
 	array->slots = static_cast<os_aio_slot_t*>(
 		ut_malloc(n * sizeof(*array->slots)));
 
-	memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots)));
+	memset(array->slots, 0x0, n * sizeof(*array->slots));
+
 #ifdef __WIN__
 	array->handles = static_cast<HANDLE*>(ut_malloc(n * sizeof(HANDLE)));
 #endif /* __WIN__ */
@@ -3803,8 +3996,8 @@ os_aio_array_free(
 /*==============*/
 	os_aio_array_t*& array)	/*!< in, own: array to free */
 {
-#ifdef WIN_ASYNC_IO
 	ulint	i;
+#ifdef WIN_ASYNC_IO
 
 	for (i = 0; i < array->n_slots; i++) {
 		os_aio_slot_t*	slot = os_aio_array_get_nth_slot(array, i);
@@ -3826,6 +4019,14 @@ os_aio_array_free(
 	}
 #endif /* LINUX_NATIVE_AIO */
 
+	for (i = 0; i < array->n_slots; i++) {
+		os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
+		if (slot->page_compression_page) {
+			ut_free(slot->page_compression_page);
+			slot->page_compression_page = NULL;
+		}
+	}
+
 	ut_free(array->slots);
 	ut_free(array);
 
@@ -4159,7 +4360,12 @@ os_aio_array_reserve_slot(
 	void*		buf,	/*!< in: buffer where to read or from which
 				to write */
 	os_offset_t	offset,	/*!< in: file offset */
-	ulint		len)	/*!< in: length of the block to read or write */
+	ulint		len,	/*!< in: length of the block to read or write */
+	ulint		write_size)     /*!< in: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 {
 	os_aio_slot_t*	slot = NULL;
 #ifdef WIN_ASYNC_IO
@@ -4249,6 +4455,54 @@ found:
 	slot->buf      = static_cast<byte*>(buf);
 	slot->offset   = offset;
 	slot->io_already_done = FALSE;
+	slot->page_compress_success = FALSE;
+	slot->write_size = write_size;
+
+	/* If the space is page compressed and this is write operation
+	   and if either only index pages compression is disabled or
+	   page is index page and only index pages compression is enabled then
+	   we compress the page */
+	if (message1 &&
+	    type == OS_FILE_WRITE &&
+	    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1)) &&
+	    (srv_page_compress_index_pages == false ||
+	     (srv_page_compress_index_pages == true &&  fil_page_is_index_page(slot->buf)))) {
+		ulint           real_len = len;
+		byte*           tmp = NULL;
+
+		/* Release the array mutex while compressing */
+		os_mutex_exit(array->mutex);
+
+		// We allocate memory for page compressed buffer if and only
+		// if it is not yet allocated.
+		if (slot->page_buf == NULL) {
+			os_slot_alloc_page_buf(slot);
+		}
+
+		ut_ad(slot->page_buf);
+
+		/* Write buffer full of zeros, this is needed for trim,
+		can't really avoid this now. */
+		memset(slot->page_buf, 0, len);
+
+		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, &real_len);
+
+		/* If compression succeeded, set up the length and buffer */
+		if (tmp != buf) {
+			len = real_len;
+			buf = slot->page_buf;
+			slot->len = real_len;
+			slot->page_compress_success = TRUE;
+		} else {
+			slot->page_compress_success = FALSE;
+		}
+
+		/* Take array mutex back, not sure if this is really needed
+		below */
+		os_mutex_enter(array->mutex);
+
+	}
+
 
 #ifdef WIN_ASYNC_IO
 	control = &slot->control;
@@ -4523,10 +4777,15 @@ os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
-	void*		message2)/*!< in: message for the aio handler
+	void*		message2,/*!< in: message for the aio handler
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
+	ulint		write_size)/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 {
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
@@ -4624,7 +4883,8 @@ try_again:
 	}
 
 	slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
-					 name, buf, offset, n);
+					 name, buf, offset, n, write_size);
+
 	if (type == OS_FILE_READ) {
 		if (srv_use_native_aio) {
 			os_n_file_reads++;
@@ -4704,7 +4964,7 @@ err_exit:
 	os_aio_array_free_slot(array, slot);
 
 	if (os_file_handle_error(
-		name,type == OS_FILE_READ ? "aio read" : "aio write")) {
+		name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) {
 
 		goto try_again;
 	}
@@ -4817,7 +5077,7 @@ os_aio_windows_handle(
 	if (ret && len == slot->len) {
 
 		ret_val = TRUE;
-	} else if (os_file_handle_error(slot->name, "Windows aio")) {
+	} else if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) {
 
 		retry = TRUE;
 	} else {
@@ -4847,9 +5107,17 @@ os_aio_windows_handle(
 
 		switch (slot->type) {
 		case OS_FILE_WRITE:
-			ret = WriteFile(slot->file, slot->buf,
+			if (slot->message1 &&
+			    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1)) &&
+			    slot->page_buf) {
+				ret = WriteFile(slot->file, slot->page_buf,
 					(DWORD) slot->len, &len,
 					&(slot->control));
+			} else {
+				ret = WriteFile(slot->file, slot->buf,
+					(DWORD) slot->len, &len,
+					&(slot->control));
+			}
 
 			break;
 		case OS_FILE_READ:
@@ -4881,6 +5149,29 @@ os_aio_windows_handle(
 		ret_val = ret && len == slot->len;
 	}
 
+	if (slot->message1 &&
+	    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1))) {
+		// We allocate memory for page compressed buffer if and only
+		// if it is not yet allocated.
+		if (slot->page_buf == NULL) {
+			os_slot_alloc_page_buf(slot);
+		}
+		ut_ad(slot->page_buf);
+
+	        if (slot->type == OS_FILE_READ) {
+			if (fil_page_is_compressed(slot->buf)) {
+				fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+			}
+		} else {
+			if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) {
+				if (srv_use_trim && os_fallocate_failed == FALSE) {
+					// Deallocate unused blocks from file system
+					os_file_trim(slot->file, slot, slot->len);
+				}
+			}
+		}
+	}
+
 	os_aio_array_free_slot(array, slot);
 
 	return(ret_val);
@@ -4970,6 +5261,34 @@ retry:
 			/* We have not overstepped to next segment. */
 			ut_a(slot->pos < end_pos);
 
+			/* If the table is page compressed and this is read,
+			we decompress before we annouce the read is
+			complete. For writes, we free the compressed page. */
+			if (slot->message1 &&
+			    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1))) {
+				// We allocate memory for page compressed buffer if and only
+				// if it is not yet allocated.
+				if (slot->page_buf == NULL) {
+					os_slot_alloc_page_buf(slot);
+				}
+				ut_ad(slot->page_buf);
+
+				if (slot->type == OS_FILE_READ) {
+					if (fil_page_is_compressed(slot->buf)) {
+						fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+					}
+				} else {
+					if (slot->page_compress_success &&
+					    fil_page_is_compressed(slot->page_buf)) {
+						ut_ad(slot->page_compression_page);
+						if (srv_use_trim && os_fallocate_failed == FALSE) {
+							// Deallocate unused blocks from file system
+							os_file_trim(slot->file, slot, slot->len);
+						}
+					}
+				}
+			}
+
 			/* Mark this request as completed. The error handling
 			will be done in the calling function. */
 			os_mutex_enter(array->mutex);
@@ -5113,6 +5432,13 @@ found:
 	} else {
 		errno = -slot->ret;
 
+		if (slot->ret == 0) {
+			fprintf(stderr, 
+				"InnoDB: Number of bytes after aio %d requested %lu\n"
+				"InnoDB: from file %s\n",
+				slot->n_bytes, slot->len, slot->name);
+		}
+
 		/* os_file_handle_error does tell us if we should retry
 		this IO. As it stands now, we don't do this retry when
 		reaping requests from a different context than
@@ -5120,7 +5446,7 @@ found:
 		windows and linux native AIO.
 		We should probably look into this to transparently
 		re-submit the IO. */
-		os_file_handle_error(slot->name, "Linux aio");
+		os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__);
 
 		ret = FALSE;
 	}
@@ -5323,7 +5649,7 @@ consecutive_loop:
 
 		if (slot->reserved
 		    && slot != aio_slot
-		    && slot->offset == slot->offset + aio_slot->len
+		    && slot->offset == aio_slot->offset + aio_slot->len
 		    && slot->type == aio_slot->type
 		    && slot->file == aio_slot->file) {
 
@@ -5791,4 +6117,147 @@ os_aio_all_slots_free(void)
 }
 #endif /* UNIV_DEBUG */
 
+#ifdef _WIN32
+#include <winioctl.h>
+#ifndef FSCTL_FILE_LEVEL_TRIM
+#define FSCTL_FILE_LEVEL_TRIM  CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA)
+typedef struct _FILE_LEVEL_TRIM_RANGE {
+  DWORDLONG Offset;
+  DWORDLONG Length;
+} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE;
+
+typedef struct _FILE_LEVEL_TRIM {
+  DWORD                 Key;
+  DWORD                 NumRanges;
+  FILE_LEVEL_TRIM_RANGE Ranges[1];
+} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM;
+#endif
+#endif
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd  for  the  byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file.  After a successful call,
+subsequent reads from  this range will return zeroes.
+@return	true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+	os_file_t	file, /*!< in: file to be trimmed */
+	os_aio_slot_t*	slot, /*!< in: slot structure     */
+	ulint		len)  /*!< in: length of area     */
+{
+
+	size_t trim_len = UNIV_PAGE_SIZE - len;
+	os_offset_t off = slot->offset + len;
+
+	// Nothing to do if trim length is zero or if actual write
+	// size is initialized and it is smaller than current write size.
+	// In first write if we trim we set write_size to actual bytes
+	// written and rest of the page is trimmed. In following writes
+	// there is no need to trim again if write_size only increases
+	// because rest of the page is already trimmed. If actual write
+	// size decreases we need to trim again.
+	if (trim_len == 0 ||
+	    (slot->write_size > 0 && len >= slot->write_size)) {
+
+		if (slot->write_size > 0 && len >= slot->write_size) {
+			srv_stats.page_compressed_trim_op_saved.inc();
+		}
+
+		slot->write_size = len;
+
+		return (TRUE);
+	}
+
+#ifdef __linux__
+#if defined(FALLOC_FL_PUNCH_HOLE) && defined (FALLOC_FL_KEEP_SIZE)
+	int ret = fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
+
+	if (ret) {
+		/* After first failure do not try to trim again */
+		os_fallocate_failed = TRUE;
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: [Warning] fallocate call failed with error code %d.\n"
+			"  InnoDB: start: %lx len: %lu payload: %lu\n"
+			"  InnoDB: Disabling fallocate for now.\n", ret, (slot->offset+len), trim_len, len);
+
+		os_file_handle_error_no_exit(slot->name,
+			" fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
+			FALSE, __FILE__, __LINE__);
+
+		slot->write_size = 0;
+
+		return (FALSE);
+	} else {
+		slot->write_size = len;
+	}
+#else
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: [Warning] fallocate not supported on this installation."
+		"  InnoDB: Disabling fallocate for now.");
+	os_fallocate_failed = TRUE;
+	slot->write_size = 0;
+
+#endif /* HAVE_FALLOCATE ... */
+
+#elif defined(_WIN32)
+	FILE_LEVEL_TRIM flt;
+	flt.Key = 0;
+	flt.NumRanges = 1;
+	flt.Ranges[0].Offset = off;
+	flt.Ranges[0].Length = trim_len;
+
+	BOOL ret = DeviceIoControl(file,FSCTL_FILE_LEVEL_TRIM,&flt, sizeof(flt), NULL, NULL, NULL, NULL);
+
+	if (!ret) {
+		/* After first failure do not try to trim again */
+		os_fallocate_failed = TRUE;
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: [Warning] fallocate call failed with error.\n"
+			"  InnoDB: start: %lx len: %du payload: %lu\n"
+			"  InnoDB: Disabling fallocate for now.\n", (slot->offset+len), trim_len, len);
+
+		os_file_handle_error_no_exit(slot->name,
+			" DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ",
+			FALSE, __FILE__, __LINE__);
+
+		slot->write_size = 0;
+		return (FALSE);
+	} else {
+		slot->write_size = len;
+	}
+#endif
+
+#define SECT_SIZE 512
+	srv_stats.page_compression_trim_sect512.add((trim_len / SECT_SIZE));
+	srv_stats.page_compression_trim_sect4096.add((trim_len / (SECT_SIZE*8)));
+	srv_stats.page_compressed_trim_op.inc();
+
+	return (TRUE);
+
+}
 #endif /* !UNIV_HOTBACKUP */
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+	os_aio_slot_t*   slot) /*!< in: slot structure     */
+{
+	byte*           cbuf2;
+	byte*           cbuf;
+
+	cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
+	cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
+	slot->page_compression_page = static_cast<byte *>(cbuf2);
+	slot->page_buf = static_cast<byte *>(cbuf);
+}
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 3b3da2f070f..44a60961110 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -290,6 +290,12 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN},
 
+	{"buffer_index_pages_written", "buffer",
+	 "Number of index pages written (innodb_index_pages_written)",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN},
+
 	{"buffer_pages_read", "buffer",
 	 "Number of pages read (innodb_pages_read)",
 	 static_cast<monitor_type_t>(
@@ -875,6 +881,41 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
 
+	{"compress_saved", "compression",
+	 "Number of bytes saved by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
+
+	{"compress_trim_sect512", "compression",
+	 "Number of sect-512 TRIMed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
+
+	{"compress_trim_sect4096", "compression",
+	 "Number of sect-4K TRIMed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
+
+	{"compress_pages_page_compressed", "compression",
+	 "Number of pages compressed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED},
+
+	{"compress_page_compressed_trim_op", "compression",
+	 "Number of TRIM operation performed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
+
+	{"compress_page_compressed_trim_op_saved", "compression",
+	 "Number of TRIM operation saved by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
+
+	{"compress_pages_page_decompressed", "compression",
+	 "Number of pages decompressed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
+
 	/* ========== Counters for Index ========== */
 	{"module_index", "index", "Index Manager",
 	 MONITOR_MODULE,
@@ -1528,6 +1569,11 @@ srv_mon_process_existing_counter(
 		value = stat.n_pages_written;
 		break;
 
+	/* innodb_index_pages_written, the number of page written */
+	case MONITOR_OVLD_INDEX_PAGES_WRITTEN:
+		value = srv_stats.index_pages_written;
+		break;
+
 	/* innodb_pages_read */
 	case MONITOR_OVLD_PAGES_READ:
 		buf_get_total_stat(&stat);
@@ -1769,6 +1815,28 @@ srv_mon_process_existing_counter(
 		value = btr_cur_n_non_sea;
 		break;
 
+        case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
+		value = srv_stats.page_compression_saved;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
+		value = srv_stats.page_compression_trim_sect512;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
+		value = srv_stats.page_compression_trim_sect4096;
+		break;
+        case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
+		value = srv_stats.pages_page_compressed;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
+		value = srv_stats.page_compressed_trim_op;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
+		value = srv_stats.page_compressed_trim_op_saved;
+		break;
+        case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
+		value = srv_stats.pages_page_decompressed;
+		break;
+
 	default:
 		ut_error;
 	}
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 4c5753ac40e..90864cee9ef 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -145,6 +145,24 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
+/* If this flag is TRUE, then we will use page compression
+to the pages */
+UNIV_INTERN my_bool     srv_compress_pages              = FALSE;
+/* If this flag is TRUE, then we will use page compression
+only for index pages */
+UNIV_INTERN my_bool     srv_page_compress_index_pages   = FALSE;
+UNIV_INTERN long        srv_trim_pct                    = 100;
+/* Default compression level if page compression is used and no compression
+level is set for the table*/
+UNIV_INTERN long        srv_compress_zlib_level         = 6;
+/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
+to the pages */
+UNIV_INTERN my_bool     srv_use_trim                    = TRUE;
+/* If this flag is TRUE, then we will use posix fallocate for file extentsion */
+UNIV_INTERN my_bool     srv_use_posix_fallocate         = FALSE;
+/* If this flag is TRUE, then we disable doublewrite buffer */
+UNIV_INTERN my_bool     srv_use_atomic_writes           = FALSE;
+
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
 pointers, because they are not available on Windows Server 2003 and
@@ -347,11 +365,6 @@ batch flushing i.e.: LRU flushing and flush_list flushing. The rest
 of the pages are used for single page flushing. */
 UNIV_INTERN ulong	srv_doublewrite_batch_size	= 120;
 
-UNIV_INTERN ibool	srv_use_atomic_writes = FALSE;
-#ifdef HAVE_POSIX_FALLOCATE
-UNIV_INTERN ibool	srv_use_posix_fallocate = TRUE;
-#endif
-
 UNIV_INTERN ulong	srv_replication_delay		= 0;
 
 /*-------------------------------------------*/
@@ -375,6 +388,16 @@ static ulint		srv_n_rows_read_old		= 0;
 UNIV_INTERN ulint	srv_truncated_status_writes	= 0;
 UNIV_INTERN ulint	srv_available_undo_logs         = 0;
 
+UNIV_INTERN ib_uint64_t srv_page_compression_saved      = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512       = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096      = 0;
+UNIV_INTERN ib_uint64_t srv_index_pages_written         = 0;
+UNIV_INTERN ib_uint64_t srv_pages_page_compressed       = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op     = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved     = 0;
+UNIV_INTERN ib_uint64_t srv_index_page_decompressed     = 0;
+
+
 /* Set the following to 0 if you want InnoDB to write messages on
 stderr on startup/shutdown. */
 UNIV_INTERN ibool	srv_print_verbose_log		= TRUE;
@@ -1457,6 +1480,14 @@ srv_export_innodb_status(void)
 		srv_truncated_status_writes;
 
 	export_vars.innodb_available_undo_logs = srv_available_undo_logs;
+	export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
+	export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
+	export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
+	export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
+	export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
+	export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
+	export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
+	export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
 
 #ifdef UNIV_DEBUG
 	if (purge_sys->done.trx_no == 0
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index efe9f094c0d..0517f4b1468 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -3,6 +3,7 @@
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -126,7 +127,10 @@ static os_file_t	files[1000];
 /** io_handler_thread parameters for thread identification */
 static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32];
+/** pgcomp_thread are 16 total */
+#define	START_PGCOMP_CNT	(SRV_MAX_N_IO_THREADS + 6 + 32)
+#define PGCOMP_MAX_WORKER   16
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32 + PGCOMP_MAX_WORKER];
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -522,7 +526,7 @@ create_log_file(
 
 	*file = os_file_create(
 		innodb_file_log_key, name,
-		OS_FILE_CREATE, OS_FILE_NORMAL, OS_LOG_FILE, &ret);
+		OS_FILE_CREATE, OS_FILE_NORMAL, OS_LOG_FILE, &ret, FALSE);
 
 	ib_logf(IB_LOG_LEVEL_INFO,
 		"Setting log file %s size to %lu MB",
@@ -715,7 +719,7 @@ open_log_file(
 
 	*file = os_file_create(innodb_file_log_key, name,
 			       OS_FILE_OPEN, OS_FILE_AIO,
-			       OS_LOG_FILE, &ret);
+			       OS_LOG_FILE, &ret, FALSE);
 	if (!ret) {
 		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
 		return(DB_ERROR);
@@ -806,7 +810,7 @@ open_or_create_data_files(
 
 			files[i] = os_file_create(
 				innodb_file_data_key, name, OS_FILE_CREATE,
-				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+				OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 
 			if (srv_read_only_mode) {
 
@@ -849,7 +853,7 @@ open_or_create_data_files(
 
 			files[i] = os_file_create(
 				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
-				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+				OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 
 			if (!ret) {
 				ib_logf(IB_LOG_LEVEL_ERROR,
@@ -881,17 +885,17 @@ open_or_create_data_files(
 				files[i] = os_file_create(
 					innodb_file_data_key,
 					name, OS_FILE_OPEN_RAW,
-					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+					OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 			} else if (i == 0) {
 				files[i] = os_file_create(
 					innodb_file_data_key,
 					name, OS_FILE_OPEN_RETRY,
-					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+					OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 			} else {
 				files[i] = os_file_create(
 					innodb_file_data_key,
 					name, OS_FILE_OPEN, OS_FILE_NORMAL,
-					OS_DATA_FILE, &ret);
+					OS_DATA_FILE, &ret, FALSE);
 			}
 
 			if (!ret) {
@@ -1078,7 +1082,7 @@ srv_undo_tablespace_create(
 		innodb_file_data_key,
 		name,
 		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
-		OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+		OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 
 	if (srv_read_only_mode && ret) {
 		ib_logf(IB_LOG_LEVEL_INFO,
@@ -1159,7 +1163,8 @@ srv_undo_tablespace_open(
 		| OS_FILE_ON_ERROR_SILENT,
 		OS_FILE_NORMAL,
 		OS_DATA_FILE,
-		&ret);
+		&ret,
+		FALSE);
 
 	/* If the file open was successful then load the tablespace. */
 
@@ -1430,6 +1435,691 @@ srv_start_wait_for_purge_to_start()
 	}
 }
 
+/* JAN: TODO: */
+/**********************************************************************************/
+extern int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time);
+extern ibool buf_flush_start(buf_pool_t* buf_pool, enum buf_flush flush_type);
+extern void buf_flush_end(buf_pool_t* buf_pool, enum buf_flush flush_type);
+extern void buf_flush_common(enum buf_flush flush_type, ulint page_count);
+extern ulint buf_flush_batch(buf_pool_t* buf_pool, enum buf_flush flush_type, ulint min_n, lsn_t lsn_limit);
+
+typedef enum wrk_status {
+    WRK_ITEM_SET=0,
+    WRK_ITEM_START=1,
+    WRK_ITEM_DONE=2,
+    WRK_ITEM_SUCCESS=2,
+    WRK_ITEM_FAILED=3,
+    WRK_ITEM_STATUS_UNDEFINED
+} wrk_status_t;
+
+typedef enum wthr_status {
+    WTHR_NOT_INIT=0,
+    WTHR_INITIALIZED=1,
+    WTHR_SIG_WAITING=2,
+    WTHR_RUNNING=3,
+    WTHR_NO_WORK=4,
+    WTHR_KILL_IT=5,
+    WTHR_STATUS_UNDEFINED
+} wthr_status_t;
+
+typedef struct wrk_itm
+{
+	/****************************/
+	/* Need to group into struct*/
+	buf_pool_t*	buf_pool;	//buffer-pool instance
+	int 		flush_type;	//flush-type for buffer-pool flush operation
+	int 		min;		//minimum number of pages requested to be flushed
+	unsigned long long lsn_limit;	//lsn limit for the buffer-pool flush operation
+	/****************************/
+
+	unsigned long	result; 	//flush pages count
+	unsigned long	t_usec;		//time-taken in usec
+	long		    id_usr;	//thread-id currently working
+	wrk_status_t    wi_status;      //flag
+	struct wrk_itm	*next;
+} wrk_t;
+
+typedef enum op_q_status {
+    Q_NOT_INIT=0,
+    Q_EMPTY=1,
+    Q_INITIALIZED=2,
+    Q_PROCESS=3,
+    Q_DONE=4,
+    Q_ERROR=5,
+    Q_STATUS_UNDEFINED
+} q_status_t;
+
+typedef struct op_queue
+{
+	pthread_mutex_t	mtx;
+	pthread_cond_t 	cv;
+	q_status_t	flag;
+	wrk_t 		*head;
+	wrk_t		*tail;
+} opq_t;
+
+opq_t wq, cq;
+
+typedef struct thread_sync
+{
+	int  		wthread_id;
+	pthread_t 	wthread;
+	opq_t		*wq;
+	opq_t		*cq;
+	wthr_status_t   wt_status;
+	unsigned long	stat_universal_num_processed;
+	unsigned long	stat_cycle_num_processed;
+} thread_sync_t;
+
+/* Global XXX:DD needs to be cleaned */
+int 			exit_flag;
+ulint 			check_wrk_done_count;
+static ulint 		done_cnt_flag;
+static int 		pgc_n_threads = 8;
+
+thread_sync_t 		pc_sync[PGCOMP_MAX_WORKER];
+static wrk_t 		work_items[PGCOMP_MAX_WORKER];
+static int 		pgcomp_wrk_initialized = -1;
+
+int set_check_done_flag_count(int cnt)
+{
+	return(check_wrk_done_count = cnt);
+}
+
+int set_pgcomp_wrk_init_done(void)
+{
+	pgcomp_wrk_initialized = 1;
+	return 0;
+}
+
+int is_pgcomp_wrk_init_done(void)
+{
+	return(pgcomp_wrk_initialized == 1);
+}
+
+ulint set_done_cnt_flag(ulint val)
+{
+	/*
+ 	 * Assumption: The thread calling into set_done_cnt_flag
+ 	 * needs to have "cq.mtx" acquired, else not safe.
+ 	 */
+	done_cnt_flag = val;
+	return done_cnt_flag;
+}
+
+
+ulint cv_done_inc_flag_sig(thread_sync_t * ppc)
+{
+	pthread_mutex_lock(&ppc->cq->mtx);
+	ppc->stat_universal_num_processed++;
+	ppc->stat_cycle_num_processed++;
+	done_cnt_flag++;
+	if(!(done_cnt_flag <= check_wrk_done_count)) {
+		fprintf(stderr, "ERROR: done_cnt:%lu check_wrk_done_count:%lu\n",
+			done_cnt_flag, check_wrk_done_count);
+	}
+	assert(done_cnt_flag <= check_wrk_done_count);
+	pthread_mutex_unlock(&ppc->cq->mtx);
+	if(done_cnt_flag == check_wrk_done_count) {
+		ppc->wq->flag = Q_DONE;
+		pthread_mutex_lock(&ppc->cq->mtx);
+			ppc->cq->flag = Q_DONE;
+			pthread_cond_signal(&ppc->cq->cv);
+		pthread_mutex_unlock(&ppc->cq->mtx);
+	}
+	return(done_cnt_flag);
+}
+
+int q_remove_wrk(opq_t *q, wrk_t **wi)
+{
+	int ret = 0;
+
+	if(!wi || !q) {
+		return -1;
+	}
+
+	pthread_mutex_lock(&q->mtx);
+	assert(!((q->tail == NULL) && (q->head != NULL)));
+	assert(!((q->tail != NULL) && (q->head == NULL)));
+
+	/* get the first in the list*/
+	*wi = q->head;
+	if(q->head) {
+		ret = 0;
+		q->head = q->head->next;
+		(*wi)->next = NULL;
+		if(!q->head) {
+			q->tail = NULL;
+		}
+	} else {
+		q->tail = NULL;
+		ret = 1; /* indicating remove from queue failed */
+	}
+	pthread_mutex_unlock(&q->mtx);
+	return (ret);
+}
+
+int is_busy_wrk_itm(wrk_t *wi)
+{
+	if(!wi) {
+		return -1;
+	}
+	return(!(wi->id_usr == -1));
+}
+
+int setup_wrk_itm(int items)
+{
+	int i;
+	for(i=0; i<items; i++) {
+		work_items[i].buf_pool = NULL;
+		work_items[i].result = 0;
+		work_items[i].t_usec = 0;
+		work_items[i].id_usr = -1;
+		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
+		work_items[i].next = &work_items[(i+1)%items];
+	}
+	/* last node should be the tail */
+	work_items[items-1].next = NULL;
+	return 0;
+}
+
+int init_queue(opq_t *q)
+{
+	if(!q) {
+		return -1;
+	}
+	/* Initialize Queue mutex and CV */
+	pthread_mutex_init(&q->mtx, NULL);
+	pthread_cond_init(&q->cv, NULL);
+	q->flag = Q_INITIALIZED;
+	q->head = q->tail = NULL;
+
+	return 0;
+}
+
+#if 0
+int drain_cq(opq_t *cq, int items)
+{
+	int i=0;
+
+	if(!cq) {
+		return -1;
+	}
+	pthread_mutex_lock(&cq->mtx);
+	for(i=0; i<items; i++) {
+		work_items[i].result=0;
+		work_items[i].t_usec = 0;
+		work_items[i].id_usr = -1;
+	}
+	cq->head = cq->tail = NULL;
+	pthread_mutex_unlock(&cq->mtx);
+	return 0;
+}
+#endif
+
+int q_insert_wrk_list(opq_t *q, wrk_t *w_list)
+{
+	if((!q) || (!w_list)) {
+		fprintf(stderr, "insert failed q:%p w:%p\n", q, w_list);
+		return -1;
+	}
+
+	pthread_mutex_lock(&q->mtx);
+
+	assert(!((q->tail == NULL) && (q->head != NULL)));
+	assert(!((q->tail != NULL) && (q->head == NULL)));
+
+	/* list is empty */
+	if(!q->tail) {
+		q->head = q->tail = w_list;
+	} else {
+		/* added the first of the node to list */
+        	assert(q->head != NULL);
+		q->tail->next = w_list;
+	}
+
+	/* move tail to the last node */
+	while(q->tail->next) {
+		q->tail = q->tail->next;
+	}
+	pthread_mutex_unlock(&q->mtx);
+
+	return 0;
+}
+
+int flush_pool_instance(wrk_t *wi)
+{
+	struct timeval p_start_time, p_end_time, d_time;
+
+	if(!wi) {
+		fprintf(stderr, "work item invalid wi:%p\n", wi);
+		return -1;
+	}
+
+	wi->t_usec = 0;
+	if (!buf_flush_start(wi->buf_pool, (buf_flush)wi->flush_type)) {
+		/* We have two choices here. If lsn_limit was
+		specified then skipping an instance of buffer
+		pool means we cannot guarantee that all pages
+		up to lsn_limit has been flushed. We can
+		return right now with failure or we can try
+		to flush remaining buffer pools up to the
+		lsn_limit. We attempt to flush other buffer
+		pools based on the assumption that it will
+		help in the retry which will follow the
+		failure. */
+		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
+			(buf_flush)wi->flush_type);
+		return -1;
+	}
+
+#ifdef UNIV_DEBUG
+	/* Record time taken for the OP in usec */
+	gettimeofday(&p_start_time, 0x0);
+#endif
+
+	if((buf_flush)wi->flush_type == BUF_FLUSH_LRU) {
+		/* srv_LRU_scan_depth can be arbitrarily large value.
+		* We cap it with current LRU size.
+		*/
+		buf_pool_mutex_enter(wi->buf_pool);
+		wi->min = UT_LIST_GET_LEN(wi->buf_pool->LRU);
+		buf_pool_mutex_exit(wi->buf_pool);
+		wi->min = ut_min(srv_LRU_scan_depth,wi->min);
+	}
+
+	wi->result = buf_flush_batch(wi->buf_pool,
+                                    (buf_flush)wi->flush_type,
+                                    wi->min, wi->lsn_limit);
+
+	buf_flush_end(wi->buf_pool, (buf_flush)wi->flush_type);
+	buf_flush_common((buf_flush)wi->flush_type, wi->result);
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
+#endif
+
+	return 0;
+}
+
+int service_page_comp_io(thread_sync_t * ppc)
+{
+	wrk_t 		*wi = NULL;
+	int 		ret=0;
+
+	pthread_mutex_lock(&ppc->wq->mtx);
+	do{
+		ppc->wt_status = WTHR_SIG_WAITING;
+		ret = pthread_cond_wait(&ppc->wq->cv, &ppc->wq->mtx);
+		ppc->wt_status = WTHR_RUNNING;
+		if(ret == ETIMEDOUT) {
+			fprintf(stderr, "ERROR ETIMEDOUT cnt_flag:[%lu] ret:%d\n",
+				done_cnt_flag, ret);
+		} else if(ret == EINVAL || ret == EPERM) {
+			fprintf(stderr, "ERROR EINVAL/EPERM cnt_flag:[%lu] ret:%d\n",
+				done_cnt_flag, ret);
+		}
+		if(ppc->wq->flag == Q_PROCESS) {
+			break;
+		} else {
+			pthread_mutex_unlock(&ppc->wq->mtx);
+			return -1;
+		}
+	} while (ppc->wq->flag == Q_PROCESS && ret == 0);
+
+	pthread_mutex_unlock(&ppc->wq->mtx);
+
+	while (ppc->cq->flag == Q_PROCESS) {
+		wi = NULL;
+		/* Get the work item */
+		if (0 != (ret = q_remove_wrk(ppc->wq, &wi))) {
+			ppc->wt_status = WTHR_NO_WORK;
+			return -1;
+		}
+
+		assert(ret==0);
+		assert(wi != NULL);
+		assert(0 == is_busy_wrk_itm(wi));
+		assert(wi->id_usr == -1);
+
+		wi->id_usr = ppc->wthread;
+		wi->wi_status = WRK_ITEM_START;
+
+		/* Process work item */
+		if(0 != (ret = flush_pool_instance(wi))) {
+			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
+			wi->wi_status = WRK_ITEM_FAILED;
+		}
+
+		ret = q_insert_wrk_list(ppc->cq, wi);
+
+		assert(0==ret);
+		assert(check_wrk_done_count >= done_cnt_flag);
+		wi->wi_status = WRK_ITEM_SUCCESS;
+		if(check_wrk_done_count == cv_done_inc_flag_sig(ppc)) {
+			break;
+		}
+	}
+	return(0);
+}
+
+/******************************************************************//**
+@return a dummy parameter*/
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(page_comp_io_thread)(
+/*==========================================*/
+	void * arg)
+{
+	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
+
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+		service_page_comp_io(ppc_io);
+		ppc_io->stat_cycle_num_processed = 0;
+	}
+	os_thread_exit(NULL);
+	OS_THREAD_DUMMY_RETURN;
+}
+
+int print_queue_wrk_itm(opq_t *q)
+{
+#if UNIV_DEBUG
+	wrk_t *wi = NULL;
+
+	if(!q) {
+		fprintf(stderr, "queue NULL\n");
+		return -1;
+	}
+
+	if(!q->head || !q->tail) {
+		assert(!(((q->tail==NULL) && (q->head!=NULL)) && ((q->tail != NULL) && (q->head == NULL))));
+		fprintf(stderr, "queue empty (h:%p t:%p)\n", q->head, q->tail);
+		return 0;
+	}
+
+	pthread_mutex_lock(&q->mtx);
+	for(wi = q->head; (wi != NULL) ; wi = wi->next) {
+		//fprintf(stderr, "- [%p] %p %lu %luus [%ld] >%p\n",
+		//	wi, wi->buf_pool, wi->result, wi->t_usec, wi->id_usr, wi->next);
+		fprintf(stderr, "- [%p] [%s] >%p\n",
+			wi, (wi->id_usr == -1)?"free":"Busy", wi->next);
+	}
+	pthread_mutex_unlock(&q->mtx);
+#endif
+	return(0);
+}
+
+int print_wrk_list(wrk_t *wi_list)
+{
+	wrk_t *wi = wi_list;
+	int i=0;
+
+	if(!wi_list) {
+		fprintf(stderr, "list NULL\n");
+	}
+
+	while(wi) {
+		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu]\t[%luus] > %p\n",
+			wi, (wi->id_usr == -1)?"free":"Busy", wi->result, wi->t_usec, wi->next);
+		wi = wi->next;
+		i++;
+	}
+	fprintf(stderr, "list len: %d\n", i);
+	return 0;
+}
+
+int pgcomp_handler(wrk_t *w_list)
+{
+	int ret=0;
+	opq_t *wrk_q=NULL, *comp_q=NULL;
+
+	wrk_q=&wq;
+	comp_q=&cq;
+
+	pthread_mutex_lock(&wrk_q->mtx);
+	/* setup work queue here.. */
+	wrk_q->flag = Q_EMPTY;
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+	ret = q_insert_wrk_list(wrk_q, w_list);
+	if(ret != 0) {
+		fprintf(stderr, "%s():work-queue setup FAILED wq:%p w_list:%p \n",
+			__FUNCTION__, &wq, w_list);
+		return -1;
+	}
+
+retry_submit:
+	pthread_mutex_lock(&wrk_q->mtx);
+	/* setup work queue here.. */
+	wrk_q->flag = Q_INITIALIZED;
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+
+	pthread_mutex_lock(&comp_q->mtx);
+	if(0 != set_done_cnt_flag(0)) {
+		fprintf(stderr, "FAILED %s:%d\n", __FILE__, __LINE__);
+		pthread_mutex_unlock(&comp_q->mtx);
+		return -1;
+	}
+	comp_q->flag = Q_PROCESS;
+	pthread_mutex_unlock(&comp_q->mtx);
+
+	/* if threads are waiting request them to start */
+	pthread_mutex_lock(&wrk_q->mtx);
+	wrk_q->flag = Q_PROCESS;
+	pthread_cond_broadcast(&wrk_q->cv);
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+	/* Wait on all worker-threads to complete */
+	pthread_mutex_lock(&comp_q->mtx);
+	if (comp_q->flag != Q_DONE) {
+		do {
+			pthread_cond_wait(&comp_q->cv, &comp_q->mtx);
+			if(comp_q->flag != Q_DONE) {
+				fprintf(stderr, "[1] cv wait on CQ failed flag:%d cnt:%lu\n",
+					comp_q->flag, done_cnt_flag);
+				if (done_cnt_flag != srv_buf_pool_instances) {
+					fprintf(stderr, "[2] cv wait on CQ failed flag:%d cnt:%lu\n",
+						comp_q->flag, done_cnt_flag);
+					fprintf(stderr, "============\n");
+					print_wrk_list(w_list);
+					fprintf(stderr, "============\n");
+				}
+				continue;
+			} else if (done_cnt_flag != srv_buf_pool_instances) {
+				fprintf(stderr, "[3]cv wait on CQ failed flag:%d cnt:%lu\n",
+					comp_q->flag, done_cnt_flag);
+				fprintf(stderr, "============\n");
+				print_wrk_list(w_list);
+				fprintf(stderr, "============\n");
+				comp_q->flag = Q_INITIALIZED;
+				pthread_mutex_unlock(&comp_q->mtx);
+				goto retry_submit;
+
+				assert(!done_cnt_flag);
+				continue;
+			}
+			assert(done_cnt_flag == srv_buf_pool_instances);
+
+			if ((comp_q->flag == Q_DONE) &&
+				(done_cnt_flag == srv_buf_pool_instances)) {
+				break;
+			}
+		} while((comp_q->flag == Q_INITIALIZED) &&
+			(done_cnt_flag != srv_buf_pool_instances));
+	} else {
+		fprintf(stderr, "[4] cv wait on CQ failed flag:%d cnt:%lu\n",
+			comp_q->flag, done_cnt_flag);
+		if (!done_cnt_flag) {
+			fprintf(stderr, "============\n");
+			print_wrk_list(w_list);
+			fprintf(stderr, "============\n");
+			comp_q->flag = Q_INITIALIZED;
+			pthread_mutex_unlock(&comp_q->mtx);
+			goto retry_submit;
+			assert(!done_cnt_flag);
+		}
+		assert(done_cnt_flag == srv_buf_pool_instances);
+	}
+
+	pthread_mutex_unlock(&comp_q->mtx);
+	pthread_mutex_lock(&wrk_q->mtx);
+	wrk_q->flag = Q_DONE;
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+	return 0;
+}
+
+/******************************************************************//**
+@return a dummy parameter*/
+int pgcomp_handler_init(int num_threads, int wrk_cnt, opq_t *wq, opq_t *cq)
+{
+	int   	i=0;
+
+	if(is_pgcomp_wrk_init_done()) {
+		fprintf(stderr, "pgcomp_handler_init(): ERROR already initialized\n");
+		return -1;
+	}
+
+	if(!wq || !cq) {
+		fprintf(stderr, "%s() FAILED wq:%p cq:%p\n", __FUNCTION__, wq, cq);
+		return -1;
+	}
+
+	/* work-item setup */
+	setup_wrk_itm(wrk_cnt);
+
+	/* wq & cq setup */
+	init_queue(wq);
+	init_queue(cq);
+
+	/* Mark each of the thread sync entires */
+	for(i=0; i < PGCOMP_MAX_WORKER; i++) {
+		pc_sync[i].wthread_id = i;
+	}
+
+	/* Create threads for page-compression-flush */
+	for(i=0; i < num_threads; i++) {
+		pc_sync[i].wthread_id = i;
+		pc_sync[i].wq = wq;
+		pc_sync[i].cq = cq;
+		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
+					thread_ids + START_PGCOMP_CNT + i);
+		//pc_sync[i].wthread = thread_ids[START_PGCOMP_CNT + i];
+		pc_sync[i].wthread = (START_PGCOMP_CNT + i);
+		pc_sync[i].wt_status = WTHR_INITIALIZED;
+	}
+
+	set_check_done_flag_count(wrk_cnt);
+	set_pgcomp_wrk_init_done();
+
+	return 0;
+}
+
+
+int wrk_thread_stat(thread_sync_t *wthr, unsigned int num_threads)
+{
+	long stat_tot=0;
+	unsigned int i=0;
+	for(i=0; i< num_threads;i++) {
+		stat_tot+=wthr[i].stat_universal_num_processed;
+		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
+			wthr[i].stat_universal_num_processed);
+	}
+	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
+	return (0);
+}
+
+int reset_wrk_itm(int items)
+{
+	int i;
+
+	pthread_mutex_lock(&wq.mtx);
+	wq.head = wq.tail = NULL;
+	pthread_mutex_unlock(&wq.mtx);
+
+	pthread_mutex_lock(&cq.mtx);
+	for(i=0;i<items; i++) {
+		work_items[i].id_usr = -1;
+	}
+	cq.head = cq.tail = NULL;
+	pthread_mutex_unlock(&cq.mtx);
+	return 0;
+}
+
+int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
+                            int flush_type, int min_n, unsigned long long lsn_limit)
+{
+	int ret=0, i=0;
+
+   	pthread_mutex_lock(&wq.mtx);
+   	pthread_mutex_lock(&cq.mtx);
+
+	assert(wq.head == NULL);
+    	assert(wq.tail == NULL);
+	if(cq.head) {
+		print_wrk_list(cq.head);
+	}
+    	assert(cq.head == NULL);
+    	assert(cq.tail == NULL);
+
+	for(i=0;i<buf_pool_inst; i++) {
+		work_items[i].buf_pool = buf_pool_from_array(i);
+		work_items[i].flush_type = flush_type;
+		work_items[i].min = min_n;
+		work_items[i].lsn_limit = lsn_limit;
+		work_items[i].id_usr = -1;
+		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
+		work_items[i].wi_status = WRK_ITEM_SET;
+	}
+	work_items[i-1].next=NULL;
+
+	pthread_mutex_unlock(&cq.mtx);
+   	pthread_mutex_unlock(&wq.mtx);
+
+	pgcomp_handler(work_items);
+
+   	pthread_mutex_lock(&wq.mtx);
+   	pthread_mutex_lock(&cq.mtx);
+	/* collect data/results total pages flushed */
+	for(i=0; i<buf_pool_inst; i++) {
+		if(work_items[i].result == -1) {
+			ret = -1;
+			per_pool_pages_flushed[i] = 0;
+		} else {
+			per_pool_pages_flushed[i] = work_items[i].result;
+		}
+		if((work_items[i].id_usr == -1) && (work_items[i].wi_status == WRK_ITEM_SET )) {
+           		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].flush_type);
+			assert(0);
+		}
+	}
+
+	wq.flag = cq.flag = Q_INITIALIZED;
+
+	pthread_mutex_unlock(&cq.mtx);
+   	pthread_mutex_unlock(&wq.mtx);
+
+#if UNIV_DEBUG
+	/* Print work-list stats */
+	fprintf(stderr, "==wq== [DONE]\n");
+	print_wrk_list(wq.head);
+	fprintf(stderr, "==cq== [DONE]\n");
+	print_wrk_list(cq.head);
+	fprintf(stderr, "==worker-thread-stats==\n");
+	wrk_thread_stat(pc_sync, pgc_n_threads);
+#endif
+
+	/* clear up work-queue for next flush */
+	reset_wrk_itm(buf_pool_inst);
+	return(ret);
+}
+
+/* JAN: TODO: END: */
+
 /********************************************************************
 Starts InnoDB and creates a new database if database files
 are not found and the user wants.
@@ -2585,6 +3275,16 @@ files_checked:
 	}
 
 	if (!srv_read_only_mode) {
+		/* JAN: TODO: */
+		if (srv_buf_pool_instances <= PGCOMP_MAX_WORKER) {
+			pgc_n_threads = srv_buf_pool_instances;
+		}
+		/* else we default to 8 worker-threads */
+		pgcomp_handler_init(pgc_n_threads, srv_buf_pool_instances, &wq, &cq);
+		/* JAN: TODO: END */
+#if UNIV_DEBUG
+		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
+#endif
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
 

From 9ba590930a94e7cdbd3a7c3f34702fa588fd504e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 19 Dec 2013 18:04:26 +0200
Subject: [PATCH 02/56] Atomic writes require also atomic_blobs. Add that
 missing flag to dictionary setting and from there it will be stored to table
 space.

---
 storage/innobase/include/dict0dict.ic | 1 +
 1 file changed, 1 insertion(+)

diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 65967552b87..65c1bfca24f 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -856,6 +856,7 @@ dict_tf_set(
 	if (atomic_writes) {
 		*flags |= (1 << DICT_TF_POS_ATOMIC_WRITES);
 		ut_ad(dict_tf_get_atomic_writes(*flags) == TRUE);
+		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
 	}
 
 	if (use_data_dir) {

From f023715fe8c3bc7c60f65cfd58e4980b4cc89560 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 20 Dec 2013 06:50:58 +0200
Subject: [PATCH 03/56] Need to disable fast file extension with
 posix_fallocate for Fusion-io currently.

---
 storage/innobase/fil/fil0fil.cc | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2bf5922e07d..0939598d90d 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -4859,6 +4859,8 @@ retry:
 	start_page_no = space->size;
 	file_start_page_no = space->size - node->size;
 
+	/* JAN: TODO: Need to disable fast file extension for Fusion-io
+	currently.
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
 		ulint n_pages = size_after_extend - start_page_no;
@@ -4875,6 +4877,7 @@ retry:
 		goto complete_io;
 	}
 #endif
+	*/
 
 	/* Extend at most 64 pages at a time */
 	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;

From f6a196555e639489a7e1987eb88c67827f468a9d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 20 Dec 2013 08:59:34 +0200
Subject: [PATCH 04/56] Temporally disable posix_fallocate on os_file_set_size
 because currently Fusion-io SSD drive does not support setting file size
 without fysically writing pages with zeroes when fallocate with PUCH_HOLE is
 used.

Added additional error message if atomic write setup does not
succeed.
---
 storage/innobase/os/os0file.cc | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 60331f9c483..4ce5646b379 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -386,6 +386,9 @@ os_file_set_atomic_writes(
 
 	if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
 
+		fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
+		"file %s on non-supported platform! Please restart with "
+			"innodb_use_atomic_writes disabled.\n", name);
 		os_file_handle_error_no_exit(name, "ioctl", FALSE, __FILE__, __LINE__);
 		return(FALSE);
 	}
@@ -2285,6 +2288,8 @@ os_file_set_size(
 
 	current_size = 0;
 
+	/* JAN: TODO: Disable posix_fallocate file extension for Fusion-io
+	because currently it assumes that pages are initialized by zeroes
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
 
@@ -2300,6 +2305,7 @@ os_file_set_size(
 		return(TRUE);
 	}
 #endif
+	*/
 
 
 	/* Write up to 1 megabyte at a time. */

From e80f2468b468540c27e9b7174769262297bffc13 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 9 Jan 2014 08:30:09 +0200
Subject: [PATCH 05/56] Fixed issues with atomic writes and compressed pages.

Temporal solution: In directFS using atomic writes
we must use posix_fallocate to extend the file because
pwrite past end of file fails but when compression is
used the file pages must be physically initialized with
zeroes, thus after file extend with posix_fallocate
we still write empty pages to file.
---
 storage/innobase/fil/fil0fil.cc | 36 +++++++++++++++++++++++----------
 storage/innobase/os/os0file.cc  |  4 ----
 2 files changed, 25 insertions(+), 15 deletions(-)

diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 0939598d90d..8a416d09c94 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -2,7 +2,6 @@
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2013 SkySQL Ab. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -4828,6 +4827,7 @@ retry:
 	}
 
 	page_size = fsp_flags_get_zip_size(space->flags);
+
 	if (!page_size) {
 		page_size = UNIV_PAGE_SIZE;
 	}
@@ -4859,8 +4859,6 @@ retry:
 	start_page_no = space->size;
 	file_start_page_no = space->size - node->size;
 
-	/* JAN: TODO: Need to disable fast file extension for Fusion-io
-	currently.
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
 		ulint n_pages = size_after_extend - start_page_no;
@@ -4868,16 +4866,37 @@ retry:
 		success = os_file_set_size(node->name, node->handle,
 			n_pages * page_size);
 
+		/* Temporal solution: In directFS using atomic writes
+		we must use posix_fallocate to extend the file because
+		pwrite past end of file fails but when compression is
+		used the file pages must be physically initialized with
+		zeroes, thus after file extend with posix_fallocate
+		we still write empty pages to file. */
+		if (success &&
+			srv_use_atomic_writes &&
+			srv_compress_pages) {
+			goto extend_file;
+		}
+
 		mutex_enter(&fil_system->mutex);
+
 		if (success) {
 			node->size += n_pages;
 			space->size += n_pages;
 			os_has_said_disk_full = FALSE;
 		}
+
+		/* If posix_fallocate was used to extent the file space
+		we need to complete the io. Because no actual writes were
+		dispatched read operation is enough here. Without this
+		there will be assertion at shutdown indicating that
+		all IO is not completed. */
+		fil_node_complete_io(node, fil_system, OS_FILE_READ);
 		goto complete_io;
 	}
 #endif
-	*/
+
+extend_file:
 
 	/* Extend at most 64 pages at a time */
 	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
@@ -4932,15 +4951,10 @@ retry:
 
 	space->size += pages_added;
 	node->size += pages_added;
-	node->being_extended = FALSE;
 
-#ifdef HAVE_POSIX_FALLOCATE
+ 	fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
+
 complete_io:
-	fil_node_complete_io(node, fil_system, OS_FILE_READ);
-#else
-	fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
-#endif
-
 	node->being_extended = FALSE;
 	*actual_size = space->size;
 
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 4ce5646b379..9f12ca86601 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2288,8 +2288,6 @@ os_file_set_size(
 
 	current_size = 0;
 
-	/* JAN: TODO: Disable posix_fallocate file extension for Fusion-io
-	because currently it assumes that pages are initialized by zeroes
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
 
@@ -2305,8 +2303,6 @@ os_file_set_size(
 		return(TRUE);
 	}
 #endif
-	*/
-
 
 	/* Write up to 1 megabyte at a time. */
 	buf_size = ut_min(64, (ulint) (size / UNIV_PAGE_SIZE))

From 2b5a0a22802a0069f318f7d23a1071a703930c90 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 9 Jan 2014 12:33:29 +0200
Subject: [PATCH 06/56] Feature: In first write if we trim we set write_size to
 actual bytes written and rest of the page is trimmed. In following writes
 there is no need to trim again if write_size only increases because rest of
 the page is already trimmed. If actual write size decreases we need to trim
 again. Need to research if this can happen frequently enough to make any
 effect.

---
 storage/innobase/buf/buf0dblwr.cc   |  2 +-
 storage/innobase/buf/buf0flu.cc     |  2 +-
 storage/innobase/fil/fil0fil.cc     |  6 ++---
 storage/innobase/include/fil0fil.h  |  2 +-
 storage/innobase/include/os0file.h  |  8 +++++--
 storage/innobase/include/os0file.ic |  8 +++++--
 storage/innobase/os/os0file.cc      | 37 ++++++++++++++++++++---------
 7 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/storage/innobase/buf/buf0dblwr.cc b/storage/innobase/buf/buf0dblwr.cc
index 933b56eaf88..2ae67d8a41e 100644
--- a/storage/innobase/buf/buf0dblwr.cc
+++ b/storage/innobase/buf/buf0dblwr.cc
@@ -728,7 +728,7 @@ buf_dblwr_write_block_to_datafile(
 	fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
 	       FALSE, buf_block_get_space(block), 0,
 	       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
-	       (void*) block->frame, (void*) block, 0);
+		(void*) block->frame, (void*) block, (ulint *)&bpage->write_size);
 }
 
 /********************************************************************//**
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 06ae7b5375c..b5f1aeef597 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -942,7 +942,7 @@ buf_flush_write_block_low(
 		       FALSE, buf_page_get_space(bpage), zip_size,
 		       buf_page_get_page_no(bpage), 0,
 		       zip_size ? zip_size : UNIV_PAGE_SIZE,
-		       frame, bpage, 0);
+		       frame, bpage, &bpage->write_size);
 	} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
 		buf_dblwr_write_single_page(bpage);
 	} else {
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 8a416d09c94..0bec85c699a 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -441,7 +441,7 @@ fil_read(
 				in aio this must be appropriately aligned */
 	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
-	ulint	write_size)	/*!< in/out: Actual write size initialized
+	ulint*	write_size)	/*!< in/out: Actual write size initialized
 				after fist successfull trim
 				operation for this page and if
 				initialized we do not trim again if
@@ -475,7 +475,7 @@ fil_write(
 				this must be appropriately aligned */
 	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
-	ulint	write_size)	/*!< in/out: Actual write size initialized
+	ulint*	write_size)	/*!< in/out: Actual write size initialized
 				after fist successfull trim
 				operation for this page and if
 				initialized we do not trim again if
@@ -5288,7 +5288,7 @@ fil_io(
 				appropriately aligned */
 	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
-	ulint	write_size)	/*!< in/out: Actual write size initialized
+	ulint*	write_size)	/*!< in/out: Actual write size initialized
 				after fist successfull trim
 				operation for this page and if
 				initialized we do not trim again if
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index c5edd33f46b..01084d52365 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -753,7 +753,7 @@ fil_io(
 				appropriately aligned */
 	void*	message,	/*!< in: message for aio handler if non-sync
 				aio used, else ignored */
-	ulint	write_size)	/*!< in/out: Actual write size initialized
+	ulint*	write_size)	/*!< in/out: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 3c70f9925fe..eb5e1dddaf5 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -724,7 +724,11 @@ pfs_os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
                                 OS_AIO_SYNC */
-	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
+	ulint*		write_size,/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line);/*!< in: line where the func invoked */
 /*******************************************************************//**
@@ -1057,7 +1061,7 @@ os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
-	ulint		write_size);/*!< in/out: Actual write size initialized
+	ulint*		write_size);/*!< in/out: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index 2be0f6a8d97..ca98428dd49 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -214,7 +214,11 @@ pfs_os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
                                 OS_AIO_SYNC */
-	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
+	ulint*		write_size,/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -230,7 +234,7 @@ pfs_os_aio_func(
 				   src_file, src_line);
 
 	result = os_aio_func(type, mode, name, file, buf, offset,
-			     n, message1, message2, atomic_writes);
+		n, message1, message2, write_size);
 
 	register_pfs_file_io_end(locker, n);
 
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 9f12ca86601..6bb9e47b116 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -196,7 +196,7 @@ struct os_aio_slot_t{
 					       freed after the write
 					       has been completed */
 
-	ulint           write_size;     /*!< Actual write size initialized
+	ulint*          write_size;     /*!< Actual write size initialized
 					after fist successfull trim
 					operation for this page and if
 					initialized we do not trim again if
@@ -4363,7 +4363,7 @@ os_aio_array_reserve_slot(
 				to write */
 	os_offset_t	offset,	/*!< in: file offset */
 	ulint		len,	/*!< in: length of the block to read or write */
-	ulint		write_size)     /*!< in: Actual write size initialized
+	ulint*		write_size)     /*!< in: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
@@ -4783,7 +4783,7 @@ os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
-	ulint		write_size)/*!< in/out: Actual write size initialized
+	ulint*		write_size)/*!< in/out: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
@@ -6163,13 +6163,20 @@ os_file_trim(
 	// because rest of the page is already trimmed. If actual write
 	// size decreases we need to trim again.
 	if (trim_len == 0 ||
-	    (slot->write_size > 0 && len >= slot->write_size)) {
+	    (slot->write_size &&
+		    *slot->write_size > 0 &&
+		    len >= *slot->write_size)) {
 
-		if (slot->write_size > 0 && len >= slot->write_size) {
+#ifdef UNIV_DEBUG
+		fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
+			*slot->write_size, trim_len, len);
+#endif
+
+		if (*slot->write_size > 0 && len >= *slot->write_size) {
 			srv_stats.page_compressed_trim_op_saved.inc();
 		}
 
-		slot->write_size = len;
+		*slot->write_size = len;
 
 		return (TRUE);
 	}
@@ -6191,11 +6198,15 @@ os_file_trim(
 			" fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
 			FALSE, __FILE__, __LINE__);
 
-		slot->write_size = 0;
+		if (slot->write_size) {
+			*slot->write_size = 0;
+		}
 
 		return (FALSE);
 	} else {
-		slot->write_size = len;
+		if (slot->write_size) {
+			*slot->write_size = len;
+		}
 	}
 #else
 	ut_print_timestamp(stderr);
@@ -6203,7 +6214,7 @@ os_file_trim(
 		"  InnoDB: [Warning] fallocate not supported on this installation."
 		"  InnoDB: Disabling fallocate for now.");
 	os_fallocate_failed = TRUE;
-	slot->write_size = 0;
+	slot->write_size = NULL;
 
 #endif /* HAVE_FALLOCATE ... */
 
@@ -6229,10 +6240,14 @@ os_file_trim(
 			" DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ",
 			FALSE, __FILE__, __LINE__);
 
-		slot->write_size = 0;
+		if (slot->write_size) {
+			slot->write_size = 0;
+		}
 		return (FALSE);
 	} else {
-		slot->write_size = len;
+		if (slot->write_size) {
+			slot->write_size = len;
+		}
 	}
 #endif
 

From ec8257216e5b25ed82d63f074254b9454e0a0df3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 10 Jan 2014 12:11:36 +0200
Subject: [PATCH 07/56] Enhancement: Change atomic_writes table option to enum
 type. Now every file can either use atomic writes, not use it or use default.

SYNTAX: ATOMIC_WRITES=['DEFAULT','ON','OFF']

Idea here is to be able to define innodb_doublewrite = 1 but with following rules:

ATOMIC_WRITES='DEFAULT' - if innodb_use_atomic_writes = 1, we do not write to doublewrite buffer the changes
                          if innodb_use_atomic_writes = 0, we write to doublewrite buffer
ATOMIC_WRITES='ON'      - do not write to doublewrite buffer
ATOMIC_WRITES='OFF'     - write to doublewrite buffer

Note that doublewrite buffer can't be used if innodb_doublewrite = 0.
---
 storage/innobase/buf/buf0flu.cc               | 24 +++++++-
 storage/innobase/fil/fil0fil.cc               | 10 ++--
 storage/innobase/fil/fil0pagecompress.cc      |  8 +--
 storage/innobase/handler/ha_innodb.cc         |  8 ++-
 storage/innobase/handler/ha_innodb.h          |  8 ++-
 storage/innobase/handler/handler0alter.cc     |  2 +-
 storage/innobase/include/dict0dict.h          |  4 +-
 storage/innobase/include/dict0dict.ic         | 15 +++--
 storage/innobase/include/dict0mem.h           |  3 +-
 storage/innobase/include/dict0pagecompress.h  |  4 +-
 storage/innobase/include/dict0pagecompress.ic | 12 ++--
 storage/innobase/include/dict0types.h         |  8 +++
 storage/innobase/include/fil0pagecompress.h   |  4 +-
 storage/innobase/include/fsp0fsp.h            |  2 +-
 storage/innobase/include/fsp0fsp.ic           |  5 +-
 storage/innobase/include/fsp0pagecompress.h   |  9 +++
 storage/innobase/include/fsp0pagecompress.ic  |  4 +-
 storage/innobase/include/os0file.h            | 34 ++++++-----
 storage/innobase/include/os0file.ic           |  9 +--
 storage/innobase/os/os0file.cc                | 58 ++++++++-----------
 20 files changed, 137 insertions(+), 94 deletions(-)

diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index b5f1aeef597..d159ddbe23f 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -50,6 +50,7 @@ Created 11/11/1995 Heikki Tuuri
 #include "srv0mon.h"
 #include "mysql/plugin.h"
 #include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
 
 /** Number of pages flushed through non flush_list flushes. */
 static ulint buf_lru_flush_page_count = 0;
@@ -866,6 +867,8 @@ buf_flush_write_block_low(
 {
 	ulint	zip_size	= buf_page_get_zip_size(bpage);
 	page_t*	frame		= NULL;
+	ulint space_id          = buf_page_get_space(bpage);
+	atomic_writes_t awrites = fil_space_get_atomic_writes(space_id);
 
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
@@ -943,10 +946,25 @@ buf_flush_write_block_low(
 		       buf_page_get_page_no(bpage), 0,
 		       zip_size ? zip_size : UNIV_PAGE_SIZE,
 		       frame, bpage, &bpage->write_size);
-	} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
-		buf_dblwr_write_single_page(bpage);
 	} else {
-		buf_dblwr_add_to_batch(bpage);
+		/* InnoDB uses doublewrite buffer and doublewrite buffer
+		is initialized. User can define do we use atomic writes
+		on a file space (table) or not. If atomic writes are
+		not used we should use doublewrite buffer and if
+		atomic writes should be used, no doublewrite buffer
+		is used. */
+
+		if (awrites == ATOMIC_WRITES_ON) {
+			fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+				FALSE, buf_page_get_space(bpage), zip_size,
+				buf_page_get_page_no(bpage), 0,
+				zip_size ? zip_size : UNIV_PAGE_SIZE,
+				frame, bpage, &bpage->write_size);
+		} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
+			buf_dblwr_write_single_page(bpage);
+		} else {
+			buf_dblwr_add_to_batch(bpage);
+		}
 	}
 }
 
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 0bec85c699a..2f56936ae04 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -756,7 +756,7 @@ fil_node_open_file(
 
 		node->handle = os_file_create_simple_no_error_handling(
 			innodb_file_data_key, node->name, OS_FILE_OPEN,
-			OS_FILE_READ_ONLY, &success, FALSE);
+			OS_FILE_READ_ONLY, &success, 0);
 		if (!success) {
 			/* The following call prints an error message */
 			os_file_get_last_error(true);
@@ -3159,7 +3159,7 @@ fil_create_link_file(
 
 	file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, link_filepath,
-		OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, FALSE);
+		OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, 0);
 
 	if (!success) {
 		/* The following call will print an error message */
@@ -3269,8 +3269,8 @@ fil_open_linked_file(
 	const char*	tablename,	/*!< in: database/tablename */
 	char**		remote_filepath,/*!< out: remote filepath */
 	os_file_t*	remote_file,	/*!< out: remote file handle */
-	ibool           atomic_writes)  /*!< in: should atomic writes be
-					used */
+	ulint           atomic_writes)  /*!< in: atomic writes table option
+					value */
 {
 	ibool		success;
 
@@ -4861,7 +4861,7 @@ retry:
 
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
-		ulint n_pages = size_after_extend - start_page_no;
+		ulint n_pages = size_after_extend;
 
 		success = os_file_set_size(node->name, node->handle,
 			n_pages * page_size);
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 3926b23c677..b67f583b53b 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -350,8 +350,8 @@ fil_get_compression_alg_name(
 /*******************************************************************//**
 Returns the atomic writes flag of the space, or false if the space
 is not using atomic writes. The tablespace must be cached in the memory cache.
-@return	true if space using atomic writes, false if not */
-ibool
+@return	atomic writes table option value */
+atomic_writes_t
 fil_space_get_atomic_writes(
 /*========================*/
 	ulint	id)	/*!< in: space id */
@@ -362,8 +362,8 @@ fil_space_get_atomic_writes(
 
 	if (flags && flags != ULINT_UNDEFINED) {
 
-		return(fsp_flags_get_atomic_writes(flags));
+		return((atomic_writes_t)fsp_flags_get_atomic_writes(flags));
 	}
 
-	return(flags);
+	return((atomic_writes_t)0);
 }
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 085521ac7e5..074f8c3fc2c 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -503,7 +503,7 @@ ha_create_table_option innodb_table_option_list[]=
   compression for this table*/
   HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1),
   /* With this option user can enable atomic writes feature for this table */
-  HA_TOPTION_BOOL("ATOMIC_WRITES", atomic_writes, 0),
+  HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0),
   HA_TOPTION_END
 };
 
@@ -9738,6 +9738,7 @@ ha_innobase::check_table_options(
 {
 	enum row_type	row_format = table->s->row_type;;
 	ha_table_option_struct *options= table->s->option_struct;
+	atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
 
 	/* Check page compression requirements */
 	if (options->page_compressed) {
@@ -9811,8 +9812,9 @@ ha_innobase::check_table_options(
 	}
 
 	/* Check atomic writes requirements */
-	if (options->atomic_writes) {
-		if (!srv_use_atomic_writes && !use_tablespace) {
+	if (awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
+		if (!use_tablespace) {
 			push_warning(
 				thd, Sql_condition::WARN_LEVEL_WARN,
 				HA_WRONG_CREATE_OPTION,
diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
index 5eb460072bb..9e133ea1023 100644
--- a/storage/innobase/handler/ha_innodb.h
+++ b/storage/innobase/handler/ha_innodb.h
@@ -64,8 +64,12 @@ struct ha_table_option_struct
 					if this option is true. */
 	int   page_compression_level;	/*!< Table page compression level
 					or UNIV_UNSPECIFIED. */
-	bool  atomic_writes;		/*!< Use atomic writes for this
-					table if this options is true. */
+	uint  atomic_writes;		/*!< Use atomic writes for this
+					table if this options is ON or
+					in DEFAULT if
+					srv_use_atomic_writes=1.
+					Atomic writes are not used if
+					value OFF.*/
 };
 
 
diff --git a/storage/innobase/handler/handler0alter.cc b/storage/innobase/handler/handler0alter.cc
index 49f8a05d11a..244e7d19586 100644
--- a/storage/innobase/handler/handler0alter.cc
+++ b/storage/innobase/handler/handler0alter.cc
@@ -258,7 +258,7 @@ ha_innobase::check_if_supported_inplace_alter(
 
 		if (new_options->page_compressed != old_options->page_compressed ||
 		    new_options->page_compression_level != old_options->page_compression_level ||
-			new_options->atomic_writes != old_options->page_compression_level) {
+			new_options->atomic_writes != old_options->atomic_writes) {
 			ha_alter_info->unsupported_reason = innobase_get_err_msg(
 				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
 			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index 0ca64956a2e..3208a764fe1 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -887,8 +887,8 @@ dict_tf_set(
 					pages */
 	ulint		page_compression_level, /*!< in: table page compression
 						 level */
-	bool		atomic_writes)  /*!< in: table uses atomic
-					writes */
+	ulint		atomic_writes)  /*!< in: table atomic
+					writes option value*/
 	__attribute__((nonnull));
 /********************************************************************//**
 Convert a 32 bit integer table flags to the 32 bit integer that is
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 65c1bfca24f..f9d548681a8 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -670,6 +670,7 @@ dict_sys_tables_type_validate(
 	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
 	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
 	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
+	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
 
 	/* The low order bit of SYS_TABLES.TYPE is always set to 1.
 	If the format is UNIV_FORMAT_B or higher, this field is the same
@@ -734,7 +735,8 @@ dict_sys_tables_type_validate(
 		}
 	}
 
-	if (atomic_writes) {
+	if (awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
 		if (!atomic_blobs) {
 			return(ULINT_UNDEFINED);
 		}
@@ -818,9 +820,10 @@ dict_tf_set(
 					pages */
 	ulint		page_compression_level, /*!< in: table page compression
 						 level */
-	bool		atomic_writes)  /*!< in: table uses atomic
-					writes */
+	ulint		atomic_writes)  /*!< in: table atomic writes setup */
 {
+	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
+
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
 		*flags = 0;
@@ -853,9 +856,9 @@ dict_tf_set(
 		ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
 	}
 
-	if (atomic_writes) {
-		*flags |= (1 << DICT_TF_POS_ATOMIC_WRITES);
-		ut_ad(dict_tf_get_atomic_writes(*flags) == TRUE);
+	if (awrites != ATOMIC_WRITES_DEFAULT) {
+		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+		ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
 		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
 	}
 
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index 6cfcb81bcd5..f4e5e558488 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -129,8 +129,9 @@ Width of the page compression flag
 
 /**
 Width of atomic writes flag
+DEFAULT=0, ON = 1, OFF = 2
 */
-#define DICT_TF_WIDTH_ATOMIC_WRITES 1
+#define DICT_TF_WIDTH_ATOMIC_WRITES 2
 
 /** Width of all the currently known table flags */
 #define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT		\
diff --git a/storage/innobase/include/dict0pagecompress.h b/storage/innobase/include/dict0pagecompress.h
index 236924758f1..19a2a6c52f3 100644
--- a/storage/innobase/include/dict0pagecompress.h
+++ b/storage/innobase/include/dict0pagecompress.h
@@ -71,7 +71,7 @@ dict_tf_verify_flags(
 Extract the atomic writes flag from table flags.
 @return	true if atomic writes are used, false if not used  */
 UNIV_INLINE
-ibool
+atomic_writes_t
 dict_tf_get_atomic_writes(
 /*======================*/
 	ulint	flags)			/*!< in: flags */
@@ -81,7 +81,7 @@ dict_tf_get_atomic_writes(
 Check whether the table uses the atomic writes.
 @return	true if atomic writes is used, false if not */
 UNIV_INLINE
-ibool
+atomic_writes_t
 dict_table_get_atomic_writes(
 /*=========================*/
 	const dict_table_t* table);	/*!< in: table */
diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic
index 98b64723542..fb9581fc657 100644
--- a/storage/innobase/include/dict0pagecompress.ic
+++ b/storage/innobase/include/dict0pagecompress.ic
@@ -168,24 +168,24 @@ dict_table_is_page_compressed(
 
 /********************************************************************//**
 Extract the atomic writes flag from table flags.
-@return	true if atomic writes are used, false if not used  */
+@return	enumerated value of atomic writes  */
 UNIV_INLINE
-ibool
+atomic_writes_t
 dict_tf_get_atomic_writes(
 /*======================*/
 	ulint	flags)			/*!< in: flags */
 {
-	return(DICT_TF_GET_ATOMIC_WRITES(flags));
+	return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags));
 }
 
 /********************************************************************//**
 Check whether the table uses the atomic writes.
-@return	true if atomic writes is used, false if not */
+@return	enumerated value of atomic writes */
 UNIV_INLINE
-ibool
+atomic_writes_t
 dict_table_get_atomic_writes(
 /*=========================*/
 	const dict_table_t* table)	/*!< in: table */
 {
-	return (dict_tf_get_atomic_writes(table->flags));
+	return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags));
 }
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index b7f7c2d9df9..a398ccfe7ea 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -67,4 +68,11 @@ enum ib_quiesce_t {
 	QUIESCE_COMPLETE		/*!< All done */
 };
 
+/** Enum values for atomic_writes table option */
+typedef enum {
+	ATOMIC_WRITES_DEFAULT = 0,
+	ATOMIC_WRITES_ON = 1,
+	ATOMIC_WRITES_OFF = 2
+} atomic_writes_t;
+
 #endif
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
index e21eae7a5ee..bf5caf98a75 100644
--- a/storage/innobase/include/fil0pagecompress.h
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -49,8 +49,8 @@ fil_space_is_page_compressed(
 /*******************************************************************//**
 Returns the atomic writes flag of the space, or false if the space
 is not using atomic writes. The tablespace must be cached in the memory cache.
-@return	true if space using atomic writes, false if not */
-ibool
+@return	atomic write table option value */
+atomic_writes_t
 fil_space_get_atomic_writes(
 /*=========================*/
 	ulint   id);	/*!< in: space id */
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 31c34cdafca..87f1f5a636d 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -58,7 +58,7 @@ is found in a remote location, not the default data directory. */
 #define FSP_FLAGS_WIDTH_PAGE_COMPRESSION  1
 #define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
 /** Number of flag bits used to indicate atomic writes for this tablespace */
-#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  1
+#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  2
 
 /** Width of all the currently known tablespace flags */
 #define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 0ca02a5652d..cb12d556ec4 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -67,6 +67,7 @@ fsp_flags_is_valid(
 	ulint	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
 	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
 	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
+	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
 
 	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
 
@@ -116,7 +117,9 @@ fsp_flags_is_valid(
 		}
 	}
 
-	if (atomic_writes && !atomic_blobs) {
+	if ((awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes))
+		&& !atomic_blobs) {
 		return (false);
 	}
 
diff --git a/storage/innobase/include/fsp0pagecompress.h b/storage/innobase/include/fsp0pagecompress.h
index 417d4a6879e..4913f1d6b29 100644
--- a/storage/innobase/include/fsp0pagecompress.h
+++ b/storage/innobase/include/fsp0pagecompress.h
@@ -57,6 +57,15 @@ fsp_flags_get_page_compression_level(
 /*=================================*/
 	ulint	flags);	/*!< in: tablespace flags */
 
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return	true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+fsp_flags_get_atomic_writes(
+/*========================*/
+	ulint	flags);	/*!< in: tablespace flags */
+
 #ifndef UNIV_NONINL
 #include "fsp0pagecompress.ic"
 #endif
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
index 1dffd1bedf1..4859012428a 100644
--- a/storage/innobase/include/fsp0pagecompress.ic
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -52,10 +52,10 @@ fsp_flags_get_page_compression_level(
 Determine the tablespace is using atomic writes from dict_table_t::flags.
 @return	true if atomic writes is used, false if not */
 UNIV_INLINE
-ibool
+atomic_writes_t
 fsp_flags_get_atomic_writes(
 /*========================*/
 	ulint	flags)	/*!< in: tablespace flags */
 {
-	return(FSP_FLAGS_GET_ATOMIC_WRITES(flags));
+	return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags));
 }
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index eb5e1dddaf5..8b798b6d34f 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -275,12 +275,12 @@ The wrapper functions have the prefix of "innodb_". */
 	pfs_os_file_create_func(key, name, create, purpose,	type,	\
 				success, atomic_writes, __FILE__, __LINE__)
 
-# define os_file_create_simple(key, name, create, access, success, atomic_writes)	\
+# define os_file_create_simple(key, name, create, access, success)	\
 	pfs_os_file_create_simple_func(key, name, create, access,	\
-				       success, atomic_writes, __FILE__, __LINE__)
+				       success, __FILE__, __LINE__)
 
 # define os_file_create_simple_no_error_handling(			\
-	key, name, create_mode, access, success, atomic_writes)			\
+	key, name, create_mode, access, success, atomic_writes)		\
 	pfs_os_file_create_simple_no_error_handling_func(		\
 		key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
 
@@ -315,13 +315,13 @@ to original un-instrumented file I/O APIs */
 # define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
 	os_file_create_func(name, create, purpose, type, success, atomic_writes)
 
-# define os_file_create_simple(key, name, create_mode, access, success, atomic_writes) \
-	os_file_create_simple_func(name, create_mode, access, success, atomic_writes)
+# define os_file_create_simple(key, name, create_mode, access, success) \
+	os_file_create_simple_func(name, create_mode, access, success)
 
 # define os_file_create_simple_no_error_handling(			\
-	key, name, create_mode, access, success, atomic_writes)			\
-		os_file_create_simple_no_error_handling_func(			\
-		name, create_mode, access, success, atomic_writes)
+	key, name, create_mode, access, success, atomic_writes)		\
+		os_file_create_simple_no_error_handling_func(		\
+			name, create_mode, access, success, atomic_writes)
 
 # define os_file_close(file)	os_file_close_func(file)
 
@@ -470,8 +470,7 @@ os_file_create_simple_func(
 	ulint		create_mode,/*!< in: create mode */
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes); /*!<in TRUE if atomic writes are used */
+	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
 /****************************************************************//**
 NOTE! Use the corresponding macro
 os_file_create_simple_no_error_handling(), not directly this function!
@@ -490,7 +489,8 @@ os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes)/*!<in TRUE if atomic writes are used */
+	ulint		atomic_writes)/*!< in: atomic writes table option
+				      value */
 	__attribute__((nonnull, warn_unused_result));
 /****************************************************************//**
 Tries to disable OS caching on an opened file descriptor. */
@@ -525,7 +525,8 @@ os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes)/*!<in TRUE if atomic writes are used */
+	ulint		atomic_writes)/*!< in: atomic writes table option
+				      value */
 	__attribute__((nonnull, warn_unused_result));
 /***********************************************************************//**
 Deletes a file. The file has to be closed before calling this.
@@ -590,7 +591,6 @@ pfs_os_file_create_simple_func(
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes,/*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -616,7 +616,8 @@ pfs_os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -645,7 +646,8 @@ pfs_os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				      value*/
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -694,6 +696,8 @@ pfs_os_file_read_no_error_handling_func(
 	void*		buf,	/*!< in: buffer where to read */
 	os_offset_t	offset,	/*!< in: file offset where to read */
 	ulint		n,	/*!< in: number of bytes to read */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line);/*!< in: line where the func invoked */
 
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index ca98428dd49..3274fb12310 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -45,7 +45,6 @@ pfs_os_file_create_simple_func(
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -61,7 +60,7 @@ pfs_os_file_create_simple_func(
 				     name, src_file, src_line);
 
 	file = os_file_create_simple_func(name, create_mode,
-					  access_type, success, atomic_writes);
+					  access_type, success);
 
 	/* Regsiter the returning "file" value with the system */
 	register_pfs_file_open_end(locker, file);
@@ -90,7 +89,8 @@ pfs_os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -137,7 +137,8 @@ pfs_os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool		atomic_writes, /*!<in TRUE if atomic writes are used */
+	ulint		atomic_writes, /*!< in: atomic writes table option
+				       value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 6bb9e47b116..57d5f5b3203 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -1259,8 +1259,7 @@ os_file_create_simple_func(
 	ulint		create_mode,/*!< in: create mode */
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
-	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-        ibool           atomic_writes) /*!<in TRUE if atomic writes are used */
+	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
 {
 	os_file_t	file;
 	ibool		retry;
@@ -1354,14 +1353,6 @@ os_file_create_simple_func(
 
 	} while (retry);
 
-	if (file != INVALID_HANDLE_VALUE
-	    && (srv_use_atomic_writes  || atomic_writes)
-	    && !os_file_set_atomic_writes(name, file)) {
-			 CloseHandle(file);
-			*success = FALSE;
-			file = INVALID_HANDLE_VALUE;
-	}
-
 #else /* __WIN__ */
 	int		create_flag;
 
@@ -1441,14 +1432,6 @@ os_file_create_simple_func(
 	}
 #endif /* USE_FILE_LOCK */
 
-	if (file != -1
-	    && (srv_use_atomic_writes  || atomic_writes)
-	    && !os_file_set_atomic_writes(name, file)) {
-		*success = FALSE;
-		close(file);
-		file = -1;
-	}
-
 #endif /* __WIN__ */
 
 	return(file);
@@ -1472,9 +1455,11 @@ os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-        ibool           atomic_writes) /*!<in TRUE if atomic writes are used */
+	ulint           atomic_writes) /*! in: atomic writes table option
+				       value */
 {
 	os_file_t	file;
+	atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
 
 #ifdef __WIN__
 	DWORD		access;
@@ -1535,13 +1520,14 @@ os_file_create_simple_no_error_handling_func(
 			  NULL);		// No template file
 
 	if (file != INVALID_HANDLE_VALUE
-	    && (srv_use_atomic_writes  || atomic_writes)
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-		CloseHandle(file);
-		file = INVALID_HANDLE_VALUE;
+			 CloseHandle(file);
+			*success = FALSE;
+			file = INVALID_HANDLE_VALUE;
 	}
 
-
 	*success = (file != INVALID_HANDLE_VALUE);
 #else /* __WIN__ */
 	int		create_flag;
@@ -1603,13 +1589,15 @@ os_file_create_simple_no_error_handling_func(
 #endif /* USE_FILE_LOCK */
 
 	if (file != -1
-	    && (srv_use_atomic_writes  || atomic_writes)
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
 		*success = FALSE;
 		close(file);
 		file = -1;
 	}
 
+
 #endif /* __WIN__ */
 
 	return(file);
@@ -1681,13 +1669,14 @@ os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
-	ibool           atomic_writes) /*! in: true if atomic writes for
-					this file should be used */
+	ulint           atomic_writes) /*! in: atomic writes table option
+				       value */
 {
 	os_file_t	file;
 	ibool		retry;
 	ibool		on_error_no_exit;
 	ibool		on_error_silent;
+	atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
 
 #ifdef __WIN__
 	DBUG_EXECUTE_IF(
@@ -1832,15 +1821,13 @@ os_file_create_func(
 	} while (retry);
 
 	if (file != INVALID_HANDLE_VALUE
-            && type == OS_DATA_FILE
-	    && (srv_use_atomic_writes  || atomic_writes)
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
 			 CloseHandle(file);
 			*success = FALSE;
 			file = INVALID_HANDLE_VALUE;
 	}
-
-
 #else /* __WIN__ */
 	int		create_flag;
 	const char*	mode_str	= NULL;
@@ -1970,15 +1957,13 @@ os_file_create_func(
 #endif /* USE_FILE_LOCK */
 
 	if (file != -1
-            && type == OS_DATA_FILE
-	    && (srv_use_atomic_writes  || atomic_writes)
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
 		*success = FALSE;
 		close(file);
 		file = -1;
 	}
-
-
 #endif /* __WIN__ */
 
 	return(file);
@@ -2288,6 +2273,11 @@ os_file_set_size(
 
 	current_size = 0;
 
+#ifdef UNIV_DEBUG
+	fprintf(stderr, "InnoDB: Note: File %s current_size %lu extended_size %lu\n",
+		name, os_file_get_size(file), size);
+#endif
+
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
 

From 58ce55134f6d7b21d38c356f448d8464b52bb983 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 13 Jan 2014 15:02:31 +0200
Subject: [PATCH 08/56] Removed some unnecessary assertions to debug build and
 enhanced the page_compression and page_compression_level fetch.

---
 storage/innobase/fil/fil0fil.cc              |  10 +-
 storage/innobase/fil/fil0pagecompress.cc     | 119 ++-----------------
 storage/innobase/include/fil0pagecompress.h  |   1 +
 storage/innobase/include/fsp0pagecompress.ic | 112 +++++++++++++++++
 storage/innobase/include/os0file.h           |  18 ++-
 storage/innobase/include/os0file.ic          |   7 +-
 storage/innobase/os/os0file.cc               |  31 +++--
 7 files changed, 171 insertions(+), 127 deletions(-)

diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2f56936ae04..1718e68d667 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -4920,7 +4920,7 @@ extend_file:
 		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
 				 node->name, node->handle, buf,
 				 offset, page_size * n_pages,
-			         NULL, NULL, 0);
+			NULL, NULL, 0, FALSE, 0);
 #endif /* UNIV_HOTBACKUP */
 		if (success) {
 			os_has_said_disk_full = FALSE;
@@ -5302,6 +5302,8 @@ fil_io(
 	ulint		wake_later;
 	os_offset_t	offset;
 	ibool		ignore_nonexistent_pages;
+        ibool		page_compressed = FALSE;
+	ibool		page_compression_level = 0;
 
 	is_log = type & OS_FILE_LOG;
 	type = type & ~OS_FILE_LOG;
@@ -5462,6 +5464,9 @@ fil_io(
 	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
 
+	page_compressed = fsp_flags_is_page_compressed(space->flags);
+	page_compression_level = fsp_flags_get_page_compression_level(space->flags);
+
 #ifdef UNIV_HOTBACKUP
 	/* In ibbackup do normal i/o, not aio */
 	if (type == OS_FILE_READ) {
@@ -5474,7 +5479,8 @@ fil_io(
 #else
 	/* Queue the aio request */
 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
-		     offset, len, node, message, write_size);
+		offset, len, node, message, write_size,
+		page_compressed, page_compression_level);
 #endif /* UNIV_HOTBACKUP */
 	ut_a(ret);
 
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index b67f583b53b..2da9d70e197 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -77,6 +77,7 @@ fil_compress_page(
 				       this must be appropriately aligned */
         byte*           out_buf,       /*!< out: compressed buffer */
         ulint           len,           /*!< in: length of input buffer.*/
+        ulint           compression_level, /* in: compression level */
 	ulint*          out_len)       /*!< out: actual length of compressed page */
 {
         int err = Z_OK;
@@ -84,13 +85,13 @@ fil_compress_page(
         ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
 	ulint write_size=0;
 
-	ut_a(buf);
-	ut_a(out_buf);
-	ut_a(len);
-	ut_a(out_len);
+	ut_ad(buf);
+	ut_ad(out_buf);
+	ut_ad(len);
+	ut_ad(out_len);
 
-        level = fil_space_get_page_compression_level(space_id);
-	ut_a(fil_space_is_page_compressed(space_id));
+        level = compression_level;
+	ut_ad(fil_space_is_page_compressed(space_id));
 
 	fil_system_enter();
 	fil_space_t* space = fil_space_get_by_id(space_id);
@@ -181,8 +182,8 @@ fil_decompress_page(
 	ulint compression_alg = 0;
 	byte *in_buf;
 
-	ut_a(buf);
-	ut_a(len);
+	ut_ad(buf);
+	ut_ad(len);
 
 	/* Before actual decompress, make sure that page type is correct */
 
@@ -264,106 +265,4 @@ fil_decompress_page(
 	}
 }
 
-/*******************************************************************//**
-Find out wheather the page is index page or not
-@return	true if page type index page, false if not */
-ibool
-fil_page_is_index_page(
-/*===================*/
-	byte *buf)	/*!< in: page */
-{
-	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
-}
 
-/*******************************************************************//**
-Find out wheather the page is page compressed
-@return	true if page is page compressed, false if not */
-ibool
-fil_page_is_compressed(
-/*===================*/
-	byte *buf)	/*!< in: page */
-{
-	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
-}
-
-/*******************************************************************//**
-Returns the page compression level of the space, or 0 if the space
-is not compressed. The tablespace must be cached in the memory cache.
-@return	page compression level, ULINT_UNDEFINED if space not found */
-ulint
-fil_space_get_page_compression_level(
-/*=================================*/
-	ulint	id)	/*!< in: space id */
-{
-	ulint	flags;
-
-	flags = fil_space_get_flags(id);
-
-	if (flags && flags != ULINT_UNDEFINED) {
-
-		return(fsp_flags_get_page_compression_level(flags));
-	}
-
-	return(flags);
-}
-
-/*******************************************************************//**
-Extract the page compression from space.
-@return true if space is page compressed, false if space is not found
-or space is not page compressed. */
-ibool
-fil_space_is_page_compressed(
-/*=========================*/
-	ulint	id)	/*!< in: space id */
-{
-	ulint	flags;
-
-	flags = fil_space_get_flags(id);
-
-	if (flags && flags != ULINT_UNDEFINED) {
-
-		return(fsp_flags_is_page_compressed(flags));
-	}
-
-	return(flags);
-}
-
-/****************************************************************//**
-Get the name of the compression algorithm used for page
-compression.
-@return compression algorithm name or "UNKNOWN" if not known*/
-const char*
-fil_get_compression_alg_name(
-/*=========================*/
-       ulint           comp_alg)     /*!<in: compression algorithm number */
-{
-	switch(comp_alg) {
-	case FIL_PAGE_COMPRESSION_ZLIB:
-		return ("ZLIB");
-		break;
-	default:
-		return("UNKNOWN");
-		break;
-	}
-}
-
-/*******************************************************************//**
-Returns the atomic writes flag of the space, or false if the space
-is not using atomic writes. The tablespace must be cached in the memory cache.
-@return	atomic writes table option value */
-atomic_writes_t
-fil_space_get_atomic_writes(
-/*========================*/
-	ulint	id)	/*!< in: space id */
-{
-	ulint	flags;
-
-	flags = fil_space_get_flags(id);
-
-	if (flags && flags != ULINT_UNDEFINED) {
-
-		return((atomic_writes_t)fsp_flags_get_atomic_writes(flags));
-	}
-
-	return((atomic_writes_t)0);
-}
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
index bf5caf98a75..342b105401c 100644
--- a/storage/innobase/include/fil0pagecompress.h
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -84,6 +84,7 @@ fil_compress_page(
 				       this must be appropriately aligned */
         byte*           out_buf,       /*!< out: compressed buffer */
         ulint           len,           /*!< in: length of input buffer.*/
+        ulint           compression_level, /*!< in: compression level */
 	ulint*          out_len);       /*!< out: actual length of compressed page */
 
 /****************************************************************//**
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
index 4859012428a..755d91b3cd9 100644
--- a/storage/innobase/include/fsp0pagecompress.ic
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -24,6 +24,8 @@ compression and atomic writes information to file space.
 Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
 ***********************************************************************/
 
+#include "fsp0fsp.h"
+
 /********************************************************************//**
 Determine if the tablespace is page compressed from dict_table_t::flags.
 @return	TRUE if page compressed, FALSE if not page compressed */
@@ -59,3 +61,113 @@ fsp_flags_get_atomic_writes(
 {
 	return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags));
 }
+
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return	true if page type index page, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_index_page(
+/*===================*/
+	byte *buf)	/*!< in: page */
+{
+	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
+}
+
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return	true if page is page compressed, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_compressed(
+/*===================*/
+	byte *buf)	/*!< in: page */
+{
+	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+}
+
+/*******************************************************************//**
+Returns the page compression level of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	page compression level, ULINT_UNDEFINED if space not found */
+UNIV_INLINE
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(fsp_flags_get_page_compression_level(flags));
+	}
+
+	return(flags);
+}
+
+/*******************************************************************//**
+Extract the page compression from space.
+@return true if space is page compressed, false if space is not found
+or space is not page compressed. */
+UNIV_INLINE
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(fsp_flags_is_page_compressed(flags));
+	}
+
+	return(flags);
+}
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+UNIV_INLINE
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+       ulint           comp_alg)     /*!<in: compression algorithm number */
+{
+	switch(comp_alg) {
+	case FIL_PAGE_COMPRESSION_ZLIB:
+		return ("ZLIB");
+		break;
+	default:
+		return("UNKNOWN");
+		break;
+	}
+}
+
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return	atomic writes table option value */
+UNIV_INLINE
+atomic_writes_t
+fil_space_get_atomic_writes(
+/*========================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return((atomic_writes_t)fsp_flags_get_atomic_writes(flags));
+	}
+
+	return((atomic_writes_t)0);
+}
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 8b798b6d34f..d2bafc30cbe 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -288,9 +288,11 @@ The wrapper functions have the prefix of "innodb_". */
 	pfs_os_file_close_func(file, __FILE__, __LINE__)
 
 # define os_aio(type, mode, name, file, buf, offset,			\
-		n, message1, message2, write_size)						\
+	n, message1, message2, write_size,                              \
+	page_compression, page_compression_level)			\
 	pfs_os_aio_func(type, mode, name, file, buf, offset,		\
-			n, message1, message2, write_size, __FILE__, __LINE__)
+			n, message1, message2, write_size,              \
+		page_compression, page_compression_level, __FILE__, __LINE__)
 
 # define os_file_read(file, buf, offset, n)				\
 	pfs_os_file_read_func(file, buf, offset, n, __FILE__, __LINE__)
@@ -327,7 +329,7 @@ to original un-instrumented file I/O APIs */
 
 # define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size) \
 	os_aio_func(type, mode, name, file, buf, offset, n,		\
-		    message1, message2, write_size)
+		message1, message2, write_size, page_compression, page_compression_level)
 
 # define os_file_read(file, buf, offset, n)	\
 	os_file_read_func(file, buf, offset, n)
@@ -733,6 +735,10 @@ pfs_os_aio_func(
 			       operation for this page and if
 			       initialized we do not trim again if
 			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level, /*!< page compression
+						 level to be used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line);/*!< in: line where the func invoked */
 /*******************************************************************//**
@@ -1065,11 +1071,15 @@ os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
-	ulint*		write_size);/*!< in/out: Actual write size initialized
+	ulint*		write_size,/*!< in/out: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
 			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level); /*!< page compression
+						 level to be used */
 
 /************************************************************************//**
 Wakes up all async i/o threads so that they know to exit themselves in
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index 3274fb12310..4d116785fb9 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -220,6 +220,10 @@ pfs_os_aio_func(
 			       operation for this page and if
 			       initialized we do not trim again if
 			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level, /*!< page compression
+						 level to be used */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -235,7 +239,8 @@ pfs_os_aio_func(
 				   src_file, src_line);
 
 	result = os_aio_func(type, mode, name, file, buf, offset,
-		n, message1, message2, write_size);
+		n, message1, message2, write_size,
+		page_compression, page_compression_level);
 
 	register_pfs_file_io_end(locker, n);
 
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 57d5f5b3203..06c1a8c6ed4 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -196,6 +196,9 @@ struct os_aio_slot_t{
 					       freed after the write
 					       has been completed */
 
+	ibool           page_compression;
+	ulint           page_compression_level;
+
 	ulint*          write_size;     /*!< Actual write size initialized
 					after fist successfull trim
 					operation for this page and if
@@ -4353,11 +4356,15 @@ os_aio_array_reserve_slot(
 				to write */
 	os_offset_t	offset,	/*!< in: file offset */
 	ulint		len,	/*!< in: length of the block to read or write */
-	ulint*		write_size)     /*!< in: Actual write size initialized
+	ulint*		write_size,/*!< in/out: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
 			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level) /*!< page compression
+						 level to be used */
 {
 	os_aio_slot_t*	slot = NULL;
 #ifdef WIN_ASYNC_IO
@@ -4449,6 +4456,8 @@ found:
 	slot->io_already_done = FALSE;
 	slot->page_compress_success = FALSE;
 	slot->write_size = write_size;
+	slot->page_compression_level = page_compression_level;
+	slot->page_compression = page_compression;
 
 	/* If the space is page compressed and this is write operation
 	   and if either only index pages compression is disabled or
@@ -4456,7 +4465,7 @@ found:
 	   we compress the page */
 	if (message1 &&
 	    type == OS_FILE_WRITE &&
-	    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1)) &&
+	    page_compression &&
 	    (srv_page_compress_index_pages == false ||
 	     (srv_page_compress_index_pages == true &&  fil_page_is_index_page(slot->buf)))) {
 		ulint           real_len = len;
@@ -4477,7 +4486,7 @@ found:
 		can't really avoid this now. */
 		memset(slot->page_buf, 0, len);
 
-		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, &real_len);
+		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, page_compression_level, &real_len);
 
 		/* If compression succeeded, set up the length and buffer */
 		if (tmp != buf) {
@@ -4773,11 +4782,15 @@ os_aio_func(
 				(can be used to identify a completed
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
-	ulint*		write_size)/*!< in/out: Actual write size initialized
+	ulint*		write_size,/*!< in/out: Actual write size initialized
 			       after fist successfull trim
 			       operation for this page and if
 			       initialized we do not trim again if
 			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level) /*!< page compression
+						 level to be used */
 {
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
@@ -4875,7 +4888,7 @@ try_again:
 	}
 
 	slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
-					 name, buf, offset, n, write_size);
+		name, buf, offset, n, write_size, page_compression, page_compression_level);
 
 	if (type == OS_FILE_READ) {
 		if (srv_use_native_aio) {
@@ -5100,7 +5113,7 @@ os_aio_windows_handle(
 		switch (slot->type) {
 		case OS_FILE_WRITE:
 			if (slot->message1 &&
-			    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1)) &&
+			    page_compression &&
 			    slot->page_buf) {
 				ret = WriteFile(slot->file, slot->page_buf,
 					(DWORD) slot->len, &len,
@@ -5141,8 +5154,7 @@ os_aio_windows_handle(
 		ret_val = ret && len == slot->len;
 	}
 
-	if (slot->message1 &&
-	    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1))) {
+	if (slot->message1 && page_compression) {
 		// We allocate memory for page compressed buffer if and only
 		// if it is not yet allocated.
 		if (slot->page_buf == NULL) {
@@ -5256,8 +5268,7 @@ retry:
 			/* If the table is page compressed and this is read,
 			we decompress before we annouce the read is
 			complete. For writes, we free the compressed page. */
-			if (slot->message1 &&
-			    fil_space_is_page_compressed(fil_node_get_space_id(slot->message1))) {
+			if (slot->message1 && slot->page_compression) {
 				// We allocate memory for page compressed buffer if and only
 				// if it is not yet allocated.
 				if (slot->page_buf == NULL) {

From 8c5d5bc5de135ed143bfe91c99fd53a8c9b4487c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Feb 2014 10:08:15 +0200
Subject: [PATCH 09/56] Fixed merge error on InnoDB page compression level
 handling.

Merged page compression feature to XtraDB storage engine.

Added feature where page compression can use lz4 compression
method (innodb_use_lz4, default OFF).
---
 storage/innobase/CMakeLists.txt              |   1 +
 storage/innobase/btr/btr0btr.cc              |   4 +-
 storage/innobase/btr/btr0cur.cc              |   4 +-
 storage/innobase/fil/fil0fil.cc              |   2 +-
 storage/innobase/fil/fil0pagecompress.cc     | 186 +++--
 storage/innobase/fil/lz4.c                   | 822 +++++++++++++++++++
 storage/innobase/fil/lz4.h                   | 205 +++++
 storage/innobase/handler/ha_innodb.cc        |  44 +-
 storage/innobase/include/fil0fil.h           |   1 +
 storage/innobase/include/fsp0pagecompress.ic |   5 +-
 storage/innobase/include/page0zip.h          |   2 +-
 storage/innobase/include/srv0srv.h           |   7 +-
 storage/innobase/page/page0cur.cc            |   2 +-
 storage/innobase/page/page0page.cc           |   6 +-
 storage/innobase/page/page0zip.cc            |   4 +-
 storage/innobase/srv/srv0srv.cc              |  18 +-
 storage/xtradb/CMakeLists.txt                |   4 +
 storage/xtradb/buf/buf0buf.cc                |  23 +
 storage/xtradb/buf/buf0dblwr.cc              |  26 +-
 storage/xtradb/buf/buf0flu.cc                | 349 +++++++-
 storage/xtradb/buf/buf0rea.cc                |   5 +-
 storage/xtradb/dict/dict0dict.cc             |   1 +
 storage/xtradb/fil/fil0fil.cc                | 152 +++-
 storage/xtradb/fil/fil0pagecompress.cc       | 324 ++++++++
 storage/xtradb/fil/lz4.c                     | 822 +++++++++++++++++++
 storage/xtradb/fil/lz4.h                     | 205 +++++
 storage/xtradb/handler/ha_innodb.cc          | 246 +++++-
 storage/xtradb/handler/ha_innodb.h           |  18 +
 storage/xtradb/handler/handler0alter.cc      |  28 +
 storage/xtradb/include/buf0buf.h             |  21 +
 storage/xtradb/include/buf0flu.h             |   7 +
 storage/xtradb/include/dict0dict.h           |  12 +-
 storage/xtradb/include/dict0dict.ic          | 164 +++-
 storage/xtradb/include/dict0mem.h            |  56 +-
 storage/xtradb/include/dict0pagecompress.h   |  94 +++
 storage/xtradb/include/dict0pagecompress.ic  | 191 +++++
 storage/xtradb/include/dict0types.h          |   9 +
 storage/xtradb/include/fil0fil.h             |  43 +-
 storage/xtradb/include/fil0pagecompress.h    | 118 +++
 storage/xtradb/include/fsp0fsp.h             |  68 +-
 storage/xtradb/include/fsp0fsp.ic            |  19 +
 storage/xtradb/include/fsp0pagecompress.h    |  73 ++
 storage/xtradb/include/fsp0pagecompress.ic   | 177 ++++
 storage/xtradb/include/os0file.h             |  69 +-
 storage/xtradb/include/os0file.ic            |  26 +-
 storage/xtradb/include/srv0mon.h             |  11 +
 storage/xtradb/include/srv0srv.h             |  62 +-
 storage/xtradb/log/log0log.cc                |  20 +-
 storage/xtradb/log/log0online.cc             |   6 +-
 storage/xtradb/log/log0recv.cc               |  19 +-
 storage/xtradb/os/os0file.cc                 | 553 +++++++++++--
 storage/xtradb/srv/srv0mon.cc                |  68 ++
 storage/xtradb/srv/srv0srv.cc                |  43 +-
 storage/xtradb/srv/srv0start.cc              | 730 +++++++++++++++-
 54 files changed, 5847 insertions(+), 328 deletions(-)
 create mode 100644 storage/innobase/fil/lz4.c
 create mode 100644 storage/innobase/fil/lz4.h
 create mode 100644 storage/xtradb/fil/fil0pagecompress.cc
 create mode 100644 storage/xtradb/fil/lz4.c
 create mode 100644 storage/xtradb/fil/lz4.h
 create mode 100644 storage/xtradb/include/dict0pagecompress.h
 create mode 100644 storage/xtradb/include/dict0pagecompress.ic
 create mode 100644 storage/xtradb/include/fil0pagecompress.h
 create mode 100644 storage/xtradb/include/fsp0pagecompress.h
 create mode 100644 storage/xtradb/include/fsp0pagecompress.ic

diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index e41d2406bd2..0b1043bc421 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -294,6 +294,7 @@ SET(INNOBASE_SOURCES
 	eval/eval0proc.cc
 	fil/fil0fil.cc
         fil/fil0pagecompress.cc
+	fil/lz4.c
 	fsp/fsp0fsp.cc
 	fut/fut0fut.cc
 	fut/fut0lst.cc
diff --git a/storage/innobase/btr/btr0btr.cc b/storage/innobase/btr/btr0btr.cc
index e3e127c3ace..3d7dc993146 100644
--- a/storage/innobase/btr/btr0btr.cc
+++ b/storage/innobase/btr/btr0btr.cc
@@ -1923,7 +1923,7 @@ btr_page_reorganize(
 	dict_index_t*	index,	/*!< in: record descriptor */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
-	return(btr_page_reorganize_low(FALSE, page_compression_level,
+	return(btr_page_reorganize_low(FALSE, page_zip_level,
 				       block, index, mtr));
 }
 #endif /* !UNIV_HOTBACKUP */
@@ -1942,7 +1942,7 @@ btr_parse_page_reorganize(
 	buf_block_t*	block,	/*!< in: page to be reorganized, or NULL */
 	mtr_t*		mtr)	/*!< in: mtr or NULL */
 {
-	ulint	level = page_compression_level;
+	ulint	level = page_zip_level;
 
 	ut_ad(ptr && end_ptr);
 
diff --git a/storage/innobase/btr/btr0cur.cc b/storage/innobase/btr/btr0cur.cc
index ecc17188770..5feb1363867 100644
--- a/storage/innobase/btr/btr0cur.cc
+++ b/storage/innobase/btr/btr0cur.cc
@@ -1844,7 +1844,7 @@ btr_cur_update_alloc_zip(
 	/* Have a local copy of the variables as these can change
 	dynamically. */
 	bool	log_compressed = page_log_compressed_pages;
-	ulint	compression_level = page_compression_level;
+	ulint	compression_level = page_zip_level;
 	page_t*	page = buf_block_get_frame(block);
 
 	ut_a(page_zip == buf_block_get_page_zip(block));
@@ -4334,7 +4334,7 @@ btr_store_big_rec_extern_fields(
 		heap = mem_heap_create(250000);
 		page_zip_set_alloc(&c_stream, heap);
 
-		err = deflateInit2(&c_stream, page_compression_level,
+		err = deflateInit2(&c_stream, page_zip_level,
 				   Z_DEFLATED, 15, 7, Z_DEFAULT_STRATEGY);
 		ut_a(err == Z_OK);
 	}
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 1718e68d667..3803d0a93aa 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -5303,7 +5303,7 @@ fil_io(
 	os_offset_t	offset;
 	ibool		ignore_nonexistent_pages;
         ibool		page_compressed = FALSE;
-	ibool		page_compression_level = 0;
+	ulint		page_compression_level = 0;
 
 	is_log = type & OS_FILE_LOG;
 	type = type & ~OS_FILE_LOG;
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 2da9d70e197..10ac273955f 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,6 +63,7 @@ static ulint srv_data_read, srv_data_written;
 #include <linux/falloc.h>
 #endif
 #include "row0mysql.h"
+#include "lz4.h"
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -100,7 +101,7 @@ fil_compress_page(
 	/* If no compression level was provided to this table, use system
 	default level */
 	if (level == 0) {
-		level = srv_compress_zlib_level;
+		level = page_zip_level;
 	}
 
 #ifdef UNIV_DEBUG
@@ -110,60 +111,88 @@ fil_compress_page(
 #endif
 
 	write_size = UNIV_PAGE_SIZE - header_len;
-	err = compress2(out_buf+header_len, &write_size, buf, len, level);
 
-        if (err != Z_OK) {
-		/* If error we leave the actual page as it was */
+	if (srv_use_lz4) {
+		err = LZ4_compress_limitedOutput((const char *)buf, (char *)out_buf+header_len, len, write_size);
+		write_size = err;
 
-		fprintf(stderr,
-			"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
-			space_id, fil_space_name(space), len, err, write_size);
+		if (err == 0) {
+			/* If error we leave the actual page as it was */
 
-		*out_len = len;
-		return (buf);
+			fprintf(stderr,
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+				space_id, fil_space_name(space), len, err, write_size);
+
+			*out_len = len;
+			return (buf);
+		}
 	} else {
-		/* Set up the page header */
-		memcpy(out_buf, buf, FIL_PAGE_DATA);
-                /* Set up the checksum */
-		mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
-		/* Set up the correct page type */
-		mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
-		/* Set up the flush lsn to be compression algorithm */
-		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
-		/* Set up the actual payload lenght */
-		mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
+		err = compress2(out_buf+header_len, &write_size, buf, len, level);
 
-#ifdef UNIV_DEBUG
-		/* Verify */
-		ut_ad(fil_page_is_compressed(out_buf));
-		ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
-		ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
-		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
-#endif
+		if (err != Z_OK) {
+			/* If error we leave the actual page as it was */
 
-		write_size+=header_len;
-		/* Actual write needs to be alligned on block size */
-		if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
-			write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
+			fprintf(stderr,
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+				space_id, fil_space_name(space), len, err, write_size);
+
+			*out_len = len;
+			return (buf);
 		}
-
-#ifdef UNIV_DEBUG
-		fprintf(stderr,
-			"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
-			space_id, fil_space_name(space), len, write_size);
-#endif
-#define SECT_SIZE 512
-		srv_stats.page_compression_saved.add((len - write_size));
-		if ((len - write_size) > 0) {
-			srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
-			srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
-		}
-		//srv_stats.page_compressed_trim_op.inc();
-		srv_stats.pages_page_compressed.inc();
-		*out_len = write_size;
-
-		return(out_buf);
 	}
+
+	/* Set up the page header */
+	memcpy(out_buf, buf, FIL_PAGE_DATA);
+	/* Set up the checksum */
+	mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+	/* Set up the correct page type */
+	mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
+	/* Set up the flush lsn to be compression algorithm */
+	if (srv_use_lz4) {
+		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_LZ4);
+	} else {
+		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
+	}
+	/* Set up the actual payload lenght */
+	mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
+
+#ifdef UNIV_DEBUG
+	/* Verify */
+	ut_ad(fil_page_is_compressed(out_buf));
+	ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
+	ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
+	if (srv_use_lz4) {
+		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_LZ4);
+	} else {
+		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
+	}
+#endif
+
+	write_size+=header_len;
+	/* Actual write needs to be alligned on block size */
+	if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
+		write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
+	}
+
+#ifdef UNIV_DEBUG
+	fprintf(stderr,
+		"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
+		space_id, fil_space_name(space), len, write_size);
+#endif
+
+#define SECT_SIZE 512
+
+	srv_stats.page_compression_saved.add((len - write_size));
+	if ((len - write_size) > 0) {
+		srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
+		srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
+	}
+	//srv_stats.page_compressed_trim_op.inc();
+	srv_stats.pages_page_compressed.inc();
+	*out_len = write_size;
+
+	return(out_buf);
+
 }
 
 /****************************************************************//**
@@ -203,16 +232,30 @@ fil_decompress_page(
 	/* Get compression algorithm */
 	compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
 
-	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
-		// If no buffer was given, we need to allocate temporal buffer
-		if (page_buf == NULL) {
-			in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
-		} else {
-			in_buf = page_buf;
-		}
+	// If no buffer was given, we need to allocate temporal buffer
+	if (page_buf == NULL) {
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Note: Compression buffer not given, allocating...\n");
+#endif
+		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
+	} else {
+		in_buf = page_buf;
+	}
 
-		/* Get the actual size of compressed page */
-		actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
+	/* Get the actual size of compressed page */
+	actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
+	/* Check if payload size is corrupted */
+	if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) {
+		fprintf(stderr,
+			"InnoDB: Corruption: We try to uncompress corrupted page\n"
+			"InnoDB: actual size %lu compression %s\n",
+			actual_size, fil_get_compression_alg_name(compression_alg));
+		fflush(stderr);
+		ut_error;
+	}
+
+	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
 
 #ifdef UNIV_DEBUG
 		fprintf(stderr,
@@ -242,17 +285,19 @@ fil_decompress_page(
 			"InnoDB: Note: Decompression succeeded for len %lu \n",
 			len);
 #endif
+	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
+		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
 
-		/* Copy the uncompressed page to the buffer pool, not
-		really any other options. */
-		memcpy(buf, in_buf, len);
+		if (err != actual_size) {
+			fprintf(stderr,
+				"InnoDB: Corruption: Page is marked as compressed\n"
+				"InnoDB: but decompression read only %d bytes.\n"
+				"InnoDB: size %lu len %lu\n",
+				err, actual_size, len);
+			fflush(stderr);
 
-		// Need to free temporal buffer if no buffer was given
-		if (page_buf == NULL) {
-			ut_free(in_buf);
+			ut_error;
 		}
-
-		srv_stats.pages_page_decompressed.inc();
 	} else {
 		fprintf(stderr,
 			"InnoDB: Corruption: Page is marked as compressed\n"
@@ -263,6 +308,17 @@ fil_decompress_page(
 		fflush(stderr);
 		ut_error;
 	}
+
+	srv_stats.pages_page_decompressed.inc();
+
+	/* Copy the uncompressed page to the buffer pool, not
+	really any other options. */
+	memcpy(buf, in_buf, len);
+
+	// Need to free temporal buffer if no buffer was given
+	if (page_buf == NULL) {
+		ut_free(in_buf);
+	}
 }
 
 
diff --git a/storage/innobase/fil/lz4.c b/storage/innobase/fil/lz4.c
new file mode 100644
index 00000000000..4e864de67d3
--- /dev/null
+++ b/storage/innobase/fil/lz4.c
@@ -0,0 +1,822 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2013, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+//**************************************
+// Tuning parameters
+//**************************************
+// MEMORY_USAGE :
+// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+// Increasing memory usage improves compression ratio
+// Reduced memory usage can improve speed, due to cache effect
+// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+#define MEMORY_USAGE 14
+
+// HEAPMODE :
+// Select how default compression functions will allocate memory for their hash table,
+// in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
+#define HEAPMODE 0
+
+
+//**************************************
+// CPU Feature Detection
+//**************************************
+// 32 or 64 bits ?
+#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
+  || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
+  || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
+  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   // Detects 64 bits mode
+#  define LZ4_ARCH64 1
+#else
+#  define LZ4_ARCH64 0
+#endif
+
+// Little Endian or Big Endian ?
+// Overwrite the #define below if you know your architecture endianess
+#if defined (__GLIBC__)
+#  include <endian.h>
+#  if (__BYTE_ORDER == __BIG_ENDIAN)
+#     define LZ4_BIG_ENDIAN 1
+#  endif
+#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
+#  define LZ4_BIG_ENDIAN 1
+#elif defined(__sparc) || defined(__sparc__) \
+   || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
+   || defined(__hpux)  || defined(__hppa) \
+   || defined(_MIPSEB) || defined(__s390__)
+#  define LZ4_BIG_ENDIAN 1
+#else
+// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
+#endif
+
+// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+// For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
+// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
+#if defined(__ARM_FEATURE_UNALIGNED)
+#  define LZ4_FORCE_UNALIGNED_ACCESS 1
+#endif
+
+// Define this parameter if your target system or compiler does not support hardware bit count
+#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
+// This option may provide a small boost to performance for some big endian cpu, although probably modest.
+// You may set this option to 1 if data will remain within closed environment.
+// This option is useless on Little_Endian CPU (such as x86)
+//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
+
+
+//**************************************
+// Compiler Options
+//**************************************
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   // C99
+/* "restrict" is a known keyword */
+#else
+#  define restrict // Disable restrict
+#endif
+
+#ifdef _MSC_VER    // Visual Studio
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    // For Visual 2005
+#  if LZ4_ARCH64   // 64-bits
+#    pragma intrinsic(_BitScanForward64) // For Visual 2005
+#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
+#  else            // 32-bits
+#    pragma intrinsic(_BitScanForward)   // For Visual 2005
+#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
+#  endif
+#  pragma warning(disable : 4127)        // disable: C4127: conditional expression is constant
+#else 
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+#ifdef _MSC_VER
+#  define lz4_bswap16(x) _byteswap_ushort(x)
+#else
+#  define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#define likely(expr)     expect((expr) != 0, 1)
+#define unlikely(expr)   expect((expr) != 0, 0)
+
+
+//**************************************
+// Memory routines
+//**************************************
+#include <stdlib.h>   // malloc, calloc, free
+#define ALLOCATOR(n,s) calloc(n,s)
+#define FREEMEM        free
+#include <string.h>   // memset, memcpy
+#define MEM_INIT       memset
+
+
+//**************************************
+// Includes
+//**************************************
+#include "lz4.h"
+
+
+//**************************************
+// Basic Types
+//**************************************
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
+#  define _PACKED __attribute__ ((packed))
+#else
+#  define _PACKED
+#endif
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#    pragma pack(1)
+#  else
+#    pragma pack(push, 1)
+#  endif
+#endif
+
+typedef struct { U16 v; }  _PACKED U16_S;
+typedef struct { U32 v; }  _PACKED U32_S;
+typedef struct { U64 v; }  _PACKED U64_S;
+typedef struct {size_t v;} _PACKED size_t_S;
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#    pragma pack(0)
+#  else
+#    pragma pack(pop)
+#  endif
+#endif
+
+#define A16(x)   (((U16_S *)(x))->v)
+#define A32(x)   (((U32_S *)(x))->v)
+#define A64(x)   (((U64_S *)(x))->v)
+#define AARCH(x) (((size_t_S *)(x))->v)
+
+
+//**************************************
+// Constants
+//**************************************
+#define LZ4_HASHLOG   (MEMORY_USAGE-2)
+#define HASHTABLESIZE (1 << MEMORY_USAGE)
+#define HASHNBCELLS4  (1 << LZ4_HASHLOG)
+
+#define MINMATCH 4
+
+#define COPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH+MINMATCH)
+const int LZ4_minLength = (MFLIMIT+1);
+
+#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
+#define SKIPSTRENGTH 6     // Increasing this value will make the compression run slower on incompressible data
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+
+//**************************************
+// Structures and local types
+//**************************************
+
+typedef struct {
+    U32 hashTable[HASHNBCELLS4];
+    const BYTE* bufferStart;
+    const BYTE* base;
+    const BYTE* nextBlock;
+} LZ4_Data_Structure;
+
+typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+
+//**************************************
+// Architecture-specific macros
+//**************************************
+#define STEPSIZE                  sizeof(size_t)
+#define LZ4_COPYSTEP(d,s)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
+#define LZ4_COPY8(d,s)            { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
+#define LZ4_SECURECOPY(d,s,e)     { if ((STEPSIZE==4)||(d<e)) LZ4_WILDCOPY(d,s,e); }
+
+#if LZ4_ARCH64   // 64-bit
+#  define HTYPE                   U32
+#  define INITBASE(base)          const BYTE* const base = ip
+#else            // 32-bit
+#  define HTYPE                   const BYTE*
+#  define INITBASE(base)          const int base = 0
+#endif
+
+#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
+#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
+#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
+#else      // Little Endian
+#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
+#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
+#endif
+
+
+//**************************************
+// Macros
+//**************************************
+#define LZ4_WILDCOPY(d,s,e)     { do { LZ4_COPY8(d,s) } while (d<e); }           // at the end, d>=e;
+
+
+//****************************
+// Private functions
+//****************************
+#if LZ4_ARCH64
+
+FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
+{
+# if defined(LZ4_BIG_ENDIAN)
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r = 0;
+    _BitScanReverse64( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_clzll(val) >> 3);
+#   else
+    int r;
+    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+    r += (!val);
+    return r;
+#   endif
+# else
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r = 0;
+    _BitScanForward64( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_ctzll(val) >> 3);
+#   else
+    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+    return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#   endif
+# endif
+}
+
+#else
+
+FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
+{
+# if defined(LZ4_BIG_ENDIAN)
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r = 0;
+    _BitScanReverse( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_clz(val) >> 3);
+#   else
+    int r;
+    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+    r += (!val);
+    return r;
+#   endif
+# else
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r;
+    _BitScanForward( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_ctz(val) >> 3);
+#   else
+    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#   endif
+# endif
+}
+
+#endif
+
+
+//****************************
+// Compression functions
+//****************************
+FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
+
+FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
+    }
+}
+
+FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   // default, to ensure a return
+}
+
+FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+FORCE_INLINE int LZ4_compress_generic(
+                 void* ctx,
+                 const char* source,
+                 char* dest,
+                 int inputSize,
+                 int maxOutputSize,
+
+                 limitedOutput_directive limitedOutput,
+                 tableType_t tableType,
+                 prefix64k_directive prefix)
+{
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source;
+    const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source);
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    int length;
+    const int skipStrength = SKIPSTRENGTH;
+    U32 forwardH;
+
+    // Init conditions
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;                                // Unsupported input size, too large (or negative)
+    if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0;   // must continue from end of previous block
+    if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend;                    // do it now, due to potential early exit
+    if ((tableType == byU16) && (inputSize>=LZ4_64KLIMIT)) return 0;                       // Size too large (not within 64K limit)
+    if (inputSize<LZ4_minLength) goto _last_literals;                                      // Input too small, no compression (all literals)
+
+    // First Byte
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    // Main Loop
+    for ( ; ; )
+    {
+        int findMatchAttempts = (1U << skipStrength) + 3;
+        const BYTE* forwardIp = ip;
+        const BYTE* ref;
+        BYTE* token;
+
+        // Find a match
+        do {
+            U32 h = forwardH;
+            int step = findMatchAttempts++ >> skipStrength;
+            ip = forwardIp;
+            forwardIp = ip + step;
+
+            if unlikely(forwardIp > mflimit) { goto _last_literals; }
+
+            forwardH = LZ4_hashPosition(forwardIp, tableType);
+            ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
+            LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+        } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip)));
+
+        // Catch up
+        while ((ip>anchor) && (ref > lowLimit) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
+
+        // Encode Literal length
+        length = (int)(ip - anchor);
+        token = op++;
+        if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend)) return 0;   // Check output limit
+        if (length>=(int)RUN_MASK) 
+        { 
+            int len = length-RUN_MASK; 
+            *token=(RUN_MASK<<ML_BITS); 
+            for(; len >= 255 ; len-=255) *op++ = 255; 
+            *op++ = (BYTE)len; 
+        }
+        else *token = (BYTE)(length<<ML_BITS);
+
+        // Copy Literals
+        { BYTE* end=(op)+(length); LZ4_WILDCOPY(op,anchor,end); op=end; }
+
+_next_match:
+        // Encode Offset
+        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+        // Start Counting
+        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
+        anchor = ip;
+        while likely(ip<matchlimit-(STEPSIZE-1))
+        {
+            size_t diff = AARCH(ref) ^ AARCH(ip);
+            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
+            ip += LZ4_NbCommonBytes(diff);
+            goto _endCount;
+        }
+        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
+        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
+        if ((ip<matchlimit) && (*ref == *ip)) ip++;
+_endCount:
+
+        // Encode MatchLength
+        length = (int)(ip - anchor);
+        if ((limitedOutput) && unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)) return 0;    // Check output limit
+        if (length>=(int)ML_MASK) 
+        { 
+            *token += ML_MASK; 
+            length -= ML_MASK; 
+            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
+            if (length >= 255) { length-=255; *op++ = 255; } 
+            *op++ = (BYTE)length; 
+        }
+        else *token += (BYTE)(length);
+
+        // Test end of chunk
+        if (ip > mflimit) { anchor = ip;  break; }
+
+        // Fill table
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        // Test next position
+        ref = LZ4_getPosition(ip, ctx, tableType, base); 
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
+
+        // Prepare next loop
+        anchor = ip++;
+        forwardH = LZ4_hashPosition(ip, tableType);
+    }
+
+_last_literals:
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;   // Check output limit
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    // End
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compress(const char* source, char* dest, int inputSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
+#else
+    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
+#endif
+    int result;
+
+    if (inputSize < (int)LZ4_64KLIMIT)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noPrefix);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix);
+}
+
+
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
+#else
+    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
+#endif
+    int result;
+
+    if (inputSize < (int)LZ4_64KLIMIT)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16, noPrefix);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix);
+}
+
+
+//****************************
+// Stream functions
+//****************************
+
+FORCE_INLINE void LZ4_init(LZ4_Data_Structure* lz4ds, const BYTE* base)
+{
+    MEM_INIT(lz4ds->hashTable, 0, sizeof(lz4ds->hashTable));
+    lz4ds->bufferStart = base;
+    lz4ds->base = base;
+    lz4ds->nextBlock = base;
+}
+
+
+void* LZ4_create (const char* inputBuffer)
+{
+    void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure));
+    LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer);
+    return lz4ds;
+}
+
+
+int LZ4_free (void* LZ4_Data)
+{
+    FREEMEM(LZ4_Data);
+    return (0);
+}
+
+
+char* LZ4_slideInputBuffer (void* LZ4_Data)
+{
+    LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data;
+    size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB);
+
+    if ( (lz4ds->base - delta > lz4ds->base)                          // underflow control
+       || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) )   // close to 32-bits limit
+    {
+        size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base;
+        int nH;
+
+        for (nH=0; nH < HASHNBCELLS4; nH++)
+        {
+            if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0;
+            else lz4ds->hashTable[nH] -= (U32)deltaLimit;
+        }
+        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
+        lz4ds->base = lz4ds->bufferStart;
+        lz4ds->nextBlock = lz4ds->base + 64 KB;
+    }
+    else
+    {
+        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
+        lz4ds->nextBlock -= delta;
+        lz4ds->base -= delta;
+    }
+
+    return (char*)(lz4ds->nextBlock);
+}
+
+
+//****************************
+// Decompression functions
+//****************************
+
+// This generic decompression function cover all use cases.
+// It shall be instanciated several times, using different sets of directives
+// Note that it is essential this generic function is really inlined, 
+// in order to remove useless branches during compilation optimisation.
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* source,
+                 char* dest,
+                 int inputSize,          //
+                 int outputSize,         // If endOnInput==endOnInputSize, this value is the max size of Output Buffer.
+
+                 int endOnInput,         // endOnOutputSize, endOnInputSize
+                 int prefix64k,          // noPrefix, withPrefix
+                 int partialDecoding,    // full, partial
+                 int targetOutputSize    // only used if partialDecoding==partial
+                 )
+{
+    // Local Variables
+    const BYTE* restrict ip = (const BYTE*) source;
+    const BYTE* ref;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+
+    const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};   // static reduces speed for LZ4_decompress_safe() on GCC64
+    static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+
+
+    // Special cases
+    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                        // targetOutputSize too high => decode everything
+    if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1;   // Empty output buffer
+    if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1);
+
+
+    // Main Loop
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+
+        // get runlength
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)
+        { 
+            unsigned s=255; 
+            while (((endOnInput)?ip<iend:1) && (s==255))
+            { 
+                s = *ip++; 
+                length += s; 
+            } 
+        }
+
+        // copy literals
+        cpy = op+length;
+        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+        {
+            if (partialDecoding)
+            {
+                if (cpy > oend) goto _output_error;                           // Error : write attempt beyond end of output buffer
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   // Error : read attempt beyond end of input buffer
+            }
+            else
+            {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       // Error : block decoding must stop exactly there
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   // Error : input must be consumed
+            }
+            memcpy(op, ip, length);
+            ip += length;
+            op += length;
+            break;                                       // Necessarily EOF, due to parsing restrictions
+        }
+        LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
+
+        // get offset
+        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
+        if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error;   // Error : offset outside destination buffer
+
+        // get matchlength
+        if ((length=(token&ML_MASK)) == ML_MASK) 
+        { 
+            while ((!endOnInput) || (ip<iend-(LASTLITERALS+1)))   // Ensure enough bytes remain for LASTLITERALS + token
+            {
+                unsigned s = *ip++; 
+                length += s; 
+                if (s==255) continue; 
+                break; 
+            }
+        }
+
+        // copy repeated sequence
+        if unlikely((op-ref)<(int)STEPSIZE)
+        {
+            const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
+            op[0] = ref[0];
+            op[1] = ref[1];
+            op[2] = ref[2];
+            op[3] = ref[3];
+            op += 4, ref += 4; ref -= dec32table[op-ref];
+            A32(op) = A32(ref); 
+            op += STEPSIZE-4; ref -= dec64;
+        } else { LZ4_COPYSTEP(op,ref); }
+        cpy = op + length - (STEPSIZE-4);
+
+        if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))
+        {
+            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
+            LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH));
+            while(op<cpy) *op++=*ref++;
+            op=cpy;
+            continue;
+        }
+        LZ4_WILDCOPY(op, ref, cpy);
+        op=cpy;   // correction
+    }
+
+    // end of decoding
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     // Nb of output bytes decoded
+    else
+       return (int) (((char*)ip)-source);   // Nb of input bytes read
+
+    // Overflow error detected
+_output_error:
+    return (int) (-(((char*)ip)-source))-1;
+}
+
+
+int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
+}
+
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
+}
+
+int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
+}
+
+int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
+{
+#ifdef _MSC_VER   // This version is faster with Visual
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
+#else
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
+#endif
+}
+
diff --git a/storage/innobase/fil/lz4.h b/storage/innobase/fil/lz4.h
new file mode 100644
index 00000000000..9ef58862947
--- /dev/null
+++ b/storage/innobase/fil/lz4.h
@@ -0,0 +1,205 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Header File
+   Copyright (C) 2011-2013, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+   - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+//**************************************
+// Compiler Options
+//**************************************
+#if defined(_MSC_VER) && !defined(__cplusplus)   // Visual Studio
+#  define inline __inline           // Visual C is not C99, but supports some kind of inline
+#endif
+
+
+//****************************
+// Simple Functions
+//****************************
+
+int LZ4_compress        (const char* source, char* dest, int inputSize);
+int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*
+LZ4_compress() :
+    Compresses 'inputSize' bytes from 'source' into 'dest'.
+    Destination buffer must be already allocated,
+    and must be sized to handle worst cases situations (input data not compressible)
+    Worst case size evaluation is provided by function LZ4_compressBound()
+    inputSize : Max supported value is LZ4_MAX_INPUT_VALUE
+    return : the number of bytes written in buffer dest
+             or 0 if the compression fails
+
+LZ4_decompress_safe() :
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits (never writes outside of output buffer, and never reads outside of input buffer). Therefore, it is protected against malicious data packets
+*/
+
+
+//****************************
+// Advanced Functions
+//****************************
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   // 2 113 929 216 bytes
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+static inline int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+
+/*
+LZ4_compressBound() :
+    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
+    primarily useful for memory allocation of output buffer.
+    inline function is recommended for the general case,
+    macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation).
+
+    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
+    return : maximum output size in a "worst case" scenario
+             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*/
+
+
+int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*
+LZ4_compress_limitedOutput() :
+    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This function never writes outside of provided output buffer.
+
+    inputSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes written in buffer 'dest'
+             or 0 if the compression fails
+*/
+
+
+int LZ4_decompress_fast (const char* source, char* dest, int outputSize);
+
+/*
+LZ4_decompress_fast() :
+    outputSize : is the original (uncompressed) size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is malformed, the function will stop decoding and return a negative result.
+    note : This function is a bit faster than LZ4_decompress_safe()
+           This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet.
+           Use this function preferably into a trusted environment (data to decode comes from a trusted source).
+           Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
+*/
+
+int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize);
+
+/*
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'inputSize' at position 'source'
+    into output buffer 'dest' of size 'maxOutputSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+
+
+//****************************
+// Stream Functions
+//****************************
+
+void* LZ4_create (const char* inputBuffer);
+int   LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize);
+int   LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
+char* LZ4_slideInputBuffer (void* LZ4_Data);
+int   LZ4_free (void* LZ4_Data);
+
+/* 
+These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
+In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function :
+
+void* LZ4_create (const char* inputBuffer);
+The result of the function is the (void*) pointer on the LZ4 Data Structure.
+This pointer will be needed in all other functions.
+If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
+The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
+The input buffer must be already allocated, and size at least 192KB.
+'inputBuffer' will also be the 'const char* source' of the first block.
+
+All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
+To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue().
+Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(), 
+but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one.
+If next block does not begin immediately after the previous one, the compression will fail (return 0).
+
+When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : 
+char* LZ4_slideInputBuffer(void* LZ4_Data);
+must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
+Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
+==> The memory position where the next input data block must start is provided as the result of the function.
+
+Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual.
+
+When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure.
+*/
+
+
+int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize);
+
+/*
+*_withPrefix64k() :
+    These decoding functions work the same as their "normal name" versions,
+    but can use up to 64KB of data in front of 'char* dest'.
+    These functions are necessary to decode inter-dependant blocks.
+*/
+
+
+//****************************
+// Obsolete Functions
+//****************************
+
+static inline int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
+static inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+/*
+These functions are deprecated and should no longer be used.
+They are provided here for compatibility with existing user programs.
+*/
+
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index eda7da81d5c..d4ce4eb9c4f 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -4,7 +4,7 @@ Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
 Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, SkySQL Ab.
+Copyright (c) 2013, 2014, SkySQL Ab.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -15429,29 +15429,6 @@ innodb_reset_all_monitor_update(
 			      TRUE);
 }
 
-/****************************************************************//**
-Update the system variable innodb_compression_level using the "saved"
-value. This function is registered as a callback with MySQL. */
-static
-void
-innodb_compression_level_update(
-/*============================*/
-	THD*				thd,	/*!< in: thread handle */
-	struct st_mysql_sys_var*	var,	/*!< in: pointer to
-						system variable */
-	void*				var_ptr,/*!< out: where the
-						formal string goes */
-	const void*			save)	/*!< in: immediate result
-						from check function */
-{
-	/* We have this call back just to avoid confusion between
-	ulong and ulint datatypes. */
-	innobase_compression_level =
-			(*static_cast<const ulong*>(save));
-	page_compression_level =
-			(static_cast<const ulint>(innobase_compression_level));
-}
-
 /****************************************************************//**
 Parse and enable InnoDB monitor counters during server startup.
 User can list the monitor counters/groups to be enable by specifying
@@ -16140,11 +16117,11 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
   "innodb_thread_concurrency is reached (0 by default)",
   NULL, NULL, 0, 0, ~0UL, 0);
 
-static MYSQL_SYSVAR_ULONG(compression_level, innobase_compression_level,
+static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
   PLUGIN_VAR_RQCMDARG,
-  "Compression level used for compressed row format.  0 is no compression"
+  "Compression level used for zlib compression.  0 is no compression"
   ", 1 is fastest, 9 is best compression and default is 6.",
-  NULL, innodb_compression_level_update,
+  NULL, NULL,
   DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
 
 static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
@@ -16620,11 +16597,6 @@ static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
   "How many percent of compressed pages should be trimmed",
   NULL, NULL, 100, 0, 100, 0);
 
-static MYSQL_SYSVAR_LONG(compress_zlib_level, srv_compress_zlib_level,
-  PLUGIN_VAR_OPCMDARG ,
-  "Default zlib compression level",
-  NULL, NULL, 6, 0, 9, 0);
-
 static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
   PLUGIN_VAR_OPCMDARG,
   "Use page compression for only index pages.",
@@ -16635,6 +16607,12 @@ static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   "Use trim.",
   NULL, NULL, TRUE);
 
+static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
+  PLUGIN_VAR_OPCMDARG ,
+  "Use LZ4 for page compression",
+  NULL, NULL, FALSE);
+
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
   MYSQL_SYSVAR(api_trx_level),
@@ -16782,9 +16760,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
 #endif /* UNIV_DEBUG */
   MYSQL_SYSVAR(compress_pages),
   MYSQL_SYSVAR(trim_pct),
-  MYSQL_SYSVAR(compress_zlib_level),
   MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
+  MYSQL_SYSVAR(use_lz4),
   NULL
 };
 
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 01084d52365..918a92fa811 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -134,6 +134,7 @@ extern fil_addr_t	fil_addr_null;
  					actual payload data size on
  					compressed pages. */
 #define FIL_PAGE_COMPRESSION_ZLIB 1    /*!< Compressin algorithm ZLIB. */
+#define FIL_PAGE_COMPRESSION_LZ4  2    /*!< Compressin algorithm LZ4. */
 
 /* @} */
 /** File page trailer @{ */
diff --git a/storage/innobase/include/fsp0pagecompress.ic b/storage/innobase/include/fsp0pagecompress.ic
index 755d91b3cd9..10f9d30d1f8 100644
--- a/storage/innobase/include/fsp0pagecompress.ic
+++ b/storage/innobase/include/fsp0pagecompress.ic
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2013,2014 SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -144,6 +144,9 @@ fil_get_compression_alg_name(
 	case FIL_PAGE_COMPRESSION_ZLIB:
 		return ("ZLIB");
 		break;
+	case FIL_PAGE_COMPRESSION_LZ4:
+		return ("LZ4");
+		break;
 	default:
 		return("UNKNOWN");
 		break;
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index 12781bd61b8..89260d0984e 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -41,7 +41,7 @@ Created June 2005 by Marko Makela
 #include "mem0mem.h"
 
 /* Compression level to be used by zlib. Settable by user. */
-extern ulint	page_compression_level;
+extern uint	page_zip_level;
 
 /* Default compression level. */
 #define DEFAULT_COMPRESSION_LEVEL	6
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index f4fa8b434fe..a11c213d534 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -3,7 +3,7 @@
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009, Google Inc.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -254,9 +254,8 @@ extern my_bool srv_use_posix_fallocate;
 /* Use atomic writes i.e disable doublewrite buffer */
 extern my_bool srv_use_atomic_writes;
 
-/* Default zlib compression level */
-extern long srv_compress_zlib_level;
-
+/* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
+extern my_bool srv_use_lz4;
 
 #ifdef __WIN__
 extern ibool	srv_use_native_conditions;
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc
index f416d38cc35..9d6a62cae8f 100644
--- a/storage/innobase/page/page0cur.cc
+++ b/storage/innobase/page/page0cur.cc
@@ -1180,7 +1180,7 @@ page_cur_insert_rec_zip_reorg(
 
 	/* Make a local copy as the values can change dynamically. */
 	bool		log_compressed = page_log_compressed_pages;
-	ulint		level = page_compression_level;
+	ulint		level = page_zip_level;
 
 	/* Recompress or reorganize and recompress the page. */
 	if (page_zip_compress(page_zip, page, index, level,
diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc
index 6b7b8424856..bf73a249f95 100644
--- a/storage/innobase/page/page0page.cc
+++ b/storage/innobase/page/page0page.cc
@@ -514,7 +514,7 @@ page_create_zip(
 	mach_write_to_2(page + PAGE_HEADER + PAGE_LEVEL, level);
 
 	if (!page_zip_compress(page_zip, page, index,
-	    page_compression_level, mtr)) {
+	    page_zip_level, mtr)) {
 		/* The compression of a newly created page
 		should always succeed. */
 		ut_error;
@@ -663,7 +663,7 @@ page_copy_rec_list_end(
 		if (!page_zip_compress(new_page_zip,
 				       new_page,
 				       index,
-				       page_compression_level,
+				       page_zip_level,
 				       mtr)) {
 			/* Before trying to reorganize the page,
 			store the number of preceding records on the page. */
@@ -788,7 +788,7 @@ page_copy_rec_list_start(
 				goto zip_reorganize;);
 
 		if (!page_zip_compress(new_page_zip, new_page, index,
-				       page_compression_level, mtr)) {
+				       page_zip_level, mtr)) {
 
 			ulint	ret_pos;
 #ifndef DBUG_OFF
diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc
index dee37580002..3fba6216430 100644
--- a/storage/innobase/page/page0zip.cc
+++ b/storage/innobase/page/page0zip.cc
@@ -69,7 +69,7 @@ UNIV_INTERN mysql_pfs_key_t		page_zip_stat_per_index_mutex_key;
 #endif /* !UNIV_HOTBACKUP */
 
 /* Compression level to be used by zlib. Settable by user. */
-UNIV_INTERN ulint	page_compression_level = 6;
+UNIV_INTERN uint	page_zip_level = DEFAULT_COMPRESSION_LEVEL;
 
 /* Whether or not to log compressed page images to avoid possible
 compression algorithm changes in zlib. */
@@ -4631,7 +4631,7 @@ page_zip_reorganize(
 	mtr_set_log_mode(mtr, log_mode);
 
 	if (!page_zip_compress(page_zip, page, index,
-			       page_compression_level, mtr)) {
+			       page_zip_level, mtr)) {
 
 #ifndef UNIV_HOTBACKUP
 		buf_block_free(temp_block);
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 90864cee9ef..cffd3f928c3 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -3,6 +3,7 @@
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2014, SkySQL Ab.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -147,21 +148,20 @@ UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
 /* If this flag is TRUE, then we will use page compression
 to the pages */
-UNIV_INTERN my_bool     srv_compress_pages              = FALSE;
+UNIV_INTERN my_bool	srv_compress_pages = FALSE;
 /* If this flag is TRUE, then we will use page compression
 only for index pages */
-UNIV_INTERN my_bool     srv_page_compress_index_pages   = FALSE;
-UNIV_INTERN long        srv_trim_pct                    = 100;
-/* Default compression level if page compression is used and no compression
-level is set for the table*/
-UNIV_INTERN long        srv_compress_zlib_level         = 6;
+UNIV_INTERN my_bool	srv_page_compress_index_pages = FALSE;
+UNIV_INTERN long	srv_trim_pct = 100;
 /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
 to the pages */
-UNIV_INTERN my_bool     srv_use_trim                    = TRUE;
+UNIV_INTERN my_bool	srv_use_trim = TRUE;
 /* If this flag is TRUE, then we will use posix fallocate for file extentsion */
-UNIV_INTERN my_bool     srv_use_posix_fallocate         = FALSE;
+UNIV_INTERN my_bool	srv_use_posix_fallocate = FALSE;
 /* If this flag is TRUE, then we disable doublewrite buffer */
-UNIV_INTERN my_bool     srv_use_atomic_writes           = FALSE;
+UNIV_INTERN my_bool	srv_use_atomic_writes = FALSE;
+/* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
+UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 282db2ddf31..5050ca34da9 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -284,6 +284,8 @@ SET(INNOBASE_SOURCES
 	buf/buf0flu.cc
 	buf/buf0lru.cc
 	buf/buf0rea.cc
+# TODO: JAN uncomment
+#	buf/buf0mtflu.cc
 	data/data0data.cc
 	data/data0type.cc
 	dict/dict0boot.cc
@@ -297,6 +299,8 @@ SET(INNOBASE_SOURCES
 	eval/eval0eval.cc
 	eval/eval0proc.cc
 	fil/fil0fil.cc
+        fil/fil0pagecompress.cc
+	fil/lz4.c
 	fsp/fsp0fsp.cc
 	fut/fut0fut.cc
 	fut/fut0lst.cc
diff --git a/storage/xtradb/buf/buf0buf.cc b/storage/xtradb/buf/buf0buf.cc
index d4b170028d9..b995e3ee737 100644
--- a/storage/xtradb/buf/buf0buf.cc
+++ b/storage/xtradb/buf/buf0buf.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -3371,6 +3372,7 @@ buf_page_init_low(
 	bpage->access_time = 0;
 	bpage->newest_modification = 0;
 	bpage->oldest_modification = 0;
+	bpage->write_size = 0;
 	HASH_INVALIDATE(bpage, hash);
 	bpage->is_corrupt = FALSE;
 #if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
@@ -5501,3 +5503,24 @@ buf_page_init_for_backup_restore(
 	}
 }
 #endif /* !UNIV_HOTBACKUP */
+
+/*********************************************************************//**
+Aquire LRU list mutex */
+void
+buf_pool_mutex_enter(
+/*=================*/
+	buf_pool_t*	buf_pool) /*!< in: buffer pool */
+{
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+	mutex_enter(&buf_pool->LRU_list_mutex);
+}
+/*********************************************************************//**
+Exit LRU list mutex */
+void
+buf_pool_mutex_exit(
+/*================*/
+	buf_pool_t*	buf_pool) /*!< in: buffer pool */
+{
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+	mutex_exit(&buf_pool->LRU_list_mutex);
+}
diff --git a/storage/xtradb/buf/buf0dblwr.cc b/storage/xtradb/buf/buf0dblwr.cc
index 506a5b177ba..30b41dc754e 100644
--- a/storage/xtradb/buf/buf0dblwr.cc
+++ b/storage/xtradb/buf/buf0dblwr.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -382,7 +383,7 @@ buf_dblwr_init_or_restore_pages(
 	buffer */
 
 	fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
-	       UNIV_PAGE_SIZE, read_buf, NULL);
+	       UNIV_PAGE_SIZE, read_buf, NULL, 0);
 	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
 
 	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
@@ -418,11 +419,11 @@ buf_dblwr_init_or_restore_pages(
 
 	fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block1, 0,
 	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-	       buf, NULL);
+	       buf, NULL, 0);
 	fil_io(OS_FILE_READ, true, TRX_SYS_SPACE, 0, block2, 0,
 	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
 	       buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-	       NULL);
+	       NULL, 0);
 	/* Check if any of these pages is half-written in data files, in the
 	intended position */
 
@@ -450,7 +451,7 @@ buf_dblwr_init_or_restore_pages(
 			}
 
 			fil_io(OS_FILE_WRITE, true, 0, 0, source_page_no, 0,
-			       UNIV_PAGE_SIZE, page, NULL);
+			       UNIV_PAGE_SIZE, page, NULL, 0);
 		} else {
 
 			space_id = mach_read_from_4(
@@ -492,7 +493,7 @@ buf_dblwr_init_or_restore_pages(
 			fil_io(OS_FILE_READ, true, space_id, zip_size,
 			       page_no, 0,
 			       zip_size ? zip_size : UNIV_PAGE_SIZE,
-			       read_buf, NULL);
+			       read_buf, NULL, 0);
 
 			/* Check if the page is corrupt */
 
@@ -544,7 +545,7 @@ buf_dblwr_init_or_restore_pages(
 				fil_io(OS_FILE_WRITE, true, space_id,
 				       zip_size, page_no, 0,
 				       zip_size ? zip_size : UNIV_PAGE_SIZE,
-				       page, NULL);
+				       page, NULL, 0);
 
 				ib_logf(IB_LOG_LEVEL_INFO,
 					"Recovered the page from"
@@ -763,7 +764,7 @@ buf_dblwr_write_block_to_datafile(
 		       buf_page_get_page_no(bpage), 0,
 		       buf_page_get_zip_size(bpage),
 		       (void*) bpage->zip.data,
-		       (void*) bpage);
+		       (void*) bpage, 0);
 
 		return;
 	}
@@ -775,7 +776,8 @@ buf_dblwr_write_block_to_datafile(
 
 	fil_io(flags, sync, buf_block_get_space(block), 0,
 	       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
-	       (void*) block->frame, (void*) block);
+	       (void*) block->frame, (void*) block,
+	       (ulint *)&bpage->write_size);
 }
 
 /********************************************************************//**
@@ -869,7 +871,7 @@ try_again:
 
 	fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
 	       buf_dblwr->block1, 0, len,
-	       (void*) write_buf, NULL);
+	       (void*) write_buf, NULL, 0);
 
 	if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
 		/* No unwritten pages in the second block. */
@@ -885,7 +887,7 @@ try_again:
 
 	fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
 	       buf_dblwr->block2, 0, len,
-	       (void*) write_buf, NULL);
+	       (void*) write_buf, NULL, 0);
 
 flush:
 	/* increment the doublewrite flushed pages counter */
@@ -1115,14 +1117,14 @@ retry:
 		fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
 		       offset, 0, UNIV_PAGE_SIZE,
 		       (void*) (buf_dblwr->write_buf
-				+ UNIV_PAGE_SIZE * i), NULL);
+			       + UNIV_PAGE_SIZE * i), NULL, 0);
 	} else {
 		/* It is a regular page. Write it directly to the
 		doublewrite buffer */
 		fil_io(OS_FILE_WRITE, true, TRX_SYS_SPACE, 0,
 		       offset, 0, UNIV_PAGE_SIZE,
 		       (void*) ((buf_block_t*) bpage)->frame,
-		       NULL);
+		       NULL, 0);
 	}
 
 	/* Now flush the doublewrite buffer data to disk */
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index abcee504d2e..3c030eb60ee 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1,6 +1,8 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, Fusion-io. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -48,6 +50,7 @@ Created 11/11/1995 Heikki Tuuri
 #include "srv0mon.h"
 #include "mysql/plugin.h"
 #include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
 
 /** Number of pages flushed through non flush_list flushes. */
 // static ulint buf_lru_flush_page_count = 0;
@@ -71,11 +74,6 @@ in thrashing. */
 
 /* @} */
 
-/** Handled page counters for a single flush */
-struct flush_counters_t {
-	ulint	flushed;	/*!< number of dirty pages flushed */
-	ulint	evicted;	/*!< number of clean pages evicted */
-};
 
 /******************************************************************//**
 Increases flush_list size in bytes with zip_size for compressed page,
@@ -721,8 +719,10 @@ buf_flush_write_complete(
 
 	buf_pool->n_flush[flush_type]--;
 
-	/* fprintf(stderr, "n pending flush %lu\n",
-	buf_pool->n_flush[flush_type]); */
+#ifdef UNIV_DEBUG
+	fprintf(stderr, "n pending flush %lu\n",
+		buf_pool->n_flush[flush_type]);
+#endif
 
 	if (buf_pool->n_flush[flush_type] == 0
 	    && buf_pool->init_flush[flush_type] == FALSE) {
@@ -880,6 +880,8 @@ buf_flush_write_block_low(
 {
 	ulint	zip_size	= buf_page_get_zip_size(bpage);
 	page_t*	frame		= NULL;
+	ulint space_id          = buf_page_get_space(bpage);
+	atomic_writes_t awrites = fil_space_get_atomic_writes(space_id);
 
 #ifdef UNIV_DEBUG
 	buf_pool_t*	buf_pool = buf_pool_from_bpage(bpage);
@@ -955,12 +957,26 @@ buf_flush_write_block_low(
 		       sync, buf_page_get_space(bpage), zip_size,
 		       buf_page_get_page_no(bpage), 0,
 		       zip_size ? zip_size : UNIV_PAGE_SIZE,
-		       frame, bpage);
-	} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
-		buf_dblwr_write_single_page(bpage, sync);
+		       frame, bpage, &bpage->write_size);
 	} else {
-		ut_ad(!sync);
-		buf_dblwr_add_to_batch(bpage);
+		/* InnoDB uses doublewrite buffer and doublewrite buffer
+		is initialized. User can define do we use atomic writes
+		on a file space (table) or not. If atomic writes are
+		not used we should use doublewrite buffer and if
+		atomic writes should be used, no doublewrite buffer
+		is used. */
+
+		if (awrites == ATOMIC_WRITES_ON) {
+			fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+				FALSE, buf_page_get_space(bpage), zip_size,
+				buf_page_get_page_no(bpage), 0,
+				zip_size ? zip_size : UNIV_PAGE_SIZE,
+				frame, bpage, &bpage->write_size);
+		} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
+			buf_dblwr_write_single_page(bpage, sync);
+		} else {
+			buf_dblwr_add_to_batch(bpage);
+		}
 	}
 
 	/* When doing single page flushing the IO is done synchronously
@@ -1747,7 +1763,6 @@ end up waiting for these latches! NOTE 2: in the case of a flush list flush,
 the calling thread is not allowed to own any latches on pages!
 @return number of blocks for which the write request was queued */
 __attribute__((nonnull))
-static
 void
 buf_flush_batch(
 /*============*/
@@ -1806,7 +1821,6 @@ buf_flush_batch(
 
 /******************************************************************//**
 Gather the aggregated stats for both flush list and LRU list flushing */
-static
 void
 buf_flush_common(
 /*=============*/
@@ -1833,7 +1847,6 @@ buf_flush_common(
 
 /******************************************************************//**
 Start a buffer flush batch for LRU or flush list */
-static
 ibool
 buf_flush_start(
 /*============*/
@@ -1862,7 +1875,6 @@ buf_flush_start(
 
 /******************************************************************//**
 End a buffer flush batch for LRU or flush list */
-static
 void
 buf_flush_end(
 /*==========*/
@@ -1912,11 +1924,55 @@ buf_flush_wait_batch_end(
 		}
 	} else {
 		thd_wait_begin(NULL, THD_WAIT_DISKIO);
-	os_event_wait(buf_pool->no_flush[type]);
+		os_event_wait(buf_pool->no_flush[type]);
 		thd_wait_end(NULL);
 	}
 }
 
+/* JAN: TODO: */
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list and also
+puts replaceable clean pages from the end of the LRU list to the free
+list.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@return true if a batch was queued successfully. false if another batch
+of same type was already running. */
+static
+bool
+pgcomp_buf_flush_LRU(
+/*==========*/
+	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+{
+	flush_counters_t n;
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
+	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
+		return(false);
+	}
+
+	buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0, false, &n);
+
+	buf_flush_end(buf_pool, BUF_FLUSH_LRU);
+
+	buf_flush_common(BUF_FLUSH_LRU, n.flushed);
+
+	if (n_processed) {
+		*n_processed = n.flushed;
+	}
+
+	return(true);
+}
+/* JAN: TODO: END: */
+
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the LRU list and also
 puts replaceable clean pages from the end of the LRU list to the free
@@ -1954,6 +2010,168 @@ buf_flush_LRU(
 	return(true);
 }
 
+/* JAN: TODO: */
+/*******************************************************************//**/
+extern int is_pgcomp_wrk_init_done(void);
+extern int pgcomp_flush_work_items(int buf_pool_inst, int *pages_flushed,
+        int flush_type, int min_n, unsigned long long lsn_limit);
+
+#define	MT_COMP_WATER_MARK	50
+
+#include <time.h>
+int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time)
+{
+	if (g_time->tv_usec < s_time->tv_usec)
+	{
+		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000 + 1;
+		s_time->tv_usec -= 1000000 * nsec;
+		s_time->tv_sec += nsec;
+	}
+	if (g_time->tv_usec - s_time->tv_usec > 1000000)
+	{
+		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000;
+		s_time->tv_usec += 1000000 * nsec;
+		s_time->tv_sec -= nsec;
+	}
+	d_time->tv_sec = g_time->tv_sec - s_time->tv_sec;
+	d_time->tv_usec = g_time->tv_usec - s_time->tv_usec;
+
+	return 0;
+}
+
+static pthread_mutex_t  pgcomp_mtx = PTHREAD_MUTEX_INITIALIZER;
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+UNIV_INTERN
+bool
+pgcomp_buf_flush_list(
+/*==================*/
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+
+{
+	ulint		i;
+	bool		success = true;
+	struct timeval p_start_time, p_end_time, d_time;
+	flush_counters_t n;
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
+	if (min_n != ULINT_MAX) {
+		/* Ensure that flushing is spread evenly amongst the
+		buffer pool instances. When min_n is ULINT_MAX
+		we need to flush everything up to the lsn limit
+		so no limit here. */
+		min_n = (min_n + srv_buf_pool_instances - 1)
+			 / srv_buf_pool_instances;
+	}
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_start_time, 0x0);
+#endif
+	if(is_pgcomp_wrk_init_done() && (min_n > MT_COMP_WATER_MARK)) {
+		int cnt_flush[32];
+
+		//stack_trace();
+		pthread_mutex_lock(&pgcomp_mtx);
+		//gettimeofday(&p_start_time, 0x0);
+		//fprintf(stderr, "Calling into wrk-pgcomp [min:%lu]", min_n);
+		pgcomp_flush_work_items(srv_buf_pool_instances,
+					cnt_flush, BUF_FLUSH_LIST,
+					min_n, lsn_limit);
+
+		for (i = 0; i < srv_buf_pool_instances; i++) {
+			if (n_processed) {
+				*n_processed += cnt_flush[i];
+			}
+			if (cnt_flush[i]) {
+				MONITOR_INC_VALUE_CUMULATIVE(
+					MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+					MONITOR_FLUSH_BATCH_COUNT,
+					MONITOR_FLUSH_BATCH_PAGES,
+					cnt_flush[i]);
+
+			}
+		}
+
+		pthread_mutex_unlock(&pgcomp_mtx);
+
+#ifdef UNIV_DEBUG
+		gettimeofday(&p_end_time, 0x0);
+		timediff(&p_end_time, &p_start_time, &d_time);
+		fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+				min_n * srv_buf_pool_instances), *n_processed,
+				(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+		return(success);
+	}
+	/* Flush to lsn_limit in all buffer pool instances */
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		buf_pool_t*	buf_pool;
+
+		buf_pool = buf_pool_from_array(i);
+
+		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
+			/* We have two choices here. If lsn_limit was
+			specified then skipping an instance of buffer
+			pool means we cannot guarantee that all pages
+			up to lsn_limit has been flushed. We can
+			return right now with failure or we can try
+			to flush remaining buffer pools up to the
+			lsn_limit. We attempt to flush other buffer
+			pools based on the assumption that it will
+			help in the retry which will follow the
+			failure. */
+			success = false;
+
+			continue;
+		}
+
+		buf_flush_batch(
+			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit, false, &n);
+
+		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
+
+		buf_flush_common(BUF_FLUSH_LIST, n.flushed);
+
+		if (n_processed) {
+			*n_processed += n.flushed;
+		}
+
+		if (n.flushed) {
+			MONITOR_INC_VALUE_CUMULATIVE(
+				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+				MONITOR_FLUSH_BATCH_COUNT,
+				MONITOR_FLUSH_BATCH_PAGES,
+				n.flushed);
+		}
+	}
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	fprintf(stderr, "[2] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+			min_n * srv_buf_pool_instances), *n_processed,
+			(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+	return(success);
+}
+
+/* JAN: TODO: END: */
+
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the flush list of
 all buffer pool instances.
@@ -1986,6 +2204,12 @@ buf_flush_list(
 	bool		timeout = false;
 	ulint		flush_start_time = 0;
 
+	/* JAN: TODO: */
+	if (is_pgcomp_wrk_init_done()) {
+		return(pgcomp_buf_flush_list(min_n, lsn_limit, n_processed));
+	}
+	/* JAN: TODO: END: */
+
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		requested_pages[i] = 0;
 		active_instance[i] = true;
@@ -2179,6 +2403,60 @@ buf_flush_single_page_from_LRU(
 	return(freed);
 }
 
+/* JAN: TODO: */
+/*********************************************************************//**
+pgcomp_Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+pgcomp_buf_flush_LRU_tail(void)
+/*====================*/
+{
+	struct  timeval p_start_time, p_end_time, d_time;
+	ulint   total_flushed=0, i=0;
+	int cnt_flush[32];
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_start_time, 0x0);
+#endif
+	assert(is_pgcomp_wrk_init_done());
+
+	pthread_mutex_lock(&pgcomp_mtx);
+	pgcomp_flush_work_items(srv_buf_pool_instances,
+		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		if (cnt_flush[i]) {
+			total_flushed += cnt_flush[i];
+
+			MONITOR_INC_VALUE_CUMULATIVE(
+			        MONITOR_LRU_BATCH_TOTAL_PAGE,
+			        MONITOR_LRU_BATCH_COUNT,
+			        MONITOR_LRU_BATCH_PAGES,
+			        cnt_flush[i]);
+		}
+	}
+
+	pthread_mutex_unlock(&pgcomp_mtx);
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
+			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed,
+		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+#endif
+
+	return(total_flushed);
+}
+/* JAN: TODO: END: */
+
+
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -2203,6 +2481,13 @@ buf_flush_LRU_tail(void)
 	ulint	free_list_lwm = srv_LRU_scan_depth / 100
 		* srv_cleaner_free_list_lwm;
 
+	/* JAN: TODO: */
+	if(is_pgcomp_wrk_init_done())
+	{
+		return(pgcomp_buf_flush_LRU_tail());
+	}
+	/* JAN: TODO: END */
+
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
 		const buf_pool_t* buf_pool = buf_pool_from_array(i);
@@ -2640,6 +2925,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	ulint	n_flushed = 0;
 	ulint	last_activity = srv_get_activity_count();
 	ulint	lru_sleep_time = srv_cleaner_max_lru_time;
+	ulint	n_lru=0, n_pgc_flush=0, n_pgc_batch=0;
 
 	ut_ad(!srv_read_only_mode);
 
@@ -2684,15 +2970,25 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 		next_loop_time = ut_time_ms() + page_cleaner_sleep_time;
 
 		/* Flush pages from end of LRU if required */
-		n_flushed = buf_flush_LRU_tail();
+		n_lru = n_flushed = buf_flush_LRU_tail();
+#ifdef UNIV_DEBUG
+		if (n_lru) {
+			fprintf(stderr,"n_lru:%lu ",n_lru);
+		}
+#endif
 
 		if (srv_check_activity(last_activity)) {
 			last_activity = srv_get_activity_count();
 
 			/* Flush pages from flush_list if required */
-			n_flushed += page_cleaner_flush_pages_if_needed();
+			n_flushed += n_pgc_flush = page_cleaner_flush_pages_if_needed();
+#ifdef UNIV_DEBUG
+			if (n_pgc_flush) {
+				fprintf(stderr,"n_pgc_flush:%lu ",n_pgc_flush);
+			}
+#endif
 		} else {
-			n_flushed = page_cleaner_do_flush_batch(
+			n_pgc_batch = n_flushed = page_cleaner_do_flush_batch(
 							PCT_IO(100),
 							LSN_MAX);
 
@@ -2703,7 +2999,20 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 					MONITOR_FLUSH_BACKGROUND_PAGES,
 					n_flushed);
 			}
+#ifdef UNIV_DEBUG
+			if (n_pgc_batch) {
+				fprintf(stderr,"n_pgc_batch:%lu ",n_pgc_batch);
+			}
+#endif
 		}
+
+#ifdef UNIV_DEBUG
+		if (n_lru || n_pgc_flush || n_pgc_batch) {
+			fprintf(stderr,"\n");
+			n_lru = n_pgc_flush = n_pgc_batch = 0;
+		}
+#endif
+
 	}
 
 	ut_ad(srv_shutdown_state > 0);
diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc
index 6e348bbf004..3dec3df6f2b 100644
--- a/storage/xtradb/buf/buf0rea.cc
+++ b/storage/xtradb/buf/buf0rea.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -229,14 +230,14 @@ not_to_recover:
 		*err = _fil_io(OS_FILE_READ | wake_later
 			       | ignore_nonexistent_pages,
 			       sync, space, zip_size, offset, 0, zip_size,
-			       bpage->zip.data, bpage, trx);
+			       bpage->zip.data, bpage, 0, trx);
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
 		*err = _fil_io(OS_FILE_READ | wake_later
 			      | ignore_nonexistent_pages,
 			      sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
-			      ((buf_block_t*) bpage)->frame, bpage, trx);
+			      ((buf_block_t*) bpage)->frame, bpage, 0, trx);
 	}
 
 	if (sync) {
diff --git a/storage/xtradb/dict/dict0dict.cc b/storage/xtradb/dict/dict0dict.cc
index a20456fe3cf..d6a05d2b214 100644
--- a/storage/xtradb/dict/dict0dict.cc
+++ b/storage/xtradb/dict/dict0dict.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 9861f85b814..f3e952299ff 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013 SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -54,6 +55,15 @@ Created 10/25/1995 Heikki Tuuri
 # include "srv0srv.h"
 static ulint srv_data_read, srv_data_written;
 #endif /* !UNIV_HOTBACKUP */
+#include "fil0pagecompress.h"
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+#include "row0mysql.h"
 
 /*
 		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
@@ -434,11 +444,16 @@ fil_read(
 				block size multiple */
 	void*	buf,		/*!< in/out: buffer where to store data read;
 				in aio this must be appropriately aligned */
-	void*	message)	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
+	void*	message,	/*!< in: message for aio handler if non-sync
+ 				aio used, else ignored */
+	ulint*	write_size)	/*!< in/out: Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size does not decrease. */
 {
 	return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
-					  byte_offset, len, buf, message));
+		      byte_offset, len, buf, message, write_size));
 }
 
 /********************************************************************//**
@@ -463,18 +478,22 @@ fil_write(
 				be a block size multiple */
 	void*	buf,		/*!< in: buffer from which to write; in aio
 				this must be appropriately aligned */
-	void*	message)	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
+	void*	message,	/*!< in: message for aio handler if non-sync
+ 				aio used, else ignored */
+	ulint*	write_size)	/*!< in/out: Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size does not decrease. */
 {
 	ut_ad(!srv_read_only_mode);
 
 	return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
-					   byte_offset, len, buf, message));
+		      byte_offset, len, buf, message, write_size));
 }
 
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
-UNIV_INLINE
 fil_space_t*
 fil_space_get_by_id(
 /*================*/
@@ -492,6 +511,19 @@ fil_space_get_by_id(
 	return(space);
 }
 
+/****************************************************************//**
+Get space id from fil node */
+ulint
+fil_node_get_space_id(
+/*==================*/
+        fil_node_t*     node)           /*!< in: Compressed node*/
+{
+	ut_ad(node);
+	ut_ad(node->space);
+
+	return (node->space->id);
+}
+
 /*******************************************************************//**
 Returns the table space by a given name, NULL if not found. */
 UNIV_INLINE
@@ -712,8 +744,9 @@ fil_node_open_file(
 	byte*		buf2;
 	byte*		page;
 	ulint		space_id;
-	ulint		flags;
+	ulint		flags=0;
 	ulint		page_size;
+	ibool           atomic_writes=FALSE;
 
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->n_pending == 0);
@@ -730,7 +763,7 @@ fil_node_open_file(
 
 		node->handle = os_file_create_simple_no_error_handling(
 			innodb_file_data_key, node->name, OS_FILE_OPEN,
-			OS_FILE_READ_ONLY, &success);
+			OS_FILE_READ_ONLY, &success, 0);
 		if (!success) {
 			/* The following call prints an error message */
 			os_file_get_last_error(true);
@@ -782,6 +815,7 @@ fil_node_open_file(
 		space_id = fsp_header_get_space_id(page);
 		flags = fsp_header_get_flags(page);
 		page_size = fsp_flags_get_page_size(flags);
+		atomic_writes = fsp_flags_get_atomic_writes(flags);
 
 		ut_free(buf2);
 
@@ -832,6 +866,17 @@ fil_node_open_file(
 			ut_error;
 		}
 
+		if (UNIV_UNLIKELY(space->flags != flags)) {
+			if (!dict_tf_verify_flags(space->flags, flags)) {
+				fprintf(stderr,
+					"InnoDB: Error: table flags are 0x%lx"
+					" in the data dictionary\n"
+					"InnoDB: but the flags in file %s are 0x%lx!\n",
+					space->flags, node->name, flags);
+				ut_error;
+			}
+		}
+
 		if (size_bytes >= 1024 * 1024) {
 			/* Truncate the size to whole megabytes. */
 			size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
@@ -851,6 +896,8 @@ add_size:
 		space->size += node->size;
 	}
 
+	atomic_writes = fsp_flags_get_atomic_writes(space->flags);
+
 	/* printf("Opening file %s\n", node->name); */
 
 	/* Open the file for reading and writing, in Windows normally in the
@@ -861,18 +908,18 @@ add_size:
 		node->handle = os_file_create(innodb_file_log_key,
 					      node->name, OS_FILE_OPEN,
 					      OS_FILE_AIO, OS_LOG_FILE,
-					      &ret);
+					      &ret, atomic_writes);
 	} else if (node->is_raw_disk) {
 		node->handle = os_file_create(innodb_file_data_key,
 					      node->name,
 					      OS_FILE_OPEN_RAW,
 					      OS_FILE_AIO, OS_DATA_FILE,
-						     &ret);
+					      &ret, atomic_writes);
 	} else {
 		node->handle = os_file_create(innodb_file_data_key,
 					      node->name, OS_FILE_OPEN,
 					      OS_FILE_AIO, OS_DATA_FILE,
-					      &ret);
+					      &ret, atomic_writes);
 	}
 
 	ut_a(ret);
@@ -1932,12 +1979,12 @@ fil_write_lsn_and_arch_no_to_file(
 	buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
 
 	err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
-		       UNIV_PAGE_SIZE, buf, NULL);
+		       UNIV_PAGE_SIZE, buf, NULL, 0);
 	if (err == DB_SUCCESS) {
 		mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
 
 		err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
-				UNIV_PAGE_SIZE, buf, NULL);
+				UNIV_PAGE_SIZE, buf, NULL, 0);
 	}
 
 	mem_free(buf1);
@@ -3222,7 +3269,7 @@ fil_create_link_file(
 
 	file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, link_filepath,
-		OS_FILE_CREATE, OS_FILE_READ_WRITE, &success);
+		OS_FILE_CREATE, OS_FILE_READ_WRITE, &success, 0);
 
 	if (!success) {
 		/* The following call will print an error message */
@@ -3331,8 +3378,9 @@ fil_open_linked_file(
 /*===============*/
 	const char*	tablename,	/*!< in: database/tablename */
 	char**		remote_filepath,/*!< out: remote filepath */
-	os_file_t*	remote_file)	/*!< out: remote file handle */
-
+	os_file_t*	remote_file,	/*!< out: remote file handle */
+	ulint           atomic_writes)  /*!< in: atomic writes table option
+					value */
 {
 	ibool		success;
 
@@ -3346,7 +3394,7 @@ fil_open_linked_file(
 	*remote_file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, *remote_filepath,
 		OS_FILE_OPEN, OS_FILE_READ_ONLY,
-		&success);
+		&success, atomic_writes);
 
 	if (!success) {
 		char*	link_filepath = fil_make_isl_name(tablename);
@@ -3401,6 +3449,7 @@ fil_create_new_single_table_tablespace(
 	/* TRUE if a table is created with CREATE TEMPORARY TABLE */
 	bool		is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
 	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
+	bool		atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
 
 	ut_a(space_id > 0);
 	ut_ad(!srv_read_only_mode);
@@ -3433,7 +3482,8 @@ fil_create_new_single_table_tablespace(
 		OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
 		OS_FILE_NORMAL,
 		OS_DATA_FILE,
-		&ret);
+		&ret,
+		atomic_writes);
 
 	if (ret == FALSE) {
 		/* The following call will print an error message */
@@ -3498,6 +3548,7 @@ fil_create_new_single_table_tablespace(
 	flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
 	fsp_header_init_fields(page, space_id, flags);
 	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
+	ut_ad(fsp_flags_is_valid(flags));
 
 	if (!(fsp_flags_is_compressed(flags))) {
 		buf_flush_init_for_writing(page, NULL, 0);
@@ -3685,6 +3736,7 @@ fil_open_single_table_tablespace(
 	fsp_open_info	remote;
 	ulint		tablespaces_found = 0;
 	ulint		valid_tablespaces_found = 0;
+	ibool           atomic_writes = FALSE;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -3719,7 +3771,7 @@ fil_open_single_table_tablespace(
 	}
 
 	link_file_found = fil_open_linked_file(
-		tablename, &remote.filepath, &remote.file);
+		tablename, &remote.filepath, &remote.file, atomic_writes);
 	remote.success = link_file_found;
 	if (remote.success) {
 		/* possibility of multiple files. */
@@ -3747,7 +3799,7 @@ fil_open_single_table_tablespace(
 	if (dict.filepath) {
 		dict.file = os_file_create_simple_no_error_handling(
 			innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
-			OS_FILE_READ_ONLY, &dict.success);
+			OS_FILE_READ_ONLY, &dict.success, atomic_writes);
 		if (dict.success) {
 			/* possibility of multiple files. */
 			validate = true;
@@ -3759,7 +3811,7 @@ fil_open_single_table_tablespace(
 	ut_a(def.filepath);
 	def.file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
-		OS_FILE_READ_ONLY, &def.success);
+		OS_FILE_READ_ONLY, &def.success, atomic_writes);
 	if (def.success) {
 		tablespaces_found++;
 	}
@@ -4155,7 +4207,7 @@ fil_load_single_table_tablespace(
 
 	/* Check for a link file which locates a remote tablespace. */
 	remote.success = fil_open_linked_file(
-		tablename, &remote.filepath, &remote.file);
+		tablename, &remote.filepath, &remote.file, FALSE);
 
 	/* Read the first page of the remote tablespace */
 	if (remote.success) {
@@ -4170,7 +4222,7 @@ fil_load_single_table_tablespace(
 	/* Try to open the tablespace in the datadir. */
 	def.file = os_file_create_simple_no_error_handling(
 		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
-		OS_FILE_READ_ONLY, &def.success);
+		OS_FILE_READ_ONLY, &def.success, FALSE);
 
 	/* Read the first page of the remote tablespace */
 	if (def.success) {
@@ -4938,7 +4990,6 @@ retry:
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
 
-		mutex_exit(&fil_system->mutex);
 		success = os_file_set_size(node->name, node->handle,
 					   (size_after_extend
 					    - file_start_page_no) * page_size);
@@ -4975,7 +5026,7 @@ retry:
 		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
 				 node->name, node->handle, buf,
 				 offset, page_size * n_pages,
-				 NULL, NULL, space_id, NULL);
+				 NULL, NULL, space_id, NULL, 0, 0, 0);
 #endif /* UNIV_HOTBACKUP */
 		if (success) {
 			os_has_said_disk_full = FALSE;
@@ -5361,7 +5412,12 @@ _fil_io(
 				or from where to write; in aio this must be
 				appropriately aligned */
 	void*	message,	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
+ 				aio used, else ignored */
+	ulint*	write_size,	/*!< in/out: Actual write size initialized
+				after fist successfull trim
+				operation for this page and if
+				initialized we do not trim again if
+				actual page size does not decrease. */
 	trx_t*	trx)
 {
 	ulint		mode;
@@ -5372,6 +5428,8 @@ _fil_io(
 	ulint		wake_later;
 	os_offset_t	offset;
 	ibool		ignore_nonexistent_pages;
+	ibool		page_compressed = FALSE;
+	ulint		page_compression_level = 0;
 
 	is_log = type & OS_FILE_LOG;
 	type = type & ~OS_FILE_LOG;
@@ -5425,6 +5483,9 @@ _fil_io(
 	} else if (type == OS_FILE_WRITE) {
 		ut_ad(!srv_read_only_mode);
 		srv_stats.data_written.add(len);
+		if (fil_page_is_index_page((byte *)buf)) {
+			srv_stats.index_pages_written.inc();
+		}
 	}
 
 	/* Reserve the fil_system mutex and make sure that we can open at
@@ -5434,6 +5495,8 @@ _fil_io(
 
 	space = fil_space_get_by_id(space_id);
 
+	page_compressed = fsp_flags_is_page_compressed(space->flags);
+	page_compression_level = fsp_flags_get_page_compression_level(space->flags);
 	/* If we are deleting a tablespace we don't allow any read
 	operations on that. However, we do allow write operations. */
 	if (space == 0 || (type == OS_FILE_READ && space->stop_new_ops)) {
@@ -5579,7 +5642,8 @@ _fil_io(
 
 	/* Queue the aio request */
 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
-		offset, len, node, message, space_id, trx);
+		     offset, len, node, message, space_id, trx,
+		     page_compressed, page_compression_level, write_size);
 
 #else
 	/* In ibbackup do normal i/o, not aio */
@@ -6214,7 +6278,7 @@ fil_tablespace_iterate(
 
 		file = os_file_create_simple_no_error_handling(
 			innodb_file_data_key, filepath,
-			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
+			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success, FALSE);
 
 		DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
 		{
@@ -6501,3 +6565,33 @@ fil_space_set_corrupt(
 
 	mutex_exit(&fil_system->mutex);
 }
+
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void)
+/*==================*/
+{
+	ut_ad(!mutex_own(&fil_system->mutex));
+	mutex_enter(&fil_system->mutex);
+}
+
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void)
+/*=================*/
+{
+	ut_ad(mutex_own(&fil_system->mutex));
+	mutex_exit(&fil_system->mutex);
+}
+
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+	fil_space_t*	space)	/*!< in: space */
+{
+	return (space->name);
+}
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
new file mode 100644
index 00000000000..10ac273955f
--- /dev/null
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -0,0 +1,324 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014, SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file fil/fil0pagecompress.cc
+Implementation for page compressed file spaces.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#include "fil0fil.h"
+#include "fil0pagecompress.h"
+
+#include <debug_sync.h>
+#include <my_dbug.h>
+
+#include "mem0mem.h"
+#include "hash0hash.h"
+#include "os0file.h"
+#include "mach0data.h"
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "log0recv.h"
+#include "fsp0fsp.h"
+#include "srv0srv.h"
+#include "srv0start.h"
+#include "mtr0mtr.h"
+#include "mtr0log.h"
+#include "dict0dict.h"
+#include "page0page.h"
+#include "page0zip.h"
+#include "trx0sys.h"
+#include "row0mysql.h"
+#ifndef UNIV_HOTBACKUP
+# include "buf0lru.h"
+# include "ibuf0ibuf.h"
+# include "sync0sync.h"
+# include "os0sync.h"
+#else /* !UNIV_HOTBACKUP */
+# include "srv0srv.h"
+static ulint srv_data_read, srv_data_written;
+#endif /* !UNIV_HOTBACKUP */
+#include "zlib.h"
+#ifdef __linux__
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <linux/falloc.h>
+#endif
+#include "row0mysql.h"
+#include "lz4.h"
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+	ulint		space_id,      /*!< in: tablespace id of the
+				       table. */
+	byte*           buf,           /*!< in: buffer from which to write; in aio
+				       this must be appropriately aligned */
+        byte*           out_buf,       /*!< out: compressed buffer */
+        ulint           len,           /*!< in: length of input buffer.*/
+        ulint           compression_level, /* in: compression level */
+	ulint*          out_len)       /*!< out: actual length of compressed page */
+{
+        int err = Z_OK;
+        int level = 0;
+        ulint header_len = FIL_PAGE_DATA + FIL_PAGE_COMPRESSED_SIZE;
+	ulint write_size=0;
+
+	ut_ad(buf);
+	ut_ad(out_buf);
+	ut_ad(len);
+	ut_ad(out_len);
+
+        level = compression_level;
+	ut_ad(fil_space_is_page_compressed(space_id));
+
+	fil_system_enter();
+	fil_space_t* space = fil_space_get_by_id(space_id);
+	fil_system_exit();
+
+	/* If no compression level was provided to this table, use system
+	default level */
+	if (level == 0) {
+		level = page_zip_level;
+	}
+
+#ifdef UNIV_DEBUG
+	fprintf(stderr,
+		"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
+		space_id, fil_space_name(space), len);
+#endif
+
+	write_size = UNIV_PAGE_SIZE - header_len;
+
+	if (srv_use_lz4) {
+		err = LZ4_compress_limitedOutput((const char *)buf, (char *)out_buf+header_len, len, write_size);
+		write_size = err;
+
+		if (err == 0) {
+			/* If error we leave the actual page as it was */
+
+			fprintf(stderr,
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+				space_id, fil_space_name(space), len, err, write_size);
+
+			*out_len = len;
+			return (buf);
+		}
+	} else {
+		err = compress2(out_buf+header_len, &write_size, buf, len, level);
+
+		if (err != Z_OK) {
+			/* If error we leave the actual page as it was */
+
+			fprintf(stderr,
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
+				space_id, fil_space_name(space), len, err, write_size);
+
+			*out_len = len;
+			return (buf);
+		}
+	}
+
+	/* Set up the page header */
+	memcpy(out_buf, buf, FIL_PAGE_DATA);
+	/* Set up the checksum */
+	mach_write_to_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM, BUF_NO_CHECKSUM_MAGIC);
+	/* Set up the correct page type */
+	mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
+	/* Set up the flush lsn to be compression algorithm */
+	if (srv_use_lz4) {
+		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_LZ4);
+	} else {
+		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
+	}
+	/* Set up the actual payload lenght */
+	mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
+
+#ifdef UNIV_DEBUG
+	/* Verify */
+	ut_ad(fil_page_is_compressed(out_buf));
+	ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
+	ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
+	if (srv_use_lz4) {
+		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_LZ4);
+	} else {
+		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
+	}
+#endif
+
+	write_size+=header_len;
+	/* Actual write needs to be alligned on block size */
+	if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
+		write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
+	}
+
+#ifdef UNIV_DEBUG
+	fprintf(stderr,
+		"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
+		space_id, fil_space_name(space), len, write_size);
+#endif
+
+#define SECT_SIZE 512
+
+	srv_stats.page_compression_saved.add((len - write_size));
+	if ((len - write_size) > 0) {
+		srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
+		srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
+	}
+	//srv_stats.page_compressed_trim_op.inc();
+	srv_stats.pages_page_compressed.inc();
+	*out_len = write_size;
+
+	return(out_buf);
+
+}
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation. */
+void
+fil_decompress_page(
+/*================*/
+	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
+	byte*           buf,           /*!< out: buffer from which to read; in aio
+				       this must be appropriately aligned */
+        ulint           len)           /*!< in: length of output buffer.*/
+{
+        int err = 0;
+        ulint actual_size = 0;
+	ulint compression_alg = 0;
+	byte *in_buf;
+
+	ut_ad(buf);
+	ut_ad(len);
+
+	/* Before actual decompress, make sure that page type is correct */
+
+	if (mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM) != BUF_NO_CHECKSUM_MAGIC ||
+		mach_read_from_2(buf+FIL_PAGE_TYPE) != FIL_PAGE_PAGE_COMPRESSED) {
+		fprintf(stderr,
+			"InnoDB: Corruption: We try to uncompress corrupted page\n"
+			"InnoDB: CRC %lu type %lu.\n"
+			"InnoDB: len %lu\n",
+			mach_read_from_4(buf+FIL_PAGE_SPACE_OR_CHKSUM),
+			mach_read_from_2(buf+FIL_PAGE_TYPE), len);
+
+		fflush(stderr);
+		ut_error;
+	}
+
+	/* Get compression algorithm */
+	compression_alg = mach_read_from_8(buf+FIL_PAGE_FILE_FLUSH_LSN);
+
+	// If no buffer was given, we need to allocate temporal buffer
+	if (page_buf == NULL) {
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Note: Compression buffer not given, allocating...\n");
+#endif
+		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
+	} else {
+		in_buf = page_buf;
+	}
+
+	/* Get the actual size of compressed page */
+	actual_size = mach_read_from_2(buf+FIL_PAGE_DATA);
+	/* Check if payload size is corrupted */
+	if (actual_size == 0 || actual_size > UNIV_PAGE_SIZE) {
+		fprintf(stderr,
+			"InnoDB: Corruption: We try to uncompress corrupted page\n"
+			"InnoDB: actual size %lu compression %s\n",
+			actual_size, fil_get_compression_alg_name(compression_alg));
+		fflush(stderr);
+		ut_error;
+	}
+
+	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Note: Preparing for decompress for len %lu\n",
+			actual_size);
+#endif
+
+		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
+
+
+		/* If uncompress fails it means that page is corrupted */
+		if (err != Z_OK) {
+
+			fprintf(stderr,
+				"InnoDB: Corruption: Page is marked as compressed\n"
+				"InnoDB: but uncompress failed with error %d.\n"
+				"InnoDB: size %lu len %lu\n",
+				err, actual_size, len);
+
+			fflush(stderr);
+
+			ut_error;
+		}
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr,
+			"InnoDB: Note: Decompression succeeded for len %lu \n",
+			len);
+#endif
+	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
+		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
+
+		if (err != actual_size) {
+			fprintf(stderr,
+				"InnoDB: Corruption: Page is marked as compressed\n"
+				"InnoDB: but decompression read only %d bytes.\n"
+				"InnoDB: size %lu len %lu\n",
+				err, actual_size, len);
+			fflush(stderr);
+
+			ut_error;
+		}
+	} else {
+		fprintf(stderr,
+			"InnoDB: Corruption: Page is marked as compressed\n"
+			"InnoDB: but compression algorithm %s\n"
+			"InnoDB: is not known.\n"
+			,fil_get_compression_alg_name(compression_alg));
+
+		fflush(stderr);
+		ut_error;
+	}
+
+	srv_stats.pages_page_decompressed.inc();
+
+	/* Copy the uncompressed page to the buffer pool, not
+	really any other options. */
+	memcpy(buf, in_buf, len);
+
+	// Need to free temporal buffer if no buffer was given
+	if (page_buf == NULL) {
+		ut_free(in_buf);
+	}
+}
+
+
diff --git a/storage/xtradb/fil/lz4.c b/storage/xtradb/fil/lz4.c
new file mode 100644
index 00000000000..4e864de67d3
--- /dev/null
+++ b/storage/xtradb/fil/lz4.c
@@ -0,0 +1,822 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Copyright (C) 2011-2013, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 source repository : http://code.google.com/p/lz4/
+   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
+*/
+
+//**************************************
+// Tuning parameters
+//**************************************
+// MEMORY_USAGE :
+// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
+// Increasing memory usage improves compression ratio
+// Reduced memory usage can improve speed, due to cache effect
+// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
+#define MEMORY_USAGE 14
+
+// HEAPMODE :
+// Select how default compression functions will allocate memory for their hash table,
+// in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
+#define HEAPMODE 0
+
+
+//**************************************
+// CPU Feature Detection
+//**************************************
+// 32 or 64 bits ?
+#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
+  || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
+  || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
+  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   // Detects 64 bits mode
+#  define LZ4_ARCH64 1
+#else
+#  define LZ4_ARCH64 0
+#endif
+
+// Little Endian or Big Endian ?
+// Overwrite the #define below if you know your architecture endianess
+#if defined (__GLIBC__)
+#  include <endian.h>
+#  if (__BYTE_ORDER == __BIG_ENDIAN)
+#     define LZ4_BIG_ENDIAN 1
+#  endif
+#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
+#  define LZ4_BIG_ENDIAN 1
+#elif defined(__sparc) || defined(__sparc__) \
+   || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
+   || defined(__hpux)  || defined(__hppa) \
+   || defined(_MIPSEB) || defined(__s390__)
+#  define LZ4_BIG_ENDIAN 1
+#else
+// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
+#endif
+
+// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
+// For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
+// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
+#if defined(__ARM_FEATURE_UNALIGNED)
+#  define LZ4_FORCE_UNALIGNED_ACCESS 1
+#endif
+
+// Define this parameter if your target system or compiler does not support hardware bit count
+#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
+#  define LZ4_FORCE_SW_BITCOUNT
+#endif
+
+// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
+// This option may provide a small boost to performance for some big endian cpu, although probably modest.
+// You may set this option to 1 if data will remain within closed environment.
+// This option is useless on Little_Endian CPU (such as x86)
+//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
+
+
+//**************************************
+// Compiler Options
+//**************************************
+#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   // C99
+/* "restrict" is a known keyword */
+#else
+#  define restrict // Disable restrict
+#endif
+
+#ifdef _MSC_VER    // Visual Studio
+#  define FORCE_INLINE static __forceinline
+#  include <intrin.h>                    // For Visual 2005
+#  if LZ4_ARCH64   // 64-bits
+#    pragma intrinsic(_BitScanForward64) // For Visual 2005
+#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
+#  else            // 32-bits
+#    pragma intrinsic(_BitScanForward)   // For Visual 2005
+#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
+#  endif
+#  pragma warning(disable : 4127)        // disable: C4127: conditional expression is constant
+#else 
+#  ifdef __GNUC__
+#    define FORCE_INLINE static inline __attribute__((always_inline))
+#  else
+#    define FORCE_INLINE static inline
+#  endif
+#endif
+
+#ifdef _MSC_VER
+#  define lz4_bswap16(x) _byteswap_ushort(x)
+#else
+#  define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
+#endif
+
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
+#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
+#else
+#  define expect(expr,value)    (expr)
+#endif
+
+#define likely(expr)     expect((expr) != 0, 1)
+#define unlikely(expr)   expect((expr) != 0, 0)
+
+
+//**************************************
+// Memory routines
+//**************************************
+#include <stdlib.h>   // malloc, calloc, free
+#define ALLOCATOR(n,s) calloc(n,s)
+#define FREEMEM        free
+#include <string.h>   // memset, memcpy
+#define MEM_INIT       memset
+
+
+//**************************************
+// Includes
+//**************************************
+#include "lz4.h"
+
+
+//**************************************
+// Basic Types
+//**************************************
+#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
+# include <stdint.h>
+  typedef  uint8_t BYTE;
+  typedef uint16_t U16;
+  typedef uint32_t U32;
+  typedef  int32_t S32;
+  typedef uint64_t U64;
+#else
+  typedef unsigned char       BYTE;
+  typedef unsigned short      U16;
+  typedef unsigned int        U32;
+  typedef   signed int        S32;
+  typedef unsigned long long  U64;
+#endif
+
+#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
+#  define _PACKED __attribute__ ((packed))
+#else
+#  define _PACKED
+#endif
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#    pragma pack(1)
+#  else
+#    pragma pack(push, 1)
+#  endif
+#endif
+
+typedef struct { U16 v; }  _PACKED U16_S;
+typedef struct { U32 v; }  _PACKED U32_S;
+typedef struct { U64 v; }  _PACKED U64_S;
+typedef struct {size_t v;} _PACKED size_t_S;
+
+#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
+#  if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
+#    pragma pack(0)
+#  else
+#    pragma pack(pop)
+#  endif
+#endif
+
+#define A16(x)   (((U16_S *)(x))->v)
+#define A32(x)   (((U32_S *)(x))->v)
+#define A64(x)   (((U64_S *)(x))->v)
+#define AARCH(x) (((size_t_S *)(x))->v)
+
+
+//**************************************
+// Constants
+//**************************************
+#define LZ4_HASHLOG   (MEMORY_USAGE-2)
+#define HASHTABLESIZE (1 << MEMORY_USAGE)
+#define HASHNBCELLS4  (1 << LZ4_HASHLOG)
+
+#define MINMATCH 4
+
+#define COPYLENGTH 8
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH+MINMATCH)
+const int LZ4_minLength = (MFLIMIT+1);
+
+#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
+#define SKIPSTRENGTH 6     // Increasing this value will make the compression run slower on incompressible data
+
+#define MAXD_LOG 16
+#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
+
+#define ML_BITS  4
+#define ML_MASK  ((1U<<ML_BITS)-1)
+#define RUN_BITS (8-ML_BITS)
+#define RUN_MASK ((1U<<RUN_BITS)-1)
+
+#define KB *(1U<<10)
+#define MB *(1U<<20)
+#define GB *(1U<<30)
+
+
+//**************************************
+// Structures and local types
+//**************************************
+
+typedef struct {
+    U32 hashTable[HASHNBCELLS4];
+    const BYTE* bufferStart;
+    const BYTE* base;
+    const BYTE* nextBlock;
+} LZ4_Data_Structure;
+
+typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive;
+typedef enum { byPtr, byU32, byU16 } tableType_t;
+
+typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
+
+typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
+typedef enum { full = 0, partial = 1 } earlyEnd_directive;
+
+
+//**************************************
+// Architecture-specific macros
+//**************************************
+#define STEPSIZE                  sizeof(size_t)
+#define LZ4_COPYSTEP(d,s)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
+#define LZ4_COPY8(d,s)            { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
+#define LZ4_SECURECOPY(d,s,e)     { if ((STEPSIZE==4)||(d<e)) LZ4_WILDCOPY(d,s,e); }
+
+#if LZ4_ARCH64   // 64-bit
+#  define HTYPE                   U32
+#  define INITBASE(base)          const BYTE* const base = ip
+#else            // 32-bit
+#  define HTYPE                   const BYTE*
+#  define INITBASE(base)          const int base = 0
+#endif
+
+#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
+#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
+#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
+#else      // Little Endian
+#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
+#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
+#endif
+
+
+//**************************************
+// Macros
+//**************************************
+#define LZ4_WILDCOPY(d,s,e)     { do { LZ4_COPY8(d,s) } while (d<e); }           // at the end, d>=e;
+
+
+//****************************
+// Private functions
+//****************************
+#if LZ4_ARCH64
+
+FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
+{
+# if defined(LZ4_BIG_ENDIAN)
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r = 0;
+    _BitScanReverse64( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_clzll(val) >> 3);
+#   else
+    int r;
+    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
+    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
+    r += (!val);
+    return r;
+#   endif
+# else
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r = 0;
+    _BitScanForward64( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_ctzll(val) >> 3);
+#   else
+    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
+    return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
+#   endif
+# endif
+}
+
+#else
+
+FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
+{
+# if defined(LZ4_BIG_ENDIAN)
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r = 0;
+    _BitScanReverse( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_clz(val) >> 3);
+#   else
+    int r;
+    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
+    r += (!val);
+    return r;
+#   endif
+# else
+#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    unsigned long r;
+    _BitScanForward( &r, val );
+    return (int)(r>>3);
+#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
+    return (__builtin_ctz(val) >> 3);
+#   else
+    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
+    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
+#   endif
+# endif
+}
+
+#endif
+
+
+//****************************
+// Compression functions
+//****************************
+FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType)
+{
+    if (tableType == byU16)
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
+    else
+        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
+}
+
+FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
+
+FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    switch (tableType)
+    {
+    case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
+    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
+    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
+    }
+}
+
+FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
+}
+
+FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
+    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
+    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   // default, to ensure a return
+}
+
+FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
+{
+    U32 h = LZ4_hashPosition(p, tableType);
+    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
+}
+
+
+FORCE_INLINE int LZ4_compress_generic(
+                 void* ctx,
+                 const char* source,
+                 char* dest,
+                 int inputSize,
+                 int maxOutputSize,
+
+                 limitedOutput_directive limitedOutput,
+                 tableType_t tableType,
+                 prefix64k_directive prefix)
+{
+    const BYTE* ip = (const BYTE*) source;
+    const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source;
+    const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source);
+    const BYTE* anchor = (const BYTE*) source;
+    const BYTE* const iend = ip + inputSize;
+    const BYTE* const mflimit = iend - MFLIMIT;
+    const BYTE* const matchlimit = iend - LASTLITERALS;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + maxOutputSize;
+
+    int length;
+    const int skipStrength = SKIPSTRENGTH;
+    U32 forwardH;
+
+    // Init conditions
+    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;                                // Unsupported input size, too large (or negative)
+    if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0;   // must continue from end of previous block
+    if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend;                    // do it now, due to potential early exit
+    if ((tableType == byU16) && (inputSize>=LZ4_64KLIMIT)) return 0;                       // Size too large (not within 64K limit)
+    if (inputSize<LZ4_minLength) goto _last_literals;                                      // Input too small, no compression (all literals)
+
+    // First Byte
+    LZ4_putPosition(ip, ctx, tableType, base);
+    ip++; forwardH = LZ4_hashPosition(ip, tableType);
+
+    // Main Loop
+    for ( ; ; )
+    {
+        int findMatchAttempts = (1U << skipStrength) + 3;
+        const BYTE* forwardIp = ip;
+        const BYTE* ref;
+        BYTE* token;
+
+        // Find a match
+        do {
+            U32 h = forwardH;
+            int step = findMatchAttempts++ >> skipStrength;
+            ip = forwardIp;
+            forwardIp = ip + step;
+
+            if unlikely(forwardIp > mflimit) { goto _last_literals; }
+
+            forwardH = LZ4_hashPosition(forwardIp, tableType);
+            ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
+            LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
+
+        } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip)));
+
+        // Catch up
+        while ((ip>anchor) && (ref > lowLimit) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
+
+        // Encode Literal length
+        length = (int)(ip - anchor);
+        token = op++;
+        if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend)) return 0;   // Check output limit
+        if (length>=(int)RUN_MASK) 
+        { 
+            int len = length-RUN_MASK; 
+            *token=(RUN_MASK<<ML_BITS); 
+            for(; len >= 255 ; len-=255) *op++ = 255; 
+            *op++ = (BYTE)len; 
+        }
+        else *token = (BYTE)(length<<ML_BITS);
+
+        // Copy Literals
+        { BYTE* end=(op)+(length); LZ4_WILDCOPY(op,anchor,end); op=end; }
+
+_next_match:
+        // Encode Offset
+        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
+
+        // Start Counting
+        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
+        anchor = ip;
+        while likely(ip<matchlimit-(STEPSIZE-1))
+        {
+            size_t diff = AARCH(ref) ^ AARCH(ip);
+            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
+            ip += LZ4_NbCommonBytes(diff);
+            goto _endCount;
+        }
+        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
+        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
+        if ((ip<matchlimit) && (*ref == *ip)) ip++;
+_endCount:
+
+        // Encode MatchLength
+        length = (int)(ip - anchor);
+        if ((limitedOutput) && unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)) return 0;    // Check output limit
+        if (length>=(int)ML_MASK) 
+        { 
+            *token += ML_MASK; 
+            length -= ML_MASK; 
+            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
+            if (length >= 255) { length-=255; *op++ = 255; } 
+            *op++ = (BYTE)length; 
+        }
+        else *token += (BYTE)(length);
+
+        // Test end of chunk
+        if (ip > mflimit) { anchor = ip;  break; }
+
+        // Fill table
+        LZ4_putPosition(ip-2, ctx, tableType, base);
+
+        // Test next position
+        ref = LZ4_getPosition(ip, ctx, tableType, base); 
+        LZ4_putPosition(ip, ctx, tableType, base);
+        if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
+
+        // Prepare next loop
+        anchor = ip++;
+        forwardH = LZ4_hashPosition(ip, tableType);
+    }
+
+_last_literals:
+    // Encode Last Literals
+    {
+        int lastRun = (int)(iend - anchor);
+        if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;   // Check output limit
+        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
+        else *op++ = (BYTE)(lastRun<<ML_BITS);
+        memcpy(op, anchor, iend - anchor);
+        op += iend-anchor;
+    }
+
+    // End
+    return (int) (((char*)op)-dest);
+}
+
+
+int LZ4_compress(const char* source, char* dest, int inputSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
+#else
+    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
+#endif
+    int result;
+
+    if (inputSize < (int)LZ4_64KLIMIT)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noPrefix);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize)
+{
+    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix);
+}
+
+
+int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+#if (HEAPMODE)
+    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
+#else
+    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
+#endif
+    int result;
+
+    if (inputSize < (int)LZ4_64KLIMIT)
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16, noPrefix);
+    else
+        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
+
+#if (HEAPMODE)
+    FREEMEM(ctx);
+#endif
+    return result;
+}
+
+int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix);
+}
+
+
+//****************************
+// Stream functions
+//****************************
+
+FORCE_INLINE void LZ4_init(LZ4_Data_Structure* lz4ds, const BYTE* base)
+{
+    MEM_INIT(lz4ds->hashTable, 0, sizeof(lz4ds->hashTable));
+    lz4ds->bufferStart = base;
+    lz4ds->base = base;
+    lz4ds->nextBlock = base;
+}
+
+
+void* LZ4_create (const char* inputBuffer)
+{
+    void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure));
+    LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer);
+    return lz4ds;
+}
+
+
+int LZ4_free (void* LZ4_Data)
+{
+    FREEMEM(LZ4_Data);
+    return (0);
+}
+
+
+char* LZ4_slideInputBuffer (void* LZ4_Data)
+{
+    LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data;
+    size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB);
+
+    if ( (lz4ds->base - delta > lz4ds->base)                          // underflow control
+       || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) )   // close to 32-bits limit
+    {
+        size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base;
+        int nH;
+
+        for (nH=0; nH < HASHNBCELLS4; nH++)
+        {
+            if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0;
+            else lz4ds->hashTable[nH] -= (U32)deltaLimit;
+        }
+        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
+        lz4ds->base = lz4ds->bufferStart;
+        lz4ds->nextBlock = lz4ds->base + 64 KB;
+    }
+    else
+    {
+        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
+        lz4ds->nextBlock -= delta;
+        lz4ds->base -= delta;
+    }
+
+    return (char*)(lz4ds->nextBlock);
+}
+
+
+//****************************
+// Decompression functions
+//****************************
+
+// This generic decompression function cover all use cases.
+// It shall be instanciated several times, using different sets of directives
+// Note that it is essential this generic function is really inlined, 
+// in order to remove useless branches during compilation optimisation.
+FORCE_INLINE int LZ4_decompress_generic(
+                 const char* source,
+                 char* dest,
+                 int inputSize,          //
+                 int outputSize,         // If endOnInput==endOnInputSize, this value is the max size of Output Buffer.
+
+                 int endOnInput,         // endOnOutputSize, endOnInputSize
+                 int prefix64k,          // noPrefix, withPrefix
+                 int partialDecoding,    // full, partial
+                 int targetOutputSize    // only used if partialDecoding==partial
+                 )
+{
+    // Local Variables
+    const BYTE* restrict ip = (const BYTE*) source;
+    const BYTE* ref;
+    const BYTE* const iend = ip + inputSize;
+
+    BYTE* op = (BYTE*) dest;
+    BYTE* const oend = op + outputSize;
+    BYTE* cpy;
+    BYTE* oexit = op + targetOutputSize;
+
+    const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};   // static reduces speed for LZ4_decompress_safe() on GCC64
+    static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
+
+
+    // Special cases
+    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                        // targetOutputSize too high => decode everything
+    if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1;   // Empty output buffer
+    if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1);
+
+
+    // Main Loop
+    while (1)
+    {
+        unsigned token;
+        size_t length;
+
+        // get runlength
+        token = *ip++;
+        if ((length=(token>>ML_BITS)) == RUN_MASK)
+        { 
+            unsigned s=255; 
+            while (((endOnInput)?ip<iend:1) && (s==255))
+            { 
+                s = *ip++; 
+                length += s; 
+            } 
+        }
+
+        // copy literals
+        cpy = op+length;
+        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
+            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
+        {
+            if (partialDecoding)
+            {
+                if (cpy > oend) goto _output_error;                           // Error : write attempt beyond end of output buffer
+                if ((endOnInput) && (ip+length > iend)) goto _output_error;   // Error : read attempt beyond end of input buffer
+            }
+            else
+            {
+                if ((!endOnInput) && (cpy != oend)) goto _output_error;       // Error : block decoding must stop exactly there
+                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   // Error : input must be consumed
+            }
+            memcpy(op, ip, length);
+            ip += length;
+            op += length;
+            break;                                       // Necessarily EOF, due to parsing restrictions
+        }
+        LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
+
+        // get offset
+        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
+        if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error;   // Error : offset outside destination buffer
+
+        // get matchlength
+        if ((length=(token&ML_MASK)) == ML_MASK) 
+        { 
+            while ((!endOnInput) || (ip<iend-(LASTLITERALS+1)))   // Ensure enough bytes remain for LASTLITERALS + token
+            {
+                unsigned s = *ip++; 
+                length += s; 
+                if (s==255) continue; 
+                break; 
+            }
+        }
+
+        // copy repeated sequence
+        if unlikely((op-ref)<(int)STEPSIZE)
+        {
+            const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
+            op[0] = ref[0];
+            op[1] = ref[1];
+            op[2] = ref[2];
+            op[3] = ref[3];
+            op += 4, ref += 4; ref -= dec32table[op-ref];
+            A32(op) = A32(ref); 
+            op += STEPSIZE-4; ref -= dec64;
+        } else { LZ4_COPYSTEP(op,ref); }
+        cpy = op + length - (STEPSIZE-4);
+
+        if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))
+        {
+            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
+            LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH));
+            while(op<cpy) *op++=*ref++;
+            op=cpy;
+            continue;
+        }
+        LZ4_WILDCOPY(op, ref, cpy);
+        op=cpy;   // correction
+    }
+
+    // end of decoding
+    if (endOnInput)
+       return (int) (((char*)op)-dest);     // Nb of output bytes decoded
+    else
+       return (int) (((char*)ip)-source);   // Nb of input bytes read
+
+    // Overflow error detected
+_output_error:
+    return (int) (-(((char*)ip)-source))-1;
+}
+
+
+int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
+}
+
+int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
+}
+
+int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
+{
+    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);
+}
+
+int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
+{
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
+}
+
+int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
+{
+#ifdef _MSC_VER   // This version is faster with Visual
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
+#else
+    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
+#endif
+}
+
diff --git a/storage/xtradb/fil/lz4.h b/storage/xtradb/fil/lz4.h
new file mode 100644
index 00000000000..9ef58862947
--- /dev/null
+++ b/storage/xtradb/fil/lz4.h
@@ -0,0 +1,205 @@
+/*
+   LZ4 - Fast LZ compression algorithm
+   Header File
+   Copyright (C) 2011-2013, Yann Collet.
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+   - LZ4 source repository : http://code.google.com/p/lz4/
+*/
+#pragma once
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+//**************************************
+// Compiler Options
+//**************************************
+#if defined(_MSC_VER) && !defined(__cplusplus)   // Visual Studio
+#  define inline __inline           // Visual C is not C99, but supports some kind of inline
+#endif
+
+
+//****************************
+// Simple Functions
+//****************************
+
+int LZ4_compress        (const char* source, char* dest, int inputSize);
+int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*
+LZ4_compress() :
+    Compresses 'inputSize' bytes from 'source' into 'dest'.
+    Destination buffer must be already allocated,
+    and must be sized to handle worst cases situations (input data not compressible)
+    Worst case size evaluation is provided by function LZ4_compressBound()
+    inputSize : Max supported value is LZ4_MAX_INPUT_VALUE
+    return : the number of bytes written in buffer dest
+             or 0 if the compression fails
+
+LZ4_decompress_safe() :
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function is protected against buffer overflow exploits (never writes outside of output buffer, and never reads outside of input buffer). Therefore, it is protected against malicious data packets
+*/
+
+
+//****************************
+// Advanced Functions
+//****************************
+#define LZ4_MAX_INPUT_SIZE        0x7E000000   // 2 113 929 216 bytes
+#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
+static inline int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
+
+/*
+LZ4_compressBound() :
+    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
+    primarily useful for memory allocation of output buffer.
+    inline function is recommended for the general case,
+    macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation).
+
+    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
+    return : maximum output size in a "worst case" scenario
+             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
+*/
+
+
+int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
+
+/*
+LZ4_compress_limitedOutput() :
+    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
+    If it cannot achieve it, compression will stop, and result of the function will be zero.
+    This function never writes outside of provided output buffer.
+
+    inputSize  : Max supported value is LZ4_MAX_INPUT_VALUE
+    maxOutputSize : is the size of the destination buffer (which must be already allocated)
+    return : the number of bytes written in buffer 'dest'
+             or 0 if the compression fails
+*/
+
+
+int LZ4_decompress_fast (const char* source, char* dest, int outputSize);
+
+/*
+LZ4_decompress_fast() :
+    outputSize : is the original (uncompressed) size
+    return : the number of bytes read from the source buffer (in other words, the compressed size)
+             If the source stream is malformed, the function will stop decoding and return a negative result.
+    note : This function is a bit faster than LZ4_decompress_safe()
+           This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet.
+           Use this function preferably into a trusted environment (data to decode comes from a trusted source).
+           Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
+*/
+
+int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize);
+
+/*
+LZ4_decompress_safe_partial() :
+    This function decompress a compressed block of size 'inputSize' at position 'source'
+    into output buffer 'dest' of size 'maxOutputSize'.
+    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
+    reducing decompression time.
+    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
+       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
+             Always control how many bytes were decoded.
+             If the source stream is detected malformed, the function will stop decoding and return a negative result.
+             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
+*/
+
+
+//****************************
+// Stream Functions
+//****************************
+
+void* LZ4_create (const char* inputBuffer);
+int   LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize);
+int   LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
+char* LZ4_slideInputBuffer (void* LZ4_Data);
+int   LZ4_free (void* LZ4_Data);
+
+/* 
+These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
+In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function :
+
+void* LZ4_create (const char* inputBuffer);
+The result of the function is the (void*) pointer on the LZ4 Data Structure.
+This pointer will be needed in all other functions.
+If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
+The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
+The input buffer must be already allocated, and size at least 192KB.
+'inputBuffer' will also be the 'const char* source' of the first block.
+
+All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
+To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue().
+Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(), 
+but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one.
+If next block does not begin immediately after the previous one, the compression will fail (return 0).
+
+When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : 
+char* LZ4_slideInputBuffer(void* LZ4_Data);
+must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
+Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
+==> The memory position where the next input data block must start is provided as the result of the function.
+
+Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual.
+
+When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure.
+*/
+
+
+int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize);
+int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize);
+
+/*
+*_withPrefix64k() :
+    These decoding functions work the same as their "normal name" versions,
+    but can use up to 64KB of data in front of 'char* dest'.
+    These functions are necessary to decode inter-dependant blocks.
+*/
+
+
+//****************************
+// Obsolete Functions
+//****************************
+
+static inline int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
+static inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
+
+/*
+These functions are deprecated and should no longer be used.
+They are provided here for compatibility with existing user programs.
+*/
+
+
+
+#if defined (__cplusplus)
+}
+#endif
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 43cfa23a99f..ead0b0fc902 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -4,6 +4,7 @@ Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, 2014, SkySQL Ab.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -558,6 +559,27 @@ ib_cb_t innodb_api_cb[] = {
 	(ib_cb_t) ib_cfg_bk_commit_interval
 };
 
+/**
+  Structure for CREATE TABLE options (table options).
+  It needs to be called ha_table_option_struct.
+
+  The option values can be specified in the CREATE TABLE at the end:
+  CREATE TABLE ( ... ) *here*
+*/
+
+ha_create_table_option innodb_table_option_list[]=
+{
+  /* With this option user can enable page compression feature for the
+  table */
+  HA_TOPTION_BOOL("PAGE_COMPRESSED", page_compressed, 0),
+  /* With this option user can set zip compression level for page
+  compression for this table*/
+  HA_TOPTION_NUMBER("PAGE_COMPRESSION_LEVEL", page_compression_level, ULINT_UNDEFINED, 0, 9, 1),
+  /* With this option user can enable atomic writes feature for this table */
+  HA_TOPTION_ENUM("ATOMIC_WRITES", atomic_writes, "DEFAULT,ON,OFF", 0),
+  HA_TOPTION_END
+};
+
 /*************************************************************//**
 Check whether valid argument given to innodb_ft_*_stopword_table.
 This function is registered as a callback with MySQL.
@@ -873,6 +895,25 @@ static SHOW_VAR innodb_status_variables[]= {
   (char*) &export_vars.innodb_x_lock_spin_rounds,	  SHOW_LONGLONG},
   {"x_lock_spin_waits",
   (char*) &export_vars.innodb_x_lock_spin_waits,	  SHOW_LONGLONG},
+
+  /* Status variables for page compression */
+  {"page_compression_saved",
+   (char*) &export_vars.innodb_page_compression_saved,    SHOW_LONGLONG},
+  {"page_compression_trim_sect512",
+   (char*) &export_vars.innodb_page_compression_trim_sect512,    SHOW_LONGLONG},
+  {"page_compression_trim_sect4096",
+   (char*) &export_vars.innodb_page_compression_trim_sect4096,    SHOW_LONGLONG},
+  {"num_index_pages_written",
+   (char*) &export_vars.innodb_index_pages_written,       SHOW_LONGLONG},
+  {"num_pages_page_compressed",
+   (char*) &export_vars.innodb_pages_page_compressed,     SHOW_LONGLONG},
+  {"num_page_compressed_trim_op",
+   (char*) &export_vars.innodb_page_compressed_trim_op,     SHOW_LONGLONG},
+  {"num_page_compressed_trim_op_saved",
+   (char*) &export_vars.innodb_page_compressed_trim_op_saved,     SHOW_LONGLONG},
+  {"num_pages_page_decompressed",
+   (char*) &export_vars.innodb_pages_page_decompressed,   SHOW_LONGLONG},
+
   {NullS, NullS, SHOW_LONG}
 };
 
@@ -3156,6 +3197,8 @@ innobase_init(
         if (srv_file_per_table)
           innobase_hton->tablefile_extensions = ha_innobase_exts;
 
+	innobase_hton->table_options = innodb_table_option_list;
+
 	ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR);
 
 #ifndef DBUG_OFF
@@ -10010,11 +10053,16 @@ innobase_table_flags(
 	enum row_type	row_format;
 	rec_format_t	innodb_row_format = REC_FORMAT_COMPACT;
 	bool		use_data_dir;
+	ha_table_option_struct *options= form->s->option_struct;
 
 	/* Cache the value of innodb_file_format, in case it is
 	modified by another thread while the table is being created. */
 	const ulint	file_format_allowed = srv_file_format;
 
+	/* Cache the value of innobase_compression_level, in case it is
+	modified by another thread while the table is being created. */
+	const ulint     default_compression_level = page_zip_level;
+
 	*flags = 0;
 	*flags2 = 0;
 
@@ -10063,6 +10111,8 @@ index_bad:
 		}
 	}
 
+	row_format = form->s->row_type;
+
 	if (create_info->key_block_size) {
 		/* The requested compressed page size (key_block_size)
 		is given in kilobytes. If it is a valid number, store
@@ -10110,8 +10160,6 @@ index_bad:
 		}
 	}
 
-	row_format = form->s->row_type;
-
 	if (zip_ssize && zip_allowed) {
 		/* if ROW_FORMAT is set to default,
 		automatically change it to COMPRESSED.*/
@@ -10166,10 +10214,18 @@ index_bad:
 				" innodb_file_format > Antelope.",
 				get_row_format_name(row_format));
 		} else {
-			innodb_row_format = (row_format == ROW_TYPE_DYNAMIC
-					     ? REC_FORMAT_DYNAMIC
-					     : REC_FORMAT_COMPRESSED);
-			break;
+			switch(row_format) {
+			  case ROW_TYPE_COMPRESSED:
+			    innodb_row_format = REC_FORMAT_COMPRESSED;
+			    break;
+			  case ROW_TYPE_DYNAMIC:
+			    innodb_row_format = REC_FORMAT_DYNAMIC;
+                            break;
+			  default:
+			    /* Not possible, avoid compiler warning */
+			    break;
+			}
+			break; /* Correct row_format */
 		}
 		zip_allowed = FALSE;
 		/* fall through to set row_format = COMPACT */
@@ -10196,7 +10252,15 @@ index_bad:
 		       && ((create_info->data_file_name != NULL)
 		       && !(create_info->options & HA_LEX_CREATE_TMP_TABLE));
 
-	dict_tf_set(flags, innodb_row_format, zip_ssize, use_data_dir);
+	/* Set up table dictionary flags */
+	dict_tf_set(flags,
+		    innodb_row_format,
+		    zip_ssize,
+		    use_data_dir,
+		    options->page_compressed,
+		    (ulint)options->page_compression_level == ULINT_UNDEFINED ?
+		        default_compression_level : options->page_compression_level,
+		    options->atomic_writes);
 
 	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
 		*flags2 |= DICT_TF2_TEMPORARY;
@@ -10209,6 +10273,112 @@ index_bad:
 	DBUG_RETURN(true);
 }
 
+/*****************************************************************//**
+Check engine specific table options not handled by SQL-parser.
+@return	NULL if valid, string if not */
+UNIV_INTERN
+const char*
+ha_innobase::check_table_options(
+	THD		*thd,		/*!< in: thread handle */
+	TABLE*		table,		/*!< in: information on table
+					columns and indexes */
+	HA_CREATE_INFO*	create_info,	/*!< in: more information of the
+					created table, contains also the
+					create statement string */
+	const bool	use_tablespace, /*!< in: use file par table */
+	const ulint     file_format)
+{
+	enum row_type	row_format = table->s->row_type;;
+	ha_table_option_struct *options= table->s->option_struct;
+	atomic_writes_t awrites = (atomic_writes_t)options->atomic_writes;
+
+	/* Check page compression requirements */
+	if (options->page_compressed) {
+		if (!srv_compress_pages) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				"innodb_compress_pages not enabled");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (row_format == ROW_TYPE_COMPRESSED) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED table can't have"
+				" ROW_TYPE=COMPRESSED");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (!use_tablespace) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				" innodb_file_per_table.");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (file_format < UNIV_FORMAT_B) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED requires"
+				" innodb_file_format > Antelope.");
+			return "PAGE_COMPRESSED";
+		}
+
+		if (create_info->key_block_size) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSED table can't have"
+				" key_block_size");
+			return "PAGE_COMPRESSED";
+		}
+	}
+
+	/* Check page compression level requirements, some of them are
+	already checked above */
+	if ((ulint)options->page_compression_level != ULINT_UNDEFINED) {
+		if (options->page_compressed == false) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: PAGE_COMPRESSION_LEVEL requires"
+				" PAGE_COMPRESSED");
+			return "PAGE_COMPRESSION_LEVEL";
+		}
+
+		if (options->page_compression_level < 0 || options->page_compression_level > 9) {
+			push_warning_printf(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
+				" Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",
+				create_info->key_block_size);
+			return "PAGE_COMPRESSION_LEVEL";
+		}
+	}
+
+	/* Check atomic writes requirements */
+	if (awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
+		if (!use_tablespace) {
+			push_warning(
+				thd, Sql_condition::WARN_LEVEL_WARN,
+				HA_WRONG_CREATE_OPTION,
+				"InnoDB: ATOMIC_WRITES requires"
+				" innodb_file_per_table.");
+			return "ATOMIC_WRITES";
+		}
+	}
+
+	return 0;
+}
+
 /*****************************************************************//**
 Creates a new table to an InnoDB database.
 @return	error number */
@@ -10240,6 +10410,7 @@ ha_innobase::create(
 	while creating the table. So we read the current value here
 	and make all further decisions based on this. */
 	bool		use_tablespace = srv_file_per_table;
+	const ulint     file_format    = srv_file_format;
 
 	/* Zip Shift Size - log2 - 9 of compressed page size,
 	zero for uncompressed */
@@ -10263,6 +10434,12 @@ ha_innobase::create(
 
 	/* Create the table definition in InnoDB */
 
+	/* Validate table options not handled by the SQL-parser */
+	if(check_table_options(thd, form, create_info, use_tablespace,
+			       file_format)) {
+		DBUG_RETURN(HA_WRONG_CREATE_OPTION);
+	}
+
 	/* Validate create options if innodb_strict_mode is set. */
 	if (create_options_are_invalid(
 			thd, form, create_info, use_tablespace)) {
@@ -14578,6 +14755,12 @@ ha_innobase::check_if_incompatible_data(
 	HA_CREATE_INFO*	info,
 	uint		table_changes)
 {
+	ha_table_option_struct *param_old, *param_new;
+
+	/* Cache engine specific options */
+	param_new = info->option_struct;
+	param_old = table->s->option_struct;
+
 	innobase_copy_frm_flags_from_create_info(prebuilt->table, info);
 
 	if (table_changes != IS_EQUAL_YES) {
@@ -14604,6 +14787,13 @@ ha_innobase::check_if_incompatible_data(
 		return(COMPATIBLE_DATA_NO);
 	}
 
+	/* Changes on engine specific table options requests a rebuild of the table. */
+	if (param_new->page_compressed != param_old->page_compressed ||
+	    param_new->page_compression_level != param_old->page_compression_level ||
+	    param_new->atomic_writes != param_old->atomic_writes) {
+		return(COMPATIBLE_DATA_NO);
+	}
+
 	return(COMPATIBLE_DATA_YES);
 }
 
@@ -17079,12 +17269,6 @@ static MYSQL_SYSVAR_ULONG(replication_delay, srv_replication_delay,
   "innodb_thread_concurrency is reached (0 by default)",
   NULL, NULL, 0, 0, ~0UL, 0);
 
-static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
-  PLUGIN_VAR_RQCMDARG,
-  "Compression level used for compressed row format.  0 is no compression"
-  ", 1 is fastest, 9 is best compression and default is 6.",
-  NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
-
 static MYSQL_SYSVAR_BOOL(log_compressed_pages, page_zip_log_pages,
        PLUGIN_VAR_OPCMDARG,
   "Enables/disables the logging of entire compressed page images."
@@ -17758,6 +17942,37 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
   "Print stacktrace on long semaphore wait (off by default supported only on linux)",
   NULL, NULL, FALSE);
 
+static MYSQL_SYSVAR_BOOL(compress_pages, srv_compress_pages,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Use page compression.",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
+  PLUGIN_VAR_OPCMDARG ,
+  "How many percent of compressed pages should be trimmed",
+  NULL, NULL, 100, 0, 100, 0);
+
+static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
+  PLUGIN_VAR_RQCMDARG,
+  "Compression level used for zlib compression.  0 is no compression"
+  ", 1 is fastest, 9 is best compression and default is 6.",
+  NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
+
+static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
+  PLUGIN_VAR_OPCMDARG,
+  "Use page compression for only index pages.",
+  NULL, NULL, FALSE);
+
+static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
+  PLUGIN_VAR_OPCMDARG,
+  "Use trim.",
+  NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
+  PLUGIN_VAR_OPCMDARG ,
+  "Use LZ4 for page compression",
+  NULL, NULL, FALSE);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_block_size),
   MYSQL_SYSVAR(additional_mem_pool_size),
@@ -17948,6 +18163,11 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(fake_changes),
   MYSQL_SYSVAR(locking_fake_changes),
   MYSQL_SYSVAR(use_stacktrace),
+  MYSQL_SYSVAR(compress_pages),
+  MYSQL_SYSVAR(trim_pct),
+  MYSQL_SYSVAR(compress_index_pages),
+  MYSQL_SYSVAR(use_trim),
+  MYSQL_SYSVAR(use_lz4),
   NULL
 };
 
diff --git a/storage/xtradb/handler/ha_innodb.h b/storage/xtradb/handler/ha_innodb.h
index 773a9b6b04d..b4df711356c 100644
--- a/storage/xtradb/handler/ha_innodb.h
+++ b/storage/xtradb/handler/ha_innodb.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2000, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -57,6 +58,21 @@ typedef struct st_innobase_share {
 /** Prebuilt structures in an InnoDB table handle used within MySQL */
 struct row_prebuilt_t;
 
+/** Engine specific table options are definined using this struct */
+struct ha_table_option_struct
+{
+	bool  page_compressed;		/*!< Table is using page compression
+					if this option is true. */
+	int   page_compression_level;	/*!< Table page compression level
+					or UNIV_UNSPECIFIED. */
+	uint  atomic_writes;		/*!< Use atomic writes for this
+					table if this options is ON or
+					in DEFAULT if
+					srv_use_atomic_writes=1.
+					Atomic writes are not used if
+					value OFF.*/
+};
+
 /** The class defining a handle to an Innodb table */
 class ha_innobase: public handler
 {
@@ -184,6 +200,8 @@ class ha_innobase: public handler
 			     char* norm_name,
 			     char* temp_path,
 			     char* remote_path);
+	const char* check_table_options(THD *thd, TABLE* table,
+		HA_CREATE_INFO*	create_info, const bool use_tablespace, const ulint file_format);
 	int create(const char *name, register TABLE *form,
 					HA_CREATE_INFO *create_info);
 	int truncate();
diff --git a/storage/xtradb/handler/handler0alter.cc b/storage/xtradb/handler/handler0alter.cc
index 9c535285d1e..24dc1086cc5 100644
--- a/storage/xtradb/handler/handler0alter.cc
+++ b/storage/xtradb/handler/handler0alter.cc
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -252,6 +253,22 @@ ha_innobase::check_if_supported_inplace_alter(
 	update_thd();
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
+	/* Change on engine specific table options require rebuild of the
+	table */
+	if (ha_alter_info->handler_flags
+		== Alter_inplace_info::CHANGE_CREATE_OPTION) {
+		ha_table_option_struct *new_options= ha_alter_info->create_info->option_struct;
+		ha_table_option_struct *old_options= table->s->option_struct;
+
+		if (new_options->page_compressed != old_options->page_compressed ||
+		    new_options->page_compression_level != old_options->page_compression_level ||
+			new_options->atomic_writes != old_options->atomic_writes) {
+			ha_alter_info->unsupported_reason = innobase_get_err_msg(
+				ER_ALTER_OPERATION_NOT_SUPPORTED_REASON);
+			DBUG_RETURN(HA_ALTER_INPLACE_NOT_SUPPORTED);
+		}
+	}
+
 	if (ha_alter_info->handler_flags
 	    & ~(INNOBASE_INPLACE_IGNORE
 		| INNOBASE_ALTER_NOREBUILD
@@ -3372,6 +3389,17 @@ ha_innobase::prepare_inplace_alter_table(
 
 	if (ha_alter_info->handler_flags
 	    & Alter_inplace_info::CHANGE_CREATE_OPTION) {
+		/* Check engine specific table options */
+		if (const char* invalid_tbopt = check_table_options(
+				user_thd, altered_table,
+				ha_alter_info->create_info,
+				prebuilt->table->space != 0,
+				srv_file_format)) {
+			my_error(ER_ILLEGAL_HA_CREATE_OPTION, MYF(0),
+				 table_type(), invalid_tbopt);
+			goto err_exit_no_heap;
+		}
+
 		if (const char* invalid_opt = create_options_are_invalid(
 			    user_thd, altered_table,
 			    ha_alter_info->create_info,
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index ba2f413429c..8fedeeaa832 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1489,6 +1490,12 @@ struct buf_page_t{
 					state == BUF_BLOCK_ZIP_PAGE and
 					zip.data == NULL means an active
 					buf_pool->watch */
+
+	ulint           write_size;     /* Write size is set when this
+					page is first time written and then
+					if written again we check is TRIM
+					operation needed. */
+
 #ifndef UNIV_HOTBACKUP
 	buf_page_t*	hash;		/*!< node used in chaining to
 					buf_pool->page_hash or
@@ -2118,6 +2125,20 @@ struct	CheckUnzipLRUAndLRUList {
 };
 #endif /* UNIV_DEBUG || defined UNIV_BUF_DEBUG */
 
+/*********************************************************************//**
+Aquire LRU list mutex */
+void
+buf_pool_mutex_enter(
+/*=================*/
+	buf_pool_t*	buf_pool); /*!< in: buffer pool */
+/*********************************************************************//**
+Exit LRU list mutex */
+void
+buf_pool_mutex_exit(
+/*================*/
+	buf_pool_t*	buf_pool); /*!< in: buffer pool */
+
+
 #ifndef UNIV_NONINL
 #include "buf0buf.ic"
 #endif
diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h
index f4542e7c206..6b2827e77a7 100644
--- a/storage/xtradb/include/buf0flu.h
+++ b/storage/xtradb/include/buf0flu.h
@@ -36,6 +36,13 @@ Created 11/5/1995 Heikki Tuuri
 /** Flag indicating if the page_cleaner is in active state. */
 extern ibool buf_page_cleaner_is_active;
 
+/** Handled page counters for a single flush */
+struct flush_counters_t {
+	ulint	flushed;	/*!< number of dirty pages flushed */
+	ulint	evicted;	/*!< number of clean pages evicted */
+};
+
+
 /********************************************************************//**
 Remove a block from the flush list of modified blocks.  */
 UNIV_INTERN
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
index 6669f60b95a..8ab05c50dbd 100644
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +43,8 @@ Created 1/8/1996 Heikki Tuuri
 #include "ut0byte.h"
 #include "trx0types.h"
 #include "row0types.h"
+#include "fsp0fsp.h"
+#include "dict0pagecompress.h"
 
 #ifndef UNIV_HOTBACKUP
 # include "sync0sync.h"
@@ -904,7 +907,14 @@ dict_tf_set(
 	ulint*		flags,		/*!< in/out: table */
 	rec_format_t	format,		/*!< in: file format */
 	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		remote_path)	/*!< in: table uses DATA DIRECTORY */
+	bool		remote_path,	/*!< in: table uses DATA DIRECTORY
+					*/
+        bool		page_compressed,/*!< in: table uses page compressed
+					pages */
+	ulint		page_compression_level, /*!< in: table page compression
+						 level */
+	ulint		atomic_writes)  /*!< in: table atomic
+					writes option value*/
 	__attribute__((nonnull));
 /********************************************************************//**
 Convert a 32 bit integer table flags to the 32 bit integer that is
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index c261d6a3aee..502b1d028d8 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -537,10 +538,27 @@ dict_tf_is_valid(
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(flags);
 	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(flags);
 	ulint	unused = DICT_TF_GET_UNUSED(flags);
+	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(flags);
+	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+	ulint	data_dir = DICT_TF_HAS_DATA_DIR(flags);
+	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(flags);
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused != 0) {
 
+		fprintf(stderr,
+			"InnoDB: Error: table unused flags are %ld"
+			" in the data dictionary and are corrupted\n"
+			"InnoDB: Error: data dictionary flags are\n"
+			"InnoDB: compact %ld atomic_blobs %ld\n"
+			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+			"InnoDB: page_compression %ld page_compression_level %ld\n"
+			"InnoDB: atomic_writes %ld\n",
+			unused,
+			compact, atomic_blobs, unused, data_dir, zip_ssize,
+			page_compression, page_compression_level, atomic_writes
+		);
+
 		return(false);
 
 	} else if (atomic_blobs) {
@@ -550,12 +568,36 @@ dict_tf_is_valid(
 		data stored off-page in the clustered index. */
 
 		if (!compact) {
+			fprintf(stderr,
+				"InnoDB: Error: table compact flags are %ld"
+				" in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				compact, compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+			);
+
 			return(false);
 		}
 
 	} else if (zip_ssize) {
 
 		/* Antelope does not support COMPRESSED row format. */
+		fprintf(stderr,
+			"InnoDB: Error: table flags are %ld"
+			" in the data dictionary and are corrupted\n"
+			"InnoDB: Error: data dictionary flags are\n"
+			"InnoDB: compact %ld atomic_blobs %ld\n"
+			"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+			"InnoDB: page_compression %ld page_compression_level %ld\n"
+			"InnoDB: atomic_writes %ld\n",
+			flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+			page_compression, page_compression_level, atomic_writes
+		);
+
 		return(false);
 	}
 
@@ -568,6 +610,41 @@ dict_tf_is_valid(
 		    || !atomic_blobs
 		    || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
 
+			fprintf(stderr,
+				"InnoDB: Error: table compact flags are %ld in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				flags,
+				compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+
+			);
+			return(false);
+		}
+	}
+
+        if (page_compression || page_compression_level) {
+		/* Page compression format must have compact and
+		atomic_blobs and page_compression_level requires
+		page_compression */
+		if (!compact
+			|| !page_compression
+			|| !atomic_blobs) {
+
+			fprintf(stderr,
+				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+			);
+
 			return(false);
 		}
 	}
@@ -594,6 +671,10 @@ dict_sys_tables_type_validate(
 	ulint	zip_ssize = DICT_TF_GET_ZIP_SSIZE(type);
 	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(type);
 	ulint	unused = DICT_TF_GET_UNUSED(type);
+	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
+	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
+	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
+	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
 
 	/* The low order bit of SYS_TABLES.TYPE is always set to 1.
 	If the format is UNIV_FORMAT_B or higher, this field is the same
@@ -647,6 +728,24 @@ dict_sys_tables_type_validate(
 	format, so the DATA_DIR flag is compatible with any other
 	table flags. However, it is not used with TEMPORARY tables.*/
 
+        if (page_compression || page_compression_level) {
+		/* page compressed row format must have low_order_bit and
+		atomic_blobs bits set and the DICT_N_COLS_COMPACT flag
+		should be in N_COLS, but we already know about the
+		low_order_bit and DICT_N_COLS_COMPACT flags. */
+
+                if (!atomic_blobs || !page_compression) {
+			return(ULINT_UNDEFINED);
+		}
+	}
+
+	if (awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
+		if (!atomic_blobs) {
+			return(ULINT_UNDEFINED);
+		}
+	}
+
 	/* Return the validated SYS_TABLES.TYPE. */
 	return(type);
 }
@@ -719,8 +818,16 @@ dict_tf_set(
 	ulint*		flags,		/*!< in/out: table flags */
 	rec_format_t	format,		/*!< in: file format */
 	ulint		zip_ssize,	/*!< in: zip shift size */
-	bool		use_data_dir)	/*!< in: table uses DATA DIRECTORY */
+	bool		use_data_dir,	/*!< in: table uses DATA DIRECTORY
+					*/
+	bool		page_compressed,/*!< in: table uses page compressed
+					pages */
+	ulint		page_compression_level, /*!< in: table page compression
+						 level */
+	ulint		atomic_writes)  /*!< in: table atomic writes setup */
 {
+	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
+
 	switch (format) {
 	case REC_FORMAT_REDUNDANT:
 		*flags = 0;
@@ -745,6 +852,28 @@ dict_tf_set(
 	if (use_data_dir) {
 		*flags |= (1 << DICT_TF_POS_DATA_DIR);
 	}
+
+	if (page_compressed) {
+		*flags = DICT_TF_COMPACT
+			| (1 << DICT_TF_POS_ATOMIC_BLOBS)
+                        | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+			| (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+
+		ut_ad(zip_ssize == 0);
+		ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
+		ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
+	}
+
+	if (awrites != ATOMIC_WRITES_DEFAULT) {
+		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+		ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
+	}
+
+	if (awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes )) {
+		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
+	}
+
 }
 
 /********************************************************************//**
@@ -765,6 +894,9 @@ dict_tf_to_fsp_flags(
 	ulint	table_flags)	/*!< in: dict_table_t::flags */
 {
 	ulint fsp_flags;
+	ulint page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+	ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+	ulint atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
 
 	DBUG_EXECUTE_IF("dict_tf_to_fsp_flags_failure",
 			return(ULINT_UNDEFINED););
@@ -783,7 +915,20 @@ dict_tf_to_fsp_flags(
 	fsp_flags |= DICT_TF_HAS_DATA_DIR(table_flags)
 		     ? FSP_FLAGS_MASK_DATA_DIR : 0;
 
+	/* In addition, tablespace flags also contain if the page
+	compression is used for this table. */
+	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION(fsp_flags, page_compression);
+
+	/* In addition, tablespace flags also contain page compression level
+	if page compression is used for this table. */
+	fsp_flags |= FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(fsp_flags, page_compression_level);
+
+	/* In addition, tablespace flags also contain flag if atomic writes
+	is used for this table */
+	fsp_flags |= FSP_FLAGS_SET_ATOMIC_WRITES(fsp_flags, atomic_writes);
+
 	ut_a(fsp_flags_is_valid(fsp_flags));
+	ut_a(dict_tf_verify_flags(table_flags, fsp_flags));
 
 	return(fsp_flags);
 }
@@ -811,10 +956,15 @@ dict_sys_tables_type_to_tf(
 	/* Adjust bit zero. */
 	flags = redundant ? 0 : 1;
 
-	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	/* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+	PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
 	flags |= type & (DICT_TF_MASK_ZIP_SSIZE
 			 | DICT_TF_MASK_ATOMIC_BLOBS
-			 | DICT_TF_MASK_DATA_DIR);
+			 | DICT_TF_MASK_DATA_DIR
+			 | DICT_TF_MASK_PAGE_COMPRESSION
+			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+			 | DICT_TF_MASK_ATOMIC_WRITES
+	);
 
 	return(flags);
 }
@@ -842,10 +992,14 @@ dict_tf_to_sys_tables_type(
 	/* Adjust bit zero. It is always 1 in SYS_TABLES.TYPE */
 	type = 1;
 
-	/* ZIP_SSIZE, ATOMIC_BLOBS & DATA_DIR are the same. */
+	/* ZIP_SSIZE, ATOMIC_BLOBS, DATA_DIR, PAGE_COMPRESSION,
+	PAGE_COMPRESSION_LEVEL, ATOMIC_WRITES are the same. */
 	type |= flags & (DICT_TF_MASK_ZIP_SSIZE
 			 | DICT_TF_MASK_ATOMIC_BLOBS
-			 | DICT_TF_MASK_DATA_DIR);
+			 | DICT_TF_MASK_DATA_DIR
+			 | DICT_TF_MASK_PAGE_COMPRESSION
+			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+			 | DICT_TF_MASK_ATOMIC_WRITES);
 
 	return(type);
 }
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
index bde0ce16094..087fde0ccb7 100644
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -125,11 +126,26 @@ This flag prevents older engines from attempting to open the table and
 allows InnoDB to update_create_info() accordingly. */
 #define DICT_TF_WIDTH_DATA_DIR		1
 
+/**
+Width of the page compression flag
+*/
+#define DICT_TF_WIDTH_PAGE_COMPRESSION  1
+#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
+
+/**
+Width of atomic writes flag
+DEFAULT=0, ON = 1, OFF = 2
+*/
+#define DICT_TF_WIDTH_ATOMIC_WRITES 2
+
 /** Width of all the currently known table flags */
 #define DICT_TF_BITS	(DICT_TF_WIDTH_COMPACT		\
 			+ DICT_TF_WIDTH_ZIP_SSIZE	\
 			+ DICT_TF_WIDTH_ATOMIC_BLOBS	\
-			+ DICT_TF_WIDTH_DATA_DIR)
+			+ DICT_TF_WIDTH_DATA_DIR        \
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION \
+			+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
+			+ DICT_TF_WIDTH_ATOMIC_WRITES)
 
 /** A mask of all the known/used bits in table flags */
 #define DICT_TF_BIT_MASK	(~(~0 << DICT_TF_BITS))
@@ -145,9 +161,18 @@ allows InnoDB to update_create_info() accordingly. */
 /** Zero relative shift position of the DATA_DIR field */
 #define DICT_TF_POS_DATA_DIR		(DICT_TF_POS_ATOMIC_BLOBS	\
 					+ DICT_TF_WIDTH_ATOMIC_BLOBS)
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define DICT_TF_POS_PAGE_COMPRESSION	(DICT_TF_POS_DATA_DIR	\
+		                        + DICT_TF_WIDTH_DATA_DIR)
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL	(DICT_TF_POS_PAGE_COMPRESSION	\
+					+ DICT_TF_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define DICT_TF_POS_ATOMIC_WRITES	(DICT_TF_POS_PAGE_COMPRESSION_LEVEL	\
+					+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
 /** Zero relative shift position of the start of the UNUSED bits */
-#define DICT_TF_POS_UNUSED		(DICT_TF_POS_DATA_DIR		\
-					+ DICT_TF_WIDTH_DATA_DIR)
+#define DICT_TF_POS_UNUSED		(DICT_TF_POS_ATOMIC_WRITES     \
+					+ DICT_TF_WIDTH_ATOMIC_WRITES)
 
 /** Bit mask of the COMPACT field */
 #define DICT_TF_MASK_COMPACT				\
@@ -165,6 +190,18 @@ allows InnoDB to update_create_info() accordingly. */
 #define DICT_TF_MASK_DATA_DIR				\
 		((~(~0 << DICT_TF_WIDTH_DATA_DIR))	\
 		<< DICT_TF_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define DICT_TF_MASK_PAGE_COMPRESSION			\
+		((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION)) \
+		<< DICT_TF_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL		\
+		((~(~0 << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
+		<< DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define DICT_TF_MASK_ATOMIC_WRITES		\
+		((~(~0 << DICT_TF_WIDTH_ATOMIC_WRITES)) \
+		<< DICT_TF_POS_ATOMIC_WRITES)
 
 /** Return the value of the COMPACT field */
 #define DICT_TF_GET_COMPACT(flags)			\
@@ -185,6 +222,19 @@ allows InnoDB to update_create_info() accordingly. */
 /** Return the contents of the UNUSED bits */
 #define DICT_TF_GET_UNUSED(flags)			\
 		(flags >> DICT_TF_POS_UNUSED)
+
+/** Return the value of the PAGE_COMPRESSION field */
+#define DICT_TF_GET_PAGE_COMPRESSION(flags)	       \
+		((flags & DICT_TF_MASK_PAGE_COMPRESSION) \
+		>> DICT_TF_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags)       \
+		((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL)	\
+		>> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define DICT_TF_GET_ATOMIC_WRITES(flags)       \
+		((flags & DICT_TF_MASK_ATOMIC_WRITES)	\
+		>> DICT_TF_POS_ATOMIC_WRITES)
 /* @} */
 
 #ifndef UNIV_INNOCHECKSUM
diff --git a/storage/xtradb/include/dict0pagecompress.h b/storage/xtradb/include/dict0pagecompress.h
new file mode 100644
index 00000000000..19a2a6c52f3
--- /dev/null
+++ b/storage/xtradb/include/dict0pagecompress.h
@@ -0,0 +1,94 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.h
+Helper functions for extracting/storing page compression information
+to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef dict0pagecompress_h
+#define dict0pagecompress_h
+
+/********************************************************************//**
+Extract the page compression level from table flags.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+	ulint	flags)			/*!< in: flags */
+	__attribute__((const));
+/********************************************************************//**
+Extract the page compression flag from table flags
+@return	page compression flag, or false if not compressed */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*==========================*/
+	ulint	flags)			/*!< in: flags */
+	__attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the page compressed page format.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+	const dict_table_t*	table)	/*!< in: table */
+	__attribute__((const));
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return	true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+	ulint	table_flags,	/*!< in: dict_table_t::flags */
+	ulint   fsp_flags)     /*!< in: fil_space_t::flags  */
+	__attribute__((const));
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return	true if atomic writes are used, false if not used  */
+UNIV_INLINE
+atomic_writes_t
+dict_tf_get_atomic_writes(
+/*======================*/
+	ulint	flags)			/*!< in: flags */
+	__attribute__((const));
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return	true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+dict_table_get_atomic_writes(
+/*=========================*/
+	const dict_table_t* table);	/*!< in: table */
+
+
+#ifndef UNIV_NONINL
+#include "dict0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic
new file mode 100644
index 00000000000..fb9581fc657
--- /dev/null
+++ b/storage/xtradb/include/dict0pagecompress.ic
@@ -0,0 +1,191 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/dict0pagecompress.ic
+Inline implementation for helper functions for extracting/storing
+page compression and atomic writes information to dictionary.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/********************************************************************//**
+Verify that dictionary flags match tablespace flags
+@return	true if flags match, false if not */
+UNIV_INLINE
+ibool
+dict_tf_verify_flags(
+/*=================*/
+	ulint	table_flags,	/*!< in: dict_table_t::flags */
+	ulint   fsp_flags)      /*!< in: fil_space_t::flags  */
+{
+	ulint   table_unused = DICT_TF_GET_UNUSED(table_flags);
+	ulint   compact = DICT_TF_GET_COMPACT(table_flags);
+	ulint   ssize = DICT_TF_GET_ZIP_SSIZE(table_flags);
+	ulint	atomic_blobs = DICT_TF_HAS_ATOMIC_BLOBS(table_flags);
+	ulint   data_dir = DICT_TF_HAS_DATA_DIR(table_flags);
+        ulint   page_compression = DICT_TF_GET_PAGE_COMPRESSION(table_flags);
+	ulint   page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(table_flags);
+	ulint   atomic_writes = DICT_TF_GET_ATOMIC_WRITES(table_flags);
+	ulint	post_antelope = FSP_FLAGS_GET_POST_ANTELOPE(fsp_flags);
+	ulint	zip_ssize = FSP_FLAGS_GET_ZIP_SSIZE(fsp_flags);
+	ulint	fsp_atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(fsp_flags);
+	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(fsp_flags);
+	ulint	fsp_unused = FSP_FLAGS_GET_UNUSED(fsp_flags);
+        ulint   fsp_page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(fsp_flags);
+	ulint   fsp_page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(fsp_flags);
+	ulint   fsp_atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(fsp_flags);
+
+	DBUG_EXECUTE_IF("dict_tf_verify_flags_failure",
+			return(ULINT_UNDEFINED););
+
+	ut_ad(!table_unused);
+	ut_ad(!fsp_unused);
+	ut_ad(page_ssize == 0 || page_ssize != 0); /* silence compiler */
+	ut_ad(compact == 0 || compact == 1); /* silence compiler */
+	ut_ad(data_dir == 0 || data_dir == 1); /* silence compiler */
+	ut_ad(post_antelope == 0 || post_antelope == 1); /* silence compiler */
+
+	if (ssize != zip_ssize) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has zip_ssize %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has zip_ssize %ld\n",
+			ssize, zip_ssize);
+		return (FALSE);
+	}
+	if (atomic_blobs != fsp_atomic_blobs) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has atomic_blobs %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has atomic_blobs %ld\n",
+			atomic_blobs, fsp_atomic_blobs);
+
+		return (FALSE);
+	}
+	if (page_compression != fsp_page_compression) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has page_compression %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file ahas page_compression %ld\n",
+			page_compression, fsp_page_compression);
+
+		return (FALSE);
+	}
+	if (page_compression_level != fsp_page_compression_level) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has page_compression_level %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has page_compression_level %ld\n",
+			page_compression_level, fsp_page_compression_level);
+
+		return (FALSE);
+	}
+
+	if (atomic_writes != fsp_atomic_writes) {
+		fprintf(stderr,
+			"InnoDB: Error: table flags has atomic writes %ld"
+			" in the data dictionary\n"
+			"InnoDB: but the flags in file has atomic_writes %ld\n",
+			atomic_writes, fsp_atomic_writes);
+
+		return (FALSE);
+	}
+
+	return(TRUE);
+}
+
+/********************************************************************//**
+Extract the page compression level from dict_table_t::flags.
+These flags are in memory, so assert that they are valid.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_tf_get_page_compression_level(
+/*===============================*/
+	ulint	flags)	/*!< in: flags */
+{
+        ulint page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags);
+
+	ut_ad(page_compression_level >= 0 && page_compression_level <= 9);
+
+	return(page_compression_level);
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return	page compression level, or 0 if not compressed */
+UNIV_INLINE
+ulint
+dict_table_page_compression_level(
+/*==============================*/
+	const dict_table_t*	table)	/*!< in: table */
+{
+	ut_ad(table);
+	ut_ad(dict_tf_get_page_compression(table->flags));
+
+	return(dict_tf_get_page_compression_level(table->flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return	true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_tf_get_page_compression(
+/*=========================*/
+	ulint	flags)	/*!< in: flags */
+{
+	return(DICT_TF_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the page compression page format.
+@return	true if page compressed, false if not */
+UNIV_INLINE
+ibool
+dict_table_is_page_compressed(
+/*==========================*/
+	const dict_table_t* table)	/*!< in: table */
+{
+	return (dict_tf_get_page_compression(table->flags));
+}
+
+/********************************************************************//**
+Extract the atomic writes flag from table flags.
+@return	enumerated value of atomic writes  */
+UNIV_INLINE
+atomic_writes_t
+dict_tf_get_atomic_writes(
+/*======================*/
+	ulint	flags)			/*!< in: flags */
+{
+	return((atomic_writes_t)DICT_TF_GET_ATOMIC_WRITES(flags));
+}
+
+/********************************************************************//**
+Check whether the table uses the atomic writes.
+@return	enumerated value of atomic writes */
+UNIV_INLINE
+atomic_writes_t
+dict_table_get_atomic_writes(
+/*=========================*/
+	const dict_table_t* table)	/*!< in: table */
+{
+	return ((atomic_writes_t)dict_tf_get_atomic_writes(table->flags));
+}
diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h
index 6acb6a2dcbe..9e210117580 100644
--- a/storage/xtradb/include/dict0types.h
+++ b/storage/xtradb/include/dict0types.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -82,4 +83,12 @@ enum ib_quiesce_t {
 #define TEMP_TABLE_PREFIX                "#sql"
 #define TEMP_TABLE_PATH_PREFIX           "/" TEMP_TABLE_PREFIX
 
+/** Enum values for atomic_writes table option */
+typedef enum {
+	ATOMIC_WRITES_DEFAULT = 0,
+	ATOMIC_WRITES_ON = 1,
+	ATOMIC_WRITES_OFF = 2
+} atomic_writes_t;
+
+
 #endif
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 472c57fcbfc..6b69a899690 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -129,6 +130,13 @@ extern fil_addr_t	fil_addr_null;
 #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
 					contains the space id of the page */
 #define FIL_PAGE_DATA		38	/*!< start of the data on the page */
+/* Following are used when page compression is used */
+#define FIL_PAGE_COMPRESSED_SIZE 2      /*!< Number of bytes used to store
+ 					actual payload data size on
+ 					compressed pages. */
+#define FIL_PAGE_COMPRESSION_ZLIB 1    /*!< Compressin algorithm ZLIB. */
+#define FIL_PAGE_COMPRESSION_LZ4  2    /*!< Compressin algorithm LZ4. */
+
 /* @} */
 /** File page trailer @{ */
 #define FIL_PAGE_END_LSN_OLD_CHKSUM 8	/*!< the low 4 bytes of this are used
@@ -139,6 +147,7 @@ extern fil_addr_t	fil_addr_null;
 /* @} */
 
 /** File page types (values of FIL_PAGE_TYPE) @{ */
+#define FIL_PAGE_PAGE_COMPRESSED 34354  /*!< Page compressed page */
 #define FIL_PAGE_INDEX		17855	/*!< B-tree node */
 #define FIL_PAGE_UNDO_LOG	2	/*!< Undo log page */
 #define FIL_PAGE_INODE		3	/*!< Index node */
@@ -721,8 +730,8 @@ fil_space_get_n_reserved_extents(
 Reads or writes data. This operation is asynchronous (aio).
 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
 i/o on a tablespace which does not exist */
-#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \
-	_fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL)
+#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size) \
+	_fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, write_size, NULL)
 
 UNIV_INTERN
 dberr_t
@@ -752,7 +761,12 @@ _fil_io(
 				or from where to write; in aio this must be
 				appropriately aligned */
 	void*	message,	/*!< in: message for aio handler if non-sync
-				aio used, else ignored */
+ 				aio used, else ignored */
+	ulint*	write_size,	/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 	trx_t*	trx)
 	__attribute__((nonnull(8)));
 /**********************************************************************//**
@@ -1018,4 +1032,27 @@ fil_space_set_corrupt(
 /*==================*/
 	ulint	space_id);
 
+/****************************************************************//**
+Acquire fil_system mutex */
+void
+fil_system_enter(void);
+/*==================*/
+/****************************************************************//**
+Release fil_system mutex */
+void
+fil_system_exit(void);
+/*==================*/
+/*******************************************************************//**
+Returns the table space by a given id, NULL if not found. */
+fil_space_t*
+fil_space_get_by_id(
+/*================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Return space name */
+char*
+fil_space_name(
+/*===========*/
+	fil_space_t*	space);	/*!< in: space */
+
 #endif /* fil0fil_h */
diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h
new file mode 100644
index 00000000000..342b105401c
--- /dev/null
+++ b/storage/xtradb/include/fil0pagecompress.h
@@ -0,0 +1,118 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+#ifndef fil0pagecompress_h
+#define fil0pagecompress_h
+
+#include "fsp0fsp.h"
+#include "fsp0pagecompress.h"
+
+/******************************************************************//**
+@file include/fil0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to table space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+/*******************************************************************//**
+Returns the page compression level flag of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	page compression level if page compressed, ULINT_UNDEFINED if space not found */
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+	ulint	id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the page compression flag of the space, or false if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	true if page compressed, false if not or space not found */
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+	ulint   id);	/*!< in: space id */
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return	atomic write table option value */
+atomic_writes_t
+fil_space_get_atomic_writes(
+/*=========================*/
+	ulint   id);	/*!< in: space id */
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return	true if page type index page, false if not */
+ibool
+fil_page_is_index_page(
+/*===================*/
+	byte *buf);	/*!< in: page */
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+       ulint           comp_alg);    /*!<in: compression algorithm number */
+
+/****************************************************************//**
+For page compressed pages compress the page before actual write
+operation.
+@return compressed page to be written*/
+byte*
+fil_compress_page(
+/*==============*/
+	ulint		space_id,      /*!< in: tablespace id of the
+				       table. */
+	byte*           buf,           /*!< in: buffer from which to write; in aio
+				       this must be appropriately aligned */
+        byte*           out_buf,       /*!< out: compressed buffer */
+        ulint           len,           /*!< in: length of input buffer.*/
+        ulint           compression_level, /*!< in: compression level */
+	ulint*          out_len);       /*!< out: actual length of compressed page */
+
+/****************************************************************//**
+For page compressed pages decompress the page after actual read
+operation.
+@return uncompressed page */
+void
+fil_decompress_page(
+/*================*/
+	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
+	byte*           buf,           /*!< out: buffer from which to read; in aio
+				       this must be appropriately aligned */
+        ulint           len);          /*!< in: length of output buffer.*/
+
+/****************************************************************//**
+Get space id from fil node
+@return space id*/
+ulint
+fil_node_get_space_id(
+/*==================*/
+        fil_node_t*     node);         /*!< in: Node where to get space id*/
+
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return	true if page is page compressed*/
+ibool
+fil_page_is_compressed(
+/*===================*/
+	byte *buf);	/*!< in: page */
+
+#endif
diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h
index a587ccc9f20..6fe44a0ef16 100644
--- a/storage/xtradb/include/fsp0fsp.h
+++ b/storage/xtradb/include/fsp0fsp.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -53,12 +54,21 @@ to the two Barracuda row formats COMPRESSED and DYNAMIC. */
 /** Width of the DATA_DIR flag.  This flag indicates that the tablespace
 is found in a remote location, not the default data directory. */
 #define FSP_FLAGS_WIDTH_DATA_DIR	1
+/** Number of flag bits used to indicate the page compression and compression level */
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION  1
+#define FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL 4
+/** Number of flag bits used to indicate atomic writes for this tablespace */
+#define FSP_FLAGS_WIDTH_ATOMIC_WRITES  2
+
 /** Width of all the currently known tablespace flags */
 #define FSP_FLAGS_WIDTH		(FSP_FLAGS_WIDTH_POST_ANTELOPE	\
 				+ FSP_FLAGS_WIDTH_ZIP_SSIZE	\
 				+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS	\
 				+ FSP_FLAGS_WIDTH_PAGE_SSIZE	\
-				+ FSP_FLAGS_WIDTH_DATA_DIR)
+				+ FSP_FLAGS_WIDTH_DATA_DIR      \
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION \
+				+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL \
+				+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
 
 /** A mask of all the known/used bits in tablespace flags */
 #define FSP_FLAGS_MASK		(~(~0 << FSP_FLAGS_WIDTH))
@@ -71,10 +81,21 @@ is found in a remote location, not the default data directory. */
 /** Zero relative shift position of the ATOMIC_BLOBS field */
 #define FSP_FLAGS_POS_ATOMIC_BLOBS	(FSP_FLAGS_POS_ZIP_SSIZE	\
 					+ FSP_FLAGS_WIDTH_ZIP_SSIZE)
-/** Zero relative shift position of the PAGE_SSIZE field */
-#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
+/** Note that these need to be before the page size to be compatible with
+dictionary */
+/** Zero relative shift position of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION	(FSP_FLAGS_POS_ATOMIC_BLOBS	\
 					+ FSP_FLAGS_WIDTH_ATOMIC_BLOBS)
-/** Zero relative shift position of the start of the UNUSED bits */
+/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	(FSP_FLAGS_POS_PAGE_COMPRESSION	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION)
+/** Zero relative shift position of the ATOMIC_WRITES field */
+#define FSP_FLAGS_POS_ATOMIC_WRITES	(FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL	\
+					+ FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL)
+ /** Zero relative shift position of the PAGE_SSIZE field */
+#define FSP_FLAGS_POS_PAGE_SSIZE	(FSP_FLAGS_POS_ATOMIC_WRITES	\
+					+ FSP_FLAGS_WIDTH_ATOMIC_WRITES)
+/** Zero relative shift position of the start of the DATA DIR bits */
 #define FSP_FLAGS_POS_DATA_DIR		(FSP_FLAGS_POS_PAGE_SSIZE	\
 					+ FSP_FLAGS_WIDTH_PAGE_SSIZE)
 /** Zero relative shift position of the start of the UNUSED bits */
@@ -101,6 +122,19 @@ is found in a remote location, not the default data directory. */
 #define FSP_FLAGS_MASK_DATA_DIR					\
 		((~(~0 << FSP_FLAGS_WIDTH_DATA_DIR))		\
 		<< FSP_FLAGS_POS_DATA_DIR)
+/** Bit mask of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION				\
+		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Bit mask of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL			\
+		((~(~0 << FSP_FLAGS_WIDTH_PAGE_COMPRESSION_LEVEL))	\
+		<< FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Bit mask of the ATOMIC_WRITES field */
+#define FSP_FLAGS_MASK_ATOMIC_WRITES				\
+		((~(~0 << FSP_FLAGS_WIDTH_ATOMIC_WRITES))	\
+		<< FSP_FLAGS_POS_ATOMIC_WRITES)
+
 
 /** Return the value of the POST_ANTELOPE field */
 #define FSP_FLAGS_GET_POST_ANTELOPE(flags)			\
@@ -125,12 +159,38 @@ is found in a remote location, not the default data directory. */
 /** Return the contents of the UNUSED bits */
 #define FSP_FLAGS_GET_UNUSED(flags)				\
 		(flags >> FSP_FLAGS_POS_UNUSED)
+/** Return the value of the PAGE_COMPRESSION field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION(flags)			\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION)	\
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION)
+/** Return the value of the PAGE_COMPRESSION_LEVEL field */
+#define FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags)		\
+		((flags & FSP_FLAGS_MASK_PAGE_COMPRESSION_LEVEL) \
+		>> FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL)
+/** Return the value of the ATOMIC_WRITES field */
+#define FSP_FLAGS_GET_ATOMIC_WRITES(flags)			\
+		((flags & FSP_FLAGS_MASK_ATOMIC_WRITES) 	\
+		>> FSP_FLAGS_POS_ATOMIC_WRITES)
 
 /** Set a PAGE_SSIZE into the correct bits in a given
 tablespace flags. */
 #define FSP_FLAGS_SET_PAGE_SSIZE(flags, ssize)			\
 		(flags | (ssize << FSP_FLAGS_POS_PAGE_SSIZE))
 
+/** Set a PAGE_COMPRESSION into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION(flags, compression)	\
+		(flags | (compression << FSP_FLAGS_POS_PAGE_COMPRESSION))
+
+/** Set a PAGE_COMPRESSION_LEVEL into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_PAGE_COMPRESSION_LEVEL(flags, level)	\
+		(flags | (level << FSP_FLAGS_POS_PAGE_COMPRESSION_LEVEL))
+/** Set a ATOMIC_WRITES into the correct bits in a given
+tablespace flags. */
+#define FSP_FLAGS_SET_ATOMIC_WRITES(flags, atomics)		\
+		(flags | (atomics << FSP_FLAGS_POS_ATOMIC_WRITES))
+
 /* @} */
 
 /* @defgroup Tablespace Header Constants (moved from fsp0fsp.c) @{ */
diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic
index 0d81e817cc9..bc46967fab0 100644
--- a/storage/xtradb/include/fsp0fsp.ic
+++ b/storage/xtradb/include/fsp0fsp.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -63,6 +64,10 @@ fsp_flags_is_valid(
 	ulint	atomic_blobs = FSP_FLAGS_HAS_ATOMIC_BLOBS(flags);
 	ulint	page_ssize = FSP_FLAGS_GET_PAGE_SSIZE(flags);
 	ulint	unused = FSP_FLAGS_GET_UNUSED(flags);
+	ulint	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
+	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
+	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
+	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
 
 	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
 
@@ -108,6 +113,20 @@ fsp_flags_is_valid(
 # error "UNIV_FORMAT_MAX != UNIV_FORMAT_B, Add more validations."
 #endif
 
+	/* Page compression level requires page compression and atomic blobs
+	to be set */
+        if (page_compression_level || page_compression) {
+		if (!page_compression || !atomic_blobs) {
+			return(false);
+		}
+	}
+
+	if ((awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes))
+		&& !atomic_blobs) {
+		return (false);
+	}
+
 	/* The DATA_DIR field can be used for any row type so there is
 	nothing here to validate. */
 
diff --git a/storage/xtradb/include/fsp0pagecompress.h b/storage/xtradb/include/fsp0pagecompress.h
new file mode 100644
index 00000000000..4913f1d6b29
--- /dev/null
+++ b/storage/xtradb/include/fsp0pagecompress.h
@@ -0,0 +1,73 @@
+/*****************************************************************************
+
+Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.h
+Helper functions for extracting/storing page compression and
+atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#ifndef fsp0pagecompress_h
+#define fsp0pagecompress_h
+
+/**********************************************************************//**
+Reads the page compression level from the first page of a tablespace.
+@return	page compression level, or 0 if uncompressed */
+UNIV_INTERN
+ulint
+fsp_header_get_compression_level(
+/*=============================*/
+	const page_t*	page);	/*!< in: first page of a tablespace */
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return	TRUE if page compressed, FALSE if not compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+	ulint	flags);	/*!< in: tablespace flags */
+
+/********************************************************************//**
+Extract the page compression level from tablespace flags.
+A tablespace has only one physical page compression level
+whether that page is compressed or not.
+@return	page compression level of the file-per-table tablespace,
+or zero if the table is not compressed.  */
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+	ulint	flags);	/*!< in: tablespace flags */
+
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return	true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+fsp_flags_get_atomic_writes(
+/*========================*/
+	ulint	flags);	/*!< in: tablespace flags */
+
+#ifndef UNIV_NONINL
+#include "fsp0pagecompress.ic"
+#endif
+
+#endif
diff --git a/storage/xtradb/include/fsp0pagecompress.ic b/storage/xtradb/include/fsp0pagecompress.ic
new file mode 100644
index 00000000000..873f6cd401d
--- /dev/null
+++ b/storage/xtradb/include/fsp0pagecompress.ic
@@ -0,0 +1,177 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014, SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/fsp0pagecompress.ic
+Implementation for helper functions for extracting/storing page
+compression and atomic writes information to file space.
+
+Created 11/12/2013 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#include "fsp0fsp.h"
+
+
+/********************************************************************//**
+Determine if the tablespace is page compressed from dict_table_t::flags.
+@return	TRUE if page compressed, FALSE if not page compressed */
+UNIV_INLINE
+ibool
+fsp_flags_is_page_compressed(
+/*=========================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	return(FSP_FLAGS_GET_PAGE_COMPRESSION(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is page compression level from dict_table_t::flags.
+@return	page compression level or 0 if not compressed*/
+UNIV_INLINE
+ulint
+fsp_flags_get_page_compression_level(
+/*=================================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	return(FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags));
+}
+
+/********************************************************************//**
+Determine the tablespace is using atomic writes from dict_table_t::flags.
+@return	true if atomic writes is used, false if not */
+UNIV_INLINE
+atomic_writes_t
+fsp_flags_get_atomic_writes(
+/*========================*/
+	ulint	flags)	/*!< in: tablespace flags */
+{
+	return((atomic_writes_t)FSP_FLAGS_GET_ATOMIC_WRITES(flags));
+}
+
+/*******************************************************************//**
+Find out wheather the page is index page or not
+@return	true if page type index page, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_index_page(
+/*===================*/
+	byte *buf)	/*!< in: page */
+{
+	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_INDEX);
+}
+
+/*******************************************************************//**
+Find out wheather the page is page compressed
+@return	true if page is page compressed, false if not */
+UNIV_INLINE
+ibool
+fil_page_is_compressed(
+/*===================*/
+	byte *buf)	/*!< in: page */
+{
+	return(mach_read_from_2(buf+FIL_PAGE_TYPE) == FIL_PAGE_PAGE_COMPRESSED);
+}
+
+/*******************************************************************//**
+Returns the page compression level of the space, or 0 if the space
+is not compressed. The tablespace must be cached in the memory cache.
+@return	page compression level, ULINT_UNDEFINED if space not found */
+UNIV_INLINE
+ulint
+fil_space_get_page_compression_level(
+/*=================================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(fsp_flags_get_page_compression_level(flags));
+	}
+
+	return(flags);
+}
+
+/*******************************************************************//**
+Extract the page compression from space.
+@return true if space is page compressed, false if space is not found
+or space is not page compressed. */
+UNIV_INLINE
+ibool
+fil_space_is_page_compressed(
+/*=========================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return(fsp_flags_is_page_compressed(flags));
+	}
+
+	return(flags);
+}
+
+/****************************************************************//**
+Get the name of the compression algorithm used for page
+compression.
+@return compression algorithm name or "UNKNOWN" if not known*/
+UNIV_INLINE
+const char*
+fil_get_compression_alg_name(
+/*=========================*/
+       ulint           comp_alg)     /*!<in: compression algorithm number */
+{
+	switch(comp_alg) {
+	case FIL_PAGE_COMPRESSION_ZLIB:
+		return ("ZLIB");
+		break;
+	case FIL_PAGE_COMPRESSION_LZ4:
+		return ("LZ4");
+		break;
+	default:
+		return("UNKNOWN");
+		break;
+	}
+}
+
+/*******************************************************************//**
+Returns the atomic writes flag of the space, or false if the space
+is not using atomic writes. The tablespace must be cached in the memory cache.
+@return	atomic writes table option value */
+UNIV_INLINE
+atomic_writes_t
+fil_space_get_atomic_writes(
+/*========================*/
+	ulint	id)	/*!< in: space id */
+{
+	ulint	flags;
+
+	flags = fil_space_get_flags(id);
+
+	if (flags && flags != ULINT_UNDEFINED) {
+
+		return((atomic_writes_t)fsp_flags_get_atomic_writes(flags));
+	}
+
+	return((atomic_writes_t)0);
+}
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index 564b579edc8..e5abd4e2961 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -155,6 +156,7 @@ enum os_file_create_t {
 #define	OS_FILE_INSUFFICIENT_RESOURCE	78
 #define	OS_FILE_AIO_INTERRUPTED		79
 #define	OS_FILE_OPERATION_ABORTED	80
+#define	OS_FILE_OPERATION_NOT_SUPPORTED	125
 /* @} */
 
 /** Types for aio operations @{ */
@@ -300,26 +302,28 @@ os_file_write
 The wrapper functions have the prefix of "innodb_". */
 
 #ifdef UNIV_PFS_IO
-# define os_file_create(key, name, create, purpose, type, success)	\
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
 	pfs_os_file_create_func(key, name, create, purpose,	type,	\
-				success, __FILE__, __LINE__)
+				success, atomic_writes, __FILE__, __LINE__)
 
 # define os_file_create_simple(key, name, create, access, success)	\
 	pfs_os_file_create_simple_func(key, name, create, access,	\
 				       success, __FILE__, __LINE__)
 
 # define os_file_create_simple_no_error_handling(			\
-		key, name, create_mode, access, success)		\
+		key, name, create_mode, access, success, atomic_writes)	\
 	pfs_os_file_create_simple_no_error_handling_func(		\
-		key, name, create_mode, access, success, __FILE__, __LINE__)
+		key, name, create_mode, access, success, atomic_writes, __FILE__, __LINE__)
 
 # define os_file_close(file)						\
 	pfs_os_file_close_func(file, __FILE__, __LINE__)
 
 # define os_aio(type, mode, name, file, buf, offset,			\
-		n, message1, message2, space_id, trx)			\
+	n, message1, message2, space_id, 				\
+	trx, page_compressed, page_compression_level, write_size)	\
 	pfs_os_aio_func(type, mode, name, file, buf, offset,		\
 		n, message1, message2, space_id, trx,			\
+		page_compressed, page_compression_level, write_size,	\
 		__FILE__, __LINE__)
 
 # define os_file_read(file, buf, offset, n)				\
@@ -353,23 +357,25 @@ The wrapper functions have the prefix of "innodb_". */
 
 /* If UNIV_PFS_IO is not defined, these I/O APIs point
 to original un-instrumented file I/O APIs */
-# define os_file_create(key, name, create, purpose, type, success)	\
-	os_file_create_func(name, create, purpose, type, success)
+# define os_file_create(key, name, create, purpose, type, success, atomic_writes)	\
+	os_file_create_func(name, create, purpose, type, success, atomic_writes)
 
 # define os_file_create_simple(key, name, create_mode, access, success)	\
 	os_file_create_simple_func(name, create_mode, access, success)
 
 # define os_file_create_simple_no_error_handling(			\
-		key, name, create_mode, access, success)		\
+	key, name, create_mode, access, success, atomic_writes)		\
 	os_file_create_simple_no_error_handling_func(			\
-		name, create_mode, access, success)
+		name, create_mode, access, success, atomic_writes)
 
 # define os_file_close(file)	os_file_close_func(file)
 
 # define os_aio(type, mode, name, file, buf, offset, n, message1,	\
-		message2, space_id, trx)				\
+		message2, space_id, trx,				\
+		page_compressed, page_compression_level, write_size)	\
 	os_aio_func(type, mode, name, file, buf, offset, n,		\
-		    message1, message2, space_id, trx)
+		message1, message2, space_id, trx,			\
+		page_compressed, page_compression_level, write_size)
 
 # define os_file_read(file, buf, offset, n)				\
 	os_file_read_func(file, buf, offset, n, NULL)
@@ -520,7 +526,9 @@ os_file_create_simple_func(
 	ulint		create_mode,/*!< in: create mode */
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
-	ibool*		success);/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes);/*!< in: atomic writes table option
+				      value */
 /****************************************************************//**
 NOTE! Use the corresponding macro
 os_file_create_simple_no_error_handling(), not directly this function!
@@ -538,7 +546,9 @@ os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_WRITE, or
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes)/*!< in: atomic writes table option
+				      value */
 	__attribute__((nonnull, warn_unused_result));
 /****************************************************************//**
 Tries to disable OS caching on an opened file descriptor. */
@@ -572,7 +582,9 @@ os_file_create_func(
 				async i/o or unbuffered i/o: look in the
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes) /*!< in: atomic writes table option
+				       value */
 	__attribute__((nonnull, warn_unused_result));
 /***********************************************************************//**
 Deletes a file. The file has to be closed before calling this.
@@ -637,6 +649,8 @@ pfs_os_file_create_simple_func(
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -662,6 +676,8 @@ pfs_os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				      value*/
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -690,6 +706,8 @@ pfs_os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 	__attribute__((nonnull, warn_unused_result));
@@ -721,6 +739,8 @@ pfs_os_file_read_func(
 	os_offset_t	offset,	/*!< in: file offset where to read */
 	ulint		n,	/*!< in: number of bytes to read */
 	trx_t*		trx,
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line);/*!< in: line where the func invoked */
 
@@ -771,6 +791,15 @@ pfs_os_aio_func(
                                 OS_AIO_SYNC */
 	ulint		space_id,
 	trx_t*		trx,
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level, /*!< page compression
+						 level to be used */
+	ulint*		write_size,/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line);/*!< in: line where the func invoked */
 /*******************************************************************//**
@@ -1143,7 +1172,17 @@ os_aio_func(
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
 	ulint		space_id,
-	trx_t*		trx);
+	trx_t*		trx,
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level, /*!< page compression
+						 level to be used */
+	ulint*		write_size);/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
+
 /************************************************************************//**
 Wakes up all async i/o threads so that they know to exit themselves in
 shutdown. */
diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic
index 25a1397147e..5ad9e3f5461 100644
--- a/storage/xtradb/include/os0file.ic
+++ b/storage/xtradb/include/os0file.ic
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 2010, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -44,6 +45,8 @@ pfs_os_file_create_simple_func(
 	ulint		access_type,/*!< in: OS_FILE_READ_ONLY or
 				OS_FILE_READ_WRITE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -59,7 +62,7 @@ pfs_os_file_create_simple_func(
 				     name, src_file, src_line);
 
 	file = os_file_create_simple_func(name, create_mode,
-					  access_type, success);
+					  access_type, success, atomic_writes);
 
 	/* Regsiter the returning "file" value with the system */
 	register_pfs_file_open_end(locker, file);
@@ -88,6 +91,8 @@ pfs_os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes,/*!< in: atomic writes table option
+				      value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -103,7 +108,7 @@ pfs_os_file_create_simple_no_error_handling_func(
 				     name, src_file, src_line);
 
 	file = os_file_create_simple_no_error_handling_func(
-		name, create_mode, access_type, success);
+			name, create_mode, access_type, success, atomic_writes);
 
 	register_pfs_file_open_end(locker, file);
 
@@ -134,6 +139,8 @@ pfs_os_file_create_func(
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
 	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint		atomic_writes, /*!< in: atomic writes table option
+				       value */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -148,7 +155,8 @@ pfs_os_file_create_func(
 					: PSI_FILE_OPEN),
 				     name, src_file, src_line);
 
-	file = os_file_create_func(name, create_mode, purpose, type, success);
+	file = os_file_create_func(name, create_mode, purpose, type,
+				   success, atomic_writes);
 
 	register_pfs_file_open_end(locker, file);
 
@@ -212,6 +220,15 @@ pfs_os_aio_func(
                                 OS_AIO_SYNC */
 	ulint		space_id,
 	trx_t*		trx,
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level, /*!< page compression
+						 level to be used */
+	ulint*		write_size,/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 	const char*	src_file,/*!< in: file name where func invoked */
 	ulint		src_line)/*!< in: line where the func invoked */
 {
@@ -227,7 +244,8 @@ pfs_os_aio_func(
 				   src_file, src_line);
 
 	result = os_aio_func(type, mode, name, file, buf, offset,
-			     n, message1, message2, space_id, trx);
+			     n, message1, message2, space_id, trx,
+			     page_compression, page_compression_level, write_size);
 
 	register_pfs_file_io_end(locker, n);
 
diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h
index 209894833a0..4f1de8a3eb7 100644
--- a/storage/xtradb/include/srv0mon.h
+++ b/storage/xtradb/include/srv0mon.h
@@ -2,6 +2,7 @@
 
 Copyright (c) 2010, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -163,6 +164,7 @@ enum monitor_id_t {
 	MONITOR_OVLD_BUF_POOL_PAGES_FREE,
 	MONITOR_OVLD_PAGE_CREATED,
 	MONITOR_OVLD_PAGES_WRITTEN,
+	MONITOR_OVLD_INDEX_PAGES_WRITTEN,
 	MONITOR_OVLD_PAGES_READ,
 	MONITOR_OVLD_BYTE_READ,
 	MONITOR_OVLD_BYTE_WRITTEN,
@@ -305,6 +307,15 @@ enum monitor_id_t {
 	MONITOR_PAD_INCREMENTS,
 	MONITOR_PAD_DECREMENTS,
 
+	/* New monitor variables for page compression */
+	MONITOR_OVLD_PAGE_COMPRESS_SAVED,
+	MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512,
+	MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096,
+	MONITOR_OVLD_PAGES_PAGE_COMPRESSED,
+	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
+	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
+	MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
+
 	/* Index related counters */
 	MONITOR_MODULE_INDEX,
 	MONITOR_INDEX_SPLIT,
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index d278782daa8..cc2221fc3c6 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -3,6 +3,7 @@
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, 2009, Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -102,6 +103,23 @@ struct srv_stats_t {
 	a disk page */
 	ulint_ctr_1_t		buf_pool_reads;
 
+	/** Number of bytes saved by page compression */
+	ulint_ctr_64_t          page_compression_saved;
+	/** Number of 512Byte TRIM by page compression */
+	ulint_ctr_64_t          page_compression_trim_sect512;
+	/** Number of 4K TRIM  by page compression */
+	ulint_ctr_64_t          page_compression_trim_sect4096;
+	/* Number of index pages written */
+	ulint_ctr_64_t          index_pages_written;
+	/* Number of pages compressed with page compression */
+        ulint_ctr_64_t          pages_page_compressed;
+	/* Number of TRIM operations induced by page compression */
+        ulint_ctr_64_t          page_compressed_trim_op;
+	/* Number of TRIM operations saved by using actual write size knowledge */
+        ulint_ctr_64_t          page_compressed_trim_op_saved;
+	/* Number of pages decompressed with page compression */
+        ulint_ctr_64_t          pages_page_decompressed;
+
 	/** Number of data read in total (in bytes) */
 	ulint_ctr_1_t		data_read;
 
@@ -238,6 +256,27 @@ extern ibool	srv_use_native_conditions;
 #endif /* __WIN__ */
 #endif /* !UNIV_HOTBACKUP */
 
+/* Is page compression used */
+extern my_bool srv_compress_pages;
+
+/* Is page compression used only for index pages */
+extern my_bool srv_page_compress_index_pages;
+
+/* Frequency of trim operations */
+extern long srv_trim_pct;
+
+/* Use trim operation */
+extern my_bool srv_use_trim;
+
+/* Use posix fallocate */
+extern my_bool srv_use_posix_fallocate;
+
+/* Use atomic writes i.e disable doublewrite buffer */
+extern my_bool srv_use_atomic_writes;
+
+/* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
+extern my_bool srv_use_lz4;
+
 /** Server undo tablespaces directory, can be absolute path. */
 extern char*	srv_undo_dir;
 
@@ -411,10 +450,6 @@ extern my_bool			srv_stats_auto_recalc;
 
 extern ibool	srv_use_doublewrite_buf;
 extern ulong	srv_doublewrite_batch_size;
-extern ibool	srv_use_atomic_writes;
-#ifdef HAVE_POSIX_FALLOCATE
-extern ibool	srv_use_posix_fallocate;
-#endif
 extern ulong	srv_checksum_algorithm;
 
 extern ulong	srv_log_arch_expire_sec;
@@ -1058,6 +1093,25 @@ struct export_var_t{
 	ulint innodb_purge_view_trx_id_age;	/*!< rw_max_trx_id
 						- purged view's min trx_id */
 #endif /* UNIV_DEBUG */
+
+
+	ib_int64_t innodb_page_compression_saved;/*!< Number of bytes saved
+						by page compression */
+	ib_int64_t innodb_page_compression_trim_sect512;/*!< Number of 512b TRIM
+						by page compression */
+	ib_int64_t innodb_page_compression_trim_sect4096;/*!< Number of 4K byte TRIM
+						by page compression */
+	ib_int64_t innodb_index_pages_written;  /*!< Number of index pages
+						written */
+	ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
+						compressed by page compression */
+	ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
+						induced by page compression */
+	ib_int64_t innodb_page_compressed_trim_op_saved;/*!< Number of TRIM operations
+						saved by page compression */
+	ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
+						decompressed by page
+						compression */
 };
 
 /** Thread slot in the thread table.  */
diff --git a/storage/xtradb/log/log0log.cc b/storage/xtradb/log/log0log.cc
index 403ceda7a10..787944c23d6 100644
--- a/storage/xtradb/log/log0log.cc
+++ b/storage/xtradb/log/log0log.cc
@@ -1257,7 +1257,7 @@ log_group_file_header_flush(
 		       (ulint) (dest_offset / UNIV_PAGE_SIZE),
 		       (ulint) (dest_offset % UNIV_PAGE_SIZE),
 		       OS_FILE_LOG_BLOCK_SIZE,
-		       buf, group);
+		       buf, group, 0);
 
 		srv_stats.os_log_pending_writes.dec();
 	}
@@ -1385,7 +1385,7 @@ loop:
 		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true, group->space_id, 0,
 		       (ulint) (next_offset / UNIV_PAGE_SIZE),
 		       (ulint) (next_offset % UNIV_PAGE_SIZE), write_len, buf,
-		       group);
+		       group, 0);
 
 		srv_stats.os_log_pending_writes.dec();
 
@@ -1975,7 +1975,7 @@ log_group_checkpoint(
 		       write_offset / UNIV_PAGE_SIZE,
 		       write_offset % UNIV_PAGE_SIZE,
 		       OS_FILE_LOG_BLOCK_SIZE,
-		       buf, ((byte*) group + 1));
+		       buf, ((byte*) group + 1), 0);
 
 		ut_ad(((ulint) group & 0x1UL) == 0);
 	}
@@ -2055,7 +2055,7 @@ log_group_read_checkpoint_info(
 
 	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->space_id, 0,
 	       field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE,
-	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL);
+	       OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL, 0);
 }
 
 /******************************************************//**
@@ -2438,7 +2438,7 @@ loop:
 	fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, 0,
 	       (ulint) (source_offset / UNIV_PAGE_SIZE),
 	       (ulint) (source_offset % UNIV_PAGE_SIZE),
-	       len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL);
+	       len, buf, (type == LOG_ARCHIVE) ? &log_archive_io : NULL, 0);
 
 	start_lsn += len;
 	buf += len;
@@ -2563,7 +2563,7 @@ log_group_archive_file_header_write(
 	       dest_offset / UNIV_PAGE_SIZE,
 	       dest_offset % UNIV_PAGE_SIZE,
 	       2 * OS_FILE_LOG_BLOCK_SIZE,
-	       buf, &log_archive_io);
+	       buf, &log_archive_io, 0);
 }
 
 /******************************************************//**
@@ -2600,7 +2600,7 @@ log_group_archive_completed_header_write(
 	       dest_offset % UNIV_PAGE_SIZE,
 	       OS_FILE_LOG_BLOCK_SIZE,
 	       buf + LOG_FILE_ARCH_COMPLETED,
-	       &log_archive_io);
+	       &log_archive_io, 0);
 }
 
 /******************************************************//**
@@ -2663,12 +2663,12 @@ loop:
 		file_handle = os_file_create(innodb_file_log_key,
 					     name, open_mode,
 					     OS_FILE_AIO,
-					     OS_DATA_FILE, &ret);
+					     OS_DATA_FILE, &ret, FALSE);
 
 		if (!ret && (open_mode == OS_FILE_CREATE)) {
 			file_handle = os_file_create(
 				innodb_file_log_key, name, OS_FILE_OPEN,
-				OS_FILE_AIO, OS_DATA_FILE, &ret);
+				OS_FILE_AIO, OS_DATA_FILE, &ret, FALSE);
 		}
 
 		if (!ret) {
@@ -2737,7 +2737,7 @@ loop:
 	       (ulint) (next_offset / UNIV_PAGE_SIZE),
 	       (ulint) (next_offset % UNIV_PAGE_SIZE),
 	       ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf,
-	       &log_archive_io);
+	       &log_archive_io, 0);
 
 	start_lsn += len;
 	next_offset += len;
diff --git a/storage/xtradb/log/log0online.cc b/storage/xtradb/log/log0online.cc
index 8c2bc5602a9..2438303043c 100644
--- a/storage/xtradb/log/log0online.cc
+++ b/storage/xtradb/log/log0online.cc
@@ -547,7 +547,7 @@ log_online_start_bitmap_file(void)
 							log_bmp_sys->out.name,
 							OS_FILE_CREATE,
 							OS_FILE_READ_WRITE,
-							&success);
+							&success, FALSE);
 	}
 	if (UNIV_UNLIKELY(!success)) {
 
@@ -707,7 +707,7 @@ log_online_read_init(void)
 	log_bmp_sys->out.file
 		= os_file_create_simple_no_error_handling
 		(innodb_file_bmp_key, log_bmp_sys->out.name, OS_FILE_OPEN,
-		 OS_FILE_READ_WRITE, &success);
+			OS_FILE_READ_WRITE, &success, FALSE);
 
 	if (!success) {
 
@@ -1491,7 +1491,7 @@ log_online_open_bitmap_file_read_only(
 							  bitmap_file->name,
 							  OS_FILE_OPEN,
 							  OS_FILE_READ_ONLY,
-							  &success);
+			&success, FALSE);
 	if (UNIV_UNLIKELY(!success)) {
 
 		/* Here and below assume that bitmap file names do not
diff --git a/storage/xtradb/log/log0recv.cc b/storage/xtradb/log/log0recv.cc
index d0b833f2bba..1772def9f9b 100644
--- a/storage/xtradb/log/log0recv.cc
+++ b/storage/xtradb/log/log0recv.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2012, Facebook Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -2131,7 +2132,7 @@ recv_apply_log_recs_for_backup(void)
 				error = fil_io(OS_FILE_READ, true,
 					       recv_addr->space, zip_size,
 					       recv_addr->page_no, 0, zip_size,
-					       block->page.zip.data, NULL);
+					       block->page.zip.data, NULL, 0);
 				if (error == DB_SUCCESS
 				    && !buf_zip_decompress(block, TRUE)) {
 					exit(1);
@@ -2141,7 +2142,7 @@ recv_apply_log_recs_for_backup(void)
 					       recv_addr->space, 0,
 					       recv_addr->page_no, 0,
 					       UNIV_PAGE_SIZE,
-					       block->frame, NULL);
+					       block->frame, NULL, 0);
 			}
 
 			if (error != DB_SUCCESS) {
@@ -2170,13 +2171,13 @@ recv_apply_log_recs_for_backup(void)
 					       recv_addr->space, zip_size,
 					       recv_addr->page_no, 0,
 					       zip_size,
-					       block->page.zip.data, NULL);
+					       block->page.zip.data, NULL, 0);
 			} else {
 				error = fil_io(OS_FILE_WRITE, true,
 					       recv_addr->space, 0,
 					       recv_addr->page_no, 0,
 					       UNIV_PAGE_SIZE,
-					       block->frame, NULL);
+					       block->frame, NULL, 0);
 			}
 skip_this_recv_addr:
 			recv_addr = HASH_GET_NEXT(addr_hash, recv_addr);
@@ -3144,7 +3145,7 @@ recv_recovery_from_checkpoint_start_func(
 
 	fil_io(OS_FILE_READ | OS_FILE_LOG, true, max_cp_group->space_id, 0,
 	       0, 0, LOG_FILE_HDR_SIZE,
-	       log_hdr_buf, max_cp_group);
+	       log_hdr_buf, max_cp_group, 0);
 
 	if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP,
 			   (byte*)"ibbackup", (sizeof "ibbackup") - 1)) {
@@ -3175,7 +3176,7 @@ recv_recovery_from_checkpoint_start_func(
 		fil_io(OS_FILE_WRITE | OS_FILE_LOG, true,
 		       max_cp_group->space_id, 0,
 		       0, 0, OS_FILE_LOG_BLOCK_SIZE,
-		       log_hdr_buf, max_cp_group);
+		       log_hdr_buf, max_cp_group, 0);
 	}
 
 	log_hdr_log_block_size
@@ -3775,7 +3776,7 @@ try_open_again:
 
 	file_handle = os_file_create(innodb_file_log_key,
 				     name, OS_FILE_OPEN,
-				     OS_FILE_LOG, OS_FILE_AIO, &ret);
+				     OS_FILE_LOG, OS_FILE_AIO, &ret, FALSE);
 
 	if (ret == FALSE) {
 ask_again:
@@ -3827,7 +3828,7 @@ ask_again:
 	/* Read the archive file header */
 	fil_io(OS_FILE_READ | OS_FILE_LOG, true, group->archive_space_id, 0,
 	       0, 0,
-	       LOG_FILE_HDR_SIZE, buf, NULL);
+	       LOG_FILE_HDR_SIZE, buf, NULL, 0);
 
 	/* Check if the archive file header is consistent */
 
@@ -3901,7 +3902,7 @@ ask_again:
 		fil_io(OS_FILE_READ | OS_FILE_LOG, true,
 		       group->archive_space_id, 0,
 		       read_offset / UNIV_PAGE_SIZE,
-		       read_offset % UNIV_PAGE_SIZE, len, buf, NULL);
+		       read_offset % UNIV_PAGE_SIZE, len, buf, NULL, 0);
 
 		ret = recv_scan_log_recs(
 			(buf_pool_get_n_pages()
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 38eb5241da1..43adf78c63c 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2,6 +2,7 @@
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -42,10 +43,16 @@ Created 10/21/1995 Heikki Tuuri
 #include "srv0srv.h"
 #include "srv0start.h"
 #include "fil0fil.h"
+#include "fil0pagecompress.h"
 #include "buf0buf.h"
 #include "btr0types.h"
 #include "trx0trx.h"
 #include "srv0mon.h"
+#include "srv0srv.h"
+#ifdef HAVE_POSIX_FALLOCATE
+#include "fcntl.h"
+#include "linux/falloc.h"
+#endif
 #ifndef UNIV_HOTBACKUP
 # include "os0sync.h"
 # include "os0thread.h"
@@ -196,6 +203,28 @@ struct os_aio_slot_t{
 					and which can be used to identify
 					which pending aio operation was
 					completed */
+	ulint           bitmap;
+
+	byte*           page_compression_page; /*!< Memory allocated for
+					       page compressed page and
+					       freed after the write
+					       has been completed */
+
+	ibool           page_compression;
+	ulint           page_compression_level;
+
+	ulint*          write_size;     /*!< Actual write size initialized
+					after fist successfull trim
+					operation for this page and if
+					initialized we do not trim again if
+					actual page size does not decrease. */
+
+	byte*           page_buf;       /*!< Actual page buffer for
+					page compressed pages, do not
+					free this */
+
+	ibool           page_compress_success;
+
 #ifdef LINUX_NATIVE_AIO
 	struct iocb	control;	/* Linux control block for aio */
 	int		n_bytes;	/* bytes written/read. */
@@ -301,6 +330,58 @@ UNIV_INTERN ulint	os_n_pending_writes = 0;
 /** Number of pending read operations */
 UNIV_INTERN ulint	os_n_pending_reads = 0;
 
+/** After first fallocate failure we will disable os_file_trim */
+UNIV_INTERN ibool       os_fallocate_failed = FALSE;
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd  for  the  byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file.  After a successful call,
+subsequent reads from  this range will return zeroes.
+@return	true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+	os_file_t	file, /*!< in: file to be trimmed */
+	os_aio_slot_t*	slot, /*!< in: slot structure     */
+	ulint		len); /*!< in: length of area     */
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+	os_aio_slot_t*	slot); /*!< in: slot structure     */
+
+/****************************************************************//**
+Does error handling when a file operation fails.
+@return	TRUE if we should retry the operation */
+static
+ibool
+os_file_handle_error_no_exit(
+/*=========================*/
+	const char*	name,		/*!< in: name of a file or NULL */
+	const char*	operation,	/*!< in: operation */
+	ibool		on_error_silent,/*!< in: if TRUE then don't print
+					any message to the log. */
+	const char*	file,		/*!< in: file name */
+	const ulint	line);		/*!< in: line */
+
+/****************************************************************//**
+Tries to enable the atomic write feature, if available, for the specified file
+handle.
+@return TRUE if success */
+static __attribute__((warn_unused_result))
+ibool
+os_file_set_atomic_writes(
+/*======================*/
+	const char*	name,	/*!< in: name of the file */
+	os_file_t	file);	/*!< in: handle to the file */
+
 #ifdef UNIV_DEBUG
 # ifndef UNIV_HOTBACKUP
 /**********************************************************************//**
@@ -537,6 +618,16 @@ os_file_get_last_error_low(
 				"InnoDB: because of either a thread exit"
 				" or an application request.\n"
 				"InnoDB: Retry attempt is made.\n");
+		} else if (err == ECANCELED) {
+			fprintf(stderr,
+				"InnoDB: Operation canceled (%d):%s\n",
+				err, strerror(err));
+
+			if(srv_use_atomic_writes) {
+				fprintf(stderr,
+					"InnoDB: Error trying to enable atomic writes on "
+					"non-supported destination!\n");
+			}
 		} else {
 			fprintf(stderr,
 				"InnoDB: Some operating system error numbers"
@@ -633,6 +724,8 @@ os_file_get_last_error_low(
 			return(OS_FILE_AIO_RESOURCES_RESERVED);
 		}
 		break;
+	case ECANCELED:
+                return(OS_FILE_OPERATION_NOT_SUPPORTED);
 	case EINTR:
 		if (srv_use_native_aio) {
 			return(OS_FILE_AIO_INTERRUPTED);
@@ -672,9 +765,11 @@ os_file_handle_error_cond_exit(
 	const char*	operation,	/*!< in: operation */
 	ibool		should_exit,	/*!< in: call exit(3) if unknown error
 					and this parameter is TRUE */
-	ibool		on_error_silent)/*!< in: if TRUE then don't print
+	ibool		on_error_silent,/*!< in: if TRUE then don't print
 					any message to the log iff it is
 					an unknown non-fatal error */
+	const char*     file,           /*!< in: file name */
+	const ulint     line)           /*!< in: line */
 {
 	ulint	err;
 
@@ -706,6 +801,9 @@ os_file_handle_error_cond_exit(
 
 		os_has_said_disk_full = TRUE;
 
+		fprintf(stderr,
+			" InnoDB: at file %s and at line %ld\n", file, line);
+
 		fflush(stderr);
 
 		return(FALSE);
@@ -737,6 +835,9 @@ os_file_handle_error_cond_exit(
 		is better to ignore on_error_silent and print an error message
 		to the log. */
 
+		fprintf(stderr,
+			" InnoDB: at file %s and at line %ld\n", file, line);
+
 		if (should_exit || !on_error_silent) {
 			ib_logf(IB_LOG_LEVEL_ERROR, "File %s: '%s' returned OS "
 				"error " ULINTPF ".%s", name ? name : "(unknown)",
@@ -760,10 +861,12 @@ ibool
 os_file_handle_error(
 /*=================*/
 	const char*	name,		/*!< in: name of a file or NULL */
-	const char*	operation)	/*!< in: operation */
+	const char*	operation,	/*!< in: operation */
+	const char*     file,           /*!< in: file name */
+	const ulint     line)           /*!< in: line */
 {
 	/* exit in case of unknown error */
-	return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE));
+	return(os_file_handle_error_cond_exit(name, operation, TRUE, FALSE, file, line));
 }
 
 /****************************************************************//**
@@ -775,12 +878,14 @@ os_file_handle_error_no_exit(
 /*=========================*/
 	const char*	name,		/*!< in: name of a file or NULL */
 	const char*	operation,	/*!< in: operation */
-	ibool		on_error_silent)/*!< in: if TRUE then don't print
+	ibool		on_error_silent,/*!< in: if TRUE then don't print
 					any message to the log. */
+	const char*	file,		/*!< in: file name */
+	const ulint	line)		/*!< in: line */
 {
 	/* don't exit in case of unknown error */
 	return(os_file_handle_error_cond_exit(
-			name, operation, FALSE, on_error_silent));
+			name, operation, FALSE, on_error_silent, file, line));
 }
 
 #undef USE_FILE_LOCK
@@ -923,7 +1028,7 @@ os_file_opendir(
 	if (dir == INVALID_HANDLE_VALUE) {
 
 		if (error_is_fatal) {
-			os_file_handle_error(dirname, "opendir");
+			os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
 		}
 
 		return(NULL);
@@ -934,7 +1039,7 @@ os_file_opendir(
 	dir = opendir(dirname);
 
 	if (dir == NULL && error_is_fatal) {
-		os_file_handle_error(dirname, "opendir");
+		os_file_handle_error(dirname, "opendir", __FILE__, __LINE__);
 	}
 
 	return(dir);
@@ -956,7 +1061,7 @@ os_file_closedir(
 	ret = FindClose(dir);
 
 	if (!ret) {
-		os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+		os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
 
 		return(-1);
 	}
@@ -968,7 +1073,7 @@ os_file_closedir(
 	ret = closedir(dir);
 
 	if (ret) {
-		os_file_handle_error_no_exit(NULL, "closedir", FALSE);
+		os_file_handle_error_no_exit(NULL, "closedir", FALSE, __FILE__, __LINE__);
 	}
 
 	return(ret);
@@ -1040,7 +1145,7 @@ next_file:
 
 		return(1);
 	} else {
-		os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE);
+		os_file_handle_error_no_exit(NULL, "readdir_next_file", FALSE, __FILE__, __LINE__);
 		return(-1);
 	}
 #else
@@ -1126,7 +1231,7 @@ next_file:
 			goto next_file;
 		}
 
-		os_file_handle_error_no_exit(full_path, "stat", FALSE);
+		os_file_handle_error_no_exit(full_path, "stat", FALSE, __FILE__, __LINE__);
 
 		ut_free(full_path);
 
@@ -1177,7 +1282,7 @@ os_file_create_directory(
 		  && !fail_if_exists))) {
 
 		os_file_handle_error_no_exit(
-			pathname, "CreateDirectory", FALSE);
+			pathname, "CreateDirectory", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -1190,7 +1295,7 @@ os_file_create_directory(
 
 	if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) {
 		/* failure */
-		os_file_handle_error_no_exit(pathname, "mkdir", FALSE);
+		os_file_handle_error_no_exit(pathname, "mkdir", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -1300,7 +1405,7 @@ os_file_create_simple_func(
 
 			retry = os_file_handle_error(
 				name, create_mode == OS_FILE_OPEN ?
-				"open" : "create");
+				"open" : "create", __FILE__, __LINE__);
 
 		} else {
 			*success = TRUE;
@@ -1368,7 +1473,7 @@ os_file_create_simple_func(
 			retry = os_file_handle_error(
 				name,
 				create_mode == OS_FILE_OPEN
-				?  "open" : "create");
+				?  "open" : "create", __FILE__, __LINE__);
 		} else {
 			*success = TRUE;
 			retry = false;
@@ -1410,9 +1515,12 @@ os_file_create_simple_no_error_handling_func(
 				OS_FILE_READ_WRITE, or
 				OS_FILE_READ_ALLOW_DELETE; the last option is
 				used by a backup program reading the file */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint           atomic_writes) /*! in: atomic writes table option
+				       value */
 {
 	os_file_t	file;
+	atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
 
 	*success = FALSE;
 #ifdef __WIN__
@@ -1473,6 +1581,15 @@ os_file_create_simple_no_error_handling_func(
 			  attributes,
 			  NULL);		// No template file
 
+	if (file != INVALID_HANDLE_VALUE
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+	    && !os_file_set_atomic_writes(name, file)) {
+			 CloseHandle(file);
+			*success = FALSE;
+			file = INVALID_HANDLE_VALUE;
+	}
+
 	*success = (file != INVALID_HANDLE_VALUE);
 #else /* __WIN__ */
 	int		create_flag;
@@ -1533,6 +1650,15 @@ os_file_create_simple_no_error_handling_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	if (file != -1
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+	    && !os_file_set_atomic_writes(name, file)) {
+		*success = FALSE;
+		close(file);
+		file = -1;
+	}
+
 #endif /* __WIN__ */
 
 	return(file);
@@ -1602,7 +1728,7 @@ os_file_set_atomic_writes(
 
 	if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
 
-		os_file_handle_error_no_exit(name, "ioctl", FALSE);
+		os_file_handle_error_no_exit(name, "ioctl(DFS_IOCTL_ATOMIC_WRITE_SET)", FALSE, __FILE__, __LINE__);
 		return(FALSE);
 	}
 
@@ -1636,12 +1762,15 @@ os_file_create_func(
 				async i/o or unbuffered i/o: look in the
 				function source code for the exact rules */
 	ulint		type,	/*!< in: OS_DATA_FILE or OS_LOG_FILE */
-	ibool*		success)/*!< out: TRUE if succeed, FALSE if error */
+	ibool*		success,/*!< out: TRUE if succeed, FALSE if error */
+	ulint           atomic_writes) /*! in: atomic writes table option
+				       value */
 {
 	os_file_t	file;
 	ibool		retry;
 	ibool		on_error_no_exit;
 	ibool		on_error_silent;
+	atomic_writes_t awrites = (atomic_writes_t) atomic_writes;
 
 #ifdef __WIN__
 	DBUG_EXECUTE_IF(
@@ -1784,9 +1913,9 @@ os_file_create_func(
 
 			if (on_error_no_exit) {
 				retry = os_file_handle_error_no_exit(
-					name, operation, on_error_silent);
+					name, operation, on_error_silent, __FILE__, __LINE__);
 			} else {
-				retry = os_file_handle_error(name, operation);
+				retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
 			}
 		} else {
 			*success = TRUE;
@@ -1795,8 +1924,10 @@ os_file_create_func(
 
 	} while (retry);
 
-	if (srv_use_atomic_writes && type == OS_DATA_FILE &&
-		!os_file_set_atomic_writes(name, file)) {
+	if (file != INVALID_HANDLE_VALUE
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
+	    && !os_file_set_atomic_writes(name, file)) {
 			 CloseHandle(file);
 			*success = FALSE;
 			file = INVALID_HANDLE_VALUE;
@@ -1876,9 +2007,9 @@ os_file_create_func(
 
 			if (on_error_no_exit) {
 				retry = os_file_handle_error_no_exit(
-					name, operation, on_error_silent);
+					name, operation, on_error_silent, __FILE__, __LINE__);
 			} else {
-				retry = os_file_handle_error(name, operation);
+				retry = os_file_handle_error(name, operation, __FILE__, __LINE__);
 			}
 		} else {
 			*success = TRUE;
@@ -1932,14 +2063,16 @@ os_file_create_func(
 	}
 #endif /* USE_FILE_LOCK */
 
-	if (srv_use_atomic_writes && type == OS_DATA_FILE
+	if (file != -1
+	    && (awrites == ATOMIC_WRITES_ON ||
+		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-
 		*success = FALSE;
 		close(file);
 		file = -1;
 	}
 
+
 #endif /* __WIN__ */
 
 	return(file);
@@ -1998,7 +2131,7 @@ loop:
 	ret = unlink(name);
 
 	if (ret != 0 && errno != ENOENT) {
-		os_file_handle_error_no_exit(name, "delete", FALSE);
+		os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
 
 		return(false);
 	}
@@ -2062,7 +2195,7 @@ loop:
 	ret = unlink(name);
 
 	if (ret != 0) {
-		os_file_handle_error_no_exit(name, "delete", FALSE);
+		os_file_handle_error_no_exit(name, "delete", FALSE, __FILE__, __LINE__);
 
 		return(false);
 	}
@@ -2106,7 +2239,7 @@ os_file_rename_func(
 		return(TRUE);
 	}
 
-	os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+	os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
 
 	return(FALSE);
 #else
@@ -2115,7 +2248,7 @@ os_file_rename_func(
 	ret = rename(oldpath, newpath);
 
 	if (ret != 0) {
-		os_file_handle_error_no_exit(oldpath, "rename", FALSE);
+		os_file_handle_error_no_exit(oldpath, "rename", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -2146,7 +2279,7 @@ os_file_close_func(
 		return(TRUE);
 	}
 
-	os_file_handle_error(NULL, "close");
+	os_file_handle_error(NULL, "close", __FILE__, __LINE__);
 
 	return(FALSE);
 #else
@@ -2155,7 +2288,7 @@ os_file_close_func(
 	ret = close(file);
 
 	if (ret == -1) {
-		os_file_handle_error(NULL, "close");
+		os_file_handle_error(NULL, "close", __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -2247,6 +2380,12 @@ os_file_set_size(
 
 	current_size = 0;
 
+#ifdef UNIV_DEBUG
+	fprintf(stderr, "InnoDB: Note: File %s current_size %lu extended_size %lu\n",
+		name, os_file_get_size(file), size);
+#endif
+
+
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
 
@@ -2257,7 +2396,7 @@ os_file_set_size(
 				INT64PF ", desired size " INT64PF "\n",
 				name, current_size, size);
 			os_file_handle_error_no_exit (name, "posix_fallocate",
-						      FALSE);
+						      FALSE, __FILE__, __LINE__);
 			return(FALSE);
 		}
 		return(TRUE);
@@ -2446,7 +2585,7 @@ os_file_flush_func(
 		return(TRUE);
 	}
 
-	os_file_handle_error(NULL, "flush");
+	os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
 
 	/* It is a fatal error if a file flush does not succeed, because then
 	the database can get corrupt on disk */
@@ -2500,7 +2639,7 @@ os_file_flush_func(
 
 	ib_logf(IB_LOG_LEVEL_ERROR, "The OS said file flush did not succeed");
 
-	os_file_handle_error(NULL, "flush");
+	os_file_handle_error(NULL, "flush", __FILE__, __LINE__);
 
 	/* It is a fatal error if a file flush does not succeed, because then
 	the database can get corrupt on disk */
@@ -2855,6 +2994,9 @@ try_again:
 	os_mutex_exit(os_file_count_mutex);
 
 	if (ret && len == n) {
+		if (fil_page_is_compressed((byte *)buf)) {
+		        fil_decompress_page(NULL, (byte *)buf, len);
+		}
 		return(TRUE);
 	}
 #else /* __WIN__ */
@@ -2868,6 +3010,10 @@ try_again:
 
 	if ((ulint) ret == n) {
 
+		if (fil_page_is_compressed((byte *)buf)) {
+		        fil_decompress_page(NULL, (byte *)buf, n);
+		}
+
 		return(TRUE);
 	}
 
@@ -2875,7 +3021,7 @@ try_again:
 		"Tried to read "ULINTPF" bytes at offset " UINT64PF". "
 		"Was only able to read %ld.", n, offset, (lint) ret);
 #endif /* __WIN__ */
-	retry = os_file_handle_error(NULL, "read");
+	retry = os_file_handle_error(NULL, "read", __FILE__, __LINE__);
 
 	if (retry) {
 		goto try_again;
@@ -2968,10 +3114,14 @@ try_again:
 
 	if ((ulint) ret == n) {
 
+		if (fil_page_is_compressed((byte *)buf)) {
+		        fil_decompress_page(NULL, (byte *)buf, n);
+		}
+
 		return(TRUE);
 	}
 #endif /* __WIN__ */
-	retry = os_file_handle_error_no_exit(NULL, "read", FALSE);
+	retry = os_file_handle_error_no_exit(NULL, "read", FALSE, __FILE__, __LINE__);
 
 	if (retry) {
 		goto try_again;
@@ -3183,7 +3333,7 @@ os_file_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -3211,7 +3361,7 @@ os_file_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(FALSE);
 	}
@@ -3260,7 +3410,7 @@ os_file_get_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(DB_FAIL);
 
@@ -3313,7 +3463,7 @@ os_file_get_status(
 	} else if (ret) {
 		/* file exists, but stat call failed */
 
-		os_file_handle_error_no_exit(path, "stat", FALSE);
+		os_file_handle_error_no_exit(path, "stat", FALSE, __FILE__, __LINE__);
 
 		return(DB_FAIL);
 
@@ -3866,7 +4016,7 @@ os_aio_array_create(
 	array->slots = static_cast<os_aio_slot_t*>(
 		ut_malloc(n * sizeof(*array->slots)));
 
-	memset(array->slots, 0x0, sizeof(n * sizeof(*array->slots)));
+	memset(array->slots, 0x0, n * sizeof(*array->slots));
 
 #if defined(LINUX_NATIVE_AIO)
 	array->aio_ctx = NULL;
@@ -3941,6 +4091,8 @@ os_aio_array_free(
 /*==============*/
 	os_aio_array_t*& array)	/*!< in, own: array to free */
 {
+	ulint	i;
+
 	os_mutex_free(array->mutex);
 	os_event_free(array->not_full);
 	os_event_free(array->is_empty);
@@ -3952,6 +4104,14 @@ os_aio_array_free(
 	}
 #endif /* LINUX_NATIVE_AIO */
 
+	for (i = 0; i < array->n_slots; i++) {
+		os_aio_slot_t* slot = os_aio_array_get_nth_slot(array, i);
+		if (slot->page_compression_page) {
+			ut_free(slot->page_compression_page);
+			slot->page_compression_page = NULL;
+		}
+	}
+
 	ut_free(array->slots);
 	ut_free(array);
 
@@ -4296,7 +4456,16 @@ os_aio_array_reserve_slot(
 				to write */
 	os_offset_t	offset,	/*!< in: file offset */
 	ulint		len,	/*!< in: length of the block to read or write */
-	ulint		space_id)
+	ulint		space_id,
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level, /*!< page compression
+						 level to be used */
+	ulint*		write_size)/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 {
 	os_aio_slot_t*	slot = NULL;
 #ifdef WIN_ASYNC_IO
@@ -4388,6 +4557,55 @@ found:
 	slot->io_already_done = FALSE;
 	slot->space_id = space_id;
 
+	slot->page_compress_success = FALSE;
+	slot->write_size = write_size;
+	slot->page_compression_level = page_compression_level;
+	slot->page_compression = page_compression;
+
+	/* If the space is page compressed and this is write operation
+	   and if either only index pages compression is disabled or
+	   page is index page and only index pages compression is enabled then
+	   we compress the page */
+	if (message1 &&
+	    type == OS_FILE_WRITE &&
+	    page_compression &&
+	    (srv_page_compress_index_pages == false ||
+	     (srv_page_compress_index_pages == true &&  fil_page_is_index_page(slot->buf)))) {
+		ulint           real_len = len;
+		byte*           tmp = NULL;
+
+		/* Release the array mutex while compressing */
+		os_mutex_exit(array->mutex);
+
+		// We allocate memory for page compressed buffer if and only
+		// if it is not yet allocated.
+		if (slot->page_buf == NULL) {
+			os_slot_alloc_page_buf(slot);
+		}
+
+		ut_ad(slot->page_buf);
+
+		/* Write buffer full of zeros, this is needed for trim,
+		can't really avoid this now. */
+		memset(slot->page_buf, 0, len);
+
+		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, page_compression_level, &real_len);
+
+		/* If compression succeeded, set up the length and buffer */
+		if (tmp != buf) {
+			len = real_len;
+			buf = slot->page_buf;
+			slot->len = real_len;
+			slot->page_compress_success = TRUE;
+		} else {
+			slot->page_compress_success = FALSE;
+		}
+
+		/* Take array mutex back */
+		os_mutex_enter(array->mutex);
+
+	}
+
 #ifdef WIN_ASYNC_IO
 	control = &slot->control;
 	control->Offset = (DWORD) offset & 0xFFFFFFFF;
@@ -4663,7 +4881,16 @@ os_aio_func(
 				aio operation); ignored if mode is
 				OS_AIO_SYNC */
 	ulint		space_id,
-	trx_t*		trx)
+	trx_t*		trx,
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level, /*!< page compression
+						 level to be used */
+	ulint*		write_size)/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
 {
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
@@ -4686,7 +4913,7 @@ os_aio_func(
 	wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
 	mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER);
 
-	if (mode == OS_AIO_SYNC) 
+	if (mode == OS_AIO_SYNC)
 	{
 		ibool ret;
 		/* This is actually an ordinary synchronous read or write:
@@ -4753,7 +4980,8 @@ try_again:
 		trx->io_read += n;
 	}
 	slot = os_aio_array_reserve_slot(type, array, message1, message2, file,
-					 name, buf, offset, n, space_id);
+					 name, buf, offset, n, space_id,
+					 page_compression, page_compression_level, write_size);
 	if (type == OS_FILE_READ) {
 		if (srv_use_native_aio) {
 			os_n_file_reads++;
@@ -4811,7 +5039,7 @@ err_exit:
 	os_aio_array_free_slot(array, slot);
 
 	if (os_file_handle_error(
-		name,type == OS_FILE_READ ? "aio read" : "aio write")) {
+		name,type == OS_FILE_READ ? "aio read" : "aio write", __FILE__, __LINE__)) {
 
 		goto try_again;
 	}
@@ -4911,7 +5139,7 @@ os_aio_windows_handle(
 	if (ret && len == slot->len) {
 
 		ret_val = TRUE;
-	} else if (os_file_handle_error(slot->name, "Windows aio")) {
+	} else if (os_file_handle_error(slot->name, "Windows aio", __FILE__, __LINE__)) {
 
 		retry = TRUE;
 	} else {
@@ -4939,11 +5167,17 @@ os_aio_windows_handle(
 
 		switch (slot->type) {
 		case OS_FILE_WRITE:
-			ret_val = os_file_write(slot->name, slot->file, slot->buf, 
-				slot->control.Offset, slot->control.OffsetHigh, slot->len);
+			if (slot->message1 && page_compression && slot->page_buf) {
+				ret_val = os_file_write(slot->name, slot->file, slot->page_buf,
+					slot->control.Offset, slot->control.OffsetHigh, slot->len);
+			} else {
+
+				ret_val = os_file_write(slot->name, slot->file, slot->buf,
+					slot->control.Offset, slot->control.OffsetHigh, slot->len);
+			}
 			break;
 		case OS_FILE_READ:
-			ret_val = os_file_read(slot->file, slot->buf, 
+			ret_val = os_file_read(slot->file, slot->buf,
 				 slot->control.Offset, slot->control.OffsetHigh, slot->len);
 			break;
 		default:
@@ -4969,6 +5203,28 @@ os_aio_windows_handle(
 		ret_val = ret && len == slot->len;
 	}
 
+	if (slot->message1 && page_compression) {
+		// We allocate memory for page compressed buffer if and only
+		// if it is not yet allocated.
+		if (slot->page_buf == NULL) {
+			os_slot_alloc_page_buf(slot);
+		}
+		ut_ad(slot->page_buf);
+
+	        if (slot->type == OS_FILE_READ) {
+			if (fil_page_is_compressed(slot->buf)) {
+				fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+			}
+		} else {
+			if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) {
+				if (srv_use_trim && os_fallocate_failed == FALSE) {
+					// Deallocate unused blocks from file system
+					os_file_trim(slot->file, slot, slot->len);
+				}
+			}
+		}
+	}
+
 	os_aio_array_free_slot((os_aio_array_t *)slot->arr, slot);
 
 	return(ret_val);
@@ -5058,6 +5314,33 @@ retry:
 			/* We have not overstepped to next segment. */
 			ut_a(slot->pos < end_pos);
 
+			/* If the table is page compressed and this is read,
+			we decompress before we annouce the read is
+			complete. For writes, we free the compressed page. */
+			if (slot->message1 && slot->page_compression) {
+				// We allocate memory for page compressed buffer if and only
+				// if it is not yet allocated.
+				if (slot->page_buf == NULL) {
+					os_slot_alloc_page_buf(slot);
+				}
+				ut_ad(slot->page_buf);
+
+				if (slot->type == OS_FILE_READ) {
+					if (fil_page_is_compressed(slot->buf)) {
+						fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+					}
+				} else {
+					if (slot->page_compress_success &&
+					    fil_page_is_compressed(slot->page_buf)) {
+						ut_ad(slot->page_compression_page);
+						if (srv_use_trim && os_fallocate_failed == FALSE) {
+							// Deallocate unused blocks from file system
+							os_file_trim(slot->file, slot, slot->len);
+						}
+					}
+				}
+			}
+
 			/* Mark this request as completed. The error handling
 			will be done in the calling function. */
 			os_mutex_enter(array->mutex);
@@ -5203,6 +5486,13 @@ found:
 	} else {
 		errno = -slot->ret;
 
+		if (slot->ret == 0) {
+			fprintf(stderr,
+				"InnoDB: Number of bytes after aio %d requested %lu\n"
+				"InnoDB: from file %s\n",
+				slot->n_bytes, slot->len, slot->name);
+		}
+
 		/* os_file_handle_error does tell us if we should retry
 		this IO. As it stands now, we don't do this retry when
 		reaping requests from a different context than
@@ -5210,7 +5500,7 @@ found:
 		windows and linux native AIO.
 		We should probably look into this to transparently
 		re-submit the IO. */
-		os_file_handle_error(slot->name, "Linux aio");
+		os_file_handle_error(slot->name, "Linux aio", __FILE__, __LINE__);
 
 		ret = FALSE;
 	}
@@ -5884,3 +6174,162 @@ os_aio_all_slots_free(void)
 #endif /* UNIV_DEBUG */
 
 #endif /* !UNIV_HOTBACKUP */
+
+#ifdef _WIN32
+#include <winioctl.h>
+#ifndef FSCTL_FILE_LEVEL_TRIM
+#define FSCTL_FILE_LEVEL_TRIM  CTL_CODE(FILE_DEVICE_FILE_SYSTEM, 130, METHOD_BUFFERED, FILE_WRITE_DATA)
+typedef struct _FILE_LEVEL_TRIM_RANGE {
+  DWORDLONG Offset;
+  DWORDLONG Length;
+} FILE_LEVEL_TRIM_RANGE, *PFILE_LEVEL_TRIM_RANGE;
+
+typedef struct _FILE_LEVEL_TRIM {
+  DWORD                 Key;
+  DWORD                 NumRanges;
+  FILE_LEVEL_TRIM_RANGE Ranges[1];
+} FILE_LEVEL_TRIM, *PFILE_LEVEL_TRIM;
+#endif
+#endif
+
+/**********************************************************************//**
+Directly manipulate the allocated disk space by deallocating for the file referred to
+by fd  for  the  byte range starting at offset and continuing for len bytes.
+Within the specified range, partial file system blocks are zeroed, and whole
+file system blocks are removed from the file.  After a successful call,
+subsequent reads from  this range will return zeroes.
+@return	true if success, false if error */
+UNIV_INTERN
+ibool
+os_file_trim(
+/*=========*/
+	os_file_t	file, /*!< in: file to be trimmed */
+	os_aio_slot_t*	slot, /*!< in: slot structure     */
+	ulint		len)  /*!< in: length of area     */
+{
+
+	size_t trim_len = UNIV_PAGE_SIZE - len;
+	os_offset_t off = slot->offset + len;
+
+	// Nothing to do if trim length is zero or if actual write
+	// size is initialized and it is smaller than current write size.
+	// In first write if we trim we set write_size to actual bytes
+	// written and rest of the page is trimmed. In following writes
+	// there is no need to trim again if write_size only increases
+	// because rest of the page is already trimmed. If actual write
+	// size decreases we need to trim again.
+	if (trim_len == 0 ||
+	    (slot->write_size &&
+		    *slot->write_size > 0 &&
+		    len >= *slot->write_size)) {
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
+			*slot->write_size, trim_len, len);
+#endif
+
+		if (*slot->write_size > 0 && len >= *slot->write_size) {
+			srv_stats.page_compressed_trim_op_saved.inc();
+		}
+
+		*slot->write_size = len;
+
+		return (TRUE);
+	}
+
+#ifdef __linux__
+#if defined(FALLOC_FL_PUNCH_HOLE) && defined (FALLOC_FL_KEEP_SIZE)
+	int ret = fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
+
+	if (ret) {
+		/* After first failure do not try to trim again */
+		os_fallocate_failed = TRUE;
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: [Warning] fallocate call failed with error code %d.\n"
+			"  InnoDB: start: %lx len: %lu payload: %lu\n"
+			"  InnoDB: Disabling fallocate for now.\n", ret, (slot->offset+len), trim_len, len);
+
+		os_file_handle_error_no_exit(slot->name,
+			" fallocate(FALLOC_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE) ",
+			FALSE, __FILE__, __LINE__);
+
+		if (slot->write_size) {
+			*slot->write_size = 0;
+		}
+
+		return (FALSE);
+	} else {
+		if (slot->write_size) {
+			*slot->write_size = len;
+		}
+	}
+#else
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: [Warning] fallocate not supported on this installation."
+		"  InnoDB: Disabling fallocate for now.");
+	os_fallocate_failed = TRUE;
+	slot->write_size = NULL;
+
+#endif /* HAVE_FALLOCATE ... */
+
+#elif defined(_WIN32)
+	FILE_LEVEL_TRIM flt;
+	flt.Key = 0;
+	flt.NumRanges = 1;
+	flt.Ranges[0].Offset = off;
+	flt.Ranges[0].Length = trim_len;
+
+	BOOL ret = DeviceIoControl(file,FSCTL_FILE_LEVEL_TRIM,&flt, sizeof(flt), NULL, NULL, NULL, NULL);
+
+	if (!ret) {
+		/* After first failure do not try to trim again */
+		os_fallocate_failed = TRUE;
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			"  InnoDB: [Warning] fallocate call failed with error.\n"
+			"  InnoDB: start: %lx len: %du payload: %lu\n"
+			"  InnoDB: Disabling fallocate for now.\n", (slot->offset+len), trim_len, len);
+
+		os_file_handle_error_no_exit(slot->name,
+			" DeviceIOControl(FSCTL_FILE_LEVEL_TRIM) ",
+			FALSE, __FILE__, __LINE__);
+
+		if (slot->write_size) {
+			slot->write_size = 0;
+		}
+		return (FALSE);
+	} else {
+		if (slot->write_size) {
+			slot->write_size = len;
+		}
+	}
+#endif
+
+#define SECT_SIZE 512
+	srv_stats.page_compression_trim_sect512.add((trim_len / SECT_SIZE));
+	srv_stats.page_compression_trim_sect4096.add((trim_len / (SECT_SIZE*8)));
+	srv_stats.page_compressed_trim_op.inc();
+
+	return (TRUE);
+
+}
+
+/**********************************************************************//**
+Allocate memory for temporal buffer used for page compression. This
+buffer is freed later. */
+UNIV_INTERN
+void
+os_slot_alloc_page_buf(
+/*===================*/
+	os_aio_slot_t*   slot) /*!< in: slot structure     */
+{
+	byte*           cbuf2;
+	byte*           cbuf;
+
+	cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
+	cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
+	slot->page_compression_page = static_cast<byte *>(cbuf2);
+	slot->page_buf = static_cast<byte *>(cbuf);
+}
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index d98315ae9a2..0b5556ab61a 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -290,6 +290,12 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_WRITTEN},
 
+	{"buffer_index_pages_written", "buffer",
+	 "Number of index pages written (innodb_index_pages_written)",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN},
+
 	{"buffer_pages_read", "buffer",
 	 "Number of pages read (innodb_pages_read)",
 	 static_cast<monitor_type_t>(
@@ -879,6 +885,41 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_PAD_DECREMENTS},
 
+	{"compress_saved", "compression",
+	 "Number of bytes saved by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_SAVED},
+
+	{"compress_trim_sect512", "compression",
+	 "Number of sect-512 TRIMed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512},
+
+	{"compress_trim_sect4096", "compression",
+	 "Number of sect-4K TRIMed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096},
+
+	{"compress_pages_page_compressed", "compression",
+	 "Number of pages compressed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSED},
+
+	{"compress_page_compressed_trim_op", "compression",
+	 "Number of TRIM operation performed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP},
+
+	{"compress_page_compressed_trim_op_saved", "compression",
+	 "Number of TRIM operation saved by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED},
+
+	{"compress_pages_page_decompressed", "compression",
+	 "Number of pages decompressed by page compression",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
+
 	/* ========== Counters for Index ========== */
 	{"module_index", "index", "Index Manager",
 	 MONITOR_MODULE,
@@ -1532,6 +1573,11 @@ srv_mon_process_existing_counter(
 		value = stat.n_pages_written;
 		break;
 
+	/* innodb_index_pages_written, the number of page written */
+	case MONITOR_OVLD_INDEX_PAGES_WRITTEN:
+		value = srv_stats.index_pages_written;
+		break;
+
 	/* innodb_pages_read */
 	case MONITOR_OVLD_PAGES_READ:
 		buf_get_total_stat(&stat);
@@ -1773,6 +1819,28 @@ srv_mon_process_existing_counter(
 		value = btr_cur_n_non_sea;
 		break;
 
+        case MONITOR_OVLD_PAGE_COMPRESS_SAVED:
+		value = srv_stats.page_compression_saved;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT512:
+		value = srv_stats.page_compression_trim_sect512;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESS_TRIM_SECT4096:
+		value = srv_stats.page_compression_trim_sect4096;
+		break;
+        case MONITOR_OVLD_PAGES_PAGE_COMPRESSED:
+		value = srv_stats.pages_page_compressed;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP:
+		value = srv_stats.page_compressed_trim_op;
+		break;
+        case MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED:
+		value = srv_stats.page_compressed_trim_op_saved;
+		break;
+        case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
+		value = srv_stats.pages_page_decompressed;
+		break;
+
 	default:
 		ut_error;
 	}
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 953bbba11f7..92acf847ca1 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -3,6 +3,7 @@
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, 2014, SkySQL Ab.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -160,6 +161,26 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
+/* If this flag is TRUE, then we will use page compression
+to the pages */
+UNIV_INTERN my_bool     srv_compress_pages              = FALSE;
+/* If this flag is TRUE, then we will use page compression
+only for index pages */
+UNIV_INTERN my_bool     srv_page_compress_index_pages   = FALSE;
+UNIV_INTERN long        srv_trim_pct                    = 100;
+/* Default compression level if page compression is used and no compression
+level is set for the table*/
+UNIV_INTERN long        srv_compress_zlib_level         = 6;
+/* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
+to the pages */
+UNIV_INTERN my_bool     srv_use_trim                    = TRUE;
+/* If this flag is TRUE, then we will use posix fallocate for file extentsion */
+UNIV_INTERN my_bool     srv_use_posix_fallocate         = FALSE;
+/* If this flag is TRUE, then we disable doublewrite buffer */
+UNIV_INTERN my_bool     srv_use_atomic_writes           = FALSE;
+/* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
+UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
+
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
 pointers, because they are not available on Windows Server 2003 and
@@ -454,10 +475,6 @@ UNIV_INTERN unsigned long long	srv_stats_persistent_sample_pages = 20;
 UNIV_INTERN my_bool		srv_stats_auto_recalc = TRUE;
 
 UNIV_INTERN ibool	srv_use_doublewrite_buf	= TRUE;
-UNIV_INTERN ibool       srv_use_atomic_writes = FALSE;
-#ifdef HAVE_POSIX_FALLOCATE
-UNIV_INTERN ibool       srv_use_posix_fallocate = FALSE;
-#endif
 
 /** doublewrite buffer is 1MB is size i.e.: it can hold 128 16K pages.
 The following parameter is the size of the buffer that is used for
@@ -493,6 +510,15 @@ static ulint		srv_n_rows_read_old		= 0;
 UNIV_INTERN ulint	srv_truncated_status_writes	= 0;
 UNIV_INTERN ulint	srv_available_undo_logs         = 0;
 
+UNIV_INTERN ib_uint64_t srv_page_compression_saved      = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512       = 0;
+UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096      = 0;
+UNIV_INTERN ib_uint64_t srv_index_pages_written         = 0;
+UNIV_INTERN ib_uint64_t srv_pages_page_compressed       = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op     = 0;
+UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved     = 0;
+UNIV_INTERN ib_uint64_t srv_index_page_decompressed     = 0;
+
 /* Ensure status variables are on separate cache lines */
 
 #define CACHE_LINE_SIZE 64
@@ -1835,6 +1861,15 @@ srv_export_innodb_status(void)
 	export_vars.innodb_descriptors_memory
 		= os_atomic_increment_ulint(&srv_descriptors_memory, 0);
 
+	export_vars.innodb_page_compression_saved = srv_stats.page_compression_saved;
+	export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
+	export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
+	export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
+	export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
+	export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
+	export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
+	export_vars.innodb_pages_page_decompressed = srv_stats.pages_page_decompressed;
+
 #ifdef UNIV_DEBUG
 	rw_lock_s_lock(&purge_sys->latch);
 	trx_id_t	done_trx_no	= purge_sys->done.trx_no;
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index 3ddfd9ab3a4..faad8c3c133 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -3,6 +3,7 @@
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, Google Inc.
 Copyright (c) 2009, Percona Inc.
+Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -64,6 +65,8 @@ Created 2/16/1996 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "srv0start.h"
 #include "srv0srv.h"
+#include "buf0flu.h"
+
 #ifndef UNIV_HOTBACKUP
 # include "trx0rseg.h"
 # include "os0proc.h"
@@ -128,8 +131,14 @@ static os_file_t	files[1000];
 /** io_handler_thread parameters for thread identification */
 static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6
-				   + SRV_MAX_N_PURGE_THREADS];
+/*
+ static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6
+/				   + SRV_MAX_N_PURGE_THREADS];
+*/
+/** pgcomp_thread are 16 total */
+#define	START_PGCOMP_CNT	(SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS)
+#define PGCOMP_MAX_WORKER   16
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS + PGCOMP_MAX_WORKER];
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -537,7 +546,7 @@ create_log_file(
 	*file = os_file_create(
 		innodb_file_log_key, name,
 		OS_FILE_CREATE|OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
-		OS_LOG_FILE, &ret);
+		OS_LOG_FILE, &ret, FALSE);
 
 	if (!ret) {
 		ib_logf(IB_LOG_LEVEL_ERROR, "Cannot create %s", name);
@@ -754,7 +763,7 @@ open_log_file(
 
 	*file = os_file_create(innodb_file_log_key, name,
 			       OS_FILE_OPEN, OS_FILE_AIO,
-			       OS_LOG_FILE, &ret);
+			       OS_LOG_FILE, &ret, FALSE);
 	if (!ret) {
 		ib_logf(IB_LOG_LEVEL_ERROR, "Unable to open '%s'", name);
 		return(DB_ERROR);
@@ -845,7 +854,7 @@ open_or_create_data_files(
 
 			files[i] = os_file_create(
 				innodb_file_data_key, name, OS_FILE_CREATE,
-				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+				OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 
 			if (srv_read_only_mode) {
 
@@ -888,7 +897,7 @@ open_or_create_data_files(
 
 			files[i] = os_file_create(
 				innodb_file_data_key, name, OS_FILE_OPEN_RAW,
-				OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+				OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 
 			if (!ret) {
 				ib_logf(IB_LOG_LEVEL_ERROR,
@@ -921,17 +930,17 @@ open_or_create_data_files(
 				files[i] = os_file_create(
 					innodb_file_data_key,
 					name, OS_FILE_OPEN_RAW,
-					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+					OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 			} else if (i == 0) {
 				files[i] = os_file_create(
 					innodb_file_data_key,
 					name, OS_FILE_OPEN_RETRY,
-					OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+					OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 			} else {
 				files[i] = os_file_create(
 					innodb_file_data_key,
 					name, OS_FILE_OPEN, OS_FILE_NORMAL,
-					OS_DATA_FILE, &ret);
+					OS_DATA_FILE, &ret, FALSE);
 			}
 
 			if (!ret) {
@@ -1122,7 +1131,7 @@ srv_undo_tablespace_create(
 		innodb_file_data_key,
 		name,
 		srv_read_only_mode ? OS_FILE_OPEN : OS_FILE_CREATE,
-		OS_FILE_NORMAL, OS_DATA_FILE, &ret);
+		OS_FILE_NORMAL, OS_DATA_FILE, &ret, FALSE);
 
 	if (srv_read_only_mode && ret) {
 		ib_logf(IB_LOG_LEVEL_INFO,
@@ -1209,7 +1218,8 @@ srv_undo_tablespace_open(
 		| OS_FILE_ON_ERROR_SILENT,
 		OS_FILE_NORMAL,
 		OS_DATA_FILE,
-		&ret);
+		&ret,
+		FALSE);
 
 	/* If the file open was successful then load the tablespace. */
 
@@ -1503,6 +1513,694 @@ init_log_online(void)
 	}
 }
 
+/* JAN: TODO: */
+/**********************************************************************************/
+extern int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time);
+extern ibool buf_flush_start(buf_pool_t* buf_pool, buf_flush_t flush_type);
+extern void buf_flush_end(buf_pool_t* buf_pool, buf_flush_t flush_type);
+extern void buf_flush_common(buf_flush_t flush_type, ulint page_count);
+extern ulint buf_flush_batch(buf_pool_t* buf_pool, buf_flush_t flush_type, ulint min_n, lsn_t lsn_limit, bool limited_lru_scan, flush_counters_t*);
+
+typedef enum wrk_status {
+    WRK_ITEM_SET=0,
+    WRK_ITEM_START=1,
+    WRK_ITEM_DONE=2,
+    WRK_ITEM_SUCCESS=2,
+    WRK_ITEM_FAILED=3,
+    WRK_ITEM_STATUS_UNDEFINED
+} wrk_status_t;
+
+typedef enum wthr_status {
+    WTHR_NOT_INIT=0,
+    WTHR_INITIALIZED=1,
+    WTHR_SIG_WAITING=2,
+    WTHR_RUNNING=3,
+    WTHR_NO_WORK=4,
+    WTHR_KILL_IT=5,
+    WTHR_STATUS_UNDEFINED
+} wthr_status_t;
+
+typedef struct wrk_itm
+{
+	/****************************/
+	/* Need to group into struct*/
+	buf_pool_t*	buf_pool;	//buffer-pool instance
+	int 		flush_type;	//flush-type for buffer-pool flush operation
+	int 		min;		//minimum number of pages requested to be flushed
+	unsigned long long lsn_limit;	//lsn limit for the buffer-pool flush operation
+	/****************************/
+
+	unsigned long	result; 	//flush pages count
+	unsigned long	t_usec;		//time-taken in usec
+	long		    id_usr;	//thread-id currently working
+	wrk_status_t    wi_status;      //flag
+	struct wrk_itm	*next;
+} wrk_t;
+
+typedef enum op_q_status {
+    Q_NOT_INIT=0,
+    Q_EMPTY=1,
+    Q_INITIALIZED=2,
+    Q_PROCESS=3,
+    Q_DONE=4,
+    Q_ERROR=5,
+    Q_STATUS_UNDEFINED
+} q_status_t;
+
+typedef struct op_queue
+{
+	pthread_mutex_t	mtx;
+	pthread_cond_t 	cv;
+	q_status_t	flag;
+	wrk_t 		*head;
+	wrk_t		*tail;
+} opq_t;
+
+opq_t wq, cq;
+
+typedef struct thread_sync
+{
+	int  		wthread_id;
+	pthread_t 	wthread;
+	opq_t		*wq;
+	opq_t		*cq;
+	wthr_status_t   wt_status;
+	unsigned long	stat_universal_num_processed;
+	unsigned long	stat_cycle_num_processed;
+} thread_sync_t;
+
+/* Global XXX:DD needs to be cleaned */
+int 			exit_flag;
+ulint 			check_wrk_done_count;
+static ulint 		done_cnt_flag;
+static int 		pgc_n_threads = 8;
+
+thread_sync_t 		pc_sync[PGCOMP_MAX_WORKER];
+static wrk_t 		work_items[PGCOMP_MAX_WORKER];
+static int 		pgcomp_wrk_initialized = -1;
+
+int set_check_done_flag_count(int cnt)
+{
+	return(check_wrk_done_count = cnt);
+}
+
+int set_pgcomp_wrk_init_done(void)
+{
+	pgcomp_wrk_initialized = 1;
+	return 0;
+}
+
+int is_pgcomp_wrk_init_done(void)
+{
+	return(pgcomp_wrk_initialized == 1);
+}
+
+ulint set_done_cnt_flag(ulint val)
+{
+	/*
+ 	 * Assumption: The thread calling into set_done_cnt_flag
+ 	 * needs to have "cq.mtx" acquired, else not safe.
+ 	 */
+	done_cnt_flag = val;
+	return done_cnt_flag;
+}
+
+
+ulint cv_done_inc_flag_sig(thread_sync_t * ppc)
+{
+	pthread_mutex_lock(&ppc->cq->mtx);
+	ppc->stat_universal_num_processed++;
+	ppc->stat_cycle_num_processed++;
+	done_cnt_flag++;
+	if(!(done_cnt_flag <= check_wrk_done_count)) {
+		fprintf(stderr, "ERROR: done_cnt:%lu check_wrk_done_count:%lu\n",
+			done_cnt_flag, check_wrk_done_count);
+	}
+	assert(done_cnt_flag <= check_wrk_done_count);
+	pthread_mutex_unlock(&ppc->cq->mtx);
+	if(done_cnt_flag == check_wrk_done_count) {
+		ppc->wq->flag = Q_DONE;
+		pthread_mutex_lock(&ppc->cq->mtx);
+			ppc->cq->flag = Q_DONE;
+			pthread_cond_signal(&ppc->cq->cv);
+		pthread_mutex_unlock(&ppc->cq->mtx);
+	}
+	return(done_cnt_flag);
+}
+
+int q_remove_wrk(opq_t *q, wrk_t **wi)
+{
+	int ret = 0;
+
+	if(!wi || !q) {
+		return -1;
+	}
+
+	pthread_mutex_lock(&q->mtx);
+	assert(!((q->tail == NULL) && (q->head != NULL)));
+	assert(!((q->tail != NULL) && (q->head == NULL)));
+
+	/* get the first in the list*/
+	*wi = q->head;
+	if(q->head) {
+		ret = 0;
+		q->head = q->head->next;
+		(*wi)->next = NULL;
+		if(!q->head) {
+			q->tail = NULL;
+		}
+	} else {
+		q->tail = NULL;
+		ret = 1; /* indicating remove from queue failed */
+	}
+	pthread_mutex_unlock(&q->mtx);
+	return (ret);
+}
+
+int is_busy_wrk_itm(wrk_t *wi)
+{
+	if(!wi) {
+		return -1;
+	}
+	return(!(wi->id_usr == -1));
+}
+
+int setup_wrk_itm(int items)
+{
+	int i;
+	for(i=0; i<items; i++) {
+		work_items[i].buf_pool = NULL;
+		work_items[i].result = 0;
+		work_items[i].t_usec = 0;
+		work_items[i].id_usr = -1;
+		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
+		work_items[i].next = &work_items[(i+1)%items];
+	}
+	/* last node should be the tail */
+	work_items[items-1].next = NULL;
+	return 0;
+}
+
+int init_queue(opq_t *q)
+{
+	if(!q) {
+		return -1;
+	}
+	/* Initialize Queue mutex and CV */
+	pthread_mutex_init(&q->mtx, NULL);
+	pthread_cond_init(&q->cv, NULL);
+	q->flag = Q_INITIALIZED;
+	q->head = q->tail = NULL;
+
+	return 0;
+}
+
+#if 0
+int drain_cq(opq_t *cq, int items)
+{
+	int i=0;
+
+	if(!cq) {
+		return -1;
+	}
+	pthread_mutex_lock(&cq->mtx);
+	for(i=0; i<items; i++) {
+		work_items[i].result=0;
+		work_items[i].t_usec = 0;
+		work_items[i].id_usr = -1;
+	}
+	cq->head = cq->tail = NULL;
+	pthread_mutex_unlock(&cq->mtx);
+	return 0;
+}
+#endif
+
+int q_insert_wrk_list(opq_t *q, wrk_t *w_list)
+{
+	if((!q) || (!w_list)) {
+		fprintf(stderr, "insert failed q:%p w:%p\n", q, w_list);
+		return -1;
+	}
+
+	pthread_mutex_lock(&q->mtx);
+
+	assert(!((q->tail == NULL) && (q->head != NULL)));
+	assert(!((q->tail != NULL) && (q->head == NULL)));
+
+	/* list is empty */
+	if(!q->tail) {
+		q->head = q->tail = w_list;
+	} else {
+		/* added the first of the node to list */
+        	assert(q->head != NULL);
+		q->tail->next = w_list;
+	}
+
+	/* move tail to the last node */
+	while(q->tail->next) {
+		q->tail = q->tail->next;
+	}
+	pthread_mutex_unlock(&q->mtx);
+
+	return 0;
+}
+
+int flush_pool_instance(wrk_t *wi)
+{
+	struct timeval p_start_time, p_end_time, d_time;
+	flush_counters_t n;
+
+	if(!wi) {
+		fprintf(stderr, "work item invalid wi:%p\n", wi);
+		return -1;
+	}
+
+	wi->t_usec = 0;
+	if (!buf_flush_start(wi->buf_pool, (buf_flush_t)wi->flush_type)) {
+		/* We have two choices here. If lsn_limit was
+		specified then skipping an instance of buffer
+		pool means we cannot guarantee that all pages
+		up to lsn_limit has been flushed. We can
+		return right now with failure or we can try
+		to flush remaining buffer pools up to the
+		lsn_limit. We attempt to flush other buffer
+		pools based on the assumption that it will
+		help in the retry which will follow the
+		failure. */
+		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
+			(buf_flush_t)wi->flush_type);
+		return -1;
+	}
+
+#ifdef UNIV_DEBUG
+	/* Record time taken for the OP in usec */
+	gettimeofday(&p_start_time, 0x0);
+#endif
+
+	if((buf_flush_t)wi->flush_type == BUF_FLUSH_LRU) {
+		/* srv_LRU_scan_depth can be arbitrarily large value.
+		* We cap it with current LRU size.
+		*/
+		buf_pool_mutex_enter(wi->buf_pool);
+		wi->min = UT_LIST_GET_LEN(wi->buf_pool->LRU);
+		buf_pool_mutex_exit(wi->buf_pool);
+		wi->min = ut_min(srv_LRU_scan_depth,wi->min);
+	}
+
+	buf_flush_batch(wi->buf_pool,
+			(buf_flush_t)wi->flush_type,
+			wi->min, wi->lsn_limit, false, &n);
+
+	wi->result = n.flushed;
+
+	buf_flush_end(wi->buf_pool, (buf_flush_t)wi->flush_type);
+	buf_flush_common((buf_flush_t)wi->flush_type, wi->result);
+
+#ifdef UNIV_DEBUG
+	gettimeofday(&p_end_time, 0x0);
+	timediff(&p_end_time, &p_start_time, &d_time);
+
+	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
+#endif
+
+	return 0;
+}
+
+int service_page_comp_io(thread_sync_t * ppc)
+{
+	wrk_t 		*wi = NULL;
+	int 		ret=0;
+
+	pthread_mutex_lock(&ppc->wq->mtx);
+	do{
+		ppc->wt_status = WTHR_SIG_WAITING;
+		ret = pthread_cond_wait(&ppc->wq->cv, &ppc->wq->mtx);
+		ppc->wt_status = WTHR_RUNNING;
+		if(ret == ETIMEDOUT) {
+			fprintf(stderr, "ERROR ETIMEDOUT cnt_flag:[%lu] ret:%d\n",
+				done_cnt_flag, ret);
+		} else if(ret == EINVAL || ret == EPERM) {
+			fprintf(stderr, "ERROR EINVAL/EPERM cnt_flag:[%lu] ret:%d\n",
+				done_cnt_flag, ret);
+		}
+		if(ppc->wq->flag == Q_PROCESS) {
+			break;
+		} else {
+			pthread_mutex_unlock(&ppc->wq->mtx);
+			return -1;
+		}
+	} while (ppc->wq->flag == Q_PROCESS && ret == 0);
+
+	pthread_mutex_unlock(&ppc->wq->mtx);
+
+	while (ppc->cq->flag == Q_PROCESS) {
+		wi = NULL;
+		/* Get the work item */
+		if (0 != (ret = q_remove_wrk(ppc->wq, &wi))) {
+			ppc->wt_status = WTHR_NO_WORK;
+			return -1;
+		}
+
+		assert(ret==0);
+		assert(wi != NULL);
+		assert(0 == is_busy_wrk_itm(wi));
+		assert(wi->id_usr == -1);
+
+		wi->id_usr = ppc->wthread;
+		wi->wi_status = WRK_ITEM_START;
+
+		/* Process work item */
+		if(0 != (ret = flush_pool_instance(wi))) {
+			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
+			wi->wi_status = WRK_ITEM_FAILED;
+		}
+
+		ret = q_insert_wrk_list(ppc->cq, wi);
+
+		assert(0==ret);
+		assert(check_wrk_done_count >= done_cnt_flag);
+		wi->wi_status = WRK_ITEM_SUCCESS;
+		if(check_wrk_done_count == cv_done_inc_flag_sig(ppc)) {
+			break;
+		}
+	}
+	return(0);
+}
+
+/******************************************************************//**
+@return a dummy parameter*/
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(page_comp_io_thread)(
+/*==========================================*/
+	void * arg)
+{
+	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
+
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+		service_page_comp_io(ppc_io);
+		ppc_io->stat_cycle_num_processed = 0;
+	}
+	os_thread_exit(NULL);
+	OS_THREAD_DUMMY_RETURN;
+}
+
+int print_queue_wrk_itm(opq_t *q)
+{
+#if UNIV_DEBUG
+	wrk_t *wi = NULL;
+
+	if(!q) {
+		fprintf(stderr, "queue NULL\n");
+		return -1;
+	}
+
+	if(!q->head || !q->tail) {
+		assert(!(((q->tail==NULL) && (q->head!=NULL)) && ((q->tail != NULL) && (q->head == NULL))));
+		fprintf(stderr, "queue empty (h:%p t:%p)\n", q->head, q->tail);
+		return 0;
+	}
+
+	pthread_mutex_lock(&q->mtx);
+	for(wi = q->head; (wi != NULL) ; wi = wi->next) {
+		//fprintf(stderr, "- [%p] %p %lu %luus [%ld] >%p\n",
+		//	wi, wi->buf_pool, wi->result, wi->t_usec, wi->id_usr, wi->next);
+		fprintf(stderr, "- [%p] [%s] >%p\n",
+			wi, (wi->id_usr == -1)?"free":"Busy", wi->next);
+	}
+	pthread_mutex_unlock(&q->mtx);
+#endif
+	return(0);
+}
+
+int print_wrk_list(wrk_t *wi_list)
+{
+	wrk_t *wi = wi_list;
+	int i=0;
+
+	if(!wi_list) {
+		fprintf(stderr, "list NULL\n");
+	}
+
+	while(wi) {
+		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu]\t[%luus] > %p\n",
+			wi, (wi->id_usr == -1)?"free":"Busy", wi->result, wi->t_usec, wi->next);
+		wi = wi->next;
+		i++;
+	}
+	fprintf(stderr, "list len: %d\n", i);
+	return 0;
+}
+
+int pgcomp_handler(wrk_t *w_list)
+{
+	int ret=0;
+	opq_t *wrk_q=NULL, *comp_q=NULL;
+
+	wrk_q=&wq;
+	comp_q=&cq;
+
+	pthread_mutex_lock(&wrk_q->mtx);
+	/* setup work queue here.. */
+	wrk_q->flag = Q_EMPTY;
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+	ret = q_insert_wrk_list(wrk_q, w_list);
+	if(ret != 0) {
+		fprintf(stderr, "%s():work-queue setup FAILED wq:%p w_list:%p \n",
+			__FUNCTION__, &wq, w_list);
+		return -1;
+	}
+
+retry_submit:
+	pthread_mutex_lock(&wrk_q->mtx);
+	/* setup work queue here.. */
+	wrk_q->flag = Q_INITIALIZED;
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+
+	pthread_mutex_lock(&comp_q->mtx);
+	if(0 != set_done_cnt_flag(0)) {
+		fprintf(stderr, "FAILED %s:%d\n", __FILE__, __LINE__);
+		pthread_mutex_unlock(&comp_q->mtx);
+		return -1;
+	}
+	comp_q->flag = Q_PROCESS;
+	pthread_mutex_unlock(&comp_q->mtx);
+
+	/* if threads are waiting request them to start */
+	pthread_mutex_lock(&wrk_q->mtx);
+	wrk_q->flag = Q_PROCESS;
+	pthread_cond_broadcast(&wrk_q->cv);
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+	/* Wait on all worker-threads to complete */
+	pthread_mutex_lock(&comp_q->mtx);
+	if (comp_q->flag != Q_DONE) {
+		do {
+			pthread_cond_wait(&comp_q->cv, &comp_q->mtx);
+			if(comp_q->flag != Q_DONE) {
+				fprintf(stderr, "[1] cv wait on CQ failed flag:%d cnt:%lu\n",
+					comp_q->flag, done_cnt_flag);
+				if (done_cnt_flag != srv_buf_pool_instances) {
+					fprintf(stderr, "[2] cv wait on CQ failed flag:%d cnt:%lu\n",
+						comp_q->flag, done_cnt_flag);
+					fprintf(stderr, "============\n");
+					print_wrk_list(w_list);
+					fprintf(stderr, "============\n");
+				}
+				continue;
+			} else if (done_cnt_flag != srv_buf_pool_instances) {
+				fprintf(stderr, "[3]cv wait on CQ failed flag:%d cnt:%lu\n",
+					comp_q->flag, done_cnt_flag);
+				fprintf(stderr, "============\n");
+				print_wrk_list(w_list);
+				fprintf(stderr, "============\n");
+				comp_q->flag = Q_INITIALIZED;
+				pthread_mutex_unlock(&comp_q->mtx);
+				goto retry_submit;
+
+				assert(!done_cnt_flag);
+				continue;
+			}
+			assert(done_cnt_flag == srv_buf_pool_instances);
+
+			if ((comp_q->flag == Q_DONE) &&
+				(done_cnt_flag == srv_buf_pool_instances)) {
+				break;
+			}
+		} while((comp_q->flag == Q_INITIALIZED) &&
+			(done_cnt_flag != srv_buf_pool_instances));
+	} else {
+		fprintf(stderr, "[4] cv wait on CQ failed flag:%d cnt:%lu\n",
+			comp_q->flag, done_cnt_flag);
+		if (!done_cnt_flag) {
+			fprintf(stderr, "============\n");
+			print_wrk_list(w_list);
+			fprintf(stderr, "============\n");
+			comp_q->flag = Q_INITIALIZED;
+			pthread_mutex_unlock(&comp_q->mtx);
+			goto retry_submit;
+			assert(!done_cnt_flag);
+		}
+		assert(done_cnt_flag == srv_buf_pool_instances);
+	}
+
+	pthread_mutex_unlock(&comp_q->mtx);
+	pthread_mutex_lock(&wrk_q->mtx);
+	wrk_q->flag = Q_DONE;
+	pthread_mutex_unlock(&wrk_q->mtx);
+
+	return 0;
+}
+
+/******************************************************************//**
+@return a dummy parameter*/
+int pgcomp_handler_init(int num_threads, int wrk_cnt, opq_t *wq, opq_t *cq)
+{
+	int   	i=0;
+
+	if(is_pgcomp_wrk_init_done()) {
+		fprintf(stderr, "pgcomp_handler_init(): ERROR already initialized\n");
+		return -1;
+	}
+
+	if(!wq || !cq) {
+		fprintf(stderr, "%s() FAILED wq:%p cq:%p\n", __FUNCTION__, wq, cq);
+		return -1;
+	}
+
+	/* work-item setup */
+	setup_wrk_itm(wrk_cnt);
+
+	/* wq & cq setup */
+	init_queue(wq);
+	init_queue(cq);
+
+	/* Mark each of the thread sync entires */
+	for(i=0; i < PGCOMP_MAX_WORKER; i++) {
+		pc_sync[i].wthread_id = i;
+	}
+
+	/* Create threads for page-compression-flush */
+	for(i=0; i < num_threads; i++) {
+		pc_sync[i].wthread_id = i;
+		pc_sync[i].wq = wq;
+		pc_sync[i].cq = cq;
+		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
+					thread_ids + START_PGCOMP_CNT + i);
+		//pc_sync[i].wthread = thread_ids[START_PGCOMP_CNT + i];
+		pc_sync[i].wthread = (START_PGCOMP_CNT + i);
+		pc_sync[i].wt_status = WTHR_INITIALIZED;
+	}
+
+	set_check_done_flag_count(wrk_cnt);
+	set_pgcomp_wrk_init_done();
+
+	return 0;
+}
+
+
+int wrk_thread_stat(thread_sync_t *wthr, unsigned int num_threads)
+{
+	long stat_tot=0;
+	unsigned int i=0;
+	for(i=0; i< num_threads;i++) {
+		stat_tot+=wthr[i].stat_universal_num_processed;
+		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
+			wthr[i].stat_universal_num_processed);
+	}
+	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
+	return (0);
+}
+
+int reset_wrk_itm(int items)
+{
+	int i;
+
+	pthread_mutex_lock(&wq.mtx);
+	wq.head = wq.tail = NULL;
+	pthread_mutex_unlock(&wq.mtx);
+
+	pthread_mutex_lock(&cq.mtx);
+	for(i=0;i<items; i++) {
+		work_items[i].id_usr = -1;
+	}
+	cq.head = cq.tail = NULL;
+	pthread_mutex_unlock(&cq.mtx);
+	return 0;
+}
+
+int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
+                            int flush_type, int min_n, unsigned long long lsn_limit)
+{
+	int ret=0, i=0;
+
+   	pthread_mutex_lock(&wq.mtx);
+   	pthread_mutex_lock(&cq.mtx);
+
+	assert(wq.head == NULL);
+    	assert(wq.tail == NULL);
+	if(cq.head) {
+		print_wrk_list(cq.head);
+	}
+    	assert(cq.head == NULL);
+    	assert(cq.tail == NULL);
+
+	for(i=0;i<buf_pool_inst; i++) {
+		work_items[i].buf_pool = buf_pool_from_array(i);
+		work_items[i].flush_type = flush_type;
+		work_items[i].min = min_n;
+		work_items[i].lsn_limit = lsn_limit;
+		work_items[i].id_usr = -1;
+		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
+		work_items[i].wi_status = WRK_ITEM_SET;
+	}
+	work_items[i-1].next=NULL;
+
+	pthread_mutex_unlock(&cq.mtx);
+   	pthread_mutex_unlock(&wq.mtx);
+
+	pgcomp_handler(work_items);
+
+   	pthread_mutex_lock(&wq.mtx);
+   	pthread_mutex_lock(&cq.mtx);
+	/* collect data/results total pages flushed */
+	for(i=0; i<buf_pool_inst; i++) {
+		if(work_items[i].result == -1) {
+			ret = -1;
+			per_pool_pages_flushed[i] = 0;
+		} else {
+			per_pool_pages_flushed[i] = work_items[i].result;
+		}
+		if((work_items[i].id_usr == -1) && (work_items[i].wi_status == WRK_ITEM_SET )) {
+           		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].flush_type);
+			assert(0);
+		}
+	}
+
+	wq.flag = cq.flag = Q_INITIALIZED;
+
+	pthread_mutex_unlock(&cq.mtx);
+   	pthread_mutex_unlock(&wq.mtx);
+
+#if UNIV_DEBUG
+	/* Print work-list stats */
+	fprintf(stderr, "==wq== [DONE]\n");
+	print_wrk_list(wq.head);
+	fprintf(stderr, "==cq== [DONE]\n");
+	print_wrk_list(cq.head);
+	fprintf(stderr, "==worker-thread-stats==\n");
+	wrk_thread_stat(pc_sync, pgc_n_threads);
+#endif
+
+	/* clear up work-queue for next flush */
+	reset_wrk_itm(buf_pool_inst);
+	return(ret);
+}
+
+/* JAN: TODO: END: */
+
 /********************************************************************
 Starts InnoDB and creates a new database if database files
 are not found and the user wants.
@@ -2710,6 +3408,16 @@ files_checked:
 	}
 
 	if (!srv_read_only_mode) {
+		/* JAN: TODO: */
+		if (srv_buf_pool_instances <= PGCOMP_MAX_WORKER) {
+			pgc_n_threads = srv_buf_pool_instances;
+		}
+		/* else we default to 8 worker-threads */
+		pgcomp_handler_init(pgc_n_threads, srv_buf_pool_instances, &wq, &cq);
+		/* JAN: TODO: END */
+#if UNIV_DEBUG
+		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
+#endif
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
 

From 55fab3dbf413d663b66e6c4d147a0395c18068c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 4 Feb 2014 14:52:02 +0200
Subject: [PATCH 10/56] Fixed issue on atomic writes on startup, removed
 incorrect assert.

Fixed issue on file space extend when posix_fallocate is used.

Merged second iteration of multi-threaded flush code.
---
 .../r/innodb_monitor_disable_basic.result     |   8 +
 storage/innobase/buf/buf0flu.cc               | 122 +---
 storage/innobase/fil/fil0fil.cc               |   1 -
 storage/innobase/include/dict0dict.ic         |   4 +
 storage/innobase/include/srv0srv.h            |   4 +
 storage/innobase/srv/srv0start.cc             | 670 +++++------------
 storage/xtradb/buf/buf0flu.cc                 | 120 +---
 storage/xtradb/fil/fil0fil.cc                 |  51 +-
 storage/xtradb/include/dict0dict.ic           |   1 -
 storage/xtradb/include/srv0srv.h              |   4 +
 storage/xtradb/srv/srv0start.cc               | 675 +++++-------------
 11 files changed, 515 insertions(+), 1145 deletions(-)

diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
index ce57dbb2fdc..78d294e5f09 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
@@ -37,6 +37,7 @@ buffer_pool_bytes_dirty	disabled
 buffer_pool_pages_free	disabled
 buffer_pages_created	disabled
 buffer_pages_written	disabled
+buffer_index_pages_written	disabled
 buffer_pages_read	disabled
 buffer_data_reads	disabled
 buffer_data_written	disabled
@@ -160,6 +161,13 @@ compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
 compression_pad_decrements	disabled
+compress_saved	disabled
+compress_trim_sect512	disabled
+compress_trim_sect4096	disabled
+compress_pages_page_compressed	disabled
+compress_page_compressed_trim_op	disabled
+compress_page_compressed_trim_op_saved	disabled
+compress_pages_page_decompressed	disabled
 index_splits	disabled
 index_merges	disabled
 adaptive_hash_searches	disabled
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index d159ddbe23f..ff1fab6eae7 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -46,6 +46,7 @@ Created 11/11/1995 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "log0log.h"
 #include "os0file.h"
+#include "os0sync.h"
 #include "trx0sys.h"
 #include "srv0mon.h"
 #include "mysql/plugin.h"
@@ -1934,11 +1935,16 @@ buf_flush_LRU(
 /* JAN: TODO: */
 /*******************************************************************//**/
 extern int is_pgcomp_wrk_init_done(void);
-extern int pgcomp_flush_work_items(int buf_pool_inst, int *pages_flushed,
-        int flush_type, int min_n, unsigned long long lsn_limit);
+extern int pgcomp_flush_work_items(
+	int buf_pool_inst,
+	int *pages_flushed,
+        enum buf_flush flush_type,
+	int min_n,
+	lsn_t lsn_limit);
 
 #define	MT_COMP_WATER_MARK	50
 
+#ifdef UNIV_DEBUG
 #include <time.h>
 int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time)
 {
@@ -1959,8 +1965,15 @@ int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_t
 
 	return 0;
 }
+#endif
+
+static os_fast_mutex_t pgcomp_mtx;
+
+void pgcomp_init(void)
+{
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &pgcomp_mtx);
+}
 
-static pthread_mutex_t  pgcomp_mtx = PTHREAD_MUTEX_INITIALIZER;
 /*******************************************************************//**
 Multi-threaded version of buf_flush_list
 */
@@ -1983,7 +1996,10 @@ pgcomp_buf_flush_list(
 {
 	ulint		i;
 	bool		success = true;
+#ifdef UNIV_DEBUG
 	struct timeval p_start_time, p_end_time, d_time;
+#endif
+	int cnt_flush[MTFLUSH_MAX_WORKER];
 
 	if (n_processed) {
 		*n_processed = 0;
@@ -2001,96 +2017,34 @@ pgcomp_buf_flush_list(
 #ifdef UNIV_DEBUG
 	gettimeofday(&p_start_time, 0x0);
 #endif
-	if(is_pgcomp_wrk_init_done() && (min_n > MT_COMP_WATER_MARK)) {
-		int cnt_flush[32];
+	os_fast_mutex_lock(&pgcomp_mtx);
+	pgcomp_flush_work_items(srv_buf_pool_instances,
+                cnt_flush, BUF_FLUSH_LIST,
+                min_n, lsn_limit);
+	os_fast_mutex_unlock(&pgcomp_mtx);
 
-		//stack_trace();
-		pthread_mutex_lock(&pgcomp_mtx);
-		//gettimeofday(&p_start_time, 0x0);
-		//fprintf(stderr, "Calling into wrk-pgcomp [min:%lu]", min_n);
-		pgcomp_flush_work_items(srv_buf_pool_instances,
-					cnt_flush, BUF_FLUSH_LIST,
-					min_n, lsn_limit);
-
-		for (i = 0; i < srv_buf_pool_instances; i++) {
-			if (n_processed) {
-				*n_processed += cnt_flush[i];
-			}
-			if (cnt_flush[i]) {
-				MONITOR_INC_VALUE_CUMULATIVE(
-					MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-					MONITOR_FLUSH_BATCH_COUNT,
-					MONITOR_FLUSH_BATCH_PAGES,
-					cnt_flush[i]);
-
-			}
-		}
-
-		pthread_mutex_unlock(&pgcomp_mtx);
-
-#ifdef UNIV_DEBUG
-		gettimeofday(&p_end_time, 0x0);
-		timediff(&p_end_time, &p_start_time, &d_time);
-		fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-				min_n * srv_buf_pool_instances), *n_processed,
-				(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
-#endif
-		return(success);
-	}
-	/* Flush to lsn_limit in all buffer pool instances */
 	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
-		ulint		page_count = 0;
-
-		buf_pool = buf_pool_from_array(i);
-
-		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
-			/* We have two choices here. If lsn_limit was
-			specified then skipping an instance of buffer
-			pool means we cannot guarantee that all pages
-			up to lsn_limit has been flushed. We can
-			return right now with failure or we can try
-			to flush remaining buffer pools up to the
-			lsn_limit. We attempt to flush other buffer
-			pools based on the assumption that it will
-			help in the retry which will follow the
-			failure. */
-			success = false;
-
-			continue;
-		}
-
-		page_count = buf_flush_batch(
-			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
-
-		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
-
-		buf_flush_common(BUF_FLUSH_LIST, page_count);
-
 		if (n_processed) {
-			*n_processed += page_count;
+			*n_processed += cnt_flush[i];
 		}
-
-		if (page_count) {
+		if (cnt_flush[i]) {
 			MONITOR_INC_VALUE_CUMULATIVE(
 				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
 				MONITOR_FLUSH_BATCH_COUNT,
 				MONITOR_FLUSH_BATCH_PAGES,
-				page_count);
+				cnt_flush[i]);
 		}
 	}
-
-#if UNIV_DEBUG
+#ifdef UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
-
-	fprintf(stderr, "[2] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-			min_n * srv_buf_pool_instances), *n_processed,
-			(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu %llu usec]\n",
+		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed,
+		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
 #endif
 	return(success);
 }
-#endif
+
 /* JAN: TODO: END: */
 
 /*******************************************************************//**
@@ -2292,18 +2246,21 @@ ulint
 pgcomp_buf_flush_LRU_tail(void)
 /*====================*/
 {
+#ifdef UNIV_DEBUG
 	struct  timeval p_start_time, p_end_time, d_time;
+#endif
 	ulint   total_flushed=0, i=0;
 	int cnt_flush[32];
 
-#if UNIV_DEBUG
+#ifdef UNIV_DEBUG
 	gettimeofday(&p_start_time, 0x0);
 #endif
-	assert(is_pgcomp_wrk_init_done());
+	ut_ad(is_pgcomp_wrk_init_done());
 
-	pthread_mutex_lock(&pgcomp_mtx);
+	os_fast_mutex_lock(&pgcomp_mtx);
 	pgcomp_flush_work_items(srv_buf_pool_instances,
 		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+	os_fast_mutex_unlock(&pgcomp_mtx);
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		if (cnt_flush[i]) {
@@ -2317,8 +2274,6 @@ pgcomp_buf_flush_LRU_tail(void)
 		}
 	}
 
-	pthread_mutex_unlock(&pgcomp_mtx);
-
 #if UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
@@ -2894,6 +2849,7 @@ buf_flush_validate(
 }
 
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+#endif /* !UNIV_HOTBACKUP */
 
 
 #ifdef UNIV_DEBUG
diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 3803d0a93aa..2430df2b386 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -1280,7 +1280,6 @@ fil_space_create(
 	DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
 
 	ut_a(fil_system);
-	ut_a(fsp_flags_is_valid(flags));
 
 	/* Look for a matching tablespace and if found free it. */
 	do {
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index f9d548681a8..ed891a00fd4 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -859,6 +859,10 @@ dict_tf_set(
 	if (awrites != ATOMIC_WRITES_DEFAULT) {
 		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
 		ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
+	}
+
+	if (awrites == ATOMIC_WRITES_ON ||
+		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes )) {
 		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
 	}
 
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index a11c213d534..008a77ddedf 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -257,6 +257,10 @@ extern my_bool srv_use_atomic_writes;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
 extern my_bool srv_use_lz4;
 
+/* Number of flush threads */
+#define MTFLUSH_MAX_WORKER       64
+extern ulint    srv_mtflush_threads;
+
 #ifdef __WIN__
 extern ibool	srv_use_native_conditions;
 #endif /* __WIN__ */
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 0517f4b1468..18d6cd109e7 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -3,7 +3,7 @@
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, Google Inc.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -127,10 +127,9 @@ static os_file_t	files[1000];
 /** io_handler_thread parameters for thread identification */
 static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
-/** pgcomp_thread are 16 total */
-#define	START_PGCOMP_CNT	(SRV_MAX_N_IO_THREADS + 6 + 32)
-#define PGCOMP_MAX_WORKER   16
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32 + PGCOMP_MAX_WORKER];
+/** 6 is the ? */
+#define	START_OLD_THREAD_CNT	(SRV_MAX_N_IO_THREADS + 6 + 32)
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32 + MTFLUSH_MAX_WORKER];
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1442,89 +1441,79 @@ extern ibool buf_flush_start(buf_pool_t* buf_pool, enum buf_flush flush_type);
 extern void buf_flush_end(buf_pool_t* buf_pool, enum buf_flush flush_type);
 extern void buf_flush_common(enum buf_flush flush_type, ulint page_count);
 extern ulint buf_flush_batch(buf_pool_t* buf_pool, enum buf_flush flush_type, ulint min_n, lsn_t lsn_limit);
+extern void pgcomp_init(void);
 
 typedef enum wrk_status {
-    WRK_ITEM_SET=0,
-    WRK_ITEM_START=1,
-    WRK_ITEM_DONE=2,
-    WRK_ITEM_SUCCESS=2,
-    WRK_ITEM_FAILED=3,
-    WRK_ITEM_STATUS_UNDEFINED
+	WRK_ITEM_SET=0,     // wrk-item is set
+	WRK_ITEM_START=1,   // processing of wrk-item has started
+	WRK_ITEM_DONE=2,    // processing is done usually set to SUCCESS/FAILED
+	WRK_ITEM_SUCCESS=2, // Success processing the wrk-item
+	WRK_ITEM_FAILED=3,  // status of failed
+	WRK_ITEM_EXIT=4,
+	WRK_ITEM_STATUS_UNDEFINED
 } wrk_status_t;
 
+typedef enum mt_wrk_tsk {
+	MT_WRK_NONE=0,      // Exit queue-wait
+	MT_WRK_WRITE=1,     // Flush operation
+	MT_WRK_READ=2,      // Decompress operation
+	MT_WRK_UNDEFINED
+} mt_wrk_tsk_t;
+
 typedef enum wthr_status {
-    WTHR_NOT_INIT=0,
-    WTHR_INITIALIZED=1,
-    WTHR_SIG_WAITING=2,
-    WTHR_RUNNING=3,
-    WTHR_NO_WORK=4,
-    WTHR_KILL_IT=5,
-    WTHR_STATUS_UNDEFINED
+	WTHR_NOT_INIT=0,
+	WTHR_INITIALIZED=1,
+	WTHR_SIG_WAITING=2,
+	WTHR_RUNNING=3,
+	WTHR_NO_WORK=4,
+	WTHR_KILL_IT=5,
+	WTHR_STATUS_UNDEFINED
 } wthr_status_t;
 
+typedef struct wr_tsk {
+	buf_pool_t  *buf_pool;	// buffer-pool instance
+	enum buf_flush flush_type;	// flush-type for buffer-pool flush operation
+	ulint	    min;		//minimum number of pages requested to be flushed
+	lsn_t	    lsn_limit;//lsn limit for the buffer-pool flush operation
+} wr_tsk_t;
+ 
+
+typedef struct rd_tsk {
+	void        *page_pool; //list of pages to decompress;
+} rd_tsk_t;
+
 typedef struct wrk_itm
 {
-	/****************************/
-	/* Need to group into struct*/
-	buf_pool_t*	buf_pool;	//buffer-pool instance
-	int 		flush_type;	//flush-type for buffer-pool flush operation
-	int 		min;		//minimum number of pages requested to be flushed
-	unsigned long long lsn_limit;	//lsn limit for the buffer-pool flush operation
-	/****************************/
-
-	unsigned long	result; 	//flush pages count
-	unsigned long	t_usec;		//time-taken in usec
-	long		    id_usr;	//thread-id currently working
-	wrk_status_t    wi_status;      //flag
-	struct wrk_itm	*next;
+	mt_wrk_tsk_t tsk;
+	/* based on task-type one of the entries wr_tsk/rd_tsk will be used */
+	wr_tsk_t        wr;         //flush page list
+	rd_tsk_t        rd;         //decompress page list
+ 	unsigned long	result; 	//flush pages count
+ 	unsigned long	t_usec;		//time-taken in usec
+ 	long		id_usr;		//thread-id currently working
+    	wrk_status_t    wi_status;	//flag
+ 	struct wrk_itm	*next;
 } wrk_t;
 
-typedef enum op_q_status {
-    Q_NOT_INIT=0,
-    Q_EMPTY=1,
-    Q_INITIALIZED=2,
-    Q_PROCESS=3,
-    Q_DONE=4,
-    Q_ERROR=5,
-    Q_STATUS_UNDEFINED
-} q_status_t;
-
-typedef struct op_queue
-{
-	pthread_mutex_t	mtx;
-	pthread_cond_t 	cv;
-	q_status_t	flag;
-	wrk_t 		*head;
-	wrk_t		*tail;
-} opq_t;
-
-opq_t wq, cq;
-
 typedef struct thread_sync
 {
-	int  		wthread_id;
-	pthread_t 	wthread;
-	opq_t		*wq;
-	opq_t		*cq;
-	wthr_status_t   wt_status;
+	int  	        wthread_id;
+	os_thread_t 	wthread;
+	ib_wqueue_t	*wq;	// work Queue
+	ib_wqueue_t     *wr_cq;// Write Completion Queue
+	ib_wqueue_t     *rd_cq; // Read Completion Queue
+	wthr_status_t   wt_status;	// Worker Thread status
 	unsigned long	stat_universal_num_processed;
 	unsigned long	stat_cycle_num_processed;
 } thread_sync_t;
 
 /* Global XXX:DD needs to be cleaned */
-int 			exit_flag;
-ulint 			check_wrk_done_count;
-static ulint 		done_cnt_flag;
-static int 		pgc_n_threads = 8;
-
-thread_sync_t 		pc_sync[PGCOMP_MAX_WORKER];
-static wrk_t 		work_items[PGCOMP_MAX_WORKER];
+ib_wqueue_t 	*wq=NULL, *wr_cq=NULL, *rd_cq=NULL;
+mem_heap_t		*heap_allocated=NULL;
+thread_sync_t 	pc_sync[MTFLUSH_MAX_WORKER];
+static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
 static int 		pgcomp_wrk_initialized = -1;
-
-int set_check_done_flag_count(int cnt)
-{
-	return(check_wrk_done_count = cnt);
-}
+ulint srv_mtflush_threads = 0;
 
 int set_pgcomp_wrk_init_done(void)
 {
@@ -1537,83 +1526,14 @@ int is_pgcomp_wrk_init_done(void)
 	return(pgcomp_wrk_initialized == 1);
 }
 
-ulint set_done_cnt_flag(ulint val)
-{
-	/*
- 	 * Assumption: The thread calling into set_done_cnt_flag
- 	 * needs to have "cq.mtx" acquired, else not safe.
- 	 */
-	done_cnt_flag = val;
-	return done_cnt_flag;
-}
-
-
-ulint cv_done_inc_flag_sig(thread_sync_t * ppc)
-{
-	pthread_mutex_lock(&ppc->cq->mtx);
-	ppc->stat_universal_num_processed++;
-	ppc->stat_cycle_num_processed++;
-	done_cnt_flag++;
-	if(!(done_cnt_flag <= check_wrk_done_count)) {
-		fprintf(stderr, "ERROR: done_cnt:%lu check_wrk_done_count:%lu\n",
-			done_cnt_flag, check_wrk_done_count);
-	}
-	assert(done_cnt_flag <= check_wrk_done_count);
-	pthread_mutex_unlock(&ppc->cq->mtx);
-	if(done_cnt_flag == check_wrk_done_count) {
-		ppc->wq->flag = Q_DONE;
-		pthread_mutex_lock(&ppc->cq->mtx);
-			ppc->cq->flag = Q_DONE;
-			pthread_cond_signal(&ppc->cq->cv);
-		pthread_mutex_unlock(&ppc->cq->mtx);
-	}
-	return(done_cnt_flag);
-}
-
-int q_remove_wrk(opq_t *q, wrk_t **wi)
-{
-	int ret = 0;
-
-	if(!wi || !q) {
-		return -1;
-	}
-
-	pthread_mutex_lock(&q->mtx);
-	assert(!((q->tail == NULL) && (q->head != NULL)));
-	assert(!((q->tail != NULL) && (q->head == NULL)));
-
-	/* get the first in the list*/
-	*wi = q->head;
-	if(q->head) {
-		ret = 0;
-		q->head = q->head->next;
-		(*wi)->next = NULL;
-		if(!q->head) {
-			q->tail = NULL;
-		}
-	} else {
-		q->tail = NULL;
-		ret = 1; /* indicating remove from queue failed */
-	}
-	pthread_mutex_unlock(&q->mtx);
-	return (ret);
-}
-
-int is_busy_wrk_itm(wrk_t *wi)
-{
-	if(!wi) {
-		return -1;
-	}
-	return(!(wi->id_usr == -1));
-}
-
 int setup_wrk_itm(int items)
 {
 	int i;
 	for(i=0; i<items; i++) {
-		work_items[i].buf_pool = NULL;
-		work_items[i].result = 0;
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].wr.buf_pool = NULL;
 		work_items[i].t_usec = 0;
+		work_items[i].result = 0;
 		work_items[i].id_usr = -1;
 		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
 		work_items[i].next = &work_items[(i+1)%items];
@@ -1623,81 +1543,23 @@ int setup_wrk_itm(int items)
 	return 0;
 }
 
-int init_queue(opq_t *q)
-{
-	if(!q) {
-		return -1;
-	}
-	/* Initialize Queue mutex and CV */
-	pthread_mutex_init(&q->mtx, NULL);
-	pthread_cond_init(&q->cv, NULL);
-	q->flag = Q_INITIALIZED;
-	q->head = q->tail = NULL;
-
-	return 0;
-}
-
-#if 0
-int drain_cq(opq_t *cq, int items)
-{
-	int i=0;
-
-	if(!cq) {
-		return -1;
-	}
-	pthread_mutex_lock(&cq->mtx);
-	for(i=0; i<items; i++) {
-		work_items[i].result=0;
-		work_items[i].t_usec = 0;
-		work_items[i].id_usr = -1;
-	}
-	cq->head = cq->tail = NULL;
-	pthread_mutex_unlock(&cq->mtx);
-	return 0;
-}
-#endif
-
-int q_insert_wrk_list(opq_t *q, wrk_t *w_list)
-{
-	if((!q) || (!w_list)) {
-		fprintf(stderr, "insert failed q:%p w:%p\n", q, w_list);
-		return -1;
-	}
-
-	pthread_mutex_lock(&q->mtx);
-
-	assert(!((q->tail == NULL) && (q->head != NULL)));
-	assert(!((q->tail != NULL) && (q->head == NULL)));
-
-	/* list is empty */
-	if(!q->tail) {
-		q->head = q->tail = w_list;
-	} else {
-		/* added the first of the node to list */
-        	assert(q->head != NULL);
-		q->tail->next = w_list;
-	}
-
-	/* move tail to the last node */
-	while(q->tail->next) {
-		q->tail = q->tail->next;
-	}
-	pthread_mutex_unlock(&q->mtx);
-
-	return 0;
-}
-
 int flush_pool_instance(wrk_t *wi)
 {
 	struct timeval p_start_time, p_end_time, d_time;
 
-	if(!wi) {
+	if (!wi) {
 		fprintf(stderr, "work item invalid wi:%p\n", wi);
 		return -1;
 	}
 
-	wi->t_usec = 0;
-	if (!buf_flush_start(wi->buf_pool, (buf_flush)wi->flush_type)) {
+	if (!wi->wr.buf_pool) {
+		fprintf(stderr, "work-item wi->buf_pool:%p [likely thread exit]\n",
+                wi->wr.buf_pool);
+		return -1;
+	}
+
+    	wi->t_usec = 0;
+	if (!buf_flush_start(wi->wr.buf_pool, wi->wr.flush_type)) {
 		/* We have two choices here. If lsn_limit was
 		specified then skipping an instance of buffer
 		pool means we cannot guarantee that all pages
@@ -1709,39 +1571,34 @@ int flush_pool_instance(wrk_t *wi)
 		help in the retry which will follow the
 		failure. */
 		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
-			(buf_flush)wi->flush_type);
+			wi->wr.flush_type);
 		return -1;
 	}
 
-#ifdef UNIV_DEBUG
 	/* Record time taken for the OP in usec */
 	gettimeofday(&p_start_time, 0x0);
-#endif
 
-	if((buf_flush)wi->flush_type == BUF_FLUSH_LRU) {
-		/* srv_LRU_scan_depth can be arbitrarily large value.
-		* We cap it with current LRU size.
-		*/
-		buf_pool_mutex_enter(wi->buf_pool);
-		wi->min = UT_LIST_GET_LEN(wi->buf_pool->LRU);
-		buf_pool_mutex_exit(wi->buf_pool);
-		wi->min = ut_min(srv_LRU_scan_depth,wi->min);
-	}
+    	if (wi->wr.flush_type == BUF_FLUSH_LRU) {
+        	/* srv_LRU_scan_depth can be arbitrarily large value.
+        	 * We cap it with current LRU size.
+        	 */
+        	buf_pool_mutex_enter(wi->wr.buf_pool);
+        	wi->wr.min = UT_LIST_GET_LEN(wi->wr.buf_pool->LRU);
+        	buf_pool_mutex_exit(wi->wr.buf_pool);
+        	wi->wr.min = ut_min(srv_LRU_scan_depth,wi->wr.min);
+    	}
 
-	wi->result = buf_flush_batch(wi->buf_pool,
-                                    (buf_flush)wi->flush_type,
-                                    wi->min, wi->lsn_limit);
+	wi->result = buf_flush_batch(wi->wr.buf_pool,
+                                    wi->wr.flush_type,
+                                    wi->wr.min, wi->wr.lsn_limit);
 
-	buf_flush_end(wi->buf_pool, (buf_flush)wi->flush_type);
-	buf_flush_common((buf_flush)wi->flush_type, wi->result);
+	buf_flush_end(wi->wr.buf_pool, wi->wr.flush_type);
+	buf_flush_common(wi->wr.flush_type, wi->result);
 
-#ifdef UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
 
 	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
-#endif
-
 	return 0;
 }
 
@@ -1750,68 +1607,75 @@ int service_page_comp_io(thread_sync_t * ppc)
 	wrk_t 		*wi = NULL;
 	int 		ret=0;
 
-	pthread_mutex_lock(&ppc->wq->mtx);
-	do{
-		ppc->wt_status = WTHR_SIG_WAITING;
-		ret = pthread_cond_wait(&ppc->wq->cv, &ppc->wq->mtx);
+   	ppc->wt_status = WTHR_SIG_WAITING;
+	wi = (wrk_t *)ib_wqueue_wait(ppc->wq);
+
+	if (wi) {
 		ppc->wt_status = WTHR_RUNNING;
-		if(ret == ETIMEDOUT) {
-			fprintf(stderr, "ERROR ETIMEDOUT cnt_flag:[%lu] ret:%d\n",
-				done_cnt_flag, ret);
-		} else if(ret == EINVAL || ret == EPERM) {
-			fprintf(stderr, "ERROR EINVAL/EPERM cnt_flag:[%lu] ret:%d\n",
-				done_cnt_flag, ret);
-		}
-		if(ppc->wq->flag == Q_PROCESS) {
-			break;
-		} else {
-			pthread_mutex_unlock(&ppc->wq->mtx);
-			return -1;
-		}
-	} while (ppc->wq->flag == Q_PROCESS && ret == 0);
+	} else {
+		fprintf(stderr, "%s:%d work-item is NULL\n", __FILE__, __LINE__);
+		ppc->wt_status = WTHR_NO_WORK;
+		return (0);
+	}
 
-	pthread_mutex_unlock(&ppc->wq->mtx);
+	assert(wi != NULL);
+	wi->id_usr = ppc->wthread;
 
-	while (ppc->cq->flag == Q_PROCESS) {
-		wi = NULL;
-		/* Get the work item */
-		if (0 != (ret = q_remove_wrk(ppc->wq, &wi))) {
-			ppc->wt_status = WTHR_NO_WORK;
-			return -1;
-		}
+	switch(wi->tsk) {
+	case MT_WRK_NONE:
+		assert(wi->wi_status == WRK_ITEM_EXIT);
+		wi->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
+		break;
 
-		assert(ret==0);
-		assert(wi != NULL);
-		assert(0 == is_busy_wrk_itm(wi));
-		assert(wi->id_usr == -1);
-
-		wi->id_usr = ppc->wthread;
+	case MT_WRK_WRITE:
 		wi->wi_status = WRK_ITEM_START;
-
 		/* Process work item */
-		if(0 != (ret = flush_pool_instance(wi))) {
+		if (0 != (ret = flush_pool_instance(wi))) {
 			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
 			wi->wi_status = WRK_ITEM_FAILED;
 		}
-
-		ret = q_insert_wrk_list(ppc->cq, wi);
-
-		assert(0==ret);
-		assert(check_wrk_done_count >= done_cnt_flag);
 		wi->wi_status = WRK_ITEM_SUCCESS;
-		if(check_wrk_done_count == cv_done_inc_flag_sig(ppc)) {
-			break;
-		}
+		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
+		break;
+
+	case MT_WRK_READ:
+		/* Need to also handle the read case */
+		assert(0);
+		/* completed task get added to rd_cq */
+		/* wi->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(ppc->rd_cq, wi, heap_allocated);*/
+		break;
+
+	default:
+		/* None other than Write/Read handling planned */
+		assert(0);
 	}
+
+	ppc->wt_status = WTHR_NO_WORK;
 	return(0);
 }
 
+void page_comp_io_thread_exit()
+{
+	ulint i;
+
+	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n", srv_buf_pool_instances);
+	for (i=0; i<srv_buf_pool_instances; i++) {
+		work_items[i].wr.buf_pool = NULL;
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].tsk = MT_WRK_NONE;
+		work_items[i].wi_status = WRK_ITEM_EXIT;
+		ib_wqueue_add(wq, (void *)&work_items[i], heap_allocated);
+	}
+}
+
 /******************************************************************//**
 @return a dummy parameter*/
 extern "C" UNIV_INTERN
 os_thread_ret_t
 DECLARE_THREAD(page_comp_io_thread)(
-/*==========================================*/
+/*================================*/
 	void * arg)
 {
 	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
@@ -1824,34 +1688,6 @@ DECLARE_THREAD(page_comp_io_thread)(
 	OS_THREAD_DUMMY_RETURN;
 }
 
-int print_queue_wrk_itm(opq_t *q)
-{
-#if UNIV_DEBUG
-	wrk_t *wi = NULL;
-
-	if(!q) {
-		fprintf(stderr, "queue NULL\n");
-		return -1;
-	}
-
-	if(!q->head || !q->tail) {
-		assert(!(((q->tail==NULL) && (q->head!=NULL)) && ((q->tail != NULL) && (q->head == NULL))));
-		fprintf(stderr, "queue empty (h:%p t:%p)\n", q->head, q->tail);
-		return 0;
-	}
-
-	pthread_mutex_lock(&q->mtx);
-	for(wi = q->head; (wi != NULL) ; wi = wi->next) {
-		//fprintf(stderr, "- [%p] %p %lu %luus [%ld] >%p\n",
-		//	wi, wi->buf_pool, wi->result, wi->t_usec, wi->id_usr, wi->next);
-		fprintf(stderr, "- [%p] [%s] >%p\n",
-			wi, (wi->id_usr == -1)?"free":"Busy", wi->next);
-	}
-	pthread_mutex_unlock(&q->mtx);
-#endif
-	return(0);
-}
-
 int print_wrk_list(wrk_t *wi_list)
 {
 	wrk_t *wi = wi_list;
@@ -1871,111 +1707,9 @@ int print_wrk_list(wrk_t *wi_list)
 	return 0;
 }
 
-int pgcomp_handler(wrk_t *w_list)
-{
-	int ret=0;
-	opq_t *wrk_q=NULL, *comp_q=NULL;
-
-	wrk_q=&wq;
-	comp_q=&cq;
-
-	pthread_mutex_lock(&wrk_q->mtx);
-	/* setup work queue here.. */
-	wrk_q->flag = Q_EMPTY;
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-	ret = q_insert_wrk_list(wrk_q, w_list);
-	if(ret != 0) {
-		fprintf(stderr, "%s():work-queue setup FAILED wq:%p w_list:%p \n",
-			__FUNCTION__, &wq, w_list);
-		return -1;
-	}
-
-retry_submit:
-	pthread_mutex_lock(&wrk_q->mtx);
-	/* setup work queue here.. */
-	wrk_q->flag = Q_INITIALIZED;
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-
-	pthread_mutex_lock(&comp_q->mtx);
-	if(0 != set_done_cnt_flag(0)) {
-		fprintf(stderr, "FAILED %s:%d\n", __FILE__, __LINE__);
-		pthread_mutex_unlock(&comp_q->mtx);
-		return -1;
-	}
-	comp_q->flag = Q_PROCESS;
-	pthread_mutex_unlock(&comp_q->mtx);
-
-	/* if threads are waiting request them to start */
-	pthread_mutex_lock(&wrk_q->mtx);
-	wrk_q->flag = Q_PROCESS;
-	pthread_cond_broadcast(&wrk_q->cv);
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-	/* Wait on all worker-threads to complete */
-	pthread_mutex_lock(&comp_q->mtx);
-	if (comp_q->flag != Q_DONE) {
-		do {
-			pthread_cond_wait(&comp_q->cv, &comp_q->mtx);
-			if(comp_q->flag != Q_DONE) {
-				fprintf(stderr, "[1] cv wait on CQ failed flag:%d cnt:%lu\n",
-					comp_q->flag, done_cnt_flag);
-				if (done_cnt_flag != srv_buf_pool_instances) {
-					fprintf(stderr, "[2] cv wait on CQ failed flag:%d cnt:%lu\n",
-						comp_q->flag, done_cnt_flag);
-					fprintf(stderr, "============\n");
-					print_wrk_list(w_list);
-					fprintf(stderr, "============\n");
-				}
-				continue;
-			} else if (done_cnt_flag != srv_buf_pool_instances) {
-				fprintf(stderr, "[3]cv wait on CQ failed flag:%d cnt:%lu\n",
-					comp_q->flag, done_cnt_flag);
-				fprintf(stderr, "============\n");
-				print_wrk_list(w_list);
-				fprintf(stderr, "============\n");
-				comp_q->flag = Q_INITIALIZED;
-				pthread_mutex_unlock(&comp_q->mtx);
-				goto retry_submit;
-
-				assert(!done_cnt_flag);
-				continue;
-			}
-			assert(done_cnt_flag == srv_buf_pool_instances);
-
-			if ((comp_q->flag == Q_DONE) &&
-				(done_cnt_flag == srv_buf_pool_instances)) {
-				break;
-			}
-		} while((comp_q->flag == Q_INITIALIZED) &&
-			(done_cnt_flag != srv_buf_pool_instances));
-	} else {
-		fprintf(stderr, "[4] cv wait on CQ failed flag:%d cnt:%lu\n",
-			comp_q->flag, done_cnt_flag);
-		if (!done_cnt_flag) {
-			fprintf(stderr, "============\n");
-			print_wrk_list(w_list);
-			fprintf(stderr, "============\n");
-			comp_q->flag = Q_INITIALIZED;
-			pthread_mutex_unlock(&comp_q->mtx);
-			goto retry_submit;
-			assert(!done_cnt_flag);
-		}
-		assert(done_cnt_flag == srv_buf_pool_instances);
-	}
-
-	pthread_mutex_unlock(&comp_q->mtx);
-	pthread_mutex_lock(&wrk_q->mtx);
-	wrk_q->flag = Q_DONE;
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-	return 0;
-}
-
 /******************************************************************//**
 @return a dummy parameter*/
-int pgcomp_handler_init(int num_threads, int wrk_cnt, opq_t *wq, opq_t *cq)
+int pgcomp_handler_init(int num_threads, int wrk_cnt, ib_wqueue_t *wq, ib_wqueue_t *wr_cq, ib_wqueue_t *rd_cq)
 {
 	int   	i=0;
 
@@ -1984,106 +1718,89 @@ int pgcomp_handler_init(int num_threads, int wrk_cnt, opq_t *wq, opq_t *cq)
 		return -1;
 	}
 
-	if(!wq || !cq) {
-		fprintf(stderr, "%s() FAILED wq:%p cq:%p\n", __FUNCTION__, wq, cq);
+	if(!wq || !wr_cq || !rd_cq) {
+		fprintf(stderr, "%s() FAILED wq:%p write-cq:%p read-cq:%p\n",
+                __FUNCTION__, wq, wr_cq, rd_cq);
 		return -1;
 	}
 
 	/* work-item setup */
 	setup_wrk_itm(wrk_cnt);
 
-	/* wq & cq setup */
-	init_queue(wq);
-	init_queue(cq);
-
 	/* Mark each of the thread sync entires */
-	for(i=0; i < PGCOMP_MAX_WORKER; i++) {
-		pc_sync[i].wthread_id = i;
+	for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
+	    pc_sync[i].wthread_id = i;
 	}
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < num_threads; i++) {
 		pc_sync[i].wthread_id = i;
 		pc_sync[i].wq = wq;
-		pc_sync[i].cq = cq;
+		pc_sync[i].wr_cq = wr_cq;
+		pc_sync[i].rd_cq = rd_cq;
+
 		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
-					thread_ids + START_PGCOMP_CNT + i);
-		//pc_sync[i].wthread = thread_ids[START_PGCOMP_CNT + i];
-		pc_sync[i].wthread = (START_PGCOMP_CNT + i);
+	                				thread_ids + START_OLD_THREAD_CNT + i);
+		pc_sync[i].wthread = (START_OLD_THREAD_CNT + i);
 		pc_sync[i].wt_status = WTHR_INITIALIZED;
 	}
-
-	set_check_done_flag_count(wrk_cnt);
 	set_pgcomp_wrk_init_done();
-
+	fprintf(stderr, "%s() Worker-Threads created..\n", __FUNCTION__);
 	return 0;
 }
 
-
 int wrk_thread_stat(thread_sync_t *wthr, unsigned int num_threads)
 {
-	long stat_tot=0;
-	unsigned int i=0;
-	for(i=0; i< num_threads;i++) {
+	ulong stat_tot=0;
+	ulint i=0;
+	for(i=0; i<num_threads;i++) {
 		stat_tot+=wthr[i].stat_universal_num_processed;
 		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
 			wthr[i].stat_universal_num_processed);
 	}
 	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
-	return (0);
 }
 
 int reset_wrk_itm(int items)
 {
 	int i;
 
-	pthread_mutex_lock(&wq.mtx);
-	wq.head = wq.tail = NULL;
-	pthread_mutex_unlock(&wq.mtx);
-
-	pthread_mutex_lock(&cq.mtx);
 	for(i=0;i<items; i++) {
 		work_items[i].id_usr = -1;
 	}
-	cq.head = cq.tail = NULL;
-	pthread_mutex_unlock(&cq.mtx);
 	return 0;
 }
 
 int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
-                            int flush_type, int min_n, unsigned long long lsn_limit)
+                            enum buf_flush flush_type, int min_n, lsn_t lsn_limit)
 {
 	int ret=0, i=0;
-
-   	pthread_mutex_lock(&wq.mtx);
-   	pthread_mutex_lock(&cq.mtx);
-
-	assert(wq.head == NULL);
-    	assert(wq.tail == NULL);
-	if(cq.head) {
-		print_wrk_list(cq.head);
-	}
-    	assert(cq.head == NULL);
-    	assert(cq.tail == NULL);
+	wrk_t *done_wi;
 
 	for(i=0;i<buf_pool_inst; i++) {
-		work_items[i].buf_pool = buf_pool_from_array(i);
-		work_items[i].flush_type = flush_type;
-		work_items[i].min = min_n;
-		work_items[i].lsn_limit = lsn_limit;
+		work_items[i].tsk = MT_WRK_WRITE;
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].wr.buf_pool = buf_pool_from_array(i);
+		work_items[i].wr.flush_type = (enum buf_flush)flush_type;
+		work_items[i].wr.min = min_n;
+		work_items[i].wr.lsn_limit = lsn_limit;
 		work_items[i].id_usr = -1;
 		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
 		work_items[i].wi_status = WRK_ITEM_SET;
 	}
 	work_items[i-1].next=NULL;
 
-	pthread_mutex_unlock(&cq.mtx);
-   	pthread_mutex_unlock(&wq.mtx);
+   	for(i=0;i<buf_pool_inst; i++) {
+		ib_wqueue_add(wq, (void *)(&work_items[i]), heap_allocated);
+	}
 
-	pgcomp_handler(work_items);
+	/* wait on the completion to arrive */
+   	for(i=0;i<buf_pool_inst; i++) {
+		done_wi = (wrk_t *)ib_wqueue_wait(wr_cq);
+    		//fprintf(stderr, "%s: queue-wait DONE\n", __FUNCTION__);
+		ut_ad(done_wi != NULL);
+	}
 
-   	pthread_mutex_lock(&wq.mtx);
-   	pthread_mutex_lock(&cq.mtx);
 	/* collect data/results total pages flushed */
 	for(i=0; i<buf_pool_inst; i++) {
 		if(work_items[i].result == -1) {
@@ -2092,26 +1809,13 @@ int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
 		} else {
 			per_pool_pages_flushed[i] = work_items[i].result;
 		}
-		if((work_items[i].id_usr == -1) && (work_items[i].wi_status == WRK_ITEM_SET )) {
-           		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].flush_type);
-			assert(0);
-		}
+		if((work_items[i].id_usr == -1) &&
+			(work_items[i].wi_status == WRK_ITEM_SET )) {
+        		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].wr.flush_type);
+           		//assert(0);
+       		}
 	}
-
-	wq.flag = cq.flag = Q_INITIALIZED;
-
-	pthread_mutex_unlock(&cq.mtx);
-   	pthread_mutex_unlock(&wq.mtx);
-
-#if UNIV_DEBUG
-	/* Print work-list stats */
-	fprintf(stderr, "==wq== [DONE]\n");
-	print_wrk_list(wq.head);
-	fprintf(stderr, "==cq== [DONE]\n");
-	print_wrk_list(cq.head);
-	fprintf(stderr, "==worker-thread-stats==\n");
-	wrk_thread_stat(pc_sync, pgc_n_threads);
-#endif
+	//wrk_thread_stat(pc_sync, pgc_n_threads);
 
 	/* clear up work-queue for next flush */
 	reset_wrk_itm(buf_pool_inst);
@@ -3276,15 +2980,25 @@ files_checked:
 
 	if (!srv_read_only_mode) {
 		/* JAN: TODO: */
-		if (srv_buf_pool_instances <= PGCOMP_MAX_WORKER) {
-			pgc_n_threads = srv_buf_pool_instances;
+		if (srv_buf_pool_instances <= MTFLUSH_MAX_WORKER) {
+			srv_mtflush_threads = srv_buf_pool_instances;
 		}
 		/* else we default to 8 worker-threads */
-		pgcomp_handler_init(pgc_n_threads, srv_buf_pool_instances, &wq, &cq);
-		/* JAN: TODO: END */
+ 		heap_allocated = mem_heap_create(0);
+		ut_a(heap_allocated != NULL);
+
+ 		wq = ib_wqueue_create();
+ 		wr_cq = ib_wqueue_create();
+ 		rd_cq = ib_wqueue_create();
+		pgcomp_init();
+ 	   	pgcomp_handler_init(srv_mtflush_threads,
+				    srv_buf_pool_instances,
+				    wq, wr_cq, rd_cq);
 #if UNIV_DEBUG
-		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
+ 		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
 #endif
+		/* JAN: TODO: END */
+
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
 
@@ -3549,6 +3263,14 @@ innobase_shutdown_for_mysql(void)
 		logs_empty_and_mark_files_at_shutdown() and should have
 		already quit or is quitting right now. */
 
+		/* g. Exit the multi threaded flush threads */
+
+		page_comp_io_thread_exit();
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr, "%s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
+#endif
+
 		os_mutex_enter(os_sync_mutex);
 
 		if (os_thread_count == 0) {
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 3c030eb60ee..e85d1215422 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -46,6 +46,7 @@ Created 11/11/1995 Heikki Tuuri
 #include "ibuf0ibuf.h"
 #include "log0log.h"
 #include "os0file.h"
+#include "os0sync.h"
 #include "trx0sys.h"
 #include "srv0mon.h"
 #include "mysql/plugin.h"
@@ -2013,11 +2014,16 @@ buf_flush_LRU(
 /* JAN: TODO: */
 /*******************************************************************//**/
 extern int is_pgcomp_wrk_init_done(void);
-extern int pgcomp_flush_work_items(int buf_pool_inst, int *pages_flushed,
-        int flush_type, int min_n, unsigned long long lsn_limit);
+extern int pgcomp_flush_work_items(
+	int buf_pool_inst,
+	int *pages_flushed,
+        buf_flush_t flush_type,
+	int min_n,
+	lsn_t lsn_limit);
 
 #define	MT_COMP_WATER_MARK	50
 
+#ifdef UNIV_DEBUG
 #include <time.h>
 int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time)
 {
@@ -2038,8 +2044,15 @@ int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_t
 
 	return 0;
 }
+#endif
+
+static os_fast_mutex_t pgcomp_mtx;
+
+void pgcomp_init(void)
+{
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &pgcomp_mtx);
+}
 
-static pthread_mutex_t  pgcomp_mtx = PTHREAD_MUTEX_INITIALIZER;
 /*******************************************************************//**
 Multi-threaded version of buf_flush_list
 */
@@ -2062,8 +2075,10 @@ pgcomp_buf_flush_list(
 {
 	ulint		i;
 	bool		success = true;
+#ifdef UNIV_DEBUG
 	struct timeval p_start_time, p_end_time, d_time;
-	flush_counters_t n;
+#endif
+	int cnt_flush[MTFLUSH_MAX_WORKER];
 
 	if (n_processed) {
 		*n_processed = 0;
@@ -2081,91 +2096,30 @@ pgcomp_buf_flush_list(
 #ifdef UNIV_DEBUG
 	gettimeofday(&p_start_time, 0x0);
 #endif
-	if(is_pgcomp_wrk_init_done() && (min_n > MT_COMP_WATER_MARK)) {
-		int cnt_flush[32];
+	os_fast_mutex_lock(&pgcomp_mtx);
+	pgcomp_flush_work_items(srv_buf_pool_instances,
+                cnt_flush, BUF_FLUSH_LIST,
+                min_n, lsn_limit);
+	os_fast_mutex_unlock(&pgcomp_mtx);
 
-		//stack_trace();
-		pthread_mutex_lock(&pgcomp_mtx);
-		//gettimeofday(&p_start_time, 0x0);
-		//fprintf(stderr, "Calling into wrk-pgcomp [min:%lu]", min_n);
-		pgcomp_flush_work_items(srv_buf_pool_instances,
-					cnt_flush, BUF_FLUSH_LIST,
-					min_n, lsn_limit);
-
-		for (i = 0; i < srv_buf_pool_instances; i++) {
-			if (n_processed) {
-				*n_processed += cnt_flush[i];
-			}
-			if (cnt_flush[i]) {
-				MONITOR_INC_VALUE_CUMULATIVE(
-					MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-					MONITOR_FLUSH_BATCH_COUNT,
-					MONITOR_FLUSH_BATCH_PAGES,
-					cnt_flush[i]);
-
-			}
-		}
-
-		pthread_mutex_unlock(&pgcomp_mtx);
-
-#ifdef UNIV_DEBUG
-		gettimeofday(&p_end_time, 0x0);
-		timediff(&p_end_time, &p_start_time, &d_time);
-		fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-				min_n * srv_buf_pool_instances), *n_processed,
-				(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
-#endif
-		return(success);
-	}
-	/* Flush to lsn_limit in all buffer pool instances */
 	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
-
-		buf_pool = buf_pool_from_array(i);
-
-		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
-			/* We have two choices here. If lsn_limit was
-			specified then skipping an instance of buffer
-			pool means we cannot guarantee that all pages
-			up to lsn_limit has been flushed. We can
-			return right now with failure or we can try
-			to flush remaining buffer pools up to the
-			lsn_limit. We attempt to flush other buffer
-			pools based on the assumption that it will
-			help in the retry which will follow the
-			failure. */
-			success = false;
-
-			continue;
-		}
-
-		buf_flush_batch(
-			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit, false, &n);
-
-		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
-
-		buf_flush_common(BUF_FLUSH_LIST, n.flushed);
-
 		if (n_processed) {
-			*n_processed += n.flushed;
+			*n_processed += cnt_flush[i];
 		}
-
-		if (n.flushed) {
+		if (cnt_flush[i]) {
 			MONITOR_INC_VALUE_CUMULATIVE(
 				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
 				MONITOR_FLUSH_BATCH_COUNT,
 				MONITOR_FLUSH_BATCH_PAGES,
-				n.flushed);
+				cnt_flush[i]);
 		}
 	}
-
 #ifdef UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
-
-	fprintf(stderr, "[2] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-			min_n * srv_buf_pool_instances), *n_processed,
-			(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu %llu usec]\n",
+		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed,
+		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
 #endif
 	return(success);
 }
@@ -2416,18 +2370,21 @@ ulint
 pgcomp_buf_flush_LRU_tail(void)
 /*====================*/
 {
+#ifdef UNIV_DEBUG
 	struct  timeval p_start_time, p_end_time, d_time;
+#endif
 	ulint   total_flushed=0, i=0;
 	int cnt_flush[32];
 
 #ifdef UNIV_DEBUG
 	gettimeofday(&p_start_time, 0x0);
 #endif
-	assert(is_pgcomp_wrk_init_done());
+	ut_ad(is_pgcomp_wrk_init_done());
 
-	pthread_mutex_lock(&pgcomp_mtx);
+	os_fast_mutex_lock(&pgcomp_mtx);
 	pgcomp_flush_work_items(srv_buf_pool_instances,
 		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+	os_fast_mutex_unlock(&pgcomp_mtx);
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		if (cnt_flush[i]) {
@@ -2441,9 +2398,7 @@ pgcomp_buf_flush_LRU_tail(void)
 		}
 	}
 
-	pthread_mutex_unlock(&pgcomp_mtx);
-
-#ifdef UNIV_DEBUG
+#if UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
 
@@ -2454,9 +2409,8 @@ pgcomp_buf_flush_LRU_tail(void)
 
 	return(total_flushed);
 }
+
 /* JAN: TODO: END: */
-
-
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index f3e952299ff..e170004cea1 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -1323,7 +1323,6 @@ fil_space_create(
 	DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
 
 	ut_a(fil_system);
-	ut_a(fsp_flags_is_valid(flags));
 
 	/* Look for a matching tablespace and if found free it. */
 	do {
@@ -4989,21 +4988,42 @@ retry:
 
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
+		ulint n_pages = size_after_extend;
+
+		success = os_file_set_size(node->name, node->handle, n_pages * page_size);
+
+		/* Temporal solution: In directFS using atomic writes
+		we must use posix_fallocate to extend the file because
+		pwrite past end of file fails but when compression is
+		used the file pages must be physically initialized with
+		zeroes, thus after file extend with posix_fallocate
+		we still write empty pages to file. */
+		if (success &&
+			srv_use_atomic_writes &&
+			srv_compress_pages) {
+			goto extend_file;
+		}
 
-		success = os_file_set_size(node->name, node->handle,
-					   (size_after_extend
-					    - file_start_page_no) * page_size);
 		mutex_enter(&fil_system->mutex);
+
 		if (success) {
-			node->size += (size_after_extend - start_page_no);
-			space->size += (size_after_extend - start_page_no);
+			node->size += n_pages;
+			space->size += n_pages;
 			os_has_said_disk_full = FALSE;
 		}
-		node->being_extended = FALSE;
+
+		/* If posix_fallocate was used to extent the file space
+		we need to complete the io. Because no actual writes were
+		dispatched read operation is enough here. Without this
+		there will be assertion at shutdown indicating that
+		all IO is not completed. */
+		fil_node_complete_io(node, fil_system, OS_FILE_READ);
 		goto complete_io;
 	}
 #endif
 
+extend_file:
+
 	/* Extend at most 64 pages at a time */
 	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
 	buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
@@ -5057,24 +5077,11 @@ retry:
 
 	space->size += pages_added;
 	node->size += pages_added;
-	node->being_extended = FALSE;
 
-#ifdef HAVE_POSIX_FALLOCATE
-complete_io:
-	/* If posix_fallocate was used to extent the file space
-	we need to complete the io. Because no actual writes were
-	dispatched read operation is enough here. Without this
-	there will be assertion at shutdown indicating that
-	all IO is not completed. */
-	if (srv_use_posix_fallocate) {
-		fil_node_complete_io(node, fil_system, OS_FILE_READ);
-	} else {
-		fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
-	}
-#else
 	fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
-#endif
 
+complete_io:
+	node->being_extended = FALSE;
 	*actual_size = space->size;
 
 #ifndef UNIV_HOTBACKUP
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index 502b1d028d8..1ce4fe6a2f1 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -873,7 +873,6 @@ dict_tf_set(
 		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes )) {
 		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
 	}
-
 }
 
 /********************************************************************//**
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index cc2221fc3c6..c9a92f608d8 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -277,6 +277,10 @@ extern my_bool srv_use_atomic_writes;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
 extern my_bool srv_use_lz4;
 
+/* Number of flush threads */
+#define MTFLUSH_MAX_WORKER       64
+extern ulint    srv_mtflush_threads;
+
 /** Server undo tablespaces directory, can be absolute path. */
 extern char*	srv_undo_dir;
 
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index faad8c3c133..7b2aebf6b83 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -3,7 +3,7 @@
 Copyright (c) 1996, 2013, Oracle and/or its affiliates. All rights reserved.
 Copyright (c) 2008, Google Inc.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -131,14 +131,9 @@ static os_file_t	files[1000];
 /** io_handler_thread parameters for thread identification */
 static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** io_handler_thread identifiers, 32 is the maximum number of purge threads  */
-/*
- static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6
-/				   + SRV_MAX_N_PURGE_THREADS];
-*/
-/** pgcomp_thread are 16 total */
-#define	START_PGCOMP_CNT	(SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS)
-#define PGCOMP_MAX_WORKER   16
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS + PGCOMP_MAX_WORKER];
+/** 6 is the ? */
+#define	START_OLD_THREAD_CNT	(SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS)
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS + MTFLUSH_MAX_WORKER];
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1519,90 +1514,81 @@ extern int timediff(struct timeval *g_time, struct timeval *s_time, struct timev
 extern ibool buf_flush_start(buf_pool_t* buf_pool, buf_flush_t flush_type);
 extern void buf_flush_end(buf_pool_t* buf_pool, buf_flush_t flush_type);
 extern void buf_flush_common(buf_flush_t flush_type, ulint page_count);
-extern ulint buf_flush_batch(buf_pool_t* buf_pool, buf_flush_t flush_type, ulint min_n, lsn_t lsn_limit, bool limited_lru_scan, flush_counters_t*);
+extern ulint buf_flush_batch(buf_pool_t* buf_pool, buf_flush_t flush_type, ulint min_n, lsn_t lsn_limit, bool limited_lru_scan,
+flush_counters_t* n);
+extern void pgcomp_init(void);
 
 typedef enum wrk_status {
-    WRK_ITEM_SET=0,
-    WRK_ITEM_START=1,
-    WRK_ITEM_DONE=2,
-    WRK_ITEM_SUCCESS=2,
-    WRK_ITEM_FAILED=3,
-    WRK_ITEM_STATUS_UNDEFINED
+	WRK_ITEM_SET=0,     // wrk-item is set
+	WRK_ITEM_START=1,   // processing of wrk-item has started
+	WRK_ITEM_DONE=2,    // processing is done usually set to SUCCESS/FAILED
+	WRK_ITEM_SUCCESS=2, // Success processing the wrk-item
+	WRK_ITEM_FAILED=3,  // status of failed
+	WRK_ITEM_EXIT=4,
+	WRK_ITEM_STATUS_UNDEFINED
 } wrk_status_t;
 
+typedef enum mt_wrk_tsk {
+	MT_WRK_NONE=0,      // Exit queue-wait
+	MT_WRK_WRITE=1,     // Flush operation
+	MT_WRK_READ=2,      // Decompress operation
+	MT_WRK_UNDEFINED
+} mt_wrk_tsk_t;
+
 typedef enum wthr_status {
-    WTHR_NOT_INIT=0,
-    WTHR_INITIALIZED=1,
-    WTHR_SIG_WAITING=2,
-    WTHR_RUNNING=3,
-    WTHR_NO_WORK=4,
-    WTHR_KILL_IT=5,
-    WTHR_STATUS_UNDEFINED
+	WTHR_NOT_INIT=0,
+	WTHR_INITIALIZED=1,
+	WTHR_SIG_WAITING=2,
+	WTHR_RUNNING=3,
+	WTHR_NO_WORK=4,
+	WTHR_KILL_IT=5,
+	WTHR_STATUS_UNDEFINED
 } wthr_status_t;
 
+typedef struct wr_tsk {
+	buf_pool_t  *buf_pool;	// buffer-pool instance
+	buf_flush_t flush_type;	// flush-type for buffer-pool flush operation
+	ulint	    min;		//minimum number of pages requested to be flushed
+	lsn_t	    lsn_limit;//lsn limit for the buffer-pool flush operation
+} wr_tsk_t;
+ 
+
+typedef struct rd_tsk {
+	void        *page_pool; //list of pages to decompress;
+} rd_tsk_t;
+
 typedef struct wrk_itm
 {
-	/****************************/
-	/* Need to group into struct*/
-	buf_pool_t*	buf_pool;	//buffer-pool instance
-	int 		flush_type;	//flush-type for buffer-pool flush operation
-	int 		min;		//minimum number of pages requested to be flushed
-	unsigned long long lsn_limit;	//lsn limit for the buffer-pool flush operation
-	/****************************/
-
-	unsigned long	result; 	//flush pages count
-	unsigned long	t_usec;		//time-taken in usec
-	long		    id_usr;	//thread-id currently working
-	wrk_status_t    wi_status;      //flag
-	struct wrk_itm	*next;
+	mt_wrk_tsk_t tsk;
+	/* based on task-type one of the entries wr_tsk/rd_tsk will be used */
+	wr_tsk_t        wr;         //flush page list
+	rd_tsk_t        rd;         //decompress page list
+ 	unsigned long	result; 	//flush pages count
+ 	unsigned long	t_usec;		//time-taken in usec
+ 	long		id_usr;		//thread-id currently working
+    	wrk_status_t    wi_status;	//flag
+ 	struct wrk_itm	*next;
 } wrk_t;
 
-typedef enum op_q_status {
-    Q_NOT_INIT=0,
-    Q_EMPTY=1,
-    Q_INITIALIZED=2,
-    Q_PROCESS=3,
-    Q_DONE=4,
-    Q_ERROR=5,
-    Q_STATUS_UNDEFINED
-} q_status_t;
-
-typedef struct op_queue
-{
-	pthread_mutex_t	mtx;
-	pthread_cond_t 	cv;
-	q_status_t	flag;
-	wrk_t 		*head;
-	wrk_t		*tail;
-} opq_t;
-
-opq_t wq, cq;
-
 typedef struct thread_sync
 {
-	int  		wthread_id;
-	pthread_t 	wthread;
-	opq_t		*wq;
-	opq_t		*cq;
-	wthr_status_t   wt_status;
+	int  	        wthread_id;
+	os_thread_t 	wthread;
+	ib_wqueue_t	*wq;	// work Queue
+	ib_wqueue_t     *wr_cq;// Write Completion Queue
+	ib_wqueue_t     *rd_cq; // Read Completion Queue
+	wthr_status_t   wt_status;	// Worker Thread status
 	unsigned long	stat_universal_num_processed;
 	unsigned long	stat_cycle_num_processed;
 } thread_sync_t;
 
 /* Global XXX:DD needs to be cleaned */
-int 			exit_flag;
-ulint 			check_wrk_done_count;
-static ulint 		done_cnt_flag;
-static int 		pgc_n_threads = 8;
-
-thread_sync_t 		pc_sync[PGCOMP_MAX_WORKER];
-static wrk_t 		work_items[PGCOMP_MAX_WORKER];
+ib_wqueue_t 	*wq=NULL, *wr_cq=NULL, *rd_cq=NULL;
+mem_heap_t		*heap_allocated=NULL;
+thread_sync_t 	pc_sync[MTFLUSH_MAX_WORKER];
+static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
 static int 		pgcomp_wrk_initialized = -1;
-
-int set_check_done_flag_count(int cnt)
-{
-	return(check_wrk_done_count = cnt);
-}
+ulint srv_mtflush_threads = 0;
 
 int set_pgcomp_wrk_init_done(void)
 {
@@ -1615,83 +1601,14 @@ int is_pgcomp_wrk_init_done(void)
 	return(pgcomp_wrk_initialized == 1);
 }
 
-ulint set_done_cnt_flag(ulint val)
-{
-	/*
- 	 * Assumption: The thread calling into set_done_cnt_flag
- 	 * needs to have "cq.mtx" acquired, else not safe.
- 	 */
-	done_cnt_flag = val;
-	return done_cnt_flag;
-}
-
-
-ulint cv_done_inc_flag_sig(thread_sync_t * ppc)
-{
-	pthread_mutex_lock(&ppc->cq->mtx);
-	ppc->stat_universal_num_processed++;
-	ppc->stat_cycle_num_processed++;
-	done_cnt_flag++;
-	if(!(done_cnt_flag <= check_wrk_done_count)) {
-		fprintf(stderr, "ERROR: done_cnt:%lu check_wrk_done_count:%lu\n",
-			done_cnt_flag, check_wrk_done_count);
-	}
-	assert(done_cnt_flag <= check_wrk_done_count);
-	pthread_mutex_unlock(&ppc->cq->mtx);
-	if(done_cnt_flag == check_wrk_done_count) {
-		ppc->wq->flag = Q_DONE;
-		pthread_mutex_lock(&ppc->cq->mtx);
-			ppc->cq->flag = Q_DONE;
-			pthread_cond_signal(&ppc->cq->cv);
-		pthread_mutex_unlock(&ppc->cq->mtx);
-	}
-	return(done_cnt_flag);
-}
-
-int q_remove_wrk(opq_t *q, wrk_t **wi)
-{
-	int ret = 0;
-
-	if(!wi || !q) {
-		return -1;
-	}
-
-	pthread_mutex_lock(&q->mtx);
-	assert(!((q->tail == NULL) && (q->head != NULL)));
-	assert(!((q->tail != NULL) && (q->head == NULL)));
-
-	/* get the first in the list*/
-	*wi = q->head;
-	if(q->head) {
-		ret = 0;
-		q->head = q->head->next;
-		(*wi)->next = NULL;
-		if(!q->head) {
-			q->tail = NULL;
-		}
-	} else {
-		q->tail = NULL;
-		ret = 1; /* indicating remove from queue failed */
-	}
-	pthread_mutex_unlock(&q->mtx);
-	return (ret);
-}
-
-int is_busy_wrk_itm(wrk_t *wi)
-{
-	if(!wi) {
-		return -1;
-	}
-	return(!(wi->id_usr == -1));
-}
-
 int setup_wrk_itm(int items)
 {
 	int i;
 	for(i=0; i<items; i++) {
-		work_items[i].buf_pool = NULL;
-		work_items[i].result = 0;
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].wr.buf_pool = NULL;
 		work_items[i].t_usec = 0;
+		work_items[i].result = 0;
 		work_items[i].id_usr = -1;
 		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
 		work_items[i].next = &work_items[(i+1)%items];
@@ -1701,82 +1618,26 @@ int setup_wrk_itm(int items)
 	return 0;
 }
 
-int init_queue(opq_t *q)
-{
-	if(!q) {
-		return -1;
-	}
-	/* Initialize Queue mutex and CV */
-	pthread_mutex_init(&q->mtx, NULL);
-	pthread_cond_init(&q->cv, NULL);
-	q->flag = Q_INITIALIZED;
-	q->head = q->tail = NULL;
-
-	return 0;
-}
-
-#if 0
-int drain_cq(opq_t *cq, int items)
-{
-	int i=0;
-
-	if(!cq) {
-		return -1;
-	}
-	pthread_mutex_lock(&cq->mtx);
-	for(i=0; i<items; i++) {
-		work_items[i].result=0;
-		work_items[i].t_usec = 0;
-		work_items[i].id_usr = -1;
-	}
-	cq->head = cq->tail = NULL;
-	pthread_mutex_unlock(&cq->mtx);
-	return 0;
-}
-#endif
-
-int q_insert_wrk_list(opq_t *q, wrk_t *w_list)
-{
-	if((!q) || (!w_list)) {
-		fprintf(stderr, "insert failed q:%p w:%p\n", q, w_list);
-		return -1;
-	}
-
-	pthread_mutex_lock(&q->mtx);
-
-	assert(!((q->tail == NULL) && (q->head != NULL)));
-	assert(!((q->tail != NULL) && (q->head == NULL)));
-
-	/* list is empty */
-	if(!q->tail) {
-		q->head = q->tail = w_list;
-	} else {
-		/* added the first of the node to list */
-        	assert(q->head != NULL);
-		q->tail->next = w_list;
-	}
-
-	/* move tail to the last node */
-	while(q->tail->next) {
-		q->tail = q->tail->next;
-	}
-	pthread_mutex_unlock(&q->mtx);
-
-	return 0;
-}
-
 int flush_pool_instance(wrk_t *wi)
 {
-	struct timeval p_start_time, p_end_time, d_time;
 	flush_counters_t n;
+#ifdef UNIV_DEBUG
+	struct timeval p_start_time, p_end_time, d_time;
+#endif
 
-	if(!wi) {
+	if (!wi) {
 		fprintf(stderr, "work item invalid wi:%p\n", wi);
 		return -1;
 	}
 
-	wi->t_usec = 0;
-	if (!buf_flush_start(wi->buf_pool, (buf_flush_t)wi->flush_type)) {
+	if (!wi->wr.buf_pool) {
+		fprintf(stderr, "work-item wi->buf_pool:%p [likely thread exit]\n",
+                wi->wr.buf_pool);
+		return -1;
+	}
+
+    	wi->t_usec = 0;
+	if (!buf_flush_start(wi->wr.buf_pool, wi->wr.flush_type)) {
 		/* We have two choices here. If lsn_limit was
 		specified then skipping an instance of buffer
 		pool means we cannot guarantee that all pages
@@ -1788,7 +1649,7 @@ int flush_pool_instance(wrk_t *wi)
 		help in the retry which will follow the
 		failure. */
 		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
-			(buf_flush_t)wi->flush_type);
+			wi->wr.flush_type);
 		return -1;
 	}
 
@@ -1797,32 +1658,28 @@ int flush_pool_instance(wrk_t *wi)
 	gettimeofday(&p_start_time, 0x0);
 #endif
 
-	if((buf_flush_t)wi->flush_type == BUF_FLUSH_LRU) {
-		/* srv_LRU_scan_depth can be arbitrarily large value.
-		* We cap it with current LRU size.
-		*/
-		buf_pool_mutex_enter(wi->buf_pool);
-		wi->min = UT_LIST_GET_LEN(wi->buf_pool->LRU);
-		buf_pool_mutex_exit(wi->buf_pool);
-		wi->min = ut_min(srv_LRU_scan_depth,wi->min);
-	}
+    	if (wi->wr.flush_type == BUF_FLUSH_LRU) {
+        	/* srv_LRU_scan_depth can be arbitrarily large value.
+        	 * We cap it with current LRU size.
+        	 */
+        	buf_pool_mutex_enter(wi->wr.buf_pool);
+        	wi->wr.min = UT_LIST_GET_LEN(wi->wr.buf_pool->LRU);
+        	buf_pool_mutex_exit(wi->wr.buf_pool);
+        	wi->wr.min = ut_min(srv_LRU_scan_depth,wi->wr.min);
+    	}
 
-	buf_flush_batch(wi->buf_pool,
-			(buf_flush_t)wi->flush_type,
-			wi->min, wi->lsn_limit, false, &n);
+	wi->result = buf_flush_batch(wi->wr.buf_pool,
+                                    wi->wr.flush_type,
+				    wi->wr.min, wi->wr.lsn_limit,
+				    false, &n);
 
-	wi->result = n.flushed;
+	buf_flush_end(wi->wr.buf_pool, wi->wr.flush_type);
+	buf_flush_common(wi->wr.flush_type, wi->result);
 
-	buf_flush_end(wi->buf_pool, (buf_flush_t)wi->flush_type);
-	buf_flush_common((buf_flush_t)wi->flush_type, wi->result);
-
-#ifdef UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
 
 	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
-#endif
-
 	return 0;
 }
 
@@ -1831,68 +1688,75 @@ int service_page_comp_io(thread_sync_t * ppc)
 	wrk_t 		*wi = NULL;
 	int 		ret=0;
 
-	pthread_mutex_lock(&ppc->wq->mtx);
-	do{
-		ppc->wt_status = WTHR_SIG_WAITING;
-		ret = pthread_cond_wait(&ppc->wq->cv, &ppc->wq->mtx);
+   	ppc->wt_status = WTHR_SIG_WAITING;
+	wi = (wrk_t *)ib_wqueue_wait(ppc->wq);
+
+	if (wi) {
 		ppc->wt_status = WTHR_RUNNING;
-		if(ret == ETIMEDOUT) {
-			fprintf(stderr, "ERROR ETIMEDOUT cnt_flag:[%lu] ret:%d\n",
-				done_cnt_flag, ret);
-		} else if(ret == EINVAL || ret == EPERM) {
-			fprintf(stderr, "ERROR EINVAL/EPERM cnt_flag:[%lu] ret:%d\n",
-				done_cnt_flag, ret);
-		}
-		if(ppc->wq->flag == Q_PROCESS) {
-			break;
-		} else {
-			pthread_mutex_unlock(&ppc->wq->mtx);
-			return -1;
-		}
-	} while (ppc->wq->flag == Q_PROCESS && ret == 0);
+	} else {
+		fprintf(stderr, "%s:%d work-item is NULL\n", __FILE__, __LINE__);
+		ppc->wt_status = WTHR_NO_WORK;
+		return (0);
+	}
 
-	pthread_mutex_unlock(&ppc->wq->mtx);
+	assert(wi != NULL);
+	wi->id_usr = ppc->wthread;
 
-	while (ppc->cq->flag == Q_PROCESS) {
-		wi = NULL;
-		/* Get the work item */
-		if (0 != (ret = q_remove_wrk(ppc->wq, &wi))) {
-			ppc->wt_status = WTHR_NO_WORK;
-			return -1;
-		}
+	switch(wi->tsk) {
+	case MT_WRK_NONE:
+		assert(wi->wi_status == WRK_ITEM_EXIT);
+		wi->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
+		break;
 
-		assert(ret==0);
-		assert(wi != NULL);
-		assert(0 == is_busy_wrk_itm(wi));
-		assert(wi->id_usr == -1);
-
-		wi->id_usr = ppc->wthread;
+	case MT_WRK_WRITE:
 		wi->wi_status = WRK_ITEM_START;
-
 		/* Process work item */
-		if(0 != (ret = flush_pool_instance(wi))) {
+		if (0 != (ret = flush_pool_instance(wi))) {
 			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
 			wi->wi_status = WRK_ITEM_FAILED;
 		}
-
-		ret = q_insert_wrk_list(ppc->cq, wi);
-
-		assert(0==ret);
-		assert(check_wrk_done_count >= done_cnt_flag);
 		wi->wi_status = WRK_ITEM_SUCCESS;
-		if(check_wrk_done_count == cv_done_inc_flag_sig(ppc)) {
-			break;
-		}
+		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
+		break;
+
+	case MT_WRK_READ:
+		/* Need to also handle the read case */
+		assert(0);
+		/* completed task get added to rd_cq */
+		/* wi->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(ppc->rd_cq, wi, heap_allocated);*/
+		break;
+
+	default:
+		/* None other than Write/Read handling planned */
+		assert(0);
 	}
+
+	ppc->wt_status = WTHR_NO_WORK;
 	return(0);
 }
 
+void page_comp_io_thread_exit()
+{
+	ulint i;
+
+	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n", srv_buf_pool_instances);
+	for (i=0; i<srv_buf_pool_instances; i++) {
+		work_items[i].wr.buf_pool = NULL;
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].tsk = MT_WRK_NONE;
+		work_items[i].wi_status = WRK_ITEM_EXIT;
+		ib_wqueue_add(wq, (void *)&work_items[i], heap_allocated);
+	}
+}
+
 /******************************************************************//**
 @return a dummy parameter*/
 extern "C" UNIV_INTERN
 os_thread_ret_t
 DECLARE_THREAD(page_comp_io_thread)(
-/*==========================================*/
+/*================================*/
 	void * arg)
 {
 	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
@@ -1905,34 +1769,6 @@ DECLARE_THREAD(page_comp_io_thread)(
 	OS_THREAD_DUMMY_RETURN;
 }
 
-int print_queue_wrk_itm(opq_t *q)
-{
-#if UNIV_DEBUG
-	wrk_t *wi = NULL;
-
-	if(!q) {
-		fprintf(stderr, "queue NULL\n");
-		return -1;
-	}
-
-	if(!q->head || !q->tail) {
-		assert(!(((q->tail==NULL) && (q->head!=NULL)) && ((q->tail != NULL) && (q->head == NULL))));
-		fprintf(stderr, "queue empty (h:%p t:%p)\n", q->head, q->tail);
-		return 0;
-	}
-
-	pthread_mutex_lock(&q->mtx);
-	for(wi = q->head; (wi != NULL) ; wi = wi->next) {
-		//fprintf(stderr, "- [%p] %p %lu %luus [%ld] >%p\n",
-		//	wi, wi->buf_pool, wi->result, wi->t_usec, wi->id_usr, wi->next);
-		fprintf(stderr, "- [%p] [%s] >%p\n",
-			wi, (wi->id_usr == -1)?"free":"Busy", wi->next);
-	}
-	pthread_mutex_unlock(&q->mtx);
-#endif
-	return(0);
-}
-
 int print_wrk_list(wrk_t *wi_list)
 {
 	wrk_t *wi = wi_list;
@@ -1952,111 +1788,9 @@ int print_wrk_list(wrk_t *wi_list)
 	return 0;
 }
 
-int pgcomp_handler(wrk_t *w_list)
-{
-	int ret=0;
-	opq_t *wrk_q=NULL, *comp_q=NULL;
-
-	wrk_q=&wq;
-	comp_q=&cq;
-
-	pthread_mutex_lock(&wrk_q->mtx);
-	/* setup work queue here.. */
-	wrk_q->flag = Q_EMPTY;
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-	ret = q_insert_wrk_list(wrk_q, w_list);
-	if(ret != 0) {
-		fprintf(stderr, "%s():work-queue setup FAILED wq:%p w_list:%p \n",
-			__FUNCTION__, &wq, w_list);
-		return -1;
-	}
-
-retry_submit:
-	pthread_mutex_lock(&wrk_q->mtx);
-	/* setup work queue here.. */
-	wrk_q->flag = Q_INITIALIZED;
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-
-	pthread_mutex_lock(&comp_q->mtx);
-	if(0 != set_done_cnt_flag(0)) {
-		fprintf(stderr, "FAILED %s:%d\n", __FILE__, __LINE__);
-		pthread_mutex_unlock(&comp_q->mtx);
-		return -1;
-	}
-	comp_q->flag = Q_PROCESS;
-	pthread_mutex_unlock(&comp_q->mtx);
-
-	/* if threads are waiting request them to start */
-	pthread_mutex_lock(&wrk_q->mtx);
-	wrk_q->flag = Q_PROCESS;
-	pthread_cond_broadcast(&wrk_q->cv);
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-	/* Wait on all worker-threads to complete */
-	pthread_mutex_lock(&comp_q->mtx);
-	if (comp_q->flag != Q_DONE) {
-		do {
-			pthread_cond_wait(&comp_q->cv, &comp_q->mtx);
-			if(comp_q->flag != Q_DONE) {
-				fprintf(stderr, "[1] cv wait on CQ failed flag:%d cnt:%lu\n",
-					comp_q->flag, done_cnt_flag);
-				if (done_cnt_flag != srv_buf_pool_instances) {
-					fprintf(stderr, "[2] cv wait on CQ failed flag:%d cnt:%lu\n",
-						comp_q->flag, done_cnt_flag);
-					fprintf(stderr, "============\n");
-					print_wrk_list(w_list);
-					fprintf(stderr, "============\n");
-				}
-				continue;
-			} else if (done_cnt_flag != srv_buf_pool_instances) {
-				fprintf(stderr, "[3]cv wait on CQ failed flag:%d cnt:%lu\n",
-					comp_q->flag, done_cnt_flag);
-				fprintf(stderr, "============\n");
-				print_wrk_list(w_list);
-				fprintf(stderr, "============\n");
-				comp_q->flag = Q_INITIALIZED;
-				pthread_mutex_unlock(&comp_q->mtx);
-				goto retry_submit;
-
-				assert(!done_cnt_flag);
-				continue;
-			}
-			assert(done_cnt_flag == srv_buf_pool_instances);
-
-			if ((comp_q->flag == Q_DONE) &&
-				(done_cnt_flag == srv_buf_pool_instances)) {
-				break;
-			}
-		} while((comp_q->flag == Q_INITIALIZED) &&
-			(done_cnt_flag != srv_buf_pool_instances));
-	} else {
-		fprintf(stderr, "[4] cv wait on CQ failed flag:%d cnt:%lu\n",
-			comp_q->flag, done_cnt_flag);
-		if (!done_cnt_flag) {
-			fprintf(stderr, "============\n");
-			print_wrk_list(w_list);
-			fprintf(stderr, "============\n");
-			comp_q->flag = Q_INITIALIZED;
-			pthread_mutex_unlock(&comp_q->mtx);
-			goto retry_submit;
-			assert(!done_cnt_flag);
-		}
-		assert(done_cnt_flag == srv_buf_pool_instances);
-	}
-
-	pthread_mutex_unlock(&comp_q->mtx);
-	pthread_mutex_lock(&wrk_q->mtx);
-	wrk_q->flag = Q_DONE;
-	pthread_mutex_unlock(&wrk_q->mtx);
-
-	return 0;
-}
-
 /******************************************************************//**
 @return a dummy parameter*/
-int pgcomp_handler_init(int num_threads, int wrk_cnt, opq_t *wq, opq_t *cq)
+int pgcomp_handler_init(int num_threads, int wrk_cnt, ib_wqueue_t *wq, ib_wqueue_t *wr_cq, ib_wqueue_t *rd_cq)
 {
 	int   	i=0;
 
@@ -2065,106 +1799,89 @@ int pgcomp_handler_init(int num_threads, int wrk_cnt, opq_t *wq, opq_t *cq)
 		return -1;
 	}
 
-	if(!wq || !cq) {
-		fprintf(stderr, "%s() FAILED wq:%p cq:%p\n", __FUNCTION__, wq, cq);
+	if(!wq || !wr_cq || !rd_cq) {
+		fprintf(stderr, "%s() FAILED wq:%p write-cq:%p read-cq:%p\n",
+                __FUNCTION__, wq, wr_cq, rd_cq);
 		return -1;
 	}
 
 	/* work-item setup */
 	setup_wrk_itm(wrk_cnt);
 
-	/* wq & cq setup */
-	init_queue(wq);
-	init_queue(cq);
-
 	/* Mark each of the thread sync entires */
-	for(i=0; i < PGCOMP_MAX_WORKER; i++) {
-		pc_sync[i].wthread_id = i;
+	for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
+	    pc_sync[i].wthread_id = i;
 	}
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < num_threads; i++) {
 		pc_sync[i].wthread_id = i;
 		pc_sync[i].wq = wq;
-		pc_sync[i].cq = cq;
+		pc_sync[i].wr_cq = wr_cq;
+		pc_sync[i].rd_cq = rd_cq;
+
 		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
-					thread_ids + START_PGCOMP_CNT + i);
-		//pc_sync[i].wthread = thread_ids[START_PGCOMP_CNT + i];
-		pc_sync[i].wthread = (START_PGCOMP_CNT + i);
+	                				thread_ids + START_OLD_THREAD_CNT + i);
+		pc_sync[i].wthread = (START_OLD_THREAD_CNT + i);
 		pc_sync[i].wt_status = WTHR_INITIALIZED;
 	}
-
-	set_check_done_flag_count(wrk_cnt);
 	set_pgcomp_wrk_init_done();
-
+	fprintf(stderr, "%s() Worker-Threads created..\n", __FUNCTION__);
 	return 0;
 }
 
-
 int wrk_thread_stat(thread_sync_t *wthr, unsigned int num_threads)
 {
-	long stat_tot=0;
-	unsigned int i=0;
-	for(i=0; i< num_threads;i++) {
+	ulong stat_tot=0;
+	ulint i=0;
+	for(i=0; i<num_threads;i++) {
 		stat_tot+=wthr[i].stat_universal_num_processed;
 		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
 			wthr[i].stat_universal_num_processed);
 	}
 	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
-	return (0);
 }
 
 int reset_wrk_itm(int items)
 {
 	int i;
 
-	pthread_mutex_lock(&wq.mtx);
-	wq.head = wq.tail = NULL;
-	pthread_mutex_unlock(&wq.mtx);
-
-	pthread_mutex_lock(&cq.mtx);
 	for(i=0;i<items; i++) {
 		work_items[i].id_usr = -1;
 	}
-	cq.head = cq.tail = NULL;
-	pthread_mutex_unlock(&cq.mtx);
 	return 0;
 }
 
 int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
-                            int flush_type, int min_n, unsigned long long lsn_limit)
+                            buf_flush_t flush_type, int min_n, lsn_t lsn_limit)
 {
 	int ret=0, i=0;
-
-   	pthread_mutex_lock(&wq.mtx);
-   	pthread_mutex_lock(&cq.mtx);
-
-	assert(wq.head == NULL);
-    	assert(wq.tail == NULL);
-	if(cq.head) {
-		print_wrk_list(cq.head);
-	}
-    	assert(cq.head == NULL);
-    	assert(cq.tail == NULL);
+	wrk_t *done_wi;
 
 	for(i=0;i<buf_pool_inst; i++) {
-		work_items[i].buf_pool = buf_pool_from_array(i);
-		work_items[i].flush_type = flush_type;
-		work_items[i].min = min_n;
-		work_items[i].lsn_limit = lsn_limit;
+		work_items[i].tsk = MT_WRK_WRITE;
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].wr.buf_pool = buf_pool_from_array(i);
+		work_items[i].wr.flush_type = flush_type;
+		work_items[i].wr.min = min_n;
+		work_items[i].wr.lsn_limit = lsn_limit;
 		work_items[i].id_usr = -1;
 		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
 		work_items[i].wi_status = WRK_ITEM_SET;
 	}
 	work_items[i-1].next=NULL;
 
-	pthread_mutex_unlock(&cq.mtx);
-   	pthread_mutex_unlock(&wq.mtx);
+   	for(i=0;i<buf_pool_inst; i++) {
+		ib_wqueue_add(wq, (void *)(&work_items[i]), heap_allocated);
+	}
 
-	pgcomp_handler(work_items);
+	/* wait on the completion to arrive */
+   	for(i=0;i<buf_pool_inst; i++) {
+		done_wi = (wrk_t *)ib_wqueue_wait(wr_cq);
+    		//fprintf(stderr, "%s: queue-wait DONE\n", __FUNCTION__);
+		ut_ad(done_wi != NULL);
+	}
 
-   	pthread_mutex_lock(&wq.mtx);
-   	pthread_mutex_lock(&cq.mtx);
 	/* collect data/results total pages flushed */
 	for(i=0; i<buf_pool_inst; i++) {
 		if(work_items[i].result == -1) {
@@ -2173,26 +1890,13 @@ int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
 		} else {
 			per_pool_pages_flushed[i] = work_items[i].result;
 		}
-		if((work_items[i].id_usr == -1) && (work_items[i].wi_status == WRK_ITEM_SET )) {
-           		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].flush_type);
-			assert(0);
-		}
+		if((work_items[i].id_usr == -1) &&
+			(work_items[i].wi_status == WRK_ITEM_SET )) {
+        		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].wr.flush_type);
+           		//assert(0);
+       		}
 	}
-
-	wq.flag = cq.flag = Q_INITIALIZED;
-
-	pthread_mutex_unlock(&cq.mtx);
-   	pthread_mutex_unlock(&wq.mtx);
-
-#if UNIV_DEBUG
-	/* Print work-list stats */
-	fprintf(stderr, "==wq== [DONE]\n");
-	print_wrk_list(wq.head);
-	fprintf(stderr, "==cq== [DONE]\n");
-	print_wrk_list(cq.head);
-	fprintf(stderr, "==worker-thread-stats==\n");
-	wrk_thread_stat(pc_sync, pgc_n_threads);
-#endif
+	//wrk_thread_stat(pc_sync, pgc_n_threads);
 
 	/* clear up work-queue for next flush */
 	reset_wrk_itm(buf_pool_inst);
@@ -3409,15 +3113,24 @@ files_checked:
 
 	if (!srv_read_only_mode) {
 		/* JAN: TODO: */
-		if (srv_buf_pool_instances <= PGCOMP_MAX_WORKER) {
-			pgc_n_threads = srv_buf_pool_instances;
+		if (srv_buf_pool_instances <= MTFLUSH_MAX_WORKER) {
+			srv_mtflush_threads = srv_buf_pool_instances;
 		}
-		/* else we default to 8 worker-threads */
-		pgcomp_handler_init(pgc_n_threads, srv_buf_pool_instances, &wq, &cq);
-		/* JAN: TODO: END */
+ 		heap_allocated = mem_heap_create(0);
+		ut_a(heap_allocated != NULL);
+
+ 		wq = ib_wqueue_create();
+ 		wr_cq = ib_wqueue_create();
+ 		rd_cq = ib_wqueue_create();
+		pgcomp_init();
+ 	   	pgcomp_handler_init(srv_mtflush_threads,
+				    srv_buf_pool_instances,
+				    wq, wr_cq, rd_cq);
 #if UNIV_DEBUG
-		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
+ 		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
 #endif
+		/* JAN: TODO: END */
+
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
 

From 862b0344880524bfde91b8361ccbdc99eac62a6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 4 Feb 2014 20:08:59 +0200
Subject: [PATCH 11/56] Fixed compiler errors.

---
 storage/innobase/srv/srv0start.cc    | 10 ++++++++--
 storage/tokudb/ft-index/ft/ft-ops.cc |  2 +-
 storage/xtradb/srv/srv0start.cc      |  7 ++++++-
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 18d6cd109e7..dd327769d68 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1436,7 +1436,9 @@ srv_start_wait_for_purge_to_start()
 
 /* JAN: TODO: */
 /**********************************************************************************/
+#ifdef UNIV_DEBUG
 extern int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time);
+#endif
 extern ibool buf_flush_start(buf_pool_t* buf_pool, enum buf_flush flush_type);
 extern void buf_flush_end(buf_pool_t* buf_pool, enum buf_flush flush_type);
 extern void buf_flush_common(enum buf_flush flush_type, ulint page_count);
@@ -1545,8 +1547,9 @@ int setup_wrk_itm(int items)
 
 int flush_pool_instance(wrk_t *wi)
 {
+#ifdef UNIV_DEBUG
 	struct timeval p_start_time, p_end_time, d_time;
-
+#endif
 	if (!wi) {
 		fprintf(stderr, "work item invalid wi:%p\n", wi);
 		return -1;
@@ -1575,8 +1578,10 @@ int flush_pool_instance(wrk_t *wi)
 		return -1;
 	}
 
+#ifdef UNIV_DEBUG
 	/* Record time taken for the OP in usec */
 	gettimeofday(&p_start_time, 0x0);
+#endif
 
     	if (wi->wr.flush_type == BUF_FLUSH_LRU) {
         	/* srv_LRU_scan_depth can be arbitrarily large value.
@@ -1595,10 +1600,11 @@ int flush_pool_instance(wrk_t *wi)
 	buf_flush_end(wi->wr.buf_pool, wi->wr.flush_type);
 	buf_flush_common(wi->wr.flush_type, wi->result);
 
+#ifdef UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
-
 	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
+#endif
 	return 0;
 }
 
diff --git a/storage/tokudb/ft-index/ft/ft-ops.cc b/storage/tokudb/ft-index/ft/ft-ops.cc
index 27ee6ec8000..4437f23b950 100644
--- a/storage/tokudb/ft-index/ft/ft-ops.cc
+++ b/storage/tokudb/ft-index/ft/ft-ops.cc
@@ -2330,7 +2330,7 @@ basement_node_gc_all_les(BASEMENTNODE bn,
     while (index < (num_leafentries_before = bn->data_buffer.omt_size())) {
         void* keyp = NULL;
         uint32_t keylen = 0;
-        LEAFENTRY leaf_entry;
+        LEAFENTRY leaf_entry = 0;
         bn->data_buffer.fetch_klpair(index, &leaf_entry, &keylen, &keyp);
         assert_zero(r);
         ft_basement_node_gc_once(
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index 7b2aebf6b83..4f3570249d7 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -1510,7 +1510,10 @@ init_log_online(void)
 
 /* JAN: TODO: */
 /**********************************************************************************/
+#ifdef UNIV_DEBUG
 extern int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time);
+#endif
+
 extern ibool buf_flush_start(buf_pool_t* buf_pool, buf_flush_t flush_type);
 extern void buf_flush_end(buf_pool_t* buf_pool, buf_flush_t flush_type);
 extern void buf_flush_common(buf_flush_t flush_type, ulint page_count);
@@ -1676,10 +1679,12 @@ int flush_pool_instance(wrk_t *wi)
 	buf_flush_end(wi->wr.buf_pool, wi->wr.flush_type);
 	buf_flush_common(wi->wr.flush_type, wi->result);
 
+#ifdef UNIV_DEBUG
 	gettimeofday(&p_end_time, 0x0);
 	timediff(&p_end_time, &p_start_time, &d_time);
-
 	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
+#endif
+
 	return 0;
 }
 

From 921d87d47c779240ea30aec01fbfcab888e98261 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 5 Feb 2014 15:32:29 +0200
Subject: [PATCH 12/56] Fixed issue on xtradb shutdown merge error.
 Multi-threaded flush threads where not shut down properly.

---
 storage/innobase/buf/buf0flu.cc   |  4 ++++
 storage/innobase/srv/srv0start.cc |  4 ++++
 storage/xtradb/buf/buf0flu.cc     | 24 ++++++++++++++++++++++--
 storage/xtradb/srv/srv0start.cc   | 20 +++++++++++++++++---
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index ff1fab6eae7..421d105b00f 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -1973,6 +1973,10 @@ void pgcomp_init(void)
 {
 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &pgcomp_mtx);
 }
+void pgcomp_deinit(void)
+{
+	os_fast_mutex_free(&pgcomp_mtx);
+}
 
 /*******************************************************************//**
 Multi-threaded version of buf_flush_list
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index dd327769d68..318f6b0500c 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -1444,6 +1444,7 @@ extern void buf_flush_end(buf_pool_t* buf_pool, enum buf_flush flush_type);
 extern void buf_flush_common(enum buf_flush flush_type, ulint page_count);
 extern ulint buf_flush_batch(buf_pool_t* buf_pool, enum buf_flush flush_type, ulint min_n, lsn_t lsn_limit);
 extern void pgcomp_init(void);
+extern void pgcomp_deinit(void);
 
 typedef enum wrk_status {
 	WRK_ITEM_SET=0,     // wrk-item is set
@@ -3277,6 +3278,9 @@ innobase_shutdown_for_mysql(void)
 		fprintf(stderr, "%s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
 #endif
 
+		/* h. Remove the mutex */
+		pgcomp_deinit();
+
 		os_mutex_enter(os_sync_mutex);
 
 		if (os_thread_count == 0) {
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index e85d1215422..b70dc23d7e0 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1931,6 +1931,21 @@ buf_flush_wait_batch_end(
 }
 
 /* JAN: TODO: */
+
+void buf_pool_enter_LRU_mutex(
+	buf_pool_t*    buf_pool)
+{
+	ut_ad(!mutex_own(&buf_pool->LRU_list_mutex));
+	mutex_enter(&buf_pool->LRU_list_mutex);
+}
+
+void buf_pool_exit_LRU_mutex(
+	buf_pool_t*    buf_pool)
+{
+	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
+	mutex_exit(&buf_pool->LRU_list_mutex);
+}
+
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the LRU list and also
 puts replaceable clean pages from the end of the LRU list to the free
@@ -2053,6 +2068,11 @@ void pgcomp_init(void)
 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &pgcomp_mtx);
 }
 
+void pgcomp_deinit(void)
+{
+	os_fast_mutex_free(&pgcomp_mtx);
+}
+
 /*******************************************************************//**
 Multi-threaded version of buf_flush_list
 */
@@ -2096,11 +2116,11 @@ pgcomp_buf_flush_list(
 #ifdef UNIV_DEBUG
 	gettimeofday(&p_start_time, 0x0);
 #endif
-	os_fast_mutex_lock(&pgcomp_mtx);
+	// os_fast_mutex_lock(&pgcomp_mtx);
 	pgcomp_flush_work_items(srv_buf_pool_instances,
                 cnt_flush, BUF_FLUSH_LIST,
                 min_n, lsn_limit);
-	os_fast_mutex_unlock(&pgcomp_mtx);
+	// os_fast_mutex_unlock(&pgcomp_mtx);
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		if (n_processed) {
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index 4f3570249d7..37324118fc7 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -1520,6 +1520,9 @@ extern void buf_flush_common(buf_flush_t flush_type, ulint page_count);
 extern ulint buf_flush_batch(buf_pool_t* buf_pool, buf_flush_t flush_type, ulint min_n, lsn_t lsn_limit, bool limited_lru_scan,
 flush_counters_t* n);
 extern void pgcomp_init(void);
+extern void pgcomp_deinit(void);
+extern void buf_pool_enter_LRU_mutex(buf_pool_t*);
+extern void buf_pool_exit_LRU_mutex(buf_pool_t*);
 
 typedef enum wrk_status {
 	WRK_ITEM_SET=0,     // wrk-item is set
@@ -1554,7 +1557,6 @@ typedef struct wr_tsk {
 	ulint	    min;		//minimum number of pages requested to be flushed
 	lsn_t	    lsn_limit;//lsn limit for the buffer-pool flush operation
 } wr_tsk_t;
- 
 
 typedef struct rd_tsk {
 	void        *page_pool; //list of pages to decompress;
@@ -1665,9 +1667,9 @@ int flush_pool_instance(wrk_t *wi)
         	/* srv_LRU_scan_depth can be arbitrarily large value.
         	 * We cap it with current LRU size.
         	 */
-        	buf_pool_mutex_enter(wi->wr.buf_pool);
+        	buf_pool_enter_LRU_mutex(wi->wr.buf_pool);
         	wi->wr.min = UT_LIST_GET_LEN(wi->wr.buf_pool->LRU);
-        	buf_pool_mutex_exit(wi->wr.buf_pool);
+        	buf_pool_exit_LRU_mutex(wi->wr.buf_pool);
         	wi->wr.min = ut_min(srv_LRU_scan_depth,wi->wr.min);
     	}
 
@@ -3407,8 +3409,20 @@ innobase_shutdown_for_mysql(void)
 		logs_empty_and_mark_files_at_shutdown() and should have
 		already quit or is quitting right now. */
 
+		/* g. Exit the multi threaded flush threads */
+
+		page_comp_io_thread_exit();
+
+#ifdef UNIV_DEBUG
+		fprintf(stderr, "%s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
+#endif
+
+		/* h. Remove the mutex */
+		pgcomp_deinit();
+
 		os_mutex_enter(os_sync_mutex);
 
+
 		if (os_thread_count == 0) {
 			/* All the threads have exited or are just exiting;
 			NOTE that the threads may not have completed their

From 7f3950a2aedd55b299735645882b48917a380be3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 6 Feb 2014 17:25:26 +0200
Subject: [PATCH 13/56] Moved mt-flush code to buf0mtflu.[cc|h] and cleaned it
 up. This is for InnoDB.

---
 storage/innobase/CMakeLists.txt      |    3 +-
 storage/innobase/buf/buf0flu.cc      |  235 +---
 storage/innobase/buf/buf0mtflu.cc    | 1493 ++++++++++----------------
 storage/innobase/include/buf0flu.h   |   49 +
 storage/innobase/include/buf0mtflu.h |   95 ++
 storage/innobase/include/srv0srv.h   |    2 +-
 storage/innobase/include/srv0start.h |    3 +-
 storage/innobase/srv/srv0srv.cc      |    4 +-
 storage/innobase/srv/srv0start.cc    |  425 +-------
 storage/xtradb/buf/buf0flu.cc        |    3 +
 10 files changed, 727 insertions(+), 1585 deletions(-)
 create mode 100644 storage/innobase/include/buf0mtflu.h

diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 0b1043bc421..64c22f9f7df 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -278,8 +278,7 @@ SET(INNOBASE_SOURCES
 	buf/buf0flu.cc
 	buf/buf0lru.cc
 	buf/buf0rea.cc
-# TODO: JAN uncomment
-#	buf/buf0mtflu.cc
+	buf/buf0mtflu.cc
 	data/data0data.cc
 	data/data0type.cc
 	dict/dict0boot.cc
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 421d105b00f..d131f2efb44 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -32,6 +32,7 @@ Created 11/11/1995 Heikki Tuuri
 #endif
 
 #include "buf0buf.h"
+#include "buf0mtflu.h"
 #include "buf0checksum.h"
 #include "srv0start.h"
 #include "srv0srv.h"
@@ -1680,7 +1681,6 @@ pages: to avoid deadlocks, this function must be written so that it cannot
 end up waiting for these latches! NOTE 2: in the case of a flush list flush,
 the calling thread is not allowed to own any latches on pages!
 @return number of blocks for which the write request was queued */
-//static
 ulint
 buf_flush_batch(
 /*============*/
@@ -1737,7 +1737,6 @@ buf_flush_batch(
 
 /******************************************************************//**
 Gather the aggregated stats for both flush list and LRU list flushing */
-//static
 void
 buf_flush_common(
 /*=============*/
@@ -1762,7 +1761,6 @@ buf_flush_common(
 
 /******************************************************************//**
 Start a buffer flush batch for LRU or flush list */
-//static
 ibool
 buf_flush_start(
 /*============*/
@@ -1791,7 +1789,6 @@ buf_flush_start(
 
 /******************************************************************//**
 End a buffer flush batch for LRU or flush list */
-//static
 void
 buf_flush_end(
 /*==========*/
@@ -1846,50 +1843,6 @@ buf_flush_wait_batch_end(
 	}
 }
 
-/* JAN: TODO: */
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list and also
-puts replaceable clean pages from the end of the LRU list to the free
-list.
-NOTE: The calling thread is not allowed to own any latches on pages!
-@return true if a batch was queued successfully. false if another batch
-of same type was already running. */
-static
-bool
-pgcomp_buf_flush_LRU(
-/*==========*/
-	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-{
-	ulint		page_count;
-
-	if (n_processed) {
-		*n_processed = 0;
-	}
-
-	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
-		return(false);
-	}
-
-	page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
-
-	buf_flush_end(buf_pool, BUF_FLUSH_LRU);
-
-	buf_flush_common(BUF_FLUSH_LRU, page_count);
-
-	if (n_processed) {
-		*n_processed = page_count;
-	}
-
-	return(true);
-}
-/* JAN: TODO: END: */
-
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the LRU list and also
 puts replaceable clean pages from the end of the LRU list to the free
@@ -1932,125 +1885,6 @@ buf_flush_LRU(
 	return(true);
 }
 
-/* JAN: TODO: */
-/*******************************************************************//**/
-extern int is_pgcomp_wrk_init_done(void);
-extern int pgcomp_flush_work_items(
-	int buf_pool_inst,
-	int *pages_flushed,
-        enum buf_flush flush_type,
-	int min_n,
-	lsn_t lsn_limit);
-
-#define	MT_COMP_WATER_MARK	50
-
-#ifdef UNIV_DEBUG
-#include <time.h>
-int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time)
-{
-	if (g_time->tv_usec < s_time->tv_usec)
-	{
-		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000 + 1;
-		s_time->tv_usec -= 1000000 * nsec;
-		s_time->tv_sec += nsec;
-	}
-	if (g_time->tv_usec - s_time->tv_usec > 1000000)
-	{
-		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000;
-		s_time->tv_usec += 1000000 * nsec;
-		s_time->tv_sec -= nsec;
-	}
-	d_time->tv_sec = g_time->tv_sec - s_time->tv_sec;
-	d_time->tv_usec = g_time->tv_usec - s_time->tv_usec;
-
-	return 0;
-}
-#endif
-
-static os_fast_mutex_t pgcomp_mtx;
-
-void pgcomp_init(void)
-{
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &pgcomp_mtx);
-}
-void pgcomp_deinit(void)
-{
-	os_fast_mutex_free(&pgcomp_mtx);
-}
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-UNIV_INTERN
-bool
-pgcomp_buf_flush_list(
-/*==================*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-
-{
-	ulint		i;
-	bool		success = true;
-#ifdef UNIV_DEBUG
-	struct timeval p_start_time, p_end_time, d_time;
-#endif
-	int cnt_flush[MTFLUSH_MAX_WORKER];
-
-	if (n_processed) {
-		*n_processed = 0;
-	}
-
-	if (min_n != ULINT_MAX) {
-		/* Ensure that flushing is spread evenly amongst the
-		buffer pool instances. When min_n is ULINT_MAX
-		we need to flush everything up to the lsn limit
-		so no limit here. */
-		min_n = (min_n + srv_buf_pool_instances - 1)
-			 / srv_buf_pool_instances;
-	}
-
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_start_time, 0x0);
-#endif
-	os_fast_mutex_lock(&pgcomp_mtx);
-	pgcomp_flush_work_items(srv_buf_pool_instances,
-                cnt_flush, BUF_FLUSH_LIST,
-                min_n, lsn_limit);
-	os_fast_mutex_unlock(&pgcomp_mtx);
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		if (n_processed) {
-			*n_processed += cnt_flush[i];
-		}
-		if (cnt_flush[i]) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-				MONITOR_FLUSH_BATCH_COUNT,
-				MONITOR_FLUSH_BATCH_PAGES,
-				cnt_flush[i]);
-		}
-	}
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu %llu usec]\n",
-		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed,
-		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
-#endif
-	return(success);
-}
-
-/* JAN: TODO: END: */
-
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the flush list of
 all buffer pool instances.
@@ -2078,11 +1912,9 @@ buf_flush_list(
 	ulint		i;
 	bool		success = true;
 
-	/* JAN: TODO: */
-	if (is_pgcomp_wrk_init_done()) {
-		return(pgcomp_buf_flush_list(min_n, lsn_limit, n_processed));
+	if (buf_mtflu_init_done()) {
+		return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed));
 	}
-	/* JAN: TODO: END: */
 
 	if (n_processed) {
 		*n_processed = 0;
@@ -2237,60 +2069,6 @@ buf_flush_single_page_from_LRU(
 	return(freed);
 }
 
-/* JAN: TODO: */
-/*********************************************************************//**
-pgcomp_Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-pgcomp_buf_flush_LRU_tail(void)
-/*====================*/
-{
-#ifdef UNIV_DEBUG
-	struct  timeval p_start_time, p_end_time, d_time;
-#endif
-	ulint   total_flushed=0, i=0;
-	int cnt_flush[32];
-
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_start_time, 0x0);
-#endif
-	ut_ad(is_pgcomp_wrk_init_done());
-
-	os_fast_mutex_lock(&pgcomp_mtx);
-	pgcomp_flush_work_items(srv_buf_pool_instances,
-		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
-	os_fast_mutex_unlock(&pgcomp_mtx);
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		if (cnt_flush[i]) {
-			total_flushed += cnt_flush[i];
-
-			MONITOR_INC_VALUE_CUMULATIVE(
-			        MONITOR_LRU_BATCH_TOTAL_PAGE,
-			        MONITOR_LRU_BATCH_COUNT,
-			        MONITOR_LRU_BATCH_PAGES,
-			        cnt_flush[i]);
-		}
-	}
-
-#if UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-
-	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed,
-		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
-#endif
-
-	return(total_flushed);
-}
-/* JAN: TODO: END: */
-
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -2304,12 +2082,11 @@ buf_flush_LRU_tail(void)
 /*====================*/
 {
 	ulint	total_flushed = 0;
-	/* JAN: TODO: */
-	if(is_pgcomp_wrk_init_done())
+
+	if(buf_mtflu_init_done())
 	{
-		return(pgcomp_buf_flush_LRU_tail());
+		return(buf_mtflu_flush_LRU_tail());
 	}
-	/* JAN: TODO: END */
 
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index 7abe0547877..901f766c472 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
-Copyright (C) 2013 Fusion-io. All Rights Reserved.
-Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved.
+Copyright (C) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -23,124 +23,536 @@ Multi-threaded flush method implementation
 
 Created  06/11/2013 Dhananjoy Das DDas@fusionio.com
 Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com
+Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com
+Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
 ***********************************************************************/
 
-#include <time.h>
-
-#ifdef UNIV_PFS_MUTEX
-/* Key to register fil_system_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	mtflush_mutex_key;
-#endif /* UNIV_PFS_MUTEX */
-
-/* Mutex to protect critical sections during multi-threaded flush */
-ib_mutex_t mt_flush_mutex;
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0mtflu.h"
+#include "buf0checksum.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "page0zip.h"
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "page0page.h"
+#include "fil0fil.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+#include "os0file.h"
+#include "os0sync.h"
+#include "trx0sys.h"
+#include "srv0mon.h"
+#include "mysql/plugin.h"
+#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
 
 #define	MT_COMP_WATER_MARK	50
 
 /* Work item status */
-typedef enum {
-	WORK_ITEM_SET=0,	/* Work item information set */
-	WORK_ITEM_START=1,	/* Work item assigned to thread and
-				execution started */
-	WORK_ITEM_DONE=2,	/* Work item execution done */
-} mtflu_witem_status_t;
+typedef enum wrk_status {
+	WRK_ITEM_SET=0,		/*!< Work item is set */
+	WRK_ITEM_START=1,	/*!< Processing of work item has started */
+	WRK_ITEM_DONE=2,	/*!< Processing is done usually set to
+				SUCCESS/FAILED */
+	WRK_ITEM_SUCCESS=2,	/*!< Work item successfully processed */
+	WRK_ITEM_FAILED=3,	/*!< Work item process failed */
+	WRK_ITEM_EXIT=4,	/*!< Exiting */
+	WRK_ITEM_STATUS_UNDEFINED
+} wrk_status_t;
+
+/* Work item task type */
+typedef enum mt_wrk_tsk {
+	MT_WRK_NONE=0,		/*!< Exit queue-wait */
+	MT_WRK_WRITE=1,		/*!< Flush operation */
+	MT_WRK_READ=2,		/*!< Read operation  */
+	MT_WRK_UNDEFINED
+} mt_wrk_tsk_t;
 
 /* Work thread status */
-typedef enum {
-	WORK_THREAD_NOT_INIT=0,		/* Work thread not initialized */
-	WORK_THREAD_INITIALIZED=1,	/* Work thread initialized */
-	WORK_THREAD_SIG_WAITING=2,	/* Work thred signaled */
-	WORK_THREAD_RUNNING=3,		/* Work thread running */
-	WORK_THREAD_NO_WORK=4,		/* Work thread has no work to do */
-} mtflu_wthr_status_t;
+typedef enum wthr_status {
+	WTHR_NOT_INIT=0,	/*!< Work thread not initialized */
+	WTHR_INITIALIZED=1,	/*!< Work thread initialized */
+	WTHR_SIG_WAITING=2,	/*!< Work thread wating signal */
+	WTHR_RUNNING=3,		/*!< Work thread running */
+	WTHR_NO_WORK=4,		/*!< Work thread has no work */
+	WTHR_KILL_IT=5,		/*!< Work thread should exit */
+	WTHR_STATUS_UNDEFINED
+} wthr_status_t;
 
-/* Structure containing multi-treaded flush thread information */
-typedef struct {
-	os_thread_t  		wthread_id;		/* Thread id */
-	opq_t			*wq;			/* Write queue ? */
-	opq_t			*cq;			/* Commit queue ?*/
-	ib_mutex_t 		thread_mutex;		/* Mutex proecting below
-							structures */
-	mtflu_wthr_status_t	thread_status;		/* Thread status */
-	ib_uint64_t		total_num_processed;	/* Total number of
-							pages processed */
-	ib_uint64_t		cycle_num_processed;	/* Numper of pages
-							processed on last
-							cycle */
-	ulint			check_wrk_done_count;	/* Number of pages
-							to process in this
-							work item ? */
-	ulint			done_cnt_flag;		/* Number of pages
-							processed in this
-							work item ?*/
-} mtflu_thread_t;
+/* Write work task */
+typedef struct wr_tsk {
+	buf_pool_t	*buf_pool;	/*!< buffer-pool instance */
+	enum buf_flush	flush_type;	/*!< flush-type for buffer-pool
+					flush operation */
+	ulint		min;		/*!< minimum number of pages
+					requested to be flushed */
+	lsn_t		lsn_limit;	/*!< lsn limit for the buffer-pool
+					flush operation */
+} wr_tsk_t;
 
-struct work_item_t {
-	/****************************/
-	/* Need to group into struct*/
-	buf_pool_t*	buf_pool;	//buffer-pool instance
-	int 		flush_type;	//flush-type for buffer-pool flush operation
-	ulint 		min;		//minimum number of pages requested to be flushed
-	lsn_t 		lsn_limit;	//lsn limit for the buffer-pool flush operation
-	/****************************/
+/* Read work task */
+typedef struct rd_tsk {
+	buf_pool_t	*page_pool;	/*!< list of pages to decompress; */
+} rd_tsk_t;
 
-	unsigned long	result; 	//flush pages count
-	unsigned long	t_usec;		//time-taken in usec
-	os_thread_t		id_usr;		/* thread-id
-						currently working , why ? */
-	mtflu_witem_status_t    wi_status;     /* work item status */
-
-	UT_LIST_NODE_T(work_node_t) next;
-};
-
-/* Multi-threaded flush system structure */
-typedef struct {
-	int 		pgc_n_threads = 8;// ??? why what this is
-
-	mtflu_thread_t 	pc_sync[PGCOMP_MAX_WORKER];
-	wrk_t 		work_items[PGCOMP_MAX_WORKER];
-	int 		pgcomp_wrk_initialized = -1; /* ???? */
-	opq_t		wq; /* write queue ? */
-	opq_t		cq; /* commit queue ? */
-} mtflu_system_t;
-
-typedef enum op_q_status {
-    Q_NOT_INIT=0,
-    Q_EMPTY=1,
-    Q_INITIALIZED=2,
-    Q_PROCESS=3,
-    Q_DONE=4,
-    Q_ERROR=5,
-    Q_STATUS_UNDEFINED
-} q_status_t;
-
-// NOTE: jan: could we use ut/ut0wqueue.(h|cc)
-// NOTE: jan: here ????, it would handle waiting, signaling
-// and contains simple interface
-
-typedef struct op_queue
+/* Work item */
+typedef struct wrk_itm
 {
-	ib_mutex_t		mtx;	/* Mutex protecting below variables
-					*/
-	os_cond_t 		cv;	/* ? is waiting here ? */
-	q_status_t		flag;	/* Operation queue status */
-	UT_LIST_BASE_NODE_T(work_item_t) work_list;
-} opq_t;
+	mt_wrk_tsk_t	tsk;		/*!< Task type. Based on task-type
+					one of the entries wr_tsk/rd_tsk
+					will be used */
+	wr_tsk_t	wr;		/*!< Flush page list */
+	rd_tsk_t	rd;		/*!< Decompress page list */
+        ulint		n_flushed; 	/*!< Flushed pages count  */
+ 	os_thread_t	id_usr;		/*!< Thread-id currently working */
+    	wrk_status_t    wi_status;	/*!< Work item status */
+ 	struct wrk_itm	*next;		/*!< Next work item */
+} wrk_t;
+
+/* Thread syncronization data */
+typedef struct thread_sync
+{
+	os_thread_id_t	wthread_id;	/*!< Identifier */
+	os_thread_t 	wthread;	/*!< Thread id */
+	ib_wqueue_t	*wq;		/*!< Work Queue */
+	ib_wqueue_t     *wr_cq;		/*!< Write Completion Queue */
+	ib_wqueue_t     *rd_cq;		/*!< Read Completion Queue */
+	wthr_status_t   wt_status;	/*!< Worker thread status */
+	ulint		stat_universal_num_processed;
+					/*!< Total number of pages
+					processed by this thread */
+	ulint		stat_cycle_num_processed;
+					/*!< Number of pages processed
+					on this cycle */
+	mem_heap_t*     wheap;		/*!< Work heap where memory
+					is allocated */
+	wrk_t*          work_item;      /*!< Work items to be processed */
+} thread_sync_t;
+
+/* QUESTION: Is this array used from several threads concurrently ? */
+// static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
+
+/* TODO: REALLY NEEDED ? */
+static int		mtflush_work_initialized = -1;
+static os_fast_mutex_t	mtflush_mtx;
+static thread_sync_t*   mtflush_ctx=NULL;
+
+/******************************************************************//**
+Initialize work items. */
+static
+void
+mtflu_setup_work_items(
+/*===================*/
+	wrk_t*  work_items,     /*!< inout: Work items */
+	ulint	n_items)	/*!< in: Number of work items */
+{
+	ulint i;
+	for(i=0; i<n_items; i++) {
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].wr.buf_pool = NULL;
+		work_items[i].n_flushed = 0;
+		work_items[i].id_usr = -1;
+		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
+		work_items[i].next = &work_items[(i+1)%n_items];
+	}
+	/* last node should be the tail */
+	work_items[n_items-1].next = NULL;
+}
+
+/******************************************************************//**
+Set multi-threaded flush work initialized. */
+static inline
+void
+buf_mtflu_work_init(void)
+/*=====================*/
+{
+	mtflush_work_initialized = 1;
+}
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized */
+bool
+buf_mtflu_init_done(void)
+/*=====================*/
+{
+	return(mtflush_work_initialized == 1);
+}
+
+/******************************************************************//**
+Fush buffer pool instance.
+@return number of flushed pages, or 0 if error happened
+*/
+static
+ulint
+buf_mtflu_flush_pool_instance(
+/*==========================*/
+	wrk_t	*work_item)	/*!< inout: work item to be flushed */
+{
+	ut_a(work_item != NULL);
+	ut_a(work_item->wr.buf_pool != NULL);
+
+	if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) {
+		/* We have two choices here. If lsn_limit was
+		specified then skipping an instance of buffer
+		pool means we cannot guarantee that all pages
+		up to lsn_limit has been flushed. We can
+		return right now with failure or we can try
+		to flush remaining buffer pools up to the
+		lsn_limit. We attempt to flush other buffer
+		pools based on the assumption that it will
+		help in the retry which will follow the
+		failure. */
+#ifdef UNIV_DEBUG
+		/* QUESTION: is this a really failure ? */
+		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
+			work_item->wr.flush_type);
+#endif
+		return 0;
+	}
 
 
-/*******************************************************************//**
-Initialize multi-threaded flush.
+    	if (work_item->wr.flush_type == BUF_FLUSH_LRU) {
+        	/* srv_LRU_scan_depth can be arbitrarily large value.
+        	 * We cap it with current LRU size.
+        	 */
+        	buf_pool_mutex_enter(work_item->wr.buf_pool);
+        	work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
+        	buf_pool_mutex_exit(work_item->wr.buf_pool);
+        	work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min);
+    	}
+
+	work_item->n_flushed = buf_flush_batch(work_item->wr.buf_pool,
+                                    		work_item->wr.flush_type,
+                                    		work_item->wr.min,
+						work_item->wr.lsn_limit);
+
+	buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
+	buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
+
+	return 0;
+}
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Output work item list status,
+*/
+static
+void
+mtflu_print_work_list(
+/*==================*/
+	wrk_t*	wi_list)	/*!< in: Work item list */
+{
+	wrk_t* wi = wi_list;
+	ulint i=0;
+
+	if(!wi_list) {
+		fprintf(stderr, "list NULL\n");
+	}
+
+	while(wi) {
+		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu] > %p\n",
+			wi, (wi->id_usr == -1)?"free":"Busy", wi->n_flushed, wi->next);
+		wi = wi->next;
+		i++;
+	}
+	fprintf(stderr, "list len: %d\n", i);
+}
+#endif /* UNIV_DEBUG */
+
+/******************************************************************//**
+Worker function to wait for work items and processing them and
+sending reply back.
+*/
+static
+void
+mtflush_service_io(
+/*===============*/
+	thread_sync_t*	mtflush_io)	/*!< inout: multi-threaded flush
+					syncronization data */
+{
+	wrk_t		*work_item = NULL;
+	ulint		n_flushed=0;
+	ib_time_t	max_wait_usecs = 5000000;
+
+   	mtflush_io->wt_status = WTHR_SIG_WAITING;
+	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
+
+#ifdef UNIV_DEBUG
+	mtflu_print_work_list(mtflush_io->work_item);
+#endif
+
+	if (work_item) {
+		mtflush_io->wt_status = WTHR_RUNNING;
+	} else {
+		/* Because of timeout this thread did not get any work */
+		mtflush_io->wt_status = WTHR_NO_WORK;
+		return;
+	}
+
+	work_item->id_usr = mtflush_io->wthread;
+
+	switch(work_item->tsk) {
+	case MT_WRK_NONE:
+		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
+		work_item->wi_status = WRK_ITEM_SUCCESS;
+		/* QUESTION: Why completed work items are inserted to
+		completion queue ? */
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		break;
+
+	case MT_WRK_WRITE:
+		work_item->wi_status = WRK_ITEM_START;
+		/* Process work item */
+		/* QUESTION: Is this a really a error ? */
+		if (0 != (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
+			fprintf(stderr, "FLUSH op failed ret:%lu\n", n_flushed);
+			work_item->wi_status = WRK_ITEM_FAILED;
+		}
+		work_item->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		break;
+
+	case MT_WRK_READ:
+		/* Need to also handle the read case */
+		/* TODO: ? */
+		ut_a(0);
+		/* completed task get added to rd_cq */
+		/* work_item->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(mtflush_io->rd_cq, work_item, mtflush_io->wheap);*/
+		break;
+
+	default:
+		/* None other than Write/Read handling planned */
+		ut_a(0);
+	}
+
+	mtflush_io->wt_status = WTHR_NO_WORK;
+}
+
+/******************************************************************//**
+Thead used to flush dirty pages when multi-threaded flush is
+used.
+@return a dummy parameter*/
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(mtflush_io_thread)(
+/*==============================*/
+	void * arg)
+{
+	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
+
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+		mtflush_service_io(mtflush_io);
+		mtflush_io->stat_cycle_num_processed = 0;
+	}
+
+	/* This should make sure that all current work items are
+	processed before threads exit. */
+	while (!ib_wqueue_is_empty(mtflush_io->wq)) {
+		mtflush_service_io(mtflush_io);
+	}
+
+	os_thread_exit(NULL);
+	OS_THREAD_DUMMY_RETURN;
+}
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
 */
 void
-buf_mtflu_init(void)
-/*================*/
+buf_mtflu_io_thread_exit(void)
+/*==========================*/
 {
-	mutex_create(mtflush_mutex_key,
-			     &mt_flush_mutex, SYNC_ANY_LATCH);
+	ulint i;
+	thread_sync_t* mtflush_io = mtflush_ctx;
+
+	ut_a(mtflush_io != NULL);
+
+	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n",
+		srv_buf_pool_instances);
+
+	/* Send one exit work item/thread */
+	for (i=0; i < srv_buf_pool_instances; i++) {
+		mtflush_io->work_item[i].wr.buf_pool = NULL;
+		mtflush_io->work_item[i].rd.page_pool = NULL;
+		mtflush_io->work_item[i].tsk = MT_WRK_NONE;
+		mtflush_io->work_item[i].wi_status = WRK_ITEM_EXIT;
+
+		ib_wqueue_add(mtflush_io->wq,
+			(void *)&(mtflush_io->work_item[i]),
+			mtflush_io->wheap);
+	}
+
+	/* Wait until all work items on a work queue are processed */
+	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
+		/* Wait about 1/2 sec */
+		os_thread_sleep(50000);
+	}
+
+	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+
+	/* Collect all work done items */
+	for (i=0; i < srv_buf_pool_instances;) {
+		wrk_t* work_item;
+
+		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
+
+		if (work_item) {
+			i++;
+		}
+	}
+
+	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
+	ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
+
+	/* Free all queues */
+	ib_wqueue_free(mtflush_io->wq);
+	ib_wqueue_free(mtflush_io->wr_cq);
+	ib_wqueue_free(mtflush_io->rd_cq);
+
+	/* Free heap */
+	mem_heap_free(mtflush_io->wheap);
+
+	os_fast_mutex_free(&mtflush_mtx);
+}
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+	ulint n_threads,	/*!< in: Number of threads to create */
+	ulint wrk_cnt)		/*!< in: Number of work items */
+{
+	ulint   	i;
+	mem_heap_t*	mtflush_heap;
+	ib_wqueue_t*	mtflush_work_queue;
+	ib_wqueue_t*	mtflush_write_comp_queue;
+	ib_wqueue_t*	mtflush_read_comp_queue;
+	wrk_t*		work_items;
+
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
+
+	/* Create heap, work queue, write completion queue, read
+	completion queue for multi-threaded flush, and init
+	handler. */
+	mtflush_heap = mem_heap_create(0);
+	ut_a(mtflush_heap != NULL);
+	mtflush_work_queue = ib_wqueue_create();
+	ut_a(mtflush_work_queue != NULL);
+	mtflush_write_comp_queue = ib_wqueue_create();
+	ut_a(mtflush_write_comp_queue != NULL);
+	mtflush_read_comp_queue = ib_wqueue_create();
+	ut_a(mtflush_read_comp_queue != NULL);
+
+	mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
+				MTFLUSH_MAX_WORKER * sizeof(thread_sync_t));
+	ut_a(mtflush_ctx != NULL);
+	work_items = (wrk_t*)mem_heap_alloc(mtflush_heap,
+					    MTFLUSH_MAX_WORKER * sizeof(wrk_t));
+	ut_a(work_items != NULL);
+
+	/* Initialize work items */
+	mtflu_setup_work_items(work_items, MTFLUSH_MAX_WORKER);
+
+	/* Create threads for page-compression-flush */
+	for(i=0; i < n_threads; i++) {
+		os_thread_id_t new_thread_id;
+		mtflush_ctx[i].wq = mtflush_work_queue;
+		mtflush_ctx[i].wr_cq = mtflush_write_comp_queue;
+		mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
+		mtflush_ctx[i].wheap = mtflush_heap;
+		mtflush_ctx[i].wt_status = WTHR_INITIALIZED;
+		mtflush_ctx[i].work_item = work_items;
+
+		mtflush_ctx[i].wthread = os_thread_create(
+			mtflush_io_thread,
+			((void *)(mtflush_ctx + i)),
+	                &new_thread_id);
+
+		mtflush_ctx[i].wthread_id = new_thread_id;
+	}
+
+	buf_mtflu_work_init();
+
+	return((void *)mtflush_ctx);
+}
+
+/******************************************************************//**
+Flush buffer pool instances.
+@return number of pages flushed. */
+ulint
+buf_mtflu_flush_work_items(
+/*=======================*/
+	ulint buf_pool_inst,		/*!< in: Number of buffer pool instances */
+	ulint *per_pool_pages_flushed,	/*!< out: Number of pages
+					flushed/instance */
+	enum buf_flush flush_type,	/*!< in: Type of flush */
+	ulint min_n,			/*!< in: Wished minimum number of
+					blocks to be flushed */
+	lsn_t lsn_limit)		/*!< in: All blocks whose
+					oldest_modification is smaller than
+					this should be flushed (if their
+					number does not exceed min_n) */
+{
+	ulint n_flushed=0, i;
+	wrk_t *done_wi;
+
+	for(i=0;i<buf_pool_inst; i++) {
+		mtflush_ctx->work_item[i].tsk = MT_WRK_WRITE;
+		mtflush_ctx->work_item[i].rd.page_pool = NULL;
+		mtflush_ctx->work_item[i].wr.buf_pool = buf_pool_from_array(i);
+		mtflush_ctx->work_item[i].wr.flush_type = flush_type;
+		mtflush_ctx->work_item[i].wr.min = min_n;
+		mtflush_ctx->work_item[i].wr.lsn_limit = lsn_limit;
+		mtflush_ctx->work_item[i].id_usr = -1;
+		mtflush_ctx->work_item[i].wi_status = WRK_ITEM_SET;
+
+		ib_wqueue_add(mtflush_ctx->wq,
+			(void *)(&(mtflush_ctx->work_item[i])),
+			mtflush_ctx->wheap);
+	}
+
+	/* wait on the completion to arrive */
+   	for(i=0; i< buf_pool_inst;) {
+		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, 50000);
+
+		if (done_wi != NULL) {
+			if(done_wi->n_flushed == 0) {
+				per_pool_pages_flushed[i] = 0;
+			} else {
+				per_pool_pages_flushed[i] = done_wi->n_flushed;
+			}
+
+			if(done_wi->id_usr == -1 &&
+			   done_wi->wi_status == WRK_ITEM_SET ) {
+				fprintf(stderr,
+					"**Set/Unused work_item[%d] flush_type=%lu\n",
+					i,
+					done_wi->wr.flush_type);
+				ut_a(0);
+			}
+
+			n_flushed+= done_wi->n_flushed;
+			/* Reset for next round*/
+			mtflush_ctx->work_item[i].id_usr = -1;
+
+			i++;
+		}
+	}
+
+	return(n_flushed);
 }
 
 /*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list and also
+Flushes dirty blocks from the end of the LRU list and also
 puts replaceable clean pages from the end of the LRU list to the free
 list.
 NOTE: The calling thread is not allowed to own any latches on pages!
@@ -180,44 +592,9 @@ buf_mtflu_flush_LRU(
 	return(true);
 }
 
-#ifdef UNIV_DEBUG
 /*******************************************************************//**
-Utility function to calculate time difference between start time
-and end time.
-@return Time difference.
+Multi-threaded version of buf_flush_list
 */
-UNIV_INTERN
-void
-mtflu_timediff(
-/*===========*/
-	struct timeval *g_time, /*!< in/out: Start time*/
-	struct timeval *s_time, /*!< in/out: End time */
-	struct timeval *d_time) /*!< out: Time difference */
-{
-	if (g_time->tv_usec < s_time->tv_usec)
-	{
-		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000 + 1;
-		s_time->tv_usec -= 1000000 * nsec;
-		s_time->tv_sec += nsec;
-	}
-	if (g_time->tv_usec - s_time->tv_usec > 1000000)
-	{
-		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000;
-		s_time->tv_usec += 1000000 * nsec;
-		s_time->tv_sec -= nsec;
-	}
-	d_time->tv_sec = g_time->tv_sec - s_time->tv_sec;
-	d_time->tv_usec = g_time->tv_usec - s_time->tv_usec;
-}
-#endif
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the flush list of
-all buffer pool instances. This is multi-threaded version of buf_flush_list.
-NOTE: The calling thread is not allowed to own any latches on pages!
-@return true if a batch was queued successfully for each buffer pool
-instance. false if another batch of same type was already running in
-at least one of the buffer pool instance */
 bool
 buf_mtflu_flush_list(
 /*=================*/
@@ -236,7 +613,7 @@ buf_mtflu_flush_list(
 {
 	ulint		i;
 	bool		success = true;
-	struct timeval p_start_time, p_end_time, d_time;
+	ulint		cnt_flush[MTFLUSH_MAX_WORKER];
 
 	if (n_processed) {
 		*n_processed = 0;
@@ -251,853 +628,91 @@ buf_mtflu_flush_list(
 			 / srv_buf_pool_instances;
 	}
 
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_start_time, 0x0);
-#endif
-	if(is_pgcomp_wrk_init_done() && (min_n > MT_COMP_WATER_MARK)) {
-		int cnt_flush[32];
+	/* QUESTION: What is procted by below mutex ? */
+	os_fast_mutex_lock(&mtflush_mtx);
+	buf_mtflu_flush_work_items(srv_buf_pool_instances,
+                cnt_flush, BUF_FLUSH_LIST,
+                min_n, lsn_limit);
+	os_fast_mutex_unlock(&mtflush_mtx);
 
-                mutex_enter(&mt_flush_mutex);
-
-#ifdef UNIV_DEBUG
-		fprintf(stderr, "Calling into wrk-pgcomp [min:%lu]", min_n);
-#endif
-		pgcomp_flush_work_items(srv_buf_pool_instances,
-					cnt_flush, BUF_FLUSH_LIST,
-					min_n, lsn_limit);
-
-		for (i = 0; i < srv_buf_pool_instances; i++) {
-			if (n_processed) {
-				*n_processed += cnt_flush[i];
-			}
-			if (cnt_flush[i]) {
-				MONITOR_INC_VALUE_CUMULATIVE(
-					MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-					MONITOR_FLUSH_BATCH_COUNT,
-					MONITOR_FLUSH_BATCH_PAGES,
-					cnt_flush[i]);
-
-			}
-		}
-
-		mutex_exit(&pgcomp_mtx);
-
-#ifdef UNIV_DEBUG
-		gettimeofday(&p_end_time, 0x0);
-		timediff(&p_end_time, &p_start_time, &d_time);
-		fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-				min_n * srv_buf_pool_instances), *n_processed,
-				(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
-#endif
-		return(success);
-	}
-
-	/* Flush to lsn_limit in all buffer pool instances */
 	for (i = 0; i < srv_buf_pool_instances; i++) {
-		buf_pool_t*	buf_pool;
-		ulint		page_count = 0;
-
-		buf_pool = buf_pool_from_array(i);
-
-		if (!buf_flush_start(buf_pool, BUF_FLUSH_LIST)) {
-			/* We have two choices here. If lsn_limit was
-			specified then skipping an instance of buffer
-			pool means we cannot guarantee that all pages
-			up to lsn_limit has been flushed. We can
-			return right now with failure or we can try
-			to flush remaining buffer pools up to the
-			lsn_limit. We attempt to flush other buffer
-			pools based on the assumption that it will
-			help in the retry which will follow the
-			failure. */
-			success = false;
-
-			continue;
-		}
-
-		page_count = buf_flush_batch(
-			buf_pool, BUF_FLUSH_LIST, min_n, lsn_limit);
-
-		buf_flush_end(buf_pool, BUF_FLUSH_LIST);
-
-		buf_flush_common(BUF_FLUSH_LIST, page_count);
-
 		if (n_processed) {
-			*n_processed += page_count;
+			*n_processed += cnt_flush[i];
 		}
-
-		if (page_count) {
+		if (cnt_flush[i]) {
 			MONITOR_INC_VALUE_CUMULATIVE(
 				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
 				MONITOR_FLUSH_BATCH_COUNT,
 				MONITOR_FLUSH_BATCH_PAGES,
-				page_count);
+				cnt_flush[i]);
 		}
 	}
-
 #ifdef UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-
-	fprintf(stderr, "[2] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-			min_n * srv_buf_pool_instances), *n_processed,
-			(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
+		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
 #endif
 	return(success);
 }
 
 /*********************************************************************//**
-Clear up tail of the LRU lists:
+Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
 * Flush dirty pages at the tail of LRU to the disk
 The depth to which we scan each buffer pool is controlled by dynamic
 config parameter innodb_LRU_scan_depth.
 @return total pages flushed */
+UNIV_INTERN
 ulint
 buf_mtflu_flush_LRU_tail(void)
 /*==========================*/
 {
-	ulint   total_flushed=0, i=0;
-	int cnt_flush[32];
+	ulint	total_flushed=0, i;
+	ulint	cnt_flush[MTFLUSH_MAX_WORKER];
 
-#ifdef UNIV_DEBUG
-	struct  timeval p_start_time, p_end_time, d_time;
-	gettimeofday(&p_start_time, 0x0);
-#endif
-	assert(is_pgcomp_wrk_init_done());
+	ut_a(buf_mtflu_init_done());
 
-	mutex_enter(&pgcomp_mtx);
-	pgcomp_flush_work_items(srv_buf_pool_instances,
+	/* QUESTION: What is protected by below mutex ? */
+	os_fast_mutex_lock(&mtflush_mtx);
+	buf_mtflu_flush_work_items(srv_buf_pool_instances,
 		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+	os_fast_mutex_unlock(&mtflush_mtx);
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		if (cnt_flush[i]) {
 			total_flushed += cnt_flush[i];
 
 			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_LRU_BATCH_TOTAL_PAGE,
+			        MONITOR_LRU_BATCH_TOTAL_PAGE,
 			        MONITOR_LRU_BATCH_COUNT,
 			        MONITOR_LRU_BATCH_PAGES,
 			        cnt_flush[i]);
 		}
 	}
 
-	mutex_exit(&pgcomp_mtx);
-
 #if UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-
-	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed,
-		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
+	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
+			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
 #endif
 
 	return(total_flushed);
 }
 
-/*******************************************************************//**
-Set work done count to given count.
-@return 1 if still work to do, 0 if no work left */
-int
-set_check_done_flag_count(int cnt)
-/*================*/
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+	ulint		n_threads,	/*!<in: Number of threads to fill */
+        void*		ctx,		/*!<in: thread context */
+	os_thread_id_t*	thread_ids)	/*!<in: thread id array */
 {
-	return(check_wrk_done_count = cnt);
+	thread_sync_t *mtflush_io = ((thread_sync_t *)ctx);
+	ulint i;
+	ut_a(mtflush_io != NULL);
+	ut_a(thread_ids != NULL);
+
+	for(i = 0; i < n_threads; i++) {
+		thread_ids[i] = mtflush_io[i].wthread_id;
+	}
 }
-
-/*******************************************************************//**
-?
-@return why ? */
-int
-set_pgcomp_wrk_init_done(void)
-/*================*/
-{
-	pgcomp_wrk_initialized = 1;
-	return 0;
-}
-
-/*******************************************************************//**
-?
-@return true if work is initialized */
-bool
-is_pgcomp_wrk_init_done(void)
-/*================*/
-{
-	return(pgcomp_wrk_initialized == 1);
-}
-
-/*******************************************************************//**
-Set current done pages count to the given value
-@return number of pages flushed */
-int 
-set_done_cnt_flag(int val)
-/*================*/
-{
-	/*
- 	 * Assumption: The thread calling into set_done_cnt_flag
- 	 * needs to have "cq.mtx" acquired, else not safe.
- 	 */
-	done_cnt_flag = val;
-	return done_cnt_flag;
-}
-
-/*******************************************************************//**
-?
-@return number of pages flushed */
-int
-cv_done_inc_flag_sig(thread_sync_t * ppc)
-/*================*/
-{
-	mutex_enter(&ppc->cq->mtx);
-	ppc->stat_universal_num_processed++;
-	ppc->stat_cycle_num_processed++;
-	done_cnt_flag++;
-	if(!(done_cnt_flag <= check_wrk_done_count)) {
-		fprintf(stderr, "ERROR: done_cnt:%d check_wrk_done_count:%d\n",
-			done_cnt_flag, check_wrk_done_count);
-	}
-	assert(done_cnt_flag <= check_wrk_done_count);
-	mutex_exit(&ppc->cq->mtx);
-	if(done_cnt_flag == check_wrk_done_count) {
-		// why below does not need mutex protection ?
-		ppc->wq->flag = Q_DONE;
-		mutex_enter(&ppc->cq->mtx);
-		ppc->cq->flag = Q_DONE;
-		os_cond_signal(&ppc->cq->cv);
-		mutex_exit(&ppc->cq->mtx);
-	}
-	return(done_cnt_flag);
-}
-
-/*******************************************************************//**
-Remove work item from queue, in my opinion not needed after we use
-UT_LIST
-@return number of pages flushed */
-int
-q_remove_wrk(opq_t *q, wrk_t **wi)
-/*================*/
-{
-	int ret = 0;
-
-	if(!wi || !q) {
-		return -1;
-	}
-
-	mutex_enter(&q->mtx);
-	assert(!((q->tail == NULL) && (q->head != NULL)));
-	assert(!((q->tail != NULL) && (q->head == NULL)));
-
-	/* get the first in the list*/
-	*wi = q->head;
-	if(q->head) {
-		ret = 0;
-		q->head = q->head->next;
-		(*wi)->next = NULL;
-		if(!q->head) {
-			q->tail = NULL;
-		}
-	} else {
-		q->tail = NULL;
-		ret = 1; /* indicating remove from queue failed */
-	}
-	mutex_exit(&q->mtx);
-	return (ret);
-}
-
-/*******************************************************************//**
-Return true if work item has being assigned to a thread or false
-if work item is not assigned.
-@return true if work is assigned, false if not */
-bool
-is_busy_wrk_itm(wrk_t *wi)
-/*================*/
-{
-	if(!wi) {
-		return -1;
-	}
-	return(!(wi->id_usr == -1));
-}
-
-/*******************************************************************//**
-Initialize work items.
-@return why ? */
-int
-setup_wrk_itm(int items)
-/*================*/
-{
-	int i;
-	for(i=0; i<items; i++) {
-		work_items[i].buf_pool = NULL;
-		work_items[i].result = 0;
-		work_items[i].t_usec = 0;
-		work_items[i].id_usr = -1;
-		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
-		work_items[i].next = &work_items[(i+1)%items];
-	}
-	/* last node should be the tail */
-	work_items[items-1].next = NULL;
-	return 0;
-}
-
-/*******************************************************************//**
-Initialize queue
-@return why ? */
-int
-init_queue(opq_t *q)
-/*================*/
-{
-	if(!q) {
-		return -1;
-	}
-	/* Initialize Queue mutex and CV */
-	q->mtx = os_mutex_create();
-        os_cond_init(&q->cv);
-	q->flag = Q_INITIALIZED;
-	q->head = q->tail = NULL;
-
-	return 0;
-}
-
-/// NEEDED ?
-#if 0
-int drain_cq(opq_t *cq, int items)
-{
-	int i=0;
-
-	if(!cq) {
-		return -1;
-	}
-	mutex_enter(&cq->mtx);
-	for(i=0; i<items; i++) {
-		work_items[i].result=0;
-		work_items[i].t_usec = 0;
-		work_items[i].id_usr = -1;
-	}
-	cq->head = cq->tail = NULL;
-	mutex_unlock(&cq->mtx);
-	return 0;
-}
-#endif
-
-/*******************************************************************//**
-Insert work item list to queue, not needed with UT_LIST
-@return why ? */
-int
-q_insert_wrk_list(opq_t *q, wrk_t *w_list)
-/*================*/
-{
-	if((!q) || (!w_list)) {
-		fprintf(stderr, "insert failed q:%p w:%p\n", q, w_list);
-		return -1;
-	}
-
-	mutex_enter(&q->mtx);
-
-	assert(!((q->tail == NULL) && (q->head != NULL)));
-	assert(!((q->tail != NULL) && (q->head == NULL)));
-
-	/* list is empty */
-	if(!q->tail) {
-		q->head = q->tail = w_list;
-	} else {
-		/* added the first of the node to list */
-        	assert(q->head != NULL);
-		q->tail->next = w_list;
-	}
-
-	/* move tail to the last node */
-	while(q->tail->next) {
-		q->tail = q->tail->next;
-	}
-	mutex_exit(&q->mtx);
-
-	return 0;
-}
-
-/*******************************************************************//**
-Flush ?
-@return why ? */
-int
-flush_pool_instance(wrk_t *wi)
-/*================*/
-{
-	struct timeval p_start_time, p_end_time, d_time;
-
-	if(!wi) {
-		fprintf(stderr, "work item invalid wi:%p\n", wi);
-		return -1;
-	}
-
-	wi->t_usec = 0;
-	if (!buf_flush_start(wi->buf_pool, (buf_flush_t)wi->flush_type)) {
-		/* We have two choices here. If lsn_limit was
-		specified then skipping an instance of buffer
-		pool means we cannot guarantee that all pages
-		up to lsn_limit has been flushed. We can
-		return right now with failure or we can try
-		to flush remaining buffer pools up to the
-		lsn_limit. We attempt to flush other buffer
-		pools based on the assumption that it will
-		help in the retry which will follow the
-		failure. */
-		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
-			(buf_flush_t)wi->flush_type);
-		return -1;
-	}
-
-#ifdef UNIV_DEBUG
-	/* Record time taken for the OP in usec */
-	gettimeofday(&p_start_time, 0x0);
-#endif
-
-	if((buf_flush_t)wi->flush_type == BUF_FLUSH_LRU) {
-		/* srv_LRU_scan_depth can be arbitrarily large value.
-		* We cap it with current LRU size.
-		*/
-		buf_pool_mutex_enter(wi->buf_pool);
-		wi->min = UT_LIST_GET_LEN(wi->buf_pool->LRU);
-		buf_pool_mutex_exit(wi->buf_pool);
-		wi->min = ut_min(srv_LRU_scan_depth,wi->min);
-	}
-
-	wi->result = buf_flush_batch(wi->buf_pool,
-                                    (buf_flush_t)wi->flush_type,
-                                    wi->min, wi->lsn_limit);
-
-	buf_flush_end(wi->buf_pool, (buf_flush_t)wi->flush_type);
-	buf_flush_common((buf_flush_t)wi->flush_type, wi->result);
-
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-
-	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
-#endif
-	return 0;
-}
-
-/*******************************************************************//**
-?
-@return why ? */
-int
-service_page_comp_io(thread_sync_t * ppc)
-/*================*/
-{
-	wrk_t 		*wi = NULL;
-	int 		ret=0;
-	struct timespec	ts;
-
-	mutex_enter(&ppc->wq->mtx);
-	do{
-		ppc->wt_status = WTHR_SIG_WAITING;
-		ret = os_cond_wait(&ppc->wq->cv, &ppc->wq->mtx);
-		ppc->wt_status = WTHR_RUNNING;
-		if(ret == ETIMEDOUT) {
-			fprintf(stderr, "ERROR ETIMEDOUT cnt_flag:[%d] ret:%d\n",
-				done_cnt_flag, ret);
-		} else if(ret == EINVAL || ret == EPERM) {
-			fprintf(stderr, "ERROR EINVAL/EPERM cnt_flag:[%d] ret:%d\n",
-				done_cnt_flag, ret);
-		}
-		if(ppc->wq->flag == Q_PROCESS) {
-			break;
-		} else {
-			mutex_exit(&ppc->wq->mtx);
-			return -1;
-		}
-	} while (ppc->wq->flag == Q_PROCESS && ret == 0);
-
-	mutex_exit(&ppc->wq->mtx);
-
-	while (ppc->cq->flag == Q_PROCESS) {
-		wi = NULL;
-		/* Get the work item */
-		if (0 != (ret = q_remove_wrk(ppc->wq, &wi))) {
-			ppc->wt_status = WTHR_NO_WORK;
-			return -1;
-		}
-
-		assert(ret==0);
-		assert(wi != NULL);
-		assert(0 == is_busy_wrk_itm(wi));
-		assert(wi->id_usr == -1);
-
-		wi->id_usr = ppc->wthread;
-		wi->wi_status = WRK_ITEM_START;
-
-		/* Process work item */
-		if(0 != (ret = flush_pool_instance(wi))) {
-			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
-			wi->wi_status = WRK_ITEM_FAILED;
-		}
-		ret = q_insert_wrk_list(ppc->cq, wi);
-
-		assert(0==ret);
-		assert(check_wrk_done_count >= done_cnt_flag);
-		wi->wi_status = WRK_ITEM_SUCCESS;
-		if(check_wrk_done_count == cv_done_inc_flag_sig(ppc)) {
-			break;
-		}
-	}
-	return(0);
-}
-
-/******************************************************************//**
-Thread main function for multi-threaded flush
-@return a dummy parameter*/
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(page_comp_io_thread)(
-/*==========================================*/
-	void * arg)
-{
-	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
-
-	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
-		service_page_comp_io(ppc_io);
-		ppc_io->stat_cycle_num_processed = 0;
-	}
-	os_thread_exit(NULL);
-	OS_THREAD_DUMMY_RETURN;
-}
-
-/*******************************************************************//**
-Print queue work item
-@return why ? */
-int
-print_queue_wrk_itm(opq_t *q)
-/*================*/
-{
-#if UNIV_DEBUG
-	wrk_t *wi = NULL;
-
-	if(!q) {
-		fprintf(stderr, "queue NULL\n");
-		return -1;
-	}
-
-	if(!q->head || !q->tail) {
-		assert(!(((q->tail==NULL) && (q->head!=NULL)) && ((q->tail != NULL) && (q->head == NULL))));
-		fprintf(stderr, "queue empty (h:%p t:%p)\n", q->head, q->tail);
-		return 0;
-	}
-
-	mutex_enter(&q->mtx);
-	for(wi = q->head; (wi != NULL) ; wi = wi->next) {
-		//fprintf(stderr, "- [%p] %p %lu %luus [%ld] >%p\n",
-		//	wi, wi->buf_pool, wi->result, wi->t_usec, wi->id_usr, wi->next);
-		fprintf(stderr, "- [%p] [%s] >%p\n",
-			wi, (wi->id_usr == -1)?"free":"Busy", wi->next);
-	}
-	mutex_exit(&q->mtx);
-#endif
-	return(0);
-}
-
-/*******************************************************************//**
-Print work list
-@return why ? */
-int
-print_wrk_list(wrk_t *wi_list)
-/*================*/
-{
-	wrk_t *wi = wi_list;
-	int i=0;
-
-	if(!wi_list) {
-		fprintf(stderr, "list NULL\n");
-	}
-
-	while(wi) {
-		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu]\t[%luus] > %p\n",
-			wi, (wi->id_usr == -1)?"free":"Busy", wi->result, wi->t_usec, wi->next);
-		wi = wi->next;
-		i++;
-	}
-	fprintf(stderr, "list len: %d\n", i);
-	return 0;
-}
-
-/*******************************************************************//**
-?
-@return why ? */
-int
-pgcomp_handler(wrk_t *w_list)
-/*================*/
-{
-	struct timespec   ts;
-	int ret=0, t_flag=0;
-	opq_t *wrk_q=NULL, *comp_q=NULL;
-	wrk_t *tw_list=NULL;
-
-	wrk_q=&wq;
-	comp_q=&cq;
-
-	mutex_enter(&wrk_q->mtx);
-	/* setup work queue here.. */
-	wrk_q->flag = Q_EMPTY;
-	mutex_exit(&wrk_q->mtx);
-
-	ret = q_insert_wrk_list(wrk_q, w_list);
-	if(ret != 0) {
-		fprintf(stderr, "%s():work-queue setup FAILED wq:%p w_list:%p \n",
-			__FUNCTION__, &wq, w_list);
-		return -1;
-	}
-
-retry_submit:
-	mutex_enter(&wrk_q->mtx);
-	/* setup work queue here.. */
-	wrk_q->flag = Q_INITIALIZED;
-	mutex_exit(&wrk_q->mtx);
-
-
-	mutex_enter(&comp_q->mtx);
-	if(0 != set_done_cnt_flag(0)) {
-		fprintf(stderr, "FAILED %s:%d\n", __FILE__, __LINE__);
-		mutex_exit(&comp_q->mtx);
-		return -1;
-	}
-	comp_q->flag = Q_PROCESS;
-	mutex_enter(&comp_q->mtx);
-
-	/* if threads are waiting request them to start */
-	mutex_enter(&wrk_q->mtx);
-	wrk_q->flag = Q_PROCESS;
-	os_cond_broadcast(&wrk_q->cv);
-	mutex_exit(&wrk_q->mtx);
-
-	/* Wait on all worker-threads to complete */
-	mutex_enter(&comp_q->mtx);
-	if (comp_q->flag != Q_DONE) {
-		do {
-			os_cond_wait(&comp_q->cv, &comp_q->mtx);
-			if(comp_q->flag != Q_DONE) {
-				fprintf(stderr, "[1] cv wait on CQ failed flag:%d cnt:%d\n",
-					comp_q->flag, done_cnt_flag);
-				if (done_cnt_flag != srv_buf_pool_instances) {
-					fprintf(stderr, "[2] cv wait on CQ failed flag:%d cnt:%d\n",
-						comp_q->flag, done_cnt_flag);
-					fprintf(stderr, "============\n");
-					print_wrk_list(w_list);
-					fprintf(stderr, "============\n");
-				}
-				continue;
-			} else if (done_cnt_flag != srv_buf_pool_instances) {
-				fprintf(stderr, "[3]cv wait on CQ failed flag:%d cnt:%d\n",
-					comp_q->flag, done_cnt_flag);
-				fprintf(stderr, "============\n");
-				print_wrk_list(w_list);
-				fprintf(stderr, "============\n");
-				comp_q->flag = Q_INITIALIZED;
-				mutex_exit(&comp_q->mtx);
-				goto retry_submit;
-
-				ut_ad(!done_cnt_flag);
-				continue;
-			}
-			ut_ad(done_cnt_flag == srv_buf_pool_instances);
-
-			if ((comp_q->flag == Q_DONE) &&
-				(done_cnt_flag == srv_buf_pool_instances)) {
-				break;
-			}
-		} while((comp_q->flag == Q_INITIALIZED) &&
-			(done_cnt_flag != srv_buf_pool_instances));
-	} else {
-		fprintf(stderr, "[4] cv wait on CQ failed flag:%d cnt:%d\n",
-			comp_q->flag, done_cnt_flag);
-		if (!done_cnt_flag) {
-			fprintf(stderr, "============\n");
-			print_wrk_list(w_list);
-			fprintf(stderr, "============\n");
-			comp_q->flag = Q_INITIALIZED;
-			mutex_enter(&comp_q->mtx);
-			goto retry_submit;
-			ut_ad(!done_cnt_flag);
-		}
-		ut_ad(done_cnt_flag == srv_buf_pool_instances);
-	}
-
-	mutex_exit(&comp_q->mtx);
-	mutex_enter(&wrk_q->mtx);
-	wrk_q->flag = Q_DONE;
-        mutex_exit(&wrk_q->mtx);
-
-	return 0;
-}
-
-/******************************************************************//**
-@return a dummy parameter*/
-int 
-pgcomp_handler_init(
-	int num_threads, 
-	int wrk_cnt, 
-	opq_t *wq, 
-	opq_t *cq)
-/*================*/
-{
-	int   	i=0;
-
-	if(is_pgcomp_wrk_init_done()) {
-		fprintf(stderr, "pgcomp_handler_init(): ERROR already initialized\n");
-		return -1;
-	}
-
-	if(!wq || !cq) {
-		fprintf(stderr, "%s() FAILED wq:%p cq:%p\n", __FUNCTION__, wq, cq);
-		return -1;
-	}
-	
-	/* work-item setup */
-	setup_wrk_itm(wrk_cnt);
-
-	/* wq & cq setup */
-	init_queue(wq);
-	init_queue(cq);
-
-	/* Mark each of the thread sync entires */
-	for(i=0; i < PGCOMP_MAX_WORKER; i++) {
-		pc_sync[i].wthread_id = i;
-	}
-
-	/* Create threads for page-compression-flush */
-	for(i=0; i < num_threads; i++) {
-		pc_sync[i].wthread_id = i;
-		pc_sync[i].wq = wq;
-		pc_sync[i].cq = cq;
-		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
-					thread_ids + START_PGCOMP_CNT + i);
-		//pc_sync[i].wthread = thread_ids[START_PGCOMP_CNT + i];
-		pc_sync[i].wthread = (START_PGCOMP_CNT + i);
-		pc_sync[i].wt_status = WTHR_INITIALIZED;
-	}
-
-	set_check_done_flag_count(wrk_cnt);
-	set_pgcomp_wrk_init_done();
-
-	return 0;
-}
-
-
-/*******************************************************************//**
-Print work thread status information
-@return why ? */
-int 
-wrk_thread_stat(
-	thread_sync_t *wthr, 
-	unsigned int num_threads)
-/*================*/
-{
-	long stat_tot=0;
-	int i=0;
-	for(i=0; i<num_threads;i++) {
-		stat_tot+=wthr[i].stat_universal_num_processed;
-		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
-			wthr[i].stat_universal_num_processed);
-	}
-	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
-}
-
-/*******************************************************************//**
-Reset work items
-@return why ? */
-int
-reset_wrk_itm(int items)
-/*================*/
-{
-	int i;
-
-	mutex_enter(&wq.mtx);
-	wq.head = wq.tail = NULL;
-	mutex_exit(&wq.mtx);
-
-	mutex_enter(&cq.mtx);
-	for(i=0;i<items; i++) {
-		work_items[i].id_usr = -1;
-	}
-	cq.head = cq.tail = NULL;
-	mutex_exit(&cq.mtx);
-	return 0;
-}
-
-/*******************************************************************//**
-?
-@return why ? */
-int 
-pgcomp_flush_work_items(
-/*================*/
-	int buf_pool_inst, 
-	int *per_pool_pages_flushed,
-	int flush_type, 
-	int min_n, 
-	lsn_t lsn_limit)
-{
-	int ret=0, i=0;
-
-   	mutex_enter(&wq.mtx);
-   	mutex_enter(&cq.mtx);
-    
-	assert(wq.head == NULL);
-    	assert(wq.tail == NULL);
-	if(cq.head) {
-		print_wrk_list(cq.head);
-	}
-    	assert(cq.head == NULL);
-    	assert(cq.tail == NULL);
-
-	for(i=0;i<buf_pool_inst; i++) {
-		work_items[i].buf_pool = buf_pool_from_array(i);
-		work_items[i].flush_type = flush_type;
-		work_items[i].min = min_n;
-		work_items[i].lsn_limit = lsn_limit;
-		work_items[i].id_usr = -1;
-		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
-		work_items[i].wi_status = WRK_ITEM_SET;
-	}
-	work_items[i-1].next=NULL;
-
-	mutex_exit(&cq.mtx);
-   	mutex_exit(&wq.mtx);
-
-	pgcomp_handler(work_items);
-
-   	mutex_enter(&wq.mtx);
-   	mutex_enter(&cq.mtx);
-	/* collect data/results total pages flushed */
-	for(i=0; i<buf_pool_inst; i++) {
-		if(work_items[i].result == -1) {
-			ret = -1;
-			per_pool_pages_flushed[i] = 0;
-		} else {
-			per_pool_pages_flushed[i] = work_items[i].result;
-		}
-		if((work_items[i].id_usr == -1) && (work_items[i].wi_status == WRK_ITEM_SET )) {
-           		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].flush_type);
-			assert(0);
-		}
-	}
-
-	wq.flag = cq.flag = Q_INITIALIZED;
-
-	mutex_exit(&cq.mtx);
-   	mutex_exit(&wq.mtx);
-
-#if UNIV_DEBUG
-	/* Print work-list stats */
-	fprintf(stderr, "==wq== [DONE]\n");
-	print_wrk_list(wq.head);
-	fprintf(stderr, "==cq== [DONE]\n");
-	print_wrk_list(cq.head);
-	fprintf(stderr, "==worker-thread-stats==\n");
-	wrk_thread_stat(pc_sync, pgc_n_threads);
-#endif
-
-	/* clear up work-queue for next flush */
-	reset_wrk_itm(buf_pool_inst);
-	return(ret);
-}
-
- 
-
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 94f4e6dedd1..8a5057436c2 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -1,6 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2014, SkySQL Ab.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -274,6 +275,54 @@ buf_flush_get_dirty_pages_count(
 
 #endif /* !UNIV_HOTBACKUP */
 
+/******************************************************************//**
+Start a buffer flush batch for LRU or flush list */
+ibool
+buf_flush_start(
+/*============*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	enum buf_flush	flush_type);	/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+/******************************************************************//**
+End a buffer flush batch for LRU or flush list */
+void
+buf_flush_end(
+/*==========*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	enum buf_flush	flush_type);	/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+/******************************************************************//**
+Gather the aggregated stats for both flush list and LRU list flushing */
+void
+buf_flush_common(
+/*=============*/
+	enum buf_flush	flush_type,	/*!< in: type of flush */
+	ulint		page_count);	/*!< in: number of pages flushed */
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued */
+ulint
+buf_flush_batch(
+/*============*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	enum buf_flush	flush_type,	/*!< in: BUF_FLUSH_LRU or
+					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+					then the caller must not own any
+					latches on pages */
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit);	/*!< in: in the case of BUF_FLUSH_LIST
+					all blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+
+
 #ifndef UNIV_NONINL
 #include "buf0flu.ic"
 #endif
diff --git a/storage/innobase/include/buf0mtflu.h b/storage/innobase/include/buf0mtflu.h
new file mode 100644
index 00000000000..0475335bbf5
--- /dev/null
+++ b/storage/innobase/include/buf0mtflu.h
@@ -0,0 +1,95 @@
+/*****************************************************************************
+
+Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2014 Fusion-io. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/buf0mtflu.h
+Multi-threadef flush method interface function prototypes
+
+Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
+		   Dhananjoy Das DDas@fusionio.com
+***********************************************************************/
+
+#ifndef buf0mtflu_h
+#define buf0mtflu_h
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
+*/
+void
+buf_mtflu_io_thread_exit(void);
+/*===========================*/
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+	ulint n_threads,	/*!< in: Number of threads to create */
+	ulint wrk_cnt);		/*!< in: Number of work items */
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized, false if not */
+bool
+buf_mtflu_init_done(void);
+/*======================*/
+
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_mtflu_flush_LRU_tail(void);
+/*===========================*/
+
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+bool
+buf_mtflu_flush_list(
+/*=================*/
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	ulint*		n_processed);	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+	ulint n_threads,		/*!<in: Number of threads to fill */
+	void* ctx,		        /*!<in: thread context */
+	os_thread_id_t* thread_ids);	/*!<in: thread id array */
+
+#endif
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 008a77ddedf..0ffb966d9a3 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -259,7 +259,7 @@ extern my_bool srv_use_lz4;
 
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
-extern ulint    srv_mtflush_threads;
+extern long    srv_mtflush_threads;
 
 #ifdef __WIN__
 extern ibool	srv_use_native_conditions;
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index e136f30f96a..ed43d27b427 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -37,7 +37,8 @@ Created 10/10/1995 Heikki Tuuri
 #endif
 
 /*********************************************************************//**
-Normalizes a directory path for Windows: converts slashes to backslashes. */
+Normalizes a directory path for Windows: converts slashes to backslashes. 
+*/
 UNIV_INTERN
 void
 srv_normalize_path_for_win(
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index cffd3f928c3..fa1675f7a17 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -3,7 +3,7 @@
 Copyright (c) 1995, 2012, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, 2014, SkySQL Ab.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -162,6 +162,8 @@ UNIV_INTERN my_bool	srv_use_posix_fallocate = FALSE;
 UNIV_INTERN my_bool	srv_use_atomic_writes = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
 UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
+/* Number of threads used for multi-threaded flush */
+UNIV_INTERN long srv_mtflush_threads = 0;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 318f6b0500c..879b2335720 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -70,6 +70,7 @@ Created 2/16/1996 Heikki Tuuri
 # include "sync0sync.h"
 # include "buf0flu.h"
 # include "buf0rea.h"
+# include "buf0mtflu.h"
 # include "dict0boot.h"
 # include "dict0load.h"
 # include "dict0stats_bg.h"
@@ -130,6 +131,8 @@ static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** 6 is the ? */
 #define	START_OLD_THREAD_CNT	(SRV_MAX_N_IO_THREADS + 6 + 32)
 static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + 32 + MTFLUSH_MAX_WORKER];
+/* Thread contex data for multi-threaded flush */
+void *mtflush_ctx=NULL;
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1434,403 +1437,6 @@ srv_start_wait_for_purge_to_start()
 	}
 }
 
-/* JAN: TODO: */
-/**********************************************************************************/
-#ifdef UNIV_DEBUG
-extern int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time);
-#endif
-extern ibool buf_flush_start(buf_pool_t* buf_pool, enum buf_flush flush_type);
-extern void buf_flush_end(buf_pool_t* buf_pool, enum buf_flush flush_type);
-extern void buf_flush_common(enum buf_flush flush_type, ulint page_count);
-extern ulint buf_flush_batch(buf_pool_t* buf_pool, enum buf_flush flush_type, ulint min_n, lsn_t lsn_limit);
-extern void pgcomp_init(void);
-extern void pgcomp_deinit(void);
-
-typedef enum wrk_status {
-	WRK_ITEM_SET=0,     // wrk-item is set
-	WRK_ITEM_START=1,   // processing of wrk-item has started
-	WRK_ITEM_DONE=2,    // processing is done usually set to SUCCESS/FAILED
-	WRK_ITEM_SUCCESS=2, // Success processing the wrk-item
-	WRK_ITEM_FAILED=3,  // status of failed
-	WRK_ITEM_EXIT=4,
-	WRK_ITEM_STATUS_UNDEFINED
-} wrk_status_t;
-
-typedef enum mt_wrk_tsk {
-	MT_WRK_NONE=0,      // Exit queue-wait
-	MT_WRK_WRITE=1,     // Flush operation
-	MT_WRK_READ=2,      // Decompress operation
-	MT_WRK_UNDEFINED
-} mt_wrk_tsk_t;
-
-typedef enum wthr_status {
-	WTHR_NOT_INIT=0,
-	WTHR_INITIALIZED=1,
-	WTHR_SIG_WAITING=2,
-	WTHR_RUNNING=3,
-	WTHR_NO_WORK=4,
-	WTHR_KILL_IT=5,
-	WTHR_STATUS_UNDEFINED
-} wthr_status_t;
-
-typedef struct wr_tsk {
-	buf_pool_t  *buf_pool;	// buffer-pool instance
-	enum buf_flush flush_type;	// flush-type for buffer-pool flush operation
-	ulint	    min;		//minimum number of pages requested to be flushed
-	lsn_t	    lsn_limit;//lsn limit for the buffer-pool flush operation
-} wr_tsk_t;
- 
-
-typedef struct rd_tsk {
-	void        *page_pool; //list of pages to decompress;
-} rd_tsk_t;
-
-typedef struct wrk_itm
-{
-	mt_wrk_tsk_t tsk;
-	/* based on task-type one of the entries wr_tsk/rd_tsk will be used */
-	wr_tsk_t        wr;         //flush page list
-	rd_tsk_t        rd;         //decompress page list
- 	unsigned long	result; 	//flush pages count
- 	unsigned long	t_usec;		//time-taken in usec
- 	long		id_usr;		//thread-id currently working
-    	wrk_status_t    wi_status;	//flag
- 	struct wrk_itm	*next;
-} wrk_t;
-
-typedef struct thread_sync
-{
-	int  	        wthread_id;
-	os_thread_t 	wthread;
-	ib_wqueue_t	*wq;	// work Queue
-	ib_wqueue_t     *wr_cq;// Write Completion Queue
-	ib_wqueue_t     *rd_cq; // Read Completion Queue
-	wthr_status_t   wt_status;	// Worker Thread status
-	unsigned long	stat_universal_num_processed;
-	unsigned long	stat_cycle_num_processed;
-} thread_sync_t;
-
-/* Global XXX:DD needs to be cleaned */
-ib_wqueue_t 	*wq=NULL, *wr_cq=NULL, *rd_cq=NULL;
-mem_heap_t		*heap_allocated=NULL;
-thread_sync_t 	pc_sync[MTFLUSH_MAX_WORKER];
-static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
-static int 		pgcomp_wrk_initialized = -1;
-ulint srv_mtflush_threads = 0;
-
-int set_pgcomp_wrk_init_done(void)
-{
-	pgcomp_wrk_initialized = 1;
-	return 0;
-}
-
-int is_pgcomp_wrk_init_done(void)
-{
-	return(pgcomp_wrk_initialized == 1);
-}
-
-int setup_wrk_itm(int items)
-{
-	int i;
-	for(i=0; i<items; i++) {
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].wr.buf_pool = NULL;
-		work_items[i].t_usec = 0;
-		work_items[i].result = 0;
-		work_items[i].id_usr = -1;
-		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
-		work_items[i].next = &work_items[(i+1)%items];
-	}
-	/* last node should be the tail */
-	work_items[items-1].next = NULL;
-	return 0;
-}
-
-int flush_pool_instance(wrk_t *wi)
-{
-#ifdef UNIV_DEBUG
-	struct timeval p_start_time, p_end_time, d_time;
-#endif
-	if (!wi) {
-		fprintf(stderr, "work item invalid wi:%p\n", wi);
-		return -1;
-	}
-
-	if (!wi->wr.buf_pool) {
-		fprintf(stderr, "work-item wi->buf_pool:%p [likely thread exit]\n",
-                wi->wr.buf_pool);
-		return -1;
-	}
-
-    	wi->t_usec = 0;
-	if (!buf_flush_start(wi->wr.buf_pool, wi->wr.flush_type)) {
-		/* We have two choices here. If lsn_limit was
-		specified then skipping an instance of buffer
-		pool means we cannot guarantee that all pages
-		up to lsn_limit has been flushed. We can
-		return right now with failure or we can try
-		to flush remaining buffer pools up to the
-		lsn_limit. We attempt to flush other buffer
-		pools based on the assumption that it will
-		help in the retry which will follow the
-		failure. */
-		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
-			wi->wr.flush_type);
-		return -1;
-	}
-
-#ifdef UNIV_DEBUG
-	/* Record time taken for the OP in usec */
-	gettimeofday(&p_start_time, 0x0);
-#endif
-
-    	if (wi->wr.flush_type == BUF_FLUSH_LRU) {
-        	/* srv_LRU_scan_depth can be arbitrarily large value.
-        	 * We cap it with current LRU size.
-        	 */
-        	buf_pool_mutex_enter(wi->wr.buf_pool);
-        	wi->wr.min = UT_LIST_GET_LEN(wi->wr.buf_pool->LRU);
-        	buf_pool_mutex_exit(wi->wr.buf_pool);
-        	wi->wr.min = ut_min(srv_LRU_scan_depth,wi->wr.min);
-    	}
-
-	wi->result = buf_flush_batch(wi->wr.buf_pool,
-                                    wi->wr.flush_type,
-                                    wi->wr.min, wi->wr.lsn_limit);
-
-	buf_flush_end(wi->wr.buf_pool, wi->wr.flush_type);
-	buf_flush_common(wi->wr.flush_type, wi->result);
-
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
-#endif
-	return 0;
-}
-
-int service_page_comp_io(thread_sync_t * ppc)
-{
-	wrk_t 		*wi = NULL;
-	int 		ret=0;
-
-   	ppc->wt_status = WTHR_SIG_WAITING;
-	wi = (wrk_t *)ib_wqueue_wait(ppc->wq);
-
-	if (wi) {
-		ppc->wt_status = WTHR_RUNNING;
-	} else {
-		fprintf(stderr, "%s:%d work-item is NULL\n", __FILE__, __LINE__);
-		ppc->wt_status = WTHR_NO_WORK;
-		return (0);
-	}
-
-	assert(wi != NULL);
-	wi->id_usr = ppc->wthread;
-
-	switch(wi->tsk) {
-	case MT_WRK_NONE:
-		assert(wi->wi_status == WRK_ITEM_EXIT);
-		wi->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
-		break;
-
-	case MT_WRK_WRITE:
-		wi->wi_status = WRK_ITEM_START;
-		/* Process work item */
-		if (0 != (ret = flush_pool_instance(wi))) {
-			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
-			wi->wi_status = WRK_ITEM_FAILED;
-		}
-		wi->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
-		break;
-
-	case MT_WRK_READ:
-		/* Need to also handle the read case */
-		assert(0);
-		/* completed task get added to rd_cq */
-		/* wi->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(ppc->rd_cq, wi, heap_allocated);*/
-		break;
-
-	default:
-		/* None other than Write/Read handling planned */
-		assert(0);
-	}
-
-	ppc->wt_status = WTHR_NO_WORK;
-	return(0);
-}
-
-void page_comp_io_thread_exit()
-{
-	ulint i;
-
-	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n", srv_buf_pool_instances);
-	for (i=0; i<srv_buf_pool_instances; i++) {
-		work_items[i].wr.buf_pool = NULL;
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].tsk = MT_WRK_NONE;
-		work_items[i].wi_status = WRK_ITEM_EXIT;
-		ib_wqueue_add(wq, (void *)&work_items[i], heap_allocated);
-	}
-}
-
-/******************************************************************//**
-@return a dummy parameter*/
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(page_comp_io_thread)(
-/*================================*/
-	void * arg)
-{
-	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
-
-	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
-		service_page_comp_io(ppc_io);
-		ppc_io->stat_cycle_num_processed = 0;
-	}
-	os_thread_exit(NULL);
-	OS_THREAD_DUMMY_RETURN;
-}
-
-int print_wrk_list(wrk_t *wi_list)
-{
-	wrk_t *wi = wi_list;
-	int i=0;
-
-	if(!wi_list) {
-		fprintf(stderr, "list NULL\n");
-	}
-
-	while(wi) {
-		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu]\t[%luus] > %p\n",
-			wi, (wi->id_usr == -1)?"free":"Busy", wi->result, wi->t_usec, wi->next);
-		wi = wi->next;
-		i++;
-	}
-	fprintf(stderr, "list len: %d\n", i);
-	return 0;
-}
-
-/******************************************************************//**
-@return a dummy parameter*/
-int pgcomp_handler_init(int num_threads, int wrk_cnt, ib_wqueue_t *wq, ib_wqueue_t *wr_cq, ib_wqueue_t *rd_cq)
-{
-	int   	i=0;
-
-	if(is_pgcomp_wrk_init_done()) {
-		fprintf(stderr, "pgcomp_handler_init(): ERROR already initialized\n");
-		return -1;
-	}
-
-	if(!wq || !wr_cq || !rd_cq) {
-		fprintf(stderr, "%s() FAILED wq:%p write-cq:%p read-cq:%p\n",
-                __FUNCTION__, wq, wr_cq, rd_cq);
-		return -1;
-	}
-
-	/* work-item setup */
-	setup_wrk_itm(wrk_cnt);
-
-	/* Mark each of the thread sync entires */
-	for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
-	    pc_sync[i].wthread_id = i;
-	}
-
-	/* Create threads for page-compression-flush */
-	for(i=0; i < num_threads; i++) {
-		pc_sync[i].wthread_id = i;
-		pc_sync[i].wq = wq;
-		pc_sync[i].wr_cq = wr_cq;
-		pc_sync[i].rd_cq = rd_cq;
-
-		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
-	                				thread_ids + START_OLD_THREAD_CNT + i);
-		pc_sync[i].wthread = (START_OLD_THREAD_CNT + i);
-		pc_sync[i].wt_status = WTHR_INITIALIZED;
-	}
-	set_pgcomp_wrk_init_done();
-	fprintf(stderr, "%s() Worker-Threads created..\n", __FUNCTION__);
-	return 0;
-}
-
-int wrk_thread_stat(thread_sync_t *wthr, unsigned int num_threads)
-{
-	ulong stat_tot=0;
-	ulint i=0;
-	for(i=0; i<num_threads;i++) {
-		stat_tot+=wthr[i].stat_universal_num_processed;
-		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
-			wthr[i].stat_universal_num_processed);
-	}
-	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
-}
-
-int reset_wrk_itm(int items)
-{
-	int i;
-
-	for(i=0;i<items; i++) {
-		work_items[i].id_usr = -1;
-	}
-	return 0;
-}
-
-int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
-                            enum buf_flush flush_type, int min_n, lsn_t lsn_limit)
-{
-	int ret=0, i=0;
-	wrk_t *done_wi;
-
-	for(i=0;i<buf_pool_inst; i++) {
-		work_items[i].tsk = MT_WRK_WRITE;
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].wr.buf_pool = buf_pool_from_array(i);
-		work_items[i].wr.flush_type = (enum buf_flush)flush_type;
-		work_items[i].wr.min = min_n;
-		work_items[i].wr.lsn_limit = lsn_limit;
-		work_items[i].id_usr = -1;
-		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
-		work_items[i].wi_status = WRK_ITEM_SET;
-	}
-	work_items[i-1].next=NULL;
-
-   	for(i=0;i<buf_pool_inst; i++) {
-		ib_wqueue_add(wq, (void *)(&work_items[i]), heap_allocated);
-	}
-
-	/* wait on the completion to arrive */
-   	for(i=0;i<buf_pool_inst; i++) {
-		done_wi = (wrk_t *)ib_wqueue_wait(wr_cq);
-    		//fprintf(stderr, "%s: queue-wait DONE\n", __FUNCTION__);
-		ut_ad(done_wi != NULL);
-	}
-
-	/* collect data/results total pages flushed */
-	for(i=0; i<buf_pool_inst; i++) {
-		if(work_items[i].result == -1) {
-			ret = -1;
-			per_pool_pages_flushed[i] = 0;
-		} else {
-			per_pool_pages_flushed[i] = work_items[i].result;
-		}
-		if((work_items[i].id_usr == -1) &&
-			(work_items[i].wi_status == WRK_ITEM_SET )) {
-        		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].wr.flush_type);
-           		//assert(0);
-       		}
-	}
-	//wrk_thread_stat(pc_sync, pgc_n_threads);
-
-	/* clear up work-queue for next flush */
-	reset_wrk_itm(buf_pool_inst);
-	return(ret);
-}
-
-/* JAN: TODO: END: */
-
 /********************************************************************
 Starts InnoDB and creates a new database if database files
 are not found and the user wants.
@@ -2986,25 +2592,23 @@ files_checked:
 	}
 
 	if (!srv_read_only_mode) {
-		/* JAN: TODO: */
+
 		if (srv_buf_pool_instances <= MTFLUSH_MAX_WORKER) {
 			srv_mtflush_threads = srv_buf_pool_instances;
 		}
 		/* else we default to 8 worker-threads */
- 		heap_allocated = mem_heap_create(0);
-		ut_a(heap_allocated != NULL);
 
- 		wq = ib_wqueue_create();
- 		wr_cq = ib_wqueue_create();
- 		rd_cq = ib_wqueue_create();
-		pgcomp_init();
- 	   	pgcomp_handler_init(srv_mtflush_threads,
-				    srv_buf_pool_instances,
-				    wq, wr_cq, rd_cq);
+		mtflush_ctx = buf_mtflu_handler_init(srv_mtflush_threads,
+						     srv_buf_pool_instances);
+
+		/* Set up the thread ids */
+		buf_mtflu_set_thread_ids(srv_mtflush_threads,
+					mtflush_ctx,
+					(thread_ids + 6 + 32));
+
 #if UNIV_DEBUG
  		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
 #endif
-		/* JAN: TODO: END */
 
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
@@ -3272,15 +2876,12 @@ innobase_shutdown_for_mysql(void)
 
 		/* g. Exit the multi threaded flush threads */
 
-		page_comp_io_thread_exit();
+		buf_mtflu_io_thread_exit();
 
 #ifdef UNIV_DEBUG
 		fprintf(stderr, "%s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
 #endif
 
-		/* h. Remove the mutex */
-		pgcomp_deinit();
-
 		os_mutex_enter(os_sync_mutex);
 
 		if (os_thread_count == 0) {
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index b70dc23d7e0..8ed11fd674a 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1862,6 +1862,9 @@ buf_flush_start(
 
 		/* There is already a flush batch of the same type running */
 
+		fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush\n",
+			flush_type, buf_pool->n_flush[flush_type], buf_pool->init_flush[flush_type]);
+
 		mutex_exit(&buf_pool->flush_state_mutex);
 
 		return(FALSE);

From 18353c6a4d8241ea45aeabb4a606953531c3c9dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 6 Feb 2014 17:49:55 +0200
Subject: [PATCH 14/56] Fixed issue on file space extension. File space should
 be extended from current offset to desired size if posix_fallocate is used.

---
 storage/innobase/fil/fil0fil.cc | 32 ++++++++++++++------------------
 storage/xtradb/fil/fil0fil.cc   | 29 ++++++++++++-----------------
 2 files changed, 26 insertions(+), 35 deletions(-)

diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 2430df2b386..bb9a8699e3f 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -48,6 +48,7 @@ Created 10/25/1995 Heikki Tuuri
 #include "page0zip.h"
 #include "trx0sys.h"
 #include "row0mysql.h"
+#include "os0file.h"
 #ifndef UNIV_HOTBACKUP
 # include "buf0lru.h"
 # include "ibuf0ibuf.h"
@@ -4860,28 +4861,25 @@ retry:
 
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
-		ulint n_pages = size_after_extend;
+		os_offset_t	start_offset = start_page_no * page_size;
+		os_offset_t	end_offset = (size_after_extend - start_page_no) * page_size;
 
-		success = os_file_set_size(node->name, node->handle,
-			n_pages * page_size);
-
-		/* Temporal solution: In directFS using atomic writes
-		we must use posix_fallocate to extend the file because
-		pwrite past end of file fails but when compression is
-		used the file pages must be physically initialized with
-		zeroes, thus after file extend with posix_fallocate
-		we still write empty pages to file. */
-		if (success &&
-			srv_use_atomic_writes &&
-			srv_compress_pages) {
-			goto extend_file;
+		if (posix_fallocate(node->handle, start_offset, end_offset) == -1) {
+			ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
+				"space for file \'%s\' failed.  Current size "
+				INT64PF ", desired size " INT64PF "\n",
+				node->name, start_offset, end_offset);
+			success = FALSE;
+		} else {
+			success = TRUE;
 		}
 
 		mutex_enter(&fil_system->mutex);
 
 		if (success) {
-			node->size += n_pages;
-			space->size += n_pages;
+			node->size += (size_after_extend - start_page_no);
+			space->size += (size_after_extend - start_page_no);
+
 			os_has_said_disk_full = FALSE;
 		}
 
@@ -4895,8 +4893,6 @@ retry:
 	}
 #endif
 
-extend_file:
-
 	/* Extend at most 64 pages at a time */
 	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
 	buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index e170004cea1..0dae3a28690 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -4988,27 +4988,24 @@ retry:
 
 #ifdef HAVE_POSIX_FALLOCATE
 	if (srv_use_posix_fallocate) {
-		ulint n_pages = size_after_extend;
+		os_offset_t	start_offset = start_page_no * page_size;
+		os_offset_t	end_offset = (size_after_extend - start_page_no) * page_size;
 
-		success = os_file_set_size(node->name, node->handle, n_pages * page_size);
-
-		/* Temporal solution: In directFS using atomic writes
-		we must use posix_fallocate to extend the file because
-		pwrite past end of file fails but when compression is
-		used the file pages must be physically initialized with
-		zeroes, thus after file extend with posix_fallocate
-		we still write empty pages to file. */
-		if (success &&
-			srv_use_atomic_writes &&
-			srv_compress_pages) {
-			goto extend_file;
+		if (posix_fallocate(node->handle, start_offset, end_offset) == -1) {
+			ib_logf(IB_LOG_LEVEL_ERROR, "preallocating file "
+				"space for file \'%s\' failed.  Current size "
+				INT64PF ", desired size " INT64PF "\n",
+				node->name, start_offset, end_offset);
+			success = FALSE;
+		} else {
+			success = TRUE;
 		}
 
 		mutex_enter(&fil_system->mutex);
 
 		if (success) {
-			node->size += n_pages;
-			space->size += n_pages;
+			node->size += (size_after_extend - start_page_no);
+			space->size += (size_after_extend - start_page_no);
 			os_has_said_disk_full = FALSE;
 		}
 
@@ -5022,8 +5019,6 @@ retry:
 	}
 #endif
 
-extend_file:
-
 	/* Extend at most 64 pages at a time */
 	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
 	buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));

From a5cf3a800e20e86a4469dff659e68cc1b21263e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 7 Feb 2014 15:31:31 +0200
Subject: [PATCH 15/56] Merged latest mt-flush code to xtradb. Cleaned up
 thread statistic output code.

---
 storage/innobase/buf/buf0mtflu.cc  | 116 ++---
 storage/xtradb/CMakeLists.txt      |   3 +-
 storage/xtradb/buf/buf0flu.cc      | 228 +---------
 storage/xtradb/buf/buf0mtflu.cc    | 694 +++++++++++++++++++++++++++++
 storage/xtradb/include/buf0flu.h   |  57 +++
 storage/xtradb/include/buf0mtflu.h |  95 ++++
 storage/xtradb/include/srv0srv.h   |   2 +-
 storage/xtradb/srv/srv0srv.cc      |   2 +
 storage/xtradb/srv/srv0start.cc    | 431 +-----------------
 9 files changed, 910 insertions(+), 718 deletions(-)
 create mode 100644 storage/xtradb/buf/buf0mtflu.cc
 create mode 100644 storage/xtradb/include/buf0mtflu.h

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index 901f766c472..a81ccee5650 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -116,18 +116,13 @@ typedef struct wrk_itm
 /* Thread syncronization data */
 typedef struct thread_sync
 {
+	ulint           n_threads;	/*!< Number of threads */
 	os_thread_id_t	wthread_id;	/*!< Identifier */
 	os_thread_t 	wthread;	/*!< Thread id */
 	ib_wqueue_t	*wq;		/*!< Work Queue */
 	ib_wqueue_t     *wr_cq;		/*!< Write Completion Queue */
 	ib_wqueue_t     *rd_cq;		/*!< Read Completion Queue */
 	wthr_status_t   wt_status;	/*!< Worker thread status */
-	ulint		stat_universal_num_processed;
-					/*!< Total number of pages
-					processed by this thread */
-	ulint		stat_cycle_num_processed;
-					/*!< Number of pages processed
-					on this cycle */
 	mem_heap_t*     wheap;		/*!< Work heap where memory
 					is allocated */
 	wrk_t*          work_item;      /*!< Work items to be processed */
@@ -231,6 +226,7 @@ buf_mtflu_flush_pool_instance(
                                     		work_item->wr.min,
 						work_item->wr.lsn_limit);
 
+
 	buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
 	buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
 
@@ -239,28 +235,29 @@ buf_mtflu_flush_pool_instance(
 
 #ifdef UNIV_DEBUG
 /******************************************************************//**
-Output work item list status,
+Print flush statistics of work items.
 */
 static
 void
-mtflu_print_work_list(
-/*==================*/
-	wrk_t*	wi_list)	/*!< in: Work item list */
+mtflu_print_thread_stat(
+/*====================*/
+	wrk_t* work_item)	/*!< in: Work items */
 {
-	wrk_t* wi = wi_list;
+	ulint stat_tot=0;
 	ulint i=0;
 
-	if(!wi_list) {
-		fprintf(stderr, "list NULL\n");
-	}
+ 	for(i=0; i< MTFLUSH_MAX_WORKER; i++) {
+ 		stat_tot+=work_item[i].n_flushed;
 
-	while(wi) {
-		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu] > %p\n",
-			wi, (wi->id_usr == -1)?"free":"Busy", wi->n_flushed, wi->next);
-		wi = wi->next;
-		i++;
-	}
-	fprintf(stderr, "list len: %d\n", i);
+ 		fprintf(stderr, "MTFLUSH: Thread[%lu] stat [%lu]\n",
+			work_item[i].id_usr,
+ 			work_item[i].n_flushed);
+
+		if (work_item[i].next == NULL) {
+			break; /* No more filled work items */
+		}
+ 	}
+ 	fprintf(stderr, "MTFLUSH: Stat-Total:%lu\n", stat_tot);
 }
 #endif /* UNIV_DEBUG */
 
@@ -282,10 +279,6 @@ mtflush_service_io(
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
 	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
 
-#ifdef UNIV_DEBUG
-	mtflu_print_work_list(mtflush_io->work_item);
-#endif
-
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
 	} else {
@@ -345,10 +338,28 @@ DECLARE_THREAD(mtflush_io_thread)(
 	void * arg)
 {
 	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
+#ifdef UNIV_DEBUG
+	ib_uint64_t   stat_universal_num_processed = 0;
+	ib_uint64_t   stat_cycle_num_processed = 0;
+	wrk_t*		work_item = mtflush_io[0].work_item;
+	ulint i;
+#endif
 
 	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
 		mtflush_service_io(mtflush_io);
-		mtflush_io->stat_cycle_num_processed = 0;
+
+#ifdef UNIV_DEBUG
+		for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
+			stat_cycle_num_processed+= work_item[i].n_flushed;
+		}
+
+		stat_universal_num_processed+=stat_cycle_num_processed;
+		stat_cycle_num_processed = 0;
+		fprintf(stderr, "MTFLUSH_IO_THREAD: total %lu cycle %lu\n",
+			stat_universal_num_processed,
+			stat_cycle_num_processed);
+		mtflu_print_thread_stat(work_item);
+#endif
 	}
 
 	/* This should make sure that all current work items are
@@ -458,13 +469,16 @@ buf_mtflu_handler_init(
 	work_items = (wrk_t*)mem_heap_alloc(mtflush_heap,
 					    MTFLUSH_MAX_WORKER * sizeof(wrk_t));
 	ut_a(work_items != NULL);
+	memset(work_items, 0, sizeof(wrk_t) * MTFLUSH_MAX_WORKER);
+	memset(mtflush_ctx, 0, sizeof(thread_sync_t) * MTFLUSH_MAX_WORKER);
 
 	/* Initialize work items */
-	mtflu_setup_work_items(work_items, MTFLUSH_MAX_WORKER);
+	mtflu_setup_work_items(work_items, n_threads);
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < n_threads; i++) {
 		os_thread_id_t new_thread_id;
+		mtflush_ctx[i].n_threads = n_threads;
 		mtflush_ctx[i].wq = mtflush_work_queue;
 		mtflush_ctx[i].wr_cq = mtflush_write_comp_queue;
 		mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
@@ -531,19 +545,16 @@ buf_mtflu_flush_work_items(
 				per_pool_pages_flushed[i] = done_wi->n_flushed;
 			}
 
-			if(done_wi->id_usr == -1 &&
+			if((int)done_wi->id_usr == -1 &&
 			   done_wi->wi_status == WRK_ITEM_SET ) {
 				fprintf(stderr,
-					"**Set/Unused work_item[%d] flush_type=%lu\n",
+					"**Set/Unused work_item[%lu] flush_type=%lu\n",
 					i,
 					done_wi->wr.flush_type);
 				ut_a(0);
 			}
 
 			n_flushed+= done_wi->n_flushed;
-			/* Reset for next round*/
-			mtflush_ctx->work_item[i].id_usr = -1;
-
 			i++;
 		}
 	}
@@ -551,47 +562,6 @@ buf_mtflu_flush_work_items(
 	return(n_flushed);
 }
 
-/*******************************************************************//**
-Flushes dirty blocks from the end of the LRU list and also
-puts replaceable clean pages from the end of the LRU list to the free
-list.
-NOTE: The calling thread is not allowed to own any latches on pages!
-@return true if a batch was queued successfully. false if another batch
-of same type was already running. */
-bool
-buf_mtflu_flush_LRU(
-/*================*/
-	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-{
-	ulint		page_count;
-
-	if (n_processed) {
-		*n_processed = 0;
-	}
-
-	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
-		return(false);
-	}
-
-	page_count = buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0);
-
-	buf_flush_end(buf_pool, BUF_FLUSH_LRU);
-
-	buf_flush_common(BUF_FLUSH_LRU, page_count);
-
-	if (n_processed) {
-		*n_processed = page_count;
-	}
-
-	return(true);
-}
-
 /*******************************************************************//**
 Multi-threaded version of buf_flush_list
 */
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 5050ca34da9..14fbb14bdd7 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -284,8 +284,7 @@ SET(INNOBASE_SOURCES
 	buf/buf0flu.cc
 	buf/buf0lru.cc
 	buf/buf0rea.cc
-# TODO: JAN uncomment
-#	buf/buf0mtflu.cc
+	buf/buf0mtflu.cc
 	data/data0data.cc
 	data/data0type.cc
 	dict/dict0boot.cc
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 8ed11fd674a..a080ef0ee48 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -32,6 +32,7 @@ Created 11/11/1995 Heikki Tuuri
 #endif
 
 #include "buf0buf.h"
+#include "buf0mtflu.h"
 #include "buf0checksum.h"
 #include "srv0start.h"
 #include "srv0srv.h"
@@ -1949,47 +1950,6 @@ void buf_pool_exit_LRU_mutex(
 	mutex_exit(&buf_pool->LRU_list_mutex);
 }
 
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list and also
-puts replaceable clean pages from the end of the LRU list to the free
-list.
-NOTE: The calling thread is not allowed to own any latches on pages!
-@return true if a batch was queued successfully. false if another batch
-of same type was already running. */
-static
-bool
-pgcomp_buf_flush_LRU(
-/*==========*/
-	buf_pool_t*	buf_pool,	/*!< in/out: buffer pool instance */
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-{
-	flush_counters_t n;
-
-	if (n_processed) {
-		*n_processed = 0;
-	}
-
-	if (!buf_flush_start(buf_pool, BUF_FLUSH_LRU)) {
-		return(false);
-	}
-
-	buf_flush_batch(buf_pool, BUF_FLUSH_LRU, min_n, 0, false, &n);
-
-	buf_flush_end(buf_pool, BUF_FLUSH_LRU);
-
-	buf_flush_common(BUF_FLUSH_LRU, n.flushed);
-
-	if (n_processed) {
-		*n_processed = n.flushed;
-	}
-
-	return(true);
-}
 /* JAN: TODO: END: */
 
 /*******************************************************************//**
@@ -2029,126 +1989,6 @@ buf_flush_LRU(
 	return(true);
 }
 
-/* JAN: TODO: */
-/*******************************************************************//**/
-extern int is_pgcomp_wrk_init_done(void);
-extern int pgcomp_flush_work_items(
-	int buf_pool_inst,
-	int *pages_flushed,
-        buf_flush_t flush_type,
-	int min_n,
-	lsn_t lsn_limit);
-
-#define	MT_COMP_WATER_MARK	50
-
-#ifdef UNIV_DEBUG
-#include <time.h>
-int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time)
-{
-	if (g_time->tv_usec < s_time->tv_usec)
-	{
-		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000 + 1;
-		s_time->tv_usec -= 1000000 * nsec;
-		s_time->tv_sec += nsec;
-	}
-	if (g_time->tv_usec - s_time->tv_usec > 1000000)
-	{
-		int nsec = (s_time->tv_usec - g_time->tv_usec) / 1000000;
-		s_time->tv_usec += 1000000 * nsec;
-		s_time->tv_sec -= nsec;
-	}
-	d_time->tv_sec = g_time->tv_sec - s_time->tv_sec;
-	d_time->tv_usec = g_time->tv_usec - s_time->tv_usec;
-
-	return 0;
-}
-#endif
-
-static os_fast_mutex_t pgcomp_mtx;
-
-void pgcomp_init(void)
-{
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &pgcomp_mtx);
-}
-
-void pgcomp_deinit(void)
-{
-	os_fast_mutex_free(&pgcomp_mtx);
-}
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-UNIV_INTERN
-bool
-pgcomp_buf_flush_list(
-/*==================*/
-	ulint		min_n,		/*!< in: wished minimum mumber of blocks
-					flushed (it is not guaranteed that the
-					actual number is that big, though) */
-	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
-					blocks whose oldest_modification is
-					smaller than this should be flushed
-					(if their number does not exceed
-					min_n), otherwise ignored */
-	ulint*		n_processed)	/*!< out: the number of pages
-					which were processed is passed
-					back to caller. Ignored if NULL */
-
-{
-	ulint		i;
-	bool		success = true;
-#ifdef UNIV_DEBUG
-	struct timeval p_start_time, p_end_time, d_time;
-#endif
-	int cnt_flush[MTFLUSH_MAX_WORKER];
-
-	if (n_processed) {
-		*n_processed = 0;
-	}
-
-	if (min_n != ULINT_MAX) {
-		/* Ensure that flushing is spread evenly amongst the
-		buffer pool instances. When min_n is ULINT_MAX
-		we need to flush everything up to the lsn limit
-		so no limit here. */
-		min_n = (min_n + srv_buf_pool_instances - 1)
-			 / srv_buf_pool_instances;
-	}
-
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_start_time, 0x0);
-#endif
-	// os_fast_mutex_lock(&pgcomp_mtx);
-	pgcomp_flush_work_items(srv_buf_pool_instances,
-                cnt_flush, BUF_FLUSH_LIST,
-                min_n, lsn_limit);
-	// os_fast_mutex_unlock(&pgcomp_mtx);
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		if (n_processed) {
-			*n_processed += cnt_flush[i];
-		}
-		if (cnt_flush[i]) {
-			MONITOR_INC_VALUE_CUMULATIVE(
-				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
-				MONITOR_FLUSH_BATCH_COUNT,
-				MONITOR_FLUSH_BATCH_PAGES,
-				cnt_flush[i]);
-		}
-	}
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu %llu usec]\n",
-		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed,
-		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
-#endif
-	return(success);
-}
-
-/* JAN: TODO: END: */
-
 /*******************************************************************//**
 This utility flushes dirty blocks from the end of the flush list of
 all buffer pool instances.
@@ -2181,11 +2021,9 @@ buf_flush_list(
 	bool		timeout = false;
 	ulint		flush_start_time = 0;
 
-	/* JAN: TODO: */
-	if (is_pgcomp_wrk_init_done()) {
-		return(pgcomp_buf_flush_list(min_n, lsn_limit, n_processed));
+	if (buf_mtflu_init_done()) {
+		return(buf_mtflu_flush_list(min_n, lsn_limit, n_processed));
 	}
-	/* JAN: TODO: END: */
 
 	for (i = 0; i < srv_buf_pool_instances; i++) {
 		requested_pages[i] = 0;
@@ -2380,60 +2218,6 @@ buf_flush_single_page_from_LRU(
 	return(freed);
 }
 
-/* JAN: TODO: */
-/*********************************************************************//**
-pgcomp_Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-pgcomp_buf_flush_LRU_tail(void)
-/*====================*/
-{
-#ifdef UNIV_DEBUG
-	struct  timeval p_start_time, p_end_time, d_time;
-#endif
-	ulint   total_flushed=0, i=0;
-	int cnt_flush[32];
-
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_start_time, 0x0);
-#endif
-	ut_ad(is_pgcomp_wrk_init_done());
-
-	os_fast_mutex_lock(&pgcomp_mtx);
-	pgcomp_flush_work_items(srv_buf_pool_instances,
-		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
-	os_fast_mutex_unlock(&pgcomp_mtx);
-
-	for (i = 0; i < srv_buf_pool_instances; i++) {
-		if (cnt_flush[i]) {
-			total_flushed += cnt_flush[i];
-
-			MONITOR_INC_VALUE_CUMULATIVE(
-			        MONITOR_LRU_BATCH_TOTAL_PAGE,
-			        MONITOR_LRU_BATCH_COUNT,
-			        MONITOR_LRU_BATCH_PAGES,
-			        cnt_flush[i]);
-		}
-	}
-
-#if UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-
-	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu %llu usec]\n", (
-			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed,
-		(unsigned long long)(d_time.tv_usec+(d_time.tv_sec*1000000)));
-#endif
-
-	return(total_flushed);
-}
-
-/* JAN: TODO: END: */
 /*********************************************************************//**
 Clears up tail of the LRU lists:
 * Put replaceable pages at the tail of LRU to the free list
@@ -2458,12 +2242,10 @@ buf_flush_LRU_tail(void)
 	ulint	free_list_lwm = srv_LRU_scan_depth / 100
 		* srv_cleaner_free_list_lwm;
 
-	/* JAN: TODO: */
-	if(is_pgcomp_wrk_init_done())
+	if(buf_mtflu_init_done())
 	{
-		return(pgcomp_buf_flush_LRU_tail());
+		return(buf_mtflu_flush_LRU_tail());
 	}
-	/* JAN: TODO: END */
 
 	for (ulint i = 0; i < srv_buf_pool_instances; i++) {
 
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
new file mode 100644
index 00000000000..14ece48519f
--- /dev/null
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -0,0 +1,694 @@
+/*****************************************************************************
+
+Copyright (C) 2013, 2014, Fusion-io. All Rights Reserved.
+Copyright (C) 2013, 2014, SkySQL Ab. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file buf/buf0mtflu.cc
+Multi-threaded flush method implementation
+
+Created  06/11/2013 Dhananjoy Das DDas@fusionio.com
+Modified 12/12/2013 Jan Lindström jan.lindstrom@skysql.com
+Modified 03/02/2014 Dhananjoy Das DDas@fusionio.com
+Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
+***********************************************************************/
+
+#include "buf0buf.h"
+#include "buf0flu.h"
+#include "buf0mtflu.h"
+#include "buf0checksum.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "page0zip.h"
+#include "ut0byte.h"
+#include "ut0lst.h"
+#include "page0page.h"
+#include "fil0fil.h"
+#include "buf0lru.h"
+#include "buf0rea.h"
+#include "ibuf0ibuf.h"
+#include "log0log.h"
+#include "os0file.h"
+#include "os0sync.h"
+#include "trx0sys.h"
+#include "srv0mon.h"
+#include "mysql/plugin.h"
+#include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
+
+#define	MT_COMP_WATER_MARK	50
+
+/* Work item status */
+typedef enum wrk_status {
+	WRK_ITEM_SET=0,		/*!< Work item is set */
+	WRK_ITEM_START=1,	/*!< Processing of work item has started */
+	WRK_ITEM_DONE=2,	/*!< Processing is done usually set to
+				SUCCESS/FAILED */
+	WRK_ITEM_SUCCESS=2,	/*!< Work item successfully processed */
+	WRK_ITEM_FAILED=3,	/*!< Work item process failed */
+	WRK_ITEM_EXIT=4,	/*!< Exiting */
+	WRK_ITEM_STATUS_UNDEFINED
+} wrk_status_t;
+
+/* Work item task type */
+typedef enum mt_wrk_tsk {
+	MT_WRK_NONE=0,		/*!< Exit queue-wait */
+	MT_WRK_WRITE=1,		/*!< Flush operation */
+	MT_WRK_READ=2,		/*!< Read operation  */
+	MT_WRK_UNDEFINED
+} mt_wrk_tsk_t;
+
+/* Work thread status */
+typedef enum wthr_status {
+	WTHR_NOT_INIT=0,	/*!< Work thread not initialized */
+	WTHR_INITIALIZED=1,	/*!< Work thread initialized */
+	WTHR_SIG_WAITING=2,	/*!< Work thread wating signal */
+	WTHR_RUNNING=3,		/*!< Work thread running */
+	WTHR_NO_WORK=4,		/*!< Work thread has no work */
+	WTHR_KILL_IT=5,		/*!< Work thread should exit */
+	WTHR_STATUS_UNDEFINED
+} wthr_status_t;
+
+/* Write work task */
+typedef struct wr_tsk {
+	buf_pool_t	*buf_pool;	/*!< buffer-pool instance */
+	buf_flush_t	flush_type;	/*!< flush-type for buffer-pool
+					flush operation */
+	ulint		min;		/*!< minimum number of pages
+					requested to be flushed */
+	lsn_t		lsn_limit;	/*!< lsn limit for the buffer-pool
+					flush operation */
+} wr_tsk_t;
+
+/* Read work task */
+typedef struct rd_tsk {
+	buf_pool_t	*page_pool;	/*!< list of pages to decompress; */
+} rd_tsk_t;
+
+/* Work item */
+typedef struct wrk_itm
+{
+	mt_wrk_tsk_t	tsk;		/*!< Task type. Based on task-type
+					one of the entries wr_tsk/rd_tsk
+					will be used */
+	wr_tsk_t	wr;		/*!< Flush page list */
+	rd_tsk_t	rd;		/*!< Decompress page list */
+        ulint		n_flushed; 	/*!< Flushed pages count  */
+ 	os_thread_t	id_usr;		/*!< Thread-id currently working */
+    	wrk_status_t    wi_status;	/*!< Work item status */
+ 	struct wrk_itm	*next;		/*!< Next work item */
+} wrk_t;
+
+/* Thread syncronization data */
+typedef struct thread_sync
+{
+	ulint           n_threads;	/*!< Number of threads */
+	os_thread_id_t	wthread_id;	/*!< Identifier */
+	os_thread_t 	wthread;	/*!< Thread id */
+	ib_wqueue_t	*wq;		/*!< Work Queue */
+	ib_wqueue_t     *wr_cq;		/*!< Write Completion Queue */
+	ib_wqueue_t     *rd_cq;		/*!< Read Completion Queue */
+	wthr_status_t   wt_status;	/*!< Worker thread status */
+	mem_heap_t*     wheap;		/*!< Work heap where memory
+					is allocated */
+	wrk_t*          work_item;      /*!< Work items to be processed */
+} thread_sync_t;
+
+/* QUESTION: Is this array used from several threads concurrently ? */
+// static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
+
+/* TODO: REALLY NEEDED ? */
+static int		mtflush_work_initialized = -1;
+static os_fast_mutex_t	mtflush_mtx;
+static thread_sync_t*   mtflush_ctx=NULL;
+
+/******************************************************************//**
+Initialize work items. */
+static
+void
+mtflu_setup_work_items(
+/*===================*/
+	wrk_t*  work_items,     /*!< inout: Work items */
+	ulint	n_items)	/*!< in: Number of work items */
+{
+	ulint i;
+	for(i=0; i<n_items; i++) {
+		work_items[i].rd.page_pool = NULL;
+		work_items[i].wr.buf_pool = NULL;
+		work_items[i].n_flushed = 0;
+		work_items[i].id_usr = -1;
+		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
+		work_items[i].next = &work_items[(i+1)%n_items];
+	}
+	/* last node should be the tail */
+	work_items[n_items-1].next = NULL;
+}
+
+/******************************************************************//**
+Set multi-threaded flush work initialized. */
+static inline
+void
+buf_mtflu_work_init(void)
+/*=====================*/
+{
+	mtflush_work_initialized = 1;
+}
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized */
+bool
+buf_mtflu_init_done(void)
+/*=====================*/
+{
+	return(mtflush_work_initialized == 1);
+}
+
+/******************************************************************//**
+Fush buffer pool instance.
+@return number of flushed pages, or 0 if error happened
+*/
+static
+ulint
+buf_mtflu_flush_pool_instance(
+/*==========================*/
+	wrk_t	*work_item)	/*!< inout: work item to be flushed */
+{
+	flush_counters_t n;
+
+	ut_a(work_item != NULL);
+	ut_a(work_item->wr.buf_pool != NULL);
+
+	if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) {
+		/* We have two choices here. If lsn_limit was
+		specified then skipping an instance of buffer
+		pool means we cannot guarantee that all pages
+		up to lsn_limit has been flushed. We can
+		return right now with failure or we can try
+		to flush remaining buffer pools up to the
+		lsn_limit. We attempt to flush other buffer
+		pools based on the assumption that it will
+		help in the retry which will follow the
+		failure. */
+#ifdef UNIV_DEBUG
+		/* QUESTION: is this a really failure ? */
+		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
+			work_item->wr.flush_type);
+#endif
+		return 0;
+	}
+
+
+    	if (work_item->wr.flush_type == BUF_FLUSH_LRU) {
+        	/* srv_LRU_scan_depth can be arbitrarily large value.
+        	 * We cap it with current LRU size.
+        	 */
+        	buf_pool_mutex_enter(work_item->wr.buf_pool);
+        	work_item->wr.min = UT_LIST_GET_LEN(work_item->wr.buf_pool->LRU);
+        	buf_pool_mutex_exit(work_item->wr.buf_pool);
+        	work_item->wr.min = ut_min(srv_LRU_scan_depth,work_item->wr.min);
+    	}
+
+	buf_flush_batch(work_item->wr.buf_pool,
+			work_item->wr.flush_type,
+			work_item->wr.min,
+			work_item->wr.lsn_limit,
+			false,
+			&n);
+
+	work_item->n_flushed = n.flushed;
+
+	buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
+	buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
+
+	return 0;
+}
+
+#ifdef UNIV_DEBUG
+/******************************************************************//**
+Print flush statistics of work items
+*/
+static
+void
+mtflu_print_thread_stat(
+/*====================*/
+	wrk_t* work_item)	/*!< in: Work items */
+{
+	ulint stat_tot=0;
+	ulint i=0;
+
+ 	for(i=0; i< MTFLUSH_MAX_WORKER; i++) {
+ 		stat_tot+=work_item[i].n_flushed;
+
+ 		fprintf(stderr, "MTFLUSH: Thread[%lu] stat [%lu]\n",
+			work_item[i].id_usr,
+ 			work_item[i].n_flushed);
+
+		if (work_item[i].next == NULL) {
+			break; /* No more filled work items */
+		}
+ 	}
+
+ 	fprintf(stderr, "MTFLUSH: Stat-Total:%lu\n", stat_tot);
+}
+#endif /* UNIV_DEBUG */
+
+/******************************************************************//**
+Worker function to wait for work items and processing them and
+sending reply back.
+*/
+static
+void
+mtflush_service_io(
+/*===============*/
+	thread_sync_t*	mtflush_io)	/*!< inout: multi-threaded flush
+					syncronization data */
+{
+	wrk_t		*work_item = NULL;
+	ulint		n_flushed=0;
+	ib_time_t	max_wait_usecs = 5000000;
+
+   	mtflush_io->wt_status = WTHR_SIG_WAITING;
+	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
+
+	if (work_item) {
+		mtflush_io->wt_status = WTHR_RUNNING;
+	} else {
+		/* Because of timeout this thread did not get any work */
+		mtflush_io->wt_status = WTHR_NO_WORK;
+		return;
+	}
+
+	work_item->id_usr = mtflush_io->wthread;
+
+	switch(work_item->tsk) {
+	case MT_WRK_NONE:
+		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
+		work_item->wi_status = WRK_ITEM_SUCCESS;
+		/* QUESTION: Why completed work items are inserted to
+		completion queue ? */
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		break;
+
+	case MT_WRK_WRITE:
+		work_item->wi_status = WRK_ITEM_START;
+		/* Process work item */
+		/* QUESTION: Is this a really a error ? */
+		if (0 != (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
+			fprintf(stderr, "FLUSH op failed ret:%lu\n", n_flushed);
+			work_item->wi_status = WRK_ITEM_FAILED;
+		}
+		work_item->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		break;
+
+	case MT_WRK_READ:
+		/* Need to also handle the read case */
+		/* TODO: ? */
+		ut_a(0);
+		/* completed task get added to rd_cq */
+		/* work_item->wi_status = WRK_ITEM_SUCCESS;
+		ib_wqueue_add(mtflush_io->rd_cq, work_item, mtflush_io->wheap);*/
+		break;
+
+	default:
+		/* None other than Write/Read handling planned */
+		ut_a(0);
+	}
+
+	mtflush_io->wt_status = WTHR_NO_WORK;
+}
+
+/******************************************************************//**
+Thead used to flush dirty pages when multi-threaded flush is
+used.
+@return a dummy parameter*/
+extern "C" UNIV_INTERN
+os_thread_ret_t
+DECLARE_THREAD(mtflush_io_thread)(
+/*==============================*/
+	void * arg)
+{
+	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
+#ifdef UNIV_DEBUG
+	ib_uint64_t   stat_universal_num_processed = 0;
+	ib_uint64_t   stat_cycle_num_processed = 0;
+	wrk_t*		work_item = mtflush_io[0].work_item;
+	ulint i;
+#endif
+
+	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+		mtflush_service_io(mtflush_io);
+
+#ifdef UNIV_DEBUG
+		for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
+			stat_cycle_num_processed+= work_item[i].n_flushed;
+		}
+
+		stat_universal_num_processed+=stat_cycle_num_processed;
+		stat_cycle_num_processed = 0;
+		fprintf(stderr, "MTFLUSH_IO_THREAD: total %lu cycle %lu\n",
+			stat_universal_num_processed,
+			stat_cycle_num_processed);
+		mtflu_print_thread_stat(work_item);
+#endif
+	}
+
+	/* This should make sure that all current work items are
+	processed before threads exit. */
+	while (!ib_wqueue_is_empty(mtflush_io->wq)) {
+		mtflush_service_io(mtflush_io);
+	}
+
+	os_thread_exit(NULL);
+	OS_THREAD_DUMMY_RETURN;
+}
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
+*/
+void
+buf_mtflu_io_thread_exit(void)
+/*==========================*/
+{
+	ulint i;
+	thread_sync_t* mtflush_io = mtflush_ctx;
+
+	ut_a(mtflush_io != NULL);
+
+	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n",
+		srv_buf_pool_instances);
+
+	/* Send one exit work item/thread */
+	for (i=0; i < srv_buf_pool_instances; i++) {
+		mtflush_io->work_item[i].wr.buf_pool = NULL;
+		mtflush_io->work_item[i].rd.page_pool = NULL;
+		mtflush_io->work_item[i].tsk = MT_WRK_NONE;
+		mtflush_io->work_item[i].wi_status = WRK_ITEM_EXIT;
+
+		ib_wqueue_add(mtflush_io->wq,
+			(void *)&(mtflush_io->work_item[i]),
+			mtflush_io->wheap);
+	}
+
+	/* Wait until all work items on a work queue are processed */
+	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
+		/* Wait about 1/2 sec */
+		os_thread_sleep(50000);
+	}
+
+	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
+
+	/* Collect all work done items */
+	for (i=0; i < srv_buf_pool_instances;) {
+		wrk_t* work_item;
+
+		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
+
+		if (work_item) {
+			i++;
+		}
+	}
+
+	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
+	ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
+
+	/* Free all queues */
+	ib_wqueue_free(mtflush_io->wq);
+	ib_wqueue_free(mtflush_io->wr_cq);
+	ib_wqueue_free(mtflush_io->rd_cq);
+
+	/* Free heap */
+	mem_heap_free(mtflush_io->wheap);
+
+	os_fast_mutex_free(&mtflush_mtx);
+}
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+	ulint n_threads,	/*!< in: Number of threads to create */
+	ulint wrk_cnt)		/*!< in: Number of work items */
+{
+	ulint   	i;
+	mem_heap_t*	mtflush_heap;
+	ib_wqueue_t*	mtflush_work_queue;
+	ib_wqueue_t*	mtflush_write_comp_queue;
+	ib_wqueue_t*	mtflush_read_comp_queue;
+	wrk_t*		work_items;
+
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
+
+	/* Create heap, work queue, write completion queue, read
+	completion queue for multi-threaded flush, and init
+	handler. */
+	mtflush_heap = mem_heap_create(0);
+	ut_a(mtflush_heap != NULL);
+	mtflush_work_queue = ib_wqueue_create();
+	ut_a(mtflush_work_queue != NULL);
+	mtflush_write_comp_queue = ib_wqueue_create();
+	ut_a(mtflush_write_comp_queue != NULL);
+	mtflush_read_comp_queue = ib_wqueue_create();
+	ut_a(mtflush_read_comp_queue != NULL);
+
+	mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
+				MTFLUSH_MAX_WORKER * sizeof(thread_sync_t));
+	ut_a(mtflush_ctx != NULL);
+	work_items = (wrk_t*)mem_heap_alloc(mtflush_heap,
+					    MTFLUSH_MAX_WORKER * sizeof(wrk_t));
+	ut_a(work_items != NULL);
+	memset(work_items, 0, sizeof(wrk_t) * MTFLUSH_MAX_WORKER);
+	memset(mtflush_ctx, 0, sizeof(thread_sync_t) * MTFLUSH_MAX_WORKER);
+
+	/* Initialize work items */
+	mtflu_setup_work_items(work_items, n_threads);
+
+	/* Create threads for page-compression-flush */
+	for(i=0; i < n_threads; i++) {
+		os_thread_id_t new_thread_id;
+		mtflush_ctx[i].n_threads = n_threads;
+		mtflush_ctx[i].wq = mtflush_work_queue;
+		mtflush_ctx[i].wr_cq = mtflush_write_comp_queue;
+		mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
+		mtflush_ctx[i].wheap = mtflush_heap;
+		mtflush_ctx[i].wt_status = WTHR_INITIALIZED;
+		mtflush_ctx[i].work_item = work_items;
+
+		mtflush_ctx[i].wthread = os_thread_create(
+			mtflush_io_thread,
+			((void *)(mtflush_ctx + i)),
+	                &new_thread_id);
+
+		mtflush_ctx[i].wthread_id = new_thread_id;
+	}
+
+	buf_mtflu_work_init();
+
+	return((void *)mtflush_ctx);
+}
+
+/******************************************************************//**
+Flush buffer pool instances.
+@return number of pages flushed. */
+ulint
+buf_mtflu_flush_work_items(
+/*=======================*/
+	ulint buf_pool_inst,		/*!< in: Number of buffer pool instances */
+	ulint *per_pool_pages_flushed,	/*!< out: Number of pages
+					flushed/instance */
+	buf_flush_t flush_type,		/*!< in: Type of flush */
+	ulint min_n,			/*!< in: Wished minimum number of
+					blocks to be flushed */
+	lsn_t lsn_limit)		/*!< in: All blocks whose
+					oldest_modification is smaller than
+					this should be flushed (if their
+					number does not exceed min_n) */
+{
+	ulint n_flushed=0, i;
+	wrk_t *done_wi;
+
+	for(i=0;i<buf_pool_inst; i++) {
+		mtflush_ctx->work_item[i].tsk = MT_WRK_WRITE;
+		mtflush_ctx->work_item[i].rd.page_pool = NULL;
+		mtflush_ctx->work_item[i].wr.buf_pool = buf_pool_from_array(i);
+		mtflush_ctx->work_item[i].wr.flush_type = flush_type;
+		mtflush_ctx->work_item[i].wr.min = min_n;
+		mtflush_ctx->work_item[i].wr.lsn_limit = lsn_limit;
+		mtflush_ctx->work_item[i].id_usr = -1;
+		mtflush_ctx->work_item[i].wi_status = WRK_ITEM_SET;
+
+		ib_wqueue_add(mtflush_ctx->wq,
+			(void *)(&(mtflush_ctx->work_item[i])),
+			mtflush_ctx->wheap);
+	}
+
+	/* wait on the completion to arrive */
+   	for(i=0; i< buf_pool_inst;) {
+		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, 50000);
+
+		if (done_wi != NULL) {
+			if(done_wi->n_flushed == 0) {
+				per_pool_pages_flushed[i] = 0;
+			} else {
+				per_pool_pages_flushed[i] = done_wi->n_flushed;
+			}
+
+			if((int)done_wi->id_usr == -1 &&
+			   done_wi->wi_status == WRK_ITEM_SET ) {
+				fprintf(stderr,
+					"**Set/Unused work_item[%lu] flush_type=%lu\n",
+					i,
+					done_wi->wr.flush_type);
+				ut_a(0);
+			}
+
+			n_flushed+= done_wi->n_flushed;
+			i++;
+		}
+	}
+
+	return(n_flushed);
+}
+
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+bool
+buf_mtflu_flush_list(
+/*=================*/
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	ulint*		n_processed)	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+
+{
+	ulint		i;
+	bool		success = true;
+	ulint		cnt_flush[MTFLUSH_MAX_WORKER];
+
+	if (n_processed) {
+		*n_processed = 0;
+	}
+
+	if (min_n != ULINT_MAX) {
+		/* Ensure that flushing is spread evenly amongst the
+		buffer pool instances. When min_n is ULINT_MAX
+		we need to flush everything up to the lsn limit
+		so no limit here. */
+		min_n = (min_n + srv_buf_pool_instances - 1)
+			 / srv_buf_pool_instances;
+	}
+
+	/* QUESTION: What is procted by below mutex ? */
+	os_fast_mutex_lock(&mtflush_mtx);
+	buf_mtflu_flush_work_items(srv_buf_pool_instances,
+                cnt_flush, BUF_FLUSH_LIST,
+                min_n, lsn_limit);
+	os_fast_mutex_unlock(&mtflush_mtx);
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		if (n_processed) {
+			*n_processed += cnt_flush[i];
+		}
+		if (cnt_flush[i]) {
+			MONITOR_INC_VALUE_CUMULATIVE(
+				MONITOR_FLUSH_BATCH_TOTAL_PAGE,
+				MONITOR_FLUSH_BATCH_COUNT,
+				MONITOR_FLUSH_BATCH_PAGES,
+				cnt_flush[i]);
+		}
+	}
+#ifdef UNIV_DEBUG
+	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
+		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
+#endif
+	return(success);
+}
+
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_mtflu_flush_LRU_tail(void)
+/*==========================*/
+{
+	ulint	total_flushed=0, i;
+	ulint	cnt_flush[MTFLUSH_MAX_WORKER];
+
+	ut_a(buf_mtflu_init_done());
+
+	/* QUESTION: What is protected by below mutex ? */
+	os_fast_mutex_lock(&mtflush_mtx);
+	buf_mtflu_flush_work_items(srv_buf_pool_instances,
+		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
+	os_fast_mutex_unlock(&mtflush_mtx);
+
+	for (i = 0; i < srv_buf_pool_instances; i++) {
+		if (cnt_flush[i]) {
+			total_flushed += cnt_flush[i];
+
+			MONITOR_INC_VALUE_CUMULATIVE(
+			        MONITOR_LRU_BATCH_TOTAL_PAGE,
+			        MONITOR_LRU_BATCH_COUNT,
+			        MONITOR_LRU_BATCH_PAGES,
+			        cnt_flush[i]);
+		}
+	}
+
+#if UNIV_DEBUG
+	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
+			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
+#endif
+
+	return(total_flushed);
+}
+
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+	ulint		n_threads,	/*!<in: Number of threads to fill */
+        void*		ctx,		/*!<in: thread context */
+	os_thread_id_t*	thread_ids)	/*!<in: thread id array */
+{
+	thread_sync_t *mtflush_io = ((thread_sync_t *)ctx);
+	ulint i;
+	ut_a(mtflush_io != NULL);
+	ut_a(thread_ids != NULL);
+
+	for(i = 0; i < n_threads; i++) {
+		thread_ids[i] = mtflush_io[i].wthread_id;
+	}
+}
diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h
index 6b2827e77a7..a939678841d 100644
--- a/storage/xtradb/include/buf0flu.h
+++ b/storage/xtradb/include/buf0flu.h
@@ -295,6 +295,63 @@ buf_flush_flush_list_in_progress(void)
 /*==================================*/
 	__attribute__((warn_unused_result));
 
+/******************************************************************//**
+Start a buffer flush batch for LRU or flush list */
+ibool
+buf_flush_start(
+/*============*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	buf_flush_t	flush_type);	/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+
+/******************************************************************//**
+End a buffer flush batch for LRU or flush list */
+void
+buf_flush_end(
+/*==========*/
+	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	buf_flush_t	flush_type);	/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the LRU list or flush_list.
+NOTE 1: in the case of an LRU flush the calling thread may own latches to
+pages: to avoid deadlocks, this function must be written so that it cannot
+end up waiting for these latches! NOTE 2: in the case of a flush list flush,
+the calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued */
+__attribute__((nonnull))
+void
+buf_flush_batch(
+/*============*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	buf_flush_t	flush_type,	/*!< in: BUF_FLUSH_LRU or
+					BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
+					then the caller must not own any
+					latches on pages */
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in: in the case of BUF_FLUSH_LIST
+					all blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	bool		limited_lru_scan,/*!< in: for LRU flushes, if true,
+					allow to scan only up to
+					srv_LRU_scan_depth pages in total */
+	flush_counters_t*	n);	/*!< out: flushed/evicted page
+					counts  */
+
+
+/******************************************************************//**
+Gather the aggregated stats for both flush list and LRU list flushing */
+void
+buf_flush_common(
+/*=============*/
+	buf_flush_t	flush_type,	/*!< in: type of flush */
+	ulint		page_count);	/*!< in: number of pages flushed */
+
 #ifndef UNIV_NONINL
 #include "buf0flu.ic"
 #endif
diff --git a/storage/xtradb/include/buf0mtflu.h b/storage/xtradb/include/buf0mtflu.h
new file mode 100644
index 00000000000..0475335bbf5
--- /dev/null
+++ b/storage/xtradb/include/buf0mtflu.h
@@ -0,0 +1,95 @@
+/*****************************************************************************
+
+Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2014 Fusion-io. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/buf0mtflu.h
+Multi-threadef flush method interface function prototypes
+
+Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
+		   Dhananjoy Das DDas@fusionio.com
+***********************************************************************/
+
+#ifndef buf0mtflu_h
+#define buf0mtflu_h
+
+/******************************************************************//**
+Add exit work item to work queue to signal multi-threded flush
+threads that they should exit.
+*/
+void
+buf_mtflu_io_thread_exit(void);
+/*===========================*/
+
+/******************************************************************//**
+Initialize multi-threaded flush thread syncronization data.
+@return Initialized multi-threaded flush thread syncroniztion data. */
+void*
+buf_mtflu_handler_init(
+/*===================*/
+	ulint n_threads,	/*!< in: Number of threads to create */
+	ulint wrk_cnt);		/*!< in: Number of work items */
+
+/******************************************************************//**
+Return true if multi-threaded flush is initialized
+@return true if initialized, false if not */
+bool
+buf_mtflu_init_done(void);
+/*======================*/
+
+/*********************************************************************//**
+Clears up tail of the LRU lists:
+* Put replaceable pages at the tail of LRU to the free list
+* Flush dirty pages at the tail of LRU to the disk
+The depth to which we scan each buffer pool is controlled by dynamic
+config parameter innodb_LRU_scan_depth.
+@return total pages flushed */
+UNIV_INTERN
+ulint
+buf_mtflu_flush_LRU_tail(void);
+/*===========================*/
+
+/*******************************************************************//**
+Multi-threaded version of buf_flush_list
+*/
+bool
+buf_mtflu_flush_list(
+/*=================*/
+	ulint		min_n,		/*!< in: wished minimum mumber of blocks
+					flushed (it is not guaranteed that the
+					actual number is that big, though) */
+	lsn_t		lsn_limit,	/*!< in the case BUF_FLUSH_LIST all
+					blocks whose oldest_modification is
+					smaller than this should be flushed
+					(if their number does not exceed
+					min_n), otherwise ignored */
+	ulint*		n_processed);	/*!< out: the number of pages
+					which were processed is passed
+					back to caller. Ignored if NULL */
+
+/*********************************************************************//**
+Set correct thread identifiers to io thread array based on
+information we have. */
+void
+buf_mtflu_set_thread_ids(
+/*=====================*/
+	ulint n_threads,		/*!<in: Number of threads to fill */
+	void* ctx,		        /*!<in: thread context */
+	os_thread_id_t* thread_ids);	/*!<in: thread id array */
+
+#endif
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index c9a92f608d8..f7de92d2288 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -279,7 +279,7 @@ extern my_bool srv_use_lz4;
 
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
-extern ulint    srv_mtflush_threads;
+extern long    srv_mtflush_threads;
 
 /** Server undo tablespaces directory, can be absolute path. */
 extern char*	srv_undo_dir;
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 92acf847ca1..7bd1ef52951 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -180,6 +180,8 @@ UNIV_INTERN my_bool     srv_use_posix_fallocate         = FALSE;
 UNIV_INTERN my_bool     srv_use_atomic_writes           = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
 UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
+/* Number of threads used for multi-threaded flush */
+UNIV_INTERN long srv_mtflush_threads = 0;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index 37324118fc7..29afd0d0c98 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -72,6 +72,7 @@ Created 2/16/1996 Heikki Tuuri
 # include "os0proc.h"
 # include "sync0sync.h"
 # include "buf0flu.h"
+# include "buf0mtflu.h"
 # include "buf0rea.h"
 # include "dict0boot.h"
 # include "dict0load.h"
@@ -134,6 +135,8 @@ static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** 6 is the ? */
 #define	START_OLD_THREAD_CNT	(SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS)
 static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6 + SRV_MAX_N_PURGE_THREADS + MTFLUSH_MAX_WORKER];
+/* Thread contex data for multi-threaded flush */
+void *mtflush_ctx=NULL;
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1508,410 +1511,6 @@ init_log_online(void)
 	}
 }
 
-/* JAN: TODO: */
-/**********************************************************************************/
-#ifdef UNIV_DEBUG
-extern int timediff(struct timeval *g_time, struct timeval *s_time, struct timeval *d_time);
-#endif
-
-extern ibool buf_flush_start(buf_pool_t* buf_pool, buf_flush_t flush_type);
-extern void buf_flush_end(buf_pool_t* buf_pool, buf_flush_t flush_type);
-extern void buf_flush_common(buf_flush_t flush_type, ulint page_count);
-extern ulint buf_flush_batch(buf_pool_t* buf_pool, buf_flush_t flush_type, ulint min_n, lsn_t lsn_limit, bool limited_lru_scan,
-flush_counters_t* n);
-extern void pgcomp_init(void);
-extern void pgcomp_deinit(void);
-extern void buf_pool_enter_LRU_mutex(buf_pool_t*);
-extern void buf_pool_exit_LRU_mutex(buf_pool_t*);
-
-typedef enum wrk_status {
-	WRK_ITEM_SET=0,     // wrk-item is set
-	WRK_ITEM_START=1,   // processing of wrk-item has started
-	WRK_ITEM_DONE=2,    // processing is done usually set to SUCCESS/FAILED
-	WRK_ITEM_SUCCESS=2, // Success processing the wrk-item
-	WRK_ITEM_FAILED=3,  // status of failed
-	WRK_ITEM_EXIT=4,
-	WRK_ITEM_STATUS_UNDEFINED
-} wrk_status_t;
-
-typedef enum mt_wrk_tsk {
-	MT_WRK_NONE=0,      // Exit queue-wait
-	MT_WRK_WRITE=1,     // Flush operation
-	MT_WRK_READ=2,      // Decompress operation
-	MT_WRK_UNDEFINED
-} mt_wrk_tsk_t;
-
-typedef enum wthr_status {
-	WTHR_NOT_INIT=0,
-	WTHR_INITIALIZED=1,
-	WTHR_SIG_WAITING=2,
-	WTHR_RUNNING=3,
-	WTHR_NO_WORK=4,
-	WTHR_KILL_IT=5,
-	WTHR_STATUS_UNDEFINED
-} wthr_status_t;
-
-typedef struct wr_tsk {
-	buf_pool_t  *buf_pool;	// buffer-pool instance
-	buf_flush_t flush_type;	// flush-type for buffer-pool flush operation
-	ulint	    min;		//minimum number of pages requested to be flushed
-	lsn_t	    lsn_limit;//lsn limit for the buffer-pool flush operation
-} wr_tsk_t;
-
-typedef struct rd_tsk {
-	void        *page_pool; //list of pages to decompress;
-} rd_tsk_t;
-
-typedef struct wrk_itm
-{
-	mt_wrk_tsk_t tsk;
-	/* based on task-type one of the entries wr_tsk/rd_tsk will be used */
-	wr_tsk_t        wr;         //flush page list
-	rd_tsk_t        rd;         //decompress page list
- 	unsigned long	result; 	//flush pages count
- 	unsigned long	t_usec;		//time-taken in usec
- 	long		id_usr;		//thread-id currently working
-    	wrk_status_t    wi_status;	//flag
- 	struct wrk_itm	*next;
-} wrk_t;
-
-typedef struct thread_sync
-{
-	int  	        wthread_id;
-	os_thread_t 	wthread;
-	ib_wqueue_t	*wq;	// work Queue
-	ib_wqueue_t     *wr_cq;// Write Completion Queue
-	ib_wqueue_t     *rd_cq; // Read Completion Queue
-	wthr_status_t   wt_status;	// Worker Thread status
-	unsigned long	stat_universal_num_processed;
-	unsigned long	stat_cycle_num_processed;
-} thread_sync_t;
-
-/* Global XXX:DD needs to be cleaned */
-ib_wqueue_t 	*wq=NULL, *wr_cq=NULL, *rd_cq=NULL;
-mem_heap_t		*heap_allocated=NULL;
-thread_sync_t 	pc_sync[MTFLUSH_MAX_WORKER];
-static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
-static int 		pgcomp_wrk_initialized = -1;
-ulint srv_mtflush_threads = 0;
-
-int set_pgcomp_wrk_init_done(void)
-{
-	pgcomp_wrk_initialized = 1;
-	return 0;
-}
-
-int is_pgcomp_wrk_init_done(void)
-{
-	return(pgcomp_wrk_initialized == 1);
-}
-
-int setup_wrk_itm(int items)
-{
-	int i;
-	for(i=0; i<items; i++) {
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].wr.buf_pool = NULL;
-		work_items[i].t_usec = 0;
-		work_items[i].result = 0;
-		work_items[i].id_usr = -1;
-		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
-		work_items[i].next = &work_items[(i+1)%items];
-	}
-	/* last node should be the tail */
-	work_items[items-1].next = NULL;
-	return 0;
-}
-
-int flush_pool_instance(wrk_t *wi)
-{
-	flush_counters_t n;
-#ifdef UNIV_DEBUG
-	struct timeval p_start_time, p_end_time, d_time;
-#endif
-
-	if (!wi) {
-		fprintf(stderr, "work item invalid wi:%p\n", wi);
-		return -1;
-	}
-
-	if (!wi->wr.buf_pool) {
-		fprintf(stderr, "work-item wi->buf_pool:%p [likely thread exit]\n",
-                wi->wr.buf_pool);
-		return -1;
-	}
-
-    	wi->t_usec = 0;
-	if (!buf_flush_start(wi->wr.buf_pool, wi->wr.flush_type)) {
-		/* We have two choices here. If lsn_limit was
-		specified then skipping an instance of buffer
-		pool means we cannot guarantee that all pages
-		up to lsn_limit has been flushed. We can
-		return right now with failure or we can try
-		to flush remaining buffer pools up to the
-		lsn_limit. We attempt to flush other buffer
-		pools based on the assumption that it will
-		help in the retry which will follow the
-		failure. */
-		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
-			wi->wr.flush_type);
-		return -1;
-	}
-
-#ifdef UNIV_DEBUG
-	/* Record time taken for the OP in usec */
-	gettimeofday(&p_start_time, 0x0);
-#endif
-
-    	if (wi->wr.flush_type == BUF_FLUSH_LRU) {
-        	/* srv_LRU_scan_depth can be arbitrarily large value.
-        	 * We cap it with current LRU size.
-        	 */
-        	buf_pool_enter_LRU_mutex(wi->wr.buf_pool);
-        	wi->wr.min = UT_LIST_GET_LEN(wi->wr.buf_pool->LRU);
-        	buf_pool_exit_LRU_mutex(wi->wr.buf_pool);
-        	wi->wr.min = ut_min(srv_LRU_scan_depth,wi->wr.min);
-    	}
-
-	wi->result = buf_flush_batch(wi->wr.buf_pool,
-                                    wi->wr.flush_type,
-				    wi->wr.min, wi->wr.lsn_limit,
-				    false, &n);
-
-	buf_flush_end(wi->wr.buf_pool, wi->wr.flush_type);
-	buf_flush_common(wi->wr.flush_type, wi->result);
-
-#ifdef UNIV_DEBUG
-	gettimeofday(&p_end_time, 0x0);
-	timediff(&p_end_time, &p_start_time, &d_time);
-	wi->t_usec = (unsigned long)(d_time.tv_usec+(d_time.tv_sec*1000000));
-#endif
-
-	return 0;
-}
-
-int service_page_comp_io(thread_sync_t * ppc)
-{
-	wrk_t 		*wi = NULL;
-	int 		ret=0;
-
-   	ppc->wt_status = WTHR_SIG_WAITING;
-	wi = (wrk_t *)ib_wqueue_wait(ppc->wq);
-
-	if (wi) {
-		ppc->wt_status = WTHR_RUNNING;
-	} else {
-		fprintf(stderr, "%s:%d work-item is NULL\n", __FILE__, __LINE__);
-		ppc->wt_status = WTHR_NO_WORK;
-		return (0);
-	}
-
-	assert(wi != NULL);
-	wi->id_usr = ppc->wthread;
-
-	switch(wi->tsk) {
-	case MT_WRK_NONE:
-		assert(wi->wi_status == WRK_ITEM_EXIT);
-		wi->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
-		break;
-
-	case MT_WRK_WRITE:
-		wi->wi_status = WRK_ITEM_START;
-		/* Process work item */
-		if (0 != (ret = flush_pool_instance(wi))) {
-			fprintf(stderr, "FLUSH op failed ret:%d\n", ret);
-			wi->wi_status = WRK_ITEM_FAILED;
-		}
-		wi->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(ppc->wr_cq, wi, heap_allocated);
-		break;
-
-	case MT_WRK_READ:
-		/* Need to also handle the read case */
-		assert(0);
-		/* completed task get added to rd_cq */
-		/* wi->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(ppc->rd_cq, wi, heap_allocated);*/
-		break;
-
-	default:
-		/* None other than Write/Read handling planned */
-		assert(0);
-	}
-
-	ppc->wt_status = WTHR_NO_WORK;
-	return(0);
-}
-
-void page_comp_io_thread_exit()
-{
-	ulint i;
-
-	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n", srv_buf_pool_instances);
-	for (i=0; i<srv_buf_pool_instances; i++) {
-		work_items[i].wr.buf_pool = NULL;
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].tsk = MT_WRK_NONE;
-		work_items[i].wi_status = WRK_ITEM_EXIT;
-		ib_wqueue_add(wq, (void *)&work_items[i], heap_allocated);
-	}
-}
-
-/******************************************************************//**
-@return a dummy parameter*/
-extern "C" UNIV_INTERN
-os_thread_ret_t
-DECLARE_THREAD(page_comp_io_thread)(
-/*================================*/
-	void * arg)
-{
-	thread_sync_t *ppc_io = ((thread_sync_t *)arg);
-
-	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
-		service_page_comp_io(ppc_io);
-		ppc_io->stat_cycle_num_processed = 0;
-	}
-	os_thread_exit(NULL);
-	OS_THREAD_DUMMY_RETURN;
-}
-
-int print_wrk_list(wrk_t *wi_list)
-{
-	wrk_t *wi = wi_list;
-	int i=0;
-
-	if(!wi_list) {
-		fprintf(stderr, "list NULL\n");
-	}
-
-	while(wi) {
-		fprintf(stderr, "-\t[%p]\t[%s]\t[%lu]\t[%luus] > %p\n",
-			wi, (wi->id_usr == -1)?"free":"Busy", wi->result, wi->t_usec, wi->next);
-		wi = wi->next;
-		i++;
-	}
-	fprintf(stderr, "list len: %d\n", i);
-	return 0;
-}
-
-/******************************************************************//**
-@return a dummy parameter*/
-int pgcomp_handler_init(int num_threads, int wrk_cnt, ib_wqueue_t *wq, ib_wqueue_t *wr_cq, ib_wqueue_t *rd_cq)
-{
-	int   	i=0;
-
-	if(is_pgcomp_wrk_init_done()) {
-		fprintf(stderr, "pgcomp_handler_init(): ERROR already initialized\n");
-		return -1;
-	}
-
-	if(!wq || !wr_cq || !rd_cq) {
-		fprintf(stderr, "%s() FAILED wq:%p write-cq:%p read-cq:%p\n",
-                __FUNCTION__, wq, wr_cq, rd_cq);
-		return -1;
-	}
-
-	/* work-item setup */
-	setup_wrk_itm(wrk_cnt);
-
-	/* Mark each of the thread sync entires */
-	for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
-	    pc_sync[i].wthread_id = i;
-	}
-
-	/* Create threads for page-compression-flush */
-	for(i=0; i < num_threads; i++) {
-		pc_sync[i].wthread_id = i;
-		pc_sync[i].wq = wq;
-		pc_sync[i].wr_cq = wr_cq;
-		pc_sync[i].rd_cq = rd_cq;
-
-		os_thread_create(page_comp_io_thread, ((void *)(pc_sync + i)),
-	                				thread_ids + START_OLD_THREAD_CNT + i);
-		pc_sync[i].wthread = (START_OLD_THREAD_CNT + i);
-		pc_sync[i].wt_status = WTHR_INITIALIZED;
-	}
-	set_pgcomp_wrk_init_done();
-	fprintf(stderr, "%s() Worker-Threads created..\n", __FUNCTION__);
-	return 0;
-}
-
-int wrk_thread_stat(thread_sync_t *wthr, unsigned int num_threads)
-{
-	ulong stat_tot=0;
-	ulint i=0;
-	for(i=0; i<num_threads;i++) {
-		stat_tot+=wthr[i].stat_universal_num_processed;
-		fprintf(stderr, "[%d] stat [%lu]\n", wthr[i].wthread_id,
-			wthr[i].stat_universal_num_processed);
-	}
-	fprintf(stderr, "Stat-Total:%lu\n", stat_tot);
-}
-
-int reset_wrk_itm(int items)
-{
-	int i;
-
-	for(i=0;i<items; i++) {
-		work_items[i].id_usr = -1;
-	}
-	return 0;
-}
-
-int pgcomp_flush_work_items(int buf_pool_inst, int *per_pool_pages_flushed,
-                            buf_flush_t flush_type, int min_n, lsn_t lsn_limit)
-{
-	int ret=0, i=0;
-	wrk_t *done_wi;
-
-	for(i=0;i<buf_pool_inst; i++) {
-		work_items[i].tsk = MT_WRK_WRITE;
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].wr.buf_pool = buf_pool_from_array(i);
-		work_items[i].wr.flush_type = flush_type;
-		work_items[i].wr.min = min_n;
-		work_items[i].wr.lsn_limit = lsn_limit;
-		work_items[i].id_usr = -1;
-		work_items[i].next = &work_items[(i+1)%buf_pool_inst];
-		work_items[i].wi_status = WRK_ITEM_SET;
-	}
-	work_items[i-1].next=NULL;
-
-   	for(i=0;i<buf_pool_inst; i++) {
-		ib_wqueue_add(wq, (void *)(&work_items[i]), heap_allocated);
-	}
-
-	/* wait on the completion to arrive */
-   	for(i=0;i<buf_pool_inst; i++) {
-		done_wi = (wrk_t *)ib_wqueue_wait(wr_cq);
-    		//fprintf(stderr, "%s: queue-wait DONE\n", __FUNCTION__);
-		ut_ad(done_wi != NULL);
-	}
-
-	/* collect data/results total pages flushed */
-	for(i=0; i<buf_pool_inst; i++) {
-		if(work_items[i].result == -1) {
-			ret = -1;
-			per_pool_pages_flushed[i] = 0;
-		} else {
-			per_pool_pages_flushed[i] = work_items[i].result;
-		}
-		if((work_items[i].id_usr == -1) &&
-			(work_items[i].wi_status == WRK_ITEM_SET )) {
-        		fprintf(stderr, "**Set/Unused work_item[%d] flush_type=%d\n", i, work_items[i].wr.flush_type);
-           		//assert(0);
-       		}
-	}
-	//wrk_thread_stat(pc_sync, pgc_n_threads);
-
-	/* clear up work-queue for next flush */
-	reset_wrk_itm(buf_pool_inst);
-	return(ret);
-}
-
-/* JAN: TODO: END: */
-
 /********************************************************************
 Starts InnoDB and creates a new database if database files
 are not found and the user wants.
@@ -3119,20 +2718,18 @@ files_checked:
 	}
 
 	if (!srv_read_only_mode) {
-		/* JAN: TODO: */
 		if (srv_buf_pool_instances <= MTFLUSH_MAX_WORKER) {
 			srv_mtflush_threads = srv_buf_pool_instances;
 		}
- 		heap_allocated = mem_heap_create(0);
-		ut_a(heap_allocated != NULL);
 
- 		wq = ib_wqueue_create();
- 		wr_cq = ib_wqueue_create();
- 		rd_cq = ib_wqueue_create();
-		pgcomp_init();
- 	   	pgcomp_handler_init(srv_mtflush_threads,
-				    srv_buf_pool_instances,
-				    wq, wr_cq, rd_cq);
+		mtflush_ctx = buf_mtflu_handler_init(srv_mtflush_threads,
+						     srv_buf_pool_instances);
+
+		/* Set up the thread ids */
+		buf_mtflu_set_thread_ids(srv_mtflush_threads,
+					mtflush_ctx,
+					(thread_ids + 6 + 32));
+
 #if UNIV_DEBUG
  		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
 #endif
@@ -3411,15 +3008,11 @@ innobase_shutdown_for_mysql(void)
 
 		/* g. Exit the multi threaded flush threads */
 
-		page_comp_io_thread_exit();
+		buf_mtflu_io_thread_exit();
 
 #ifdef UNIV_DEBUG
 		fprintf(stderr, "%s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
 #endif
-
-		/* h. Remove the mutex */
-		pgcomp_deinit();
-
 		os_mutex_enter(os_sync_mutex);
 
 

From fa9f5f63e918a557150e1e940ff0e8ea9ca09dc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 11 Feb 2014 20:05:09 +0200
Subject: [PATCH 16/56] Removed unnecessary files and set lz4 under HAVE_LZ4
 compiler option using cmake find_library. Fixed bunch of compiler warnings.

---
 cmake/lz4.cmake                               |  35 +
 storage/innobase/CMakeLists.txt               |   4 +-
 storage/innobase/buf/buf0flu.cc               |  27 +-
 storage/innobase/buf/buf0mtflu.cc             |   2 +-
 storage/innobase/fil/fil0pagecompress.cc      |  35 +-
 storage/innobase/fil/lz4.c                    | 822 ------------------
 storage/innobase/fil/lz4.h                    | 205 -----
 storage/innobase/handler/ha_innodb.cc         |   3 +-
 storage/innobase/include/dict0pagecompress.ic |  12 +-
 storage/xtradb/CMakeLists.txt                 |   4 +-
 storage/xtradb/buf/buf0flu.cc                 |   4 +-
 storage/xtradb/buf/buf0mtflu.cc               |   2 +-
 storage/xtradb/fil/fil0pagecompress.cc        |  34 +-
 storage/xtradb/fil/lz4.c                      | 822 ------------------
 storage/xtradb/fil/lz4.h                      | 205 -----
 storage/xtradb/handler/ha_innodb.cc           |   2 +
 storage/xtradb/include/dict0pagecompress.ic   |  12 +-
 17 files changed, 121 insertions(+), 2109 deletions(-)
 create mode 100644 cmake/lz4.cmake
 delete mode 100644 storage/innobase/fil/lz4.c
 delete mode 100644 storage/innobase/fil/lz4.h
 delete mode 100644 storage/xtradb/fil/lz4.c
 delete mode 100644 storage/xtradb/fil/lz4.h

diff --git a/cmake/lz4.cmake b/cmake/lz4.cmake
new file mode 100644
index 00000000000..56120e2cdd0
--- /dev/null
+++ b/cmake/lz4.cmake
@@ -0,0 +1,35 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+MACRO (MYSQL_CHECK_LZ4)
+
+CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
+CHECK_LIBRARY_EXISTS(liblz4.a LZ4_compress_limitedOutput "" HAVE_LZ4_LIB)
+
+IF(HAVE_LZ4_LIB AND HAVE_LZ4_H)
+  ADD_DEFINITIONS(-DHAVE_LZ4=1)
+  LINK_LIBRARIES(liblz4.a)
+ENDIF()
+ENDMACRO()
+
+MACRO (MYSQL_CHECK_SHARED_LZ4)
+
+CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
+CHECK_LIBRARY_EXISTS(lz4 LZ4_compress_limitedOutput "" HAVE_LZ4_SHARED_LIB)
+
+IF (HAVE_LZ4_SHARED_LIB AND HAVE_LZ4_H)
+  ADD_DEFINITIONS(-DHAVE_LZ4=1)
+  LINK_LIBRARIES(lz4)
+ENDIF()
+ENDMACRO()
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 64c22f9f7df..136a7a2ae0b 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -18,6 +18,9 @@
 INCLUDE(CheckFunctionExists)
 INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckCSourceRuns)
+INCLUDE(lz4)
+
+MYSQL_CHECK_LZ4()
 
 # OS tests
 IF(UNIX)
@@ -293,7 +296,6 @@ SET(INNOBASE_SOURCES
 	eval/eval0proc.cc
 	fil/fil0fil.cc
         fil/fil0pagecompress.cc
-	fil/lz4.c
 	fsp/fsp0fsp.cc
 	fut/fut0fut.cc
 	fut/fut0lst.cc
diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index d131f2efb44..2174699bd19 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -2390,7 +2390,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 	ulint	next_loop_time = ut_time_ms() + 1000;
 	ulint	n_flushed = 0;
 	ulint	last_activity = srv_get_activity_count();
-	ulint	n_lru=0, n_pgc_flush=0, n_pgc_batch=0;
+	ulint	n_lru=0;
 
 	ut_ad(!srv_read_only_mode);
 
@@ -2429,17 +2429,12 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 #endif
 
 			/* Flush pages from flush_list if required */
-			n_flushed += n_pgc_flush = page_cleaner_flush_pages_if_needed();
+			n_flushed += page_cleaner_flush_pages_if_needed();
 
-#ifdef UNIV_DEBUG
-			if (n_pgc_flush) {
-				fprintf(stderr,"n_pgc_flush:%lu ",n_pgc_flush);
-			}
-#endif
 		} else {
-			n_pgc_batch = n_flushed = page_cleaner_do_flush_batch(
-							PCT_IO(100),
-							LSN_MAX);
+			n_flushed = page_cleaner_do_flush_batch(
+				PCT_IO(100),
+				LSN_MAX);
 
 			if (n_flushed) {
 				MONITOR_INC_VALUE_CUMULATIVE(
@@ -2448,21 +2443,11 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 					MONITOR_FLUSH_BACKGROUND_PAGES,
 					n_flushed);
 			}
-#ifdef UNIV_DEBUG
-			if (n_pgc_batch) {
-				fprintf(stderr,"n_pgc_batch:%lu ",n_pgc_batch);
-			}
-#endif
 		}
-#ifdef UNIV_DEBUG
-		if (n_lru || n_pgc_flush || n_pgc_batch) {
-			fprintf(stderr,"\n");
-			n_lru = n_pgc_flush = n_pgc_batch = 0;
-		}
-#endif
 	}
 
 	ut_ad(srv_shutdown_state > 0);
+
 	if (srv_fast_shutdown == 2) {
 		/* In very fast shutdown we simulate a crash of
 		buffer pool. We are not required to do any flushing */
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index a81ccee5650..a42e6158250 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -548,7 +548,7 @@ buf_mtflu_flush_work_items(
 			if((int)done_wi->id_usr == -1 &&
 			   done_wi->wi_status == WRK_ITEM_SET ) {
 				fprintf(stderr,
-					"**Set/Unused work_item[%lu] flush_type=%lu\n",
+					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
 				ut_a(0);
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 10ac273955f..26e975bddf3 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -63,7 +63,9 @@ static ulint srv_data_read, srv_data_written;
 #include <linux/falloc.h>
 #endif
 #include "row0mysql.h"
+#ifdef HAVE_LZ4
 #include "lz4.h"
+#endif
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -108,10 +110,11 @@ fil_compress_page(
 	fprintf(stderr,
 		"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
 		space_id, fil_space_name(space), len);
-#endif
+#endif /* UNIV_DEBUG */
 
 	write_size = UNIV_PAGE_SIZE - header_len;
 
+#ifdef HAVE_LZ4
 	if (srv_use_lz4) {
 		err = LZ4_compress_limitedOutput((const char *)buf, (char *)out_buf+header_len, len, write_size);
 		write_size = err;
@@ -127,6 +130,7 @@ fil_compress_page(
 			return (buf);
 		}
 	} else {
+#endif /* HAVE_LZ4 */
 		err = compress2(out_buf+header_len, &write_size, buf, len, level);
 
 		if (err != Z_OK) {
@@ -139,7 +143,9 @@ fil_compress_page(
 			*out_len = len;
 			return (buf);
 		}
+#ifdef HAVE_LZ4
 	}
+#endif /* HAVE_LZ4 */
 
 	/* Set up the page header */
 	memcpy(out_buf, buf, FIL_PAGE_DATA);
@@ -148,11 +154,18 @@ fil_compress_page(
 	/* Set up the correct page type */
 	mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
 	/* Set up the flush lsn to be compression algorithm */
+
+#ifdef HAVE_LZ4
 	if (srv_use_lz4) {
 		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_LZ4);
 	} else {
+#endif /* HAVE_LZ4 */
 		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
+
+#ifdef HAVE_LZ4
 	}
+#endif /* HAVE_LZ4 */
+
 	/* Set up the actual payload lenght */
 	mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
 
@@ -161,12 +174,18 @@ fil_compress_page(
 	ut_ad(fil_page_is_compressed(out_buf));
 	ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
 	ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
+
+#ifdef HAVE_LZ4
 	if (srv_use_lz4) {
 		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_LZ4);
 	} else {
+#endif /* HAVE_LZ4 */
 		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
+
+#ifdef HAVE_LZ4
 	}
-#endif
+#endif /* HAVE_LZ4 */
+#endif /* UNIV_DEBUG */
 
 	write_size+=header_len;
 	/* Actual write needs to be alligned on block size */
@@ -236,8 +255,8 @@ fil_decompress_page(
 	if (page_buf == NULL) {
 #ifdef UNIV_DEBUG
 		fprintf(stderr,
-			"InnoDB: Note: Compression buffer not given, allocating...\n");
-#endif
+			"InnoDB: Note: FIL: Compression buffer not given, allocating...\n");
+#endif /* UNIV_DEBUG */
 		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
 	} else {
 		in_buf = page_buf;
@@ -261,7 +280,7 @@ fil_decompress_page(
 		fprintf(stderr,
 			"InnoDB: Note: Preparing for decompress for len %lu\n",
 			actual_size);
-#endif
+#endif /* UNIV_DEBUG */
 
 		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
 
@@ -284,11 +303,12 @@ fil_decompress_page(
 		fprintf(stderr,
 			"InnoDB: Note: Decompression succeeded for len %lu \n",
 			len);
-#endif
+#endif /* UNIV_DEBUG */
+#ifdef HAVE_LZ4
 	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
 		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
 
-		if (err != actual_size) {
+		if (err != (int)actual_size) {
 			fprintf(stderr,
 				"InnoDB: Corruption: Page is marked as compressed\n"
 				"InnoDB: but decompression read only %d bytes.\n"
@@ -298,6 +318,7 @@ fil_decompress_page(
 
 			ut_error;
 		}
+#endif /* HAVE_LZ4 */
 	} else {
 		fprintf(stderr,
 			"InnoDB: Corruption: Page is marked as compressed\n"
diff --git a/storage/innobase/fil/lz4.c b/storage/innobase/fil/lz4.c
deleted file mode 100644
index 4e864de67d3..00000000000
--- a/storage/innobase/fil/lz4.c
+++ /dev/null
@@ -1,822 +0,0 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2013, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 source repository : http://code.google.com/p/lz4/
-   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-//**************************************
-// Tuning parameters
-//**************************************
-// MEMORY_USAGE :
-// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-// Increasing memory usage improves compression ratio
-// Reduced memory usage can improve speed, due to cache effect
-// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
-#define MEMORY_USAGE 14
-
-// HEAPMODE :
-// Select how default compression functions will allocate memory for their hash table,
-// in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
-#define HEAPMODE 0
-
-
-//**************************************
-// CPU Feature Detection
-//**************************************
-// 32 or 64 bits ?
-#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
-  || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
-  || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
-  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   // Detects 64 bits mode
-#  define LZ4_ARCH64 1
-#else
-#  define LZ4_ARCH64 0
-#endif
-
-// Little Endian or Big Endian ?
-// Overwrite the #define below if you know your architecture endianess
-#if defined (__GLIBC__)
-#  include <endian.h>
-#  if (__BYTE_ORDER == __BIG_ENDIAN)
-#     define LZ4_BIG_ENDIAN 1
-#  endif
-#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
-#  define LZ4_BIG_ENDIAN 1
-#elif defined(__sparc) || defined(__sparc__) \
-   || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
-   || defined(__hpux)  || defined(__hppa) \
-   || defined(_MIPSEB) || defined(__s390__)
-#  define LZ4_BIG_ENDIAN 1
-#else
-// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
-#endif
-
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
-#if defined(__ARM_FEATURE_UNALIGNED)
-#  define LZ4_FORCE_UNALIGNED_ACCESS 1
-#endif
-
-// Define this parameter if your target system or compiler does not support hardware bit count
-#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
-#  define LZ4_FORCE_SW_BITCOUNT
-#endif
-
-// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
-// This option may provide a small boost to performance for some big endian cpu, although probably modest.
-// You may set this option to 1 if data will remain within closed environment.
-// This option is useless on Little_Endian CPU (such as x86)
-//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   // C99
-/* "restrict" is a known keyword */
-#else
-#  define restrict // Disable restrict
-#endif
-
-#ifdef _MSC_VER    // Visual Studio
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>                    // For Visual 2005
-#  if LZ4_ARCH64   // 64-bits
-#    pragma intrinsic(_BitScanForward64) // For Visual 2005
-#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
-#  else            // 32-bits
-#    pragma intrinsic(_BitScanForward)   // For Visual 2005
-#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
-#  endif
-#  pragma warning(disable : 4127)        // disable: C4127: conditional expression is constant
-#else 
-#  ifdef __GNUC__
-#    define FORCE_INLINE static inline __attribute__((always_inline))
-#  else
-#    define FORCE_INLINE static inline
-#  endif
-#endif
-
-#ifdef _MSC_VER
-#  define lz4_bswap16(x) _byteswap_ushort(x)
-#else
-#  define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
-#endif
-
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
-#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
-#else
-#  define expect(expr,value)    (expr)
-#endif
-
-#define likely(expr)     expect((expr) != 0, 1)
-#define unlikely(expr)   expect((expr) != 0, 0)
-
-
-//**************************************
-// Memory routines
-//**************************************
-#include <stdlib.h>   // malloc, calloc, free
-#define ALLOCATOR(n,s) calloc(n,s)
-#define FREEMEM        free
-#include <string.h>   // memset, memcpy
-#define MEM_INIT       memset
-
-
-//**************************************
-// Includes
-//**************************************
-#include "lz4.h"
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
-# include <stdint.h>
-  typedef  uint8_t BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-#else
-  typedef unsigned char       BYTE;
-  typedef unsigned short      U16;
-  typedef unsigned int        U32;
-  typedef   signed int        S32;
-  typedef unsigned long long  U64;
-#endif
-
-#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
-#  define _PACKED __attribute__ ((packed))
-#else
-#  define _PACKED
-#endif
-
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#    pragma pack(1)
-#  else
-#    pragma pack(push, 1)
-#  endif
-#endif
-
-typedef struct { U16 v; }  _PACKED U16_S;
-typedef struct { U32 v; }  _PACKED U32_S;
-typedef struct { U64 v; }  _PACKED U64_S;
-typedef struct {size_t v;} _PACKED size_t_S;
-
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#    pragma pack(0)
-#  else
-#    pragma pack(pop)
-#  endif
-#endif
-
-#define A16(x)   (((U16_S *)(x))->v)
-#define A32(x)   (((U32_S *)(x))->v)
-#define A64(x)   (((U64_S *)(x))->v)
-#define AARCH(x) (((size_t_S *)(x))->v)
-
-
-//**************************************
-// Constants
-//**************************************
-#define LZ4_HASHLOG   (MEMORY_USAGE-2)
-#define HASHTABLESIZE (1 << MEMORY_USAGE)
-#define HASHNBCELLS4  (1 << LZ4_HASHLOG)
-
-#define MINMATCH 4
-
-#define COPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
-const int LZ4_minLength = (MFLIMIT+1);
-
-#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
-#define SKIPSTRENGTH 6     // Increasing this value will make the compression run slower on incompressible data
-
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
-
-#define ML_BITS  4
-#define ML_MASK  ((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
-
-
-//**************************************
-// Structures and local types
-//**************************************
-
-typedef struct {
-    U32 hashTable[HASHNBCELLS4];
-    const BYTE* bufferStart;
-    const BYTE* base;
-    const BYTE* nextBlock;
-} LZ4_Data_Structure;
-
-typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
-
-typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
-
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-
-
-//**************************************
-// Architecture-specific macros
-//**************************************
-#define STEPSIZE                  sizeof(size_t)
-#define LZ4_COPYSTEP(d,s)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
-#define LZ4_COPY8(d,s)            { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
-#define LZ4_SECURECOPY(d,s,e)     { if ((STEPSIZE==4)||(d<e)) LZ4_WILDCOPY(d,s,e); }
-
-#if LZ4_ARCH64   // 64-bit
-#  define HTYPE                   U32
-#  define INITBASE(base)          const BYTE* const base = ip
-#else            // 32-bit
-#  define HTYPE                   const BYTE*
-#  define INITBASE(base)          const int base = 0
-#endif
-
-#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
-#else      // Little Endian
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
-#endif
-
-
-//**************************************
-// Macros
-//**************************************
-#define LZ4_WILDCOPY(d,s,e)     { do { LZ4_COPY8(d,s) } while (d<e); }           // at the end, d>=e;
-
-
-//****************************
-// Private functions
-//****************************
-#if LZ4_ARCH64
-
-FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
-{
-# if defined(LZ4_BIG_ENDIAN)
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse64( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clzll(val) >> 3);
-#   else
-    int r;
-    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-    r += (!val);
-    return r;
-#   endif
-# else
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanForward64( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctzll(val) >> 3);
-#   else
-    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-    return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
-#   endif
-# endif
-}
-
-#else
-
-FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
-{
-# if defined(LZ4_BIG_ENDIAN)
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clz(val) >> 3);
-#   else
-    int r;
-    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-    r += (!val);
-    return r;
-#   endif
-# else
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanForward( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctz(val) >> 3);
-#   else
-    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-#   endif
-# endif
-}
-
-#endif
-
-
-//****************************
-// Compression functions
-//****************************
-FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType)
-{
-    if (tableType == byU16)
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
-    else
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
-}
-
-FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
-
-FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    switch (tableType)
-    {
-    case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
-    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
-    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
-    }
-}
-
-FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    U32 h = LZ4_hashPosition(p, tableType);
-    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
-}
-
-FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
-    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
-    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   // default, to ensure a return
-}
-
-FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    U32 h = LZ4_hashPosition(p, tableType);
-    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
-}
-
-
-FORCE_INLINE int LZ4_compress_generic(
-                 void* ctx,
-                 const char* source,
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize,
-
-                 limitedOutput_directive limitedOutput,
-                 tableType_t tableType,
-                 prefix64k_directive prefix)
-{
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source;
-    const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source);
-    const BYTE* anchor = (const BYTE*) source;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    int length;
-    const int skipStrength = SKIPSTRENGTH;
-    U32 forwardH;
-
-    // Init conditions
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;                                // Unsupported input size, too large (or negative)
-    if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0;   // must continue from end of previous block
-    if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend;                    // do it now, due to potential early exit
-    if ((tableType == byU16) && (inputSize>=LZ4_64KLIMIT)) return 0;                       // Size too large (not within 64K limit)
-    if (inputSize<LZ4_minLength) goto _last_literals;                                      // Input too small, no compression (all literals)
-
-    // First Byte
-    LZ4_putPosition(ip, ctx, tableType, base);
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
-        BYTE* token;
-
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if unlikely(forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_hashPosition(forwardIp, tableType);
-            ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
-            LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
-
-        } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip)));
-
-        // Catch up
-        while ((ip>anchor) && (ref > lowLimit) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
-
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-        if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend)) return 0;   // Check output limit
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len = length-RUN_MASK; 
-            *token=(RUN_MASK<<ML_BITS); 
-            for(; len >= 255 ; len-=255) *op++ = 255; 
-            *op++ = (BYTE)len; 
-        }
-        else *token = (BYTE)(length<<ML_BITS);
-
-        // Copy Literals
-        { BYTE* end=(op)+(length); LZ4_WILDCOPY(op,anchor,end); op=end; }
-
-_next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
-
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
-        anchor = ip;
-        while likely(ip<matchlimit-(STEPSIZE-1))
-        {
-            size_t diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
-        }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        length = (int)(ip - anchor);
-        if ((limitedOutput) && unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)) return 0;    // Check output limit
-        if (length>=(int)ML_MASK) 
-        { 
-            *token += ML_MASK; 
-            length -= ML_MASK; 
-            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
-            if (length >= 255) { length-=255; *op++ = 255; } 
-            *op++ = (BYTE)length; 
-        }
-        else *token += (BYTE)(length);
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
-
-        // Fill table
-        LZ4_putPosition(ip-2, ctx, tableType, base);
-
-        // Test next position
-        ref = LZ4_getPosition(ip, ctx, tableType, base); 
-        LZ4_putPosition(ip, ctx, tableType, base);
-        if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
-
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_hashPosition(ip, tableType);
-    }
-
-_last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;   // Check output limit
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-int LZ4_compress(const char* source, char* dest, int inputSize)
-{
-#if (HEAPMODE)
-    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
-#else
-    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
-#endif
-    int result;
-
-    if (inputSize < (int)LZ4_64KLIMIT)
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noPrefix);
-    else
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
-
-#if (HEAPMODE)
-    FREEMEM(ctx);
-#endif
-    return result;
-}
-
-int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize)
-{
-    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix);
-}
-
-
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-#if (HEAPMODE)
-    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
-#else
-    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
-#endif
-    int result;
-
-    if (inputSize < (int)LZ4_64KLIMIT)
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16, noPrefix);
-    else
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
-
-#if (HEAPMODE)
-    FREEMEM(ctx);
-#endif
-    return result;
-}
-
-int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix);
-}
-
-
-//****************************
-// Stream functions
-//****************************
-
-FORCE_INLINE void LZ4_init(LZ4_Data_Structure* lz4ds, const BYTE* base)
-{
-    MEM_INIT(lz4ds->hashTable, 0, sizeof(lz4ds->hashTable));
-    lz4ds->bufferStart = base;
-    lz4ds->base = base;
-    lz4ds->nextBlock = base;
-}
-
-
-void* LZ4_create (const char* inputBuffer)
-{
-    void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure));
-    LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer);
-    return lz4ds;
-}
-
-
-int LZ4_free (void* LZ4_Data)
-{
-    FREEMEM(LZ4_Data);
-    return (0);
-}
-
-
-char* LZ4_slideInputBuffer (void* LZ4_Data)
-{
-    LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data;
-    size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB);
-
-    if ( (lz4ds->base - delta > lz4ds->base)                          // underflow control
-       || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) )   // close to 32-bits limit
-    {
-        size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base;
-        int nH;
-
-        for (nH=0; nH < HASHNBCELLS4; nH++)
-        {
-            if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0;
-            else lz4ds->hashTable[nH] -= (U32)deltaLimit;
-        }
-        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
-        lz4ds->base = lz4ds->bufferStart;
-        lz4ds->nextBlock = lz4ds->base + 64 KB;
-    }
-    else
-    {
-        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
-        lz4ds->nextBlock -= delta;
-        lz4ds->base -= delta;
-    }
-
-    return (char*)(lz4ds->nextBlock);
-}
-
-
-//****************************
-// Decompression functions
-//****************************
-
-// This generic decompression function cover all use cases.
-// It shall be instanciated several times, using different sets of directives
-// Note that it is essential this generic function is really inlined, 
-// in order to remove useless branches during compilation optimisation.
-FORCE_INLINE int LZ4_decompress_generic(
-                 const char* source,
-                 char* dest,
-                 int inputSize,          //
-                 int outputSize,         // If endOnInput==endOnInputSize, this value is the max size of Output Buffer.
-
-                 int endOnInput,         // endOnOutputSize, endOnInputSize
-                 int prefix64k,          // noPrefix, withPrefix
-                 int partialDecoding,    // full, partial
-                 int targetOutputSize    // only used if partialDecoding==partial
-                 )
-{
-    // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* ref;
-    const BYTE* const iend = ip + inputSize;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + outputSize;
-    BYTE* cpy;
-    BYTE* oexit = op + targetOutputSize;
-
-    const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};   // static reduces speed for LZ4_decompress_safe() on GCC64
-    static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
-
-
-    // Special cases
-    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                        // targetOutputSize too high => decode everything
-    if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1;   // Empty output buffer
-    if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1);
-
-
-    // Main Loop
-    while (1)
-    {
-        unsigned token;
-        size_t length;
-
-        // get runlength
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK)
-        { 
-            unsigned s=255; 
-            while (((endOnInput)?ip<iend:1) && (s==255))
-            { 
-                s = *ip++; 
-                length += s; 
-            } 
-        }
-
-        // copy literals
-        cpy = op+length;
-        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
-        {
-            if (partialDecoding)
-            {
-                if (cpy > oend) goto _output_error;                           // Error : write attempt beyond end of output buffer
-                if ((endOnInput) && (ip+length > iend)) goto _output_error;   // Error : read attempt beyond end of input buffer
-            }
-            else
-            {
-                if ((!endOnInput) && (cpy != oend)) goto _output_error;       // Error : block decoding must stop exactly there
-                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   // Error : input must be consumed
-            }
-            memcpy(op, ip, length);
-            ip += length;
-            op += length;
-            break;                                       // Necessarily EOF, due to parsing restrictions
-        }
-        LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
-
-        // get offset
-        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-        if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error;   // Error : offset outside destination buffer
-
-        // get matchlength
-        if ((length=(token&ML_MASK)) == ML_MASK) 
-        { 
-            while ((!endOnInput) || (ip<iend-(LASTLITERALS+1)))   // Ensure enough bytes remain for LASTLITERALS + token
-            {
-                unsigned s = *ip++; 
-                length += s; 
-                if (s==255) continue; 
-                break; 
-            }
-        }
-
-        // copy repeated sequence
-        if unlikely((op-ref)<(int)STEPSIZE)
-        {
-            const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
-            op[0] = ref[0];
-            op[1] = ref[1];
-            op[2] = ref[2];
-            op[3] = ref[3];
-            op += 4, ref += 4; ref -= dec32table[op-ref];
-            A32(op) = A32(ref); 
-            op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(op,ref); }
-        cpy = op + length - (STEPSIZE-4);
-
-        if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH));
-            while(op<cpy) *op++=*ref++;
-            op=cpy;
-            continue;
-        }
-        LZ4_WILDCOPY(op, ref, cpy);
-        op=cpy;   // correction
-    }
-
-    // end of decoding
-    if (endOnInput)
-       return (int) (((char*)op)-dest);     // Nb of output bytes decoded
-    else
-       return (int) (((char*)ip)-source);   // Nb of input bytes read
-
-    // Overflow error detected
-_output_error:
-    return (int) (-(((char*)ip)-source))-1;
-}
-
-
-int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
-}
-
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
-}
-
-int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);
-}
-
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
-}
-
-int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
-{
-#ifdef _MSC_VER   // This version is faster with Visual
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
-#else
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
-#endif
-}
-
diff --git a/storage/innobase/fil/lz4.h b/storage/innobase/fil/lz4.h
deleted file mode 100644
index 9ef58862947..00000000000
--- a/storage/innobase/fil/lz4.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Header File
-   Copyright (C) 2011-2013, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-#pragma once
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if defined(_MSC_VER) && !defined(__cplusplus)   // Visual Studio
-#  define inline __inline           // Visual C is not C99, but supports some kind of inline
-#endif
-
-
-//****************************
-// Simple Functions
-//****************************
-
-int LZ4_compress        (const char* source, char* dest, int inputSize);
-int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/*
-LZ4_compress() :
-    Compresses 'inputSize' bytes from 'source' into 'dest'.
-    Destination buffer must be already allocated,
-    and must be sized to handle worst cases situations (input data not compressible)
-    Worst case size evaluation is provided by function LZ4_compressBound()
-    inputSize : Max supported value is LZ4_MAX_INPUT_VALUE
-    return : the number of bytes written in buffer dest
-             or 0 if the compression fails
-
-LZ4_decompress_safe() :
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits (never writes outside of output buffer, and never reads outside of input buffer). Therefore, it is protected against malicious data packets
-*/
-
-
-//****************************
-// Advanced Functions
-//****************************
-#define LZ4_MAX_INPUT_SIZE        0x7E000000   // 2 113 929 216 bytes
-#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
-static inline int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
-
-/*
-LZ4_compressBound() :
-    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
-    primarily useful for memory allocation of output buffer.
-    inline function is recommended for the general case,
-    macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation).
-
-    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
-    return : maximum output size in a "worst case" scenario
-             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
-*/
-
-
-int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/*
-LZ4_compress_limitedOutput() :
-    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-    If it cannot achieve it, compression will stop, and result of the function will be zero.
-    This function never writes outside of provided output buffer.
-
-    inputSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes written in buffer 'dest'
-             or 0 if the compression fails
-*/
-
-
-int LZ4_decompress_fast (const char* source, char* dest, int outputSize);
-
-/*
-LZ4_decompress_fast() :
-    outputSize : is the original (uncompressed) size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is malformed, the function will stop decoding and return a negative result.
-    note : This function is a bit faster than LZ4_decompress_safe()
-           This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet.
-           Use this function preferably into a trusted environment (data to decode comes from a trusted source).
-           Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
-*/
-
-int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize);
-
-/*
-LZ4_decompress_safe_partial() :
-    This function decompress a compressed block of size 'inputSize' at position 'source'
-    into output buffer 'dest' of size 'maxOutputSize'.
-    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
-    reducing decompression time.
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
-*/
-
-
-//****************************
-// Stream Functions
-//****************************
-
-void* LZ4_create (const char* inputBuffer);
-int   LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize);
-int   LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
-char* LZ4_slideInputBuffer (void* LZ4_Data);
-int   LZ4_free (void* LZ4_Data);
-
-/* 
-These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
-In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function :
-
-void* LZ4_create (const char* inputBuffer);
-The result of the function is the (void*) pointer on the LZ4 Data Structure.
-This pointer will be needed in all other functions.
-If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
-The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
-
-All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
-To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue().
-Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(), 
-but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one.
-If next block does not begin immediately after the previous one, the compression will fail (return 0).
-
-When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : 
-char* LZ4_slideInputBuffer(void* LZ4_Data);
-must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
-Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
-==> The memory position where the next input data block must start is provided as the result of the function.
-
-Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual.
-
-When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure.
-*/
-
-
-int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize);
-
-/*
-*_withPrefix64k() :
-    These decoding functions work the same as their "normal name" versions,
-    but can use up to 64KB of data in front of 'char* dest'.
-    These functions are necessary to decode inter-dependant blocks.
-*/
-
-
-//****************************
-// Obsolete Functions
-//****************************
-
-static inline int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
-static inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
-
-/*
-These functions are deprecated and should no longer be used.
-They are provided here for compatibility with existing user programs.
-*/
-
-
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index d4ce4eb9c4f..c284028c51c 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16607,11 +16607,12 @@ static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   "Use trim.",
   NULL, NULL, TRUE);
 
+#ifdef HAVE_LZ4
 static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
   PLUGIN_VAR_OPCMDARG ,
   "Use LZ4 for page compression",
   NULL, NULL, FALSE);
-
+#endif /* HAVE_LZ4 */
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
diff --git a/storage/innobase/include/dict0pagecompress.ic b/storage/innobase/include/dict0pagecompress.ic
index fb9581fc657..ea3c7546850 100644
--- a/storage/innobase/include/dict0pagecompress.ic
+++ b/storage/innobase/include/dict0pagecompress.ic
@@ -54,12 +54,12 @@ dict_tf_verify_flags(
 	DBUG_EXECUTE_IF("dict_tf_verify_flags_failure",
 			return(ULINT_UNDEFINED););
 
-	ut_ad(!table_unused);
-	ut_ad(!fsp_unused);
-	ut_ad(page_ssize == 0 || page_ssize != 0); /* silence compiler */
-	ut_ad(compact == 0 || compact == 1); /* silence compiler */
-	ut_ad(data_dir == 0 || data_dir == 1); /* silence compiler */
-	ut_ad(post_antelope == 0 || post_antelope == 1); /* silence compiler */
+	ut_a(!table_unused);
+	ut_a(!fsp_unused);
+	ut_a(page_ssize == 0 || page_ssize != 0); /* silence compiler */
+	ut_a(compact == 0 || compact == 1); /* silence compiler */
+	ut_a(data_dir == 0 || data_dir == 1); /* silence compiler */
+	ut_a(post_antelope == 0 || post_antelope == 1); /* silence compiler */
 
 	if (ssize != zip_ssize) {
 		fprintf(stderr,
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 14fbb14bdd7..a13b19638af 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -18,6 +18,9 @@
 INCLUDE(CheckFunctionExists)
 INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckCSourceRuns)
+INCLUDE(lz4)
+
+MYSQL_CHECK_SHARED_LZ4()
 
 # OS tests
 IF(UNIX)
@@ -299,7 +302,6 @@ SET(INNOBASE_SOURCES
 	eval/eval0proc.cc
 	fil/fil0fil.cc
         fil/fil0pagecompress.cc
-	fil/lz4.c
 	fsp/fsp0fsp.cc
 	fut/fut0fut.cc
 	fut/fut0lst.cc
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index a080ef0ee48..04fe25afa01 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1863,8 +1863,10 @@ buf_flush_start(
 
 		/* There is already a flush batch of the same type running */
 
-		fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush\n",
+#ifdef UNIV_DEBUG
+		fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush %lu\n",
 			flush_type, buf_pool->n_flush[flush_type], buf_pool->init_flush[flush_type]);
+#endif
 
 		mutex_exit(&buf_pool->flush_state_mutex);
 
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index 14ece48519f..31cf74e7f5a 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -554,7 +554,7 @@ buf_mtflu_flush_work_items(
 			if((int)done_wi->id_usr == -1 &&
 			   done_wi->wi_status == WRK_ITEM_SET ) {
 				fprintf(stderr,
-					"**Set/Unused work_item[%lu] flush_type=%lu\n",
+					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
 				ut_a(0);
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index 10ac273955f..8f835113b7f 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -63,7 +63,9 @@ static ulint srv_data_read, srv_data_written;
 #include <linux/falloc.h>
 #endif
 #include "row0mysql.h"
+#ifdef HAVE_LZ4
 #include "lz4.h"
+#endif
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -108,10 +110,11 @@ fil_compress_page(
 	fprintf(stderr,
 		"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
 		space_id, fil_space_name(space), len);
-#endif
+#endif /* UNIV_DEBUG */
 
 	write_size = UNIV_PAGE_SIZE - header_len;
 
+#ifdef HAVE_LZ4
 	if (srv_use_lz4) {
 		err = LZ4_compress_limitedOutput((const char *)buf, (char *)out_buf+header_len, len, write_size);
 		write_size = err;
@@ -127,6 +130,7 @@ fil_compress_page(
 			return (buf);
 		}
 	} else {
+#endif /* HAVE_LZ4 */
 		err = compress2(out_buf+header_len, &write_size, buf, len, level);
 
 		if (err != Z_OK) {
@@ -139,7 +143,9 @@ fil_compress_page(
 			*out_len = len;
 			return (buf);
 		}
+#ifdef HAVE_LZ4
 	}
+#endif /* HAVE_LZ4 */
 
 	/* Set up the page header */
 	memcpy(out_buf, buf, FIL_PAGE_DATA);
@@ -148,11 +154,15 @@ fil_compress_page(
 	/* Set up the correct page type */
 	mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
 	/* Set up the flush lsn to be compression algorithm */
+#ifdef HAVE_LZ4
 	if (srv_use_lz4) {
 		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_LZ4);
 	} else {
+#endif /* HAVE_LZ4 */
 		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
+#ifdef HAVE_LZ4
 	}
+#endif /* HAVE_LZ4 */
 	/* Set up the actual payload lenght */
 	mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
 
@@ -161,12 +171,17 @@ fil_compress_page(
 	ut_ad(fil_page_is_compressed(out_buf));
 	ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
 	ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
+
+#ifdef HAVE_LZ4
 	if (srv_use_lz4) {
 		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_LZ4);
 	} else {
+#endif /* HAVE_LZ4 */
 		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
+#ifdef HAVE_LZ4
 	}
-#endif
+#endif /* HAVE_LZ4 */
+#endif /* UNIV_DEBUG */
 
 	write_size+=header_len;
 	/* Actual write needs to be alligned on block size */
@@ -178,7 +193,7 @@ fil_compress_page(
 	fprintf(stderr,
 		"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
 		space_id, fil_space_name(space), len, write_size);
-#endif
+#endif /* UNIV_DEBUG */
 
 #define SECT_SIZE 512
 
@@ -236,8 +251,8 @@ fil_decompress_page(
 	if (page_buf == NULL) {
 #ifdef UNIV_DEBUG
 		fprintf(stderr,
-			"InnoDB: Note: Compression buffer not given, allocating...\n");
-#endif
+			"InnoDB: FIL: Note: Compression buffer not given, allocating...\n");
+#endif /* UNIV_DEBUG */
 		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
 	} else {
 		in_buf = page_buf;
@@ -261,11 +276,10 @@ fil_decompress_page(
 		fprintf(stderr,
 			"InnoDB: Note: Preparing for decompress for len %lu\n",
 			actual_size);
-#endif
+#endif /* UNIV_DEBUG */
 
 		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
 
-
 		/* If uncompress fails it means that page is corrupted */
 		if (err != Z_OK) {
 
@@ -284,11 +298,12 @@ fil_decompress_page(
 		fprintf(stderr,
 			"InnoDB: Note: Decompression succeeded for len %lu \n",
 			len);
-#endif
+#endif /* UNIV_DEBUG */
+#ifdef HAVE_LZ4
 	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
 		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
 
-		if (err != actual_size) {
+		if (err != (int)actual_size) {
 			fprintf(stderr,
 				"InnoDB: Corruption: Page is marked as compressed\n"
 				"InnoDB: but decompression read only %d bytes.\n"
@@ -298,6 +313,7 @@ fil_decompress_page(
 
 			ut_error;
 		}
+#endif /* HAVE_LZ4 */
 	} else {
 		fprintf(stderr,
 			"InnoDB: Corruption: Page is marked as compressed\n"
diff --git a/storage/xtradb/fil/lz4.c b/storage/xtradb/fil/lz4.c
deleted file mode 100644
index 4e864de67d3..00000000000
--- a/storage/xtradb/fil/lz4.c
+++ /dev/null
@@ -1,822 +0,0 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Copyright (C) 2011-2013, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 source repository : http://code.google.com/p/lz4/
-   - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c
-*/
-
-//**************************************
-// Tuning parameters
-//**************************************
-// MEMORY_USAGE :
-// Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.)
-// Increasing memory usage improves compression ratio
-// Reduced memory usage can improve speed, due to cache effect
-// Default value is 14, for 16KB, which nicely fits into Intel x86 L1 cache
-#define MEMORY_USAGE 14
-
-// HEAPMODE :
-// Select how default compression functions will allocate memory for their hash table,
-// in memory stack (0:default, fastest), or in memory heap (1:requires memory allocation (malloc)).
-#define HEAPMODE 0
-
-
-//**************************************
-// CPU Feature Detection
-//**************************************
-// 32 or 64 bits ?
-#if (defined(__x86_64__) || defined(_M_X64) || defined(_WIN64) \
-  || defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) \
-  || defined(__64BIT__) || defined(_LP64) || defined(__LP64__) \
-  || defined(__ia64) || defined(__itanium__) || defined(_M_IA64) )   // Detects 64 bits mode
-#  define LZ4_ARCH64 1
-#else
-#  define LZ4_ARCH64 0
-#endif
-
-// Little Endian or Big Endian ?
-// Overwrite the #define below if you know your architecture endianess
-#if defined (__GLIBC__)
-#  include <endian.h>
-#  if (__BYTE_ORDER == __BIG_ENDIAN)
-#     define LZ4_BIG_ENDIAN 1
-#  endif
-#elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
-#  define LZ4_BIG_ENDIAN 1
-#elif defined(__sparc) || defined(__sparc__) \
-   || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) \
-   || defined(__hpux)  || defined(__hppa) \
-   || defined(_MIPSEB) || defined(__s390__)
-#  define LZ4_BIG_ENDIAN 1
-#else
-// Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
-#endif
-
-// Unaligned memory access is automatically enabled for "common" CPU, such as x86.
-// For others CPU, such as ARM, the compiler may be more cautious, inserting unnecessary extra code to ensure aligned access property
-// If you know your target CPU supports unaligned memory access, you want to force this option manually to improve performance
-#if defined(__ARM_FEATURE_UNALIGNED)
-#  define LZ4_FORCE_UNALIGNED_ACCESS 1
-#endif
-
-// Define this parameter if your target system or compiler does not support hardware bit count
-#if defined(_MSC_VER) && defined(_WIN32_WCE)            // Visual Studio for Windows CE does not support Hardware bit count
-#  define LZ4_FORCE_SW_BITCOUNT
-#endif
-
-// BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE :
-// This option may provide a small boost to performance for some big endian cpu, although probably modest.
-// You may set this option to 1 if data will remain within closed environment.
-// This option is useless on Little_Endian CPU (such as x86)
-//#define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)   // C99
-/* "restrict" is a known keyword */
-#else
-#  define restrict // Disable restrict
-#endif
-
-#ifdef _MSC_VER    // Visual Studio
-#  define FORCE_INLINE static __forceinline
-#  include <intrin.h>                    // For Visual 2005
-#  if LZ4_ARCH64   // 64-bits
-#    pragma intrinsic(_BitScanForward64) // For Visual 2005
-#    pragma intrinsic(_BitScanReverse64) // For Visual 2005
-#  else            // 32-bits
-#    pragma intrinsic(_BitScanForward)   // For Visual 2005
-#    pragma intrinsic(_BitScanReverse)   // For Visual 2005
-#  endif
-#  pragma warning(disable : 4127)        // disable: C4127: conditional expression is constant
-#else 
-#  ifdef __GNUC__
-#    define FORCE_INLINE static inline __attribute__((always_inline))
-#  else
-#    define FORCE_INLINE static inline
-#  endif
-#endif
-
-#ifdef _MSC_VER
-#  define lz4_bswap16(x) _byteswap_ushort(x)
-#else
-#  define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | (((x) & 0xffu) << 8)))
-#endif
-
-#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-#if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
-#  define expect(expr,value)    (__builtin_expect ((expr),(value)) )
-#else
-#  define expect(expr,value)    (expr)
-#endif
-
-#define likely(expr)     expect((expr) != 0, 1)
-#define unlikely(expr)   expect((expr) != 0, 0)
-
-
-//**************************************
-// Memory routines
-//**************************************
-#include <stdlib.h>   // malloc, calloc, free
-#define ALLOCATOR(n,s) calloc(n,s)
-#define FREEMEM        free
-#include <string.h>   // memset, memcpy
-#define MEM_INIT       memset
-
-
-//**************************************
-// Includes
-//**************************************
-#include "lz4.h"
-
-
-//**************************************
-// Basic Types
-//**************************************
-#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
-# include <stdint.h>
-  typedef  uint8_t BYTE;
-  typedef uint16_t U16;
-  typedef uint32_t U32;
-  typedef  int32_t S32;
-  typedef uint64_t U64;
-#else
-  typedef unsigned char       BYTE;
-  typedef unsigned short      U16;
-  typedef unsigned int        U32;
-  typedef   signed int        S32;
-  typedef unsigned long long  U64;
-#endif
-
-#if defined(__GNUC__)  && !defined(LZ4_FORCE_UNALIGNED_ACCESS)
-#  define _PACKED __attribute__ ((packed))
-#else
-#  define _PACKED
-#endif
-
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  if defined(__IBMC__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#    pragma pack(1)
-#  else
-#    pragma pack(push, 1)
-#  endif
-#endif
-
-typedef struct { U16 v; }  _PACKED U16_S;
-typedef struct { U32 v; }  _PACKED U32_S;
-typedef struct { U64 v; }  _PACKED U64_S;
-typedef struct {size_t v;} _PACKED size_t_S;
-
-#if !defined(LZ4_FORCE_UNALIGNED_ACCESS) && !defined(__GNUC__)
-#  if defined(__SUNPRO_C) || defined(__SUNPRO_CC)
-#    pragma pack(0)
-#  else
-#    pragma pack(pop)
-#  endif
-#endif
-
-#define A16(x)   (((U16_S *)(x))->v)
-#define A32(x)   (((U32_S *)(x))->v)
-#define A64(x)   (((U64_S *)(x))->v)
-#define AARCH(x) (((size_t_S *)(x))->v)
-
-
-//**************************************
-// Constants
-//**************************************
-#define LZ4_HASHLOG   (MEMORY_USAGE-2)
-#define HASHTABLESIZE (1 << MEMORY_USAGE)
-#define HASHNBCELLS4  (1 << LZ4_HASHLOG)
-
-#define MINMATCH 4
-
-#define COPYLENGTH 8
-#define LASTLITERALS 5
-#define MFLIMIT (COPYLENGTH+MINMATCH)
-const int LZ4_minLength = (MFLIMIT+1);
-
-#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT-1))
-#define SKIPSTRENGTH 6     // Increasing this value will make the compression run slower on incompressible data
-
-#define MAXD_LOG 16
-#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
-
-#define ML_BITS  4
-#define ML_MASK  ((1U<<ML_BITS)-1)
-#define RUN_BITS (8-ML_BITS)
-#define RUN_MASK ((1U<<RUN_BITS)-1)
-
-#define KB *(1U<<10)
-#define MB *(1U<<20)
-#define GB *(1U<<30)
-
-
-//**************************************
-// Structures and local types
-//**************************************
-
-typedef struct {
-    U32 hashTable[HASHNBCELLS4];
-    const BYTE* bufferStart;
-    const BYTE* base;
-    const BYTE* nextBlock;
-} LZ4_Data_Structure;
-
-typedef enum { notLimited = 0, limited = 1 } limitedOutput_directive;
-typedef enum { byPtr, byU32, byU16 } tableType_t;
-
-typedef enum { noPrefix = 0, withPrefix = 1 } prefix64k_directive;
-
-typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
-typedef enum { full = 0, partial = 1 } earlyEnd_directive;
-
-
-//**************************************
-// Architecture-specific macros
-//**************************************
-#define STEPSIZE                  sizeof(size_t)
-#define LZ4_COPYSTEP(d,s)         { AARCH(d) = AARCH(s); d+=STEPSIZE; s+=STEPSIZE; }
-#define LZ4_COPY8(d,s)            { LZ4_COPYSTEP(d,s); if (STEPSIZE<8) LZ4_COPYSTEP(d,s); }
-#define LZ4_SECURECOPY(d,s,e)     { if ((STEPSIZE==4)||(d<e)) LZ4_WILDCOPY(d,s,e); }
-
-#if LZ4_ARCH64   // 64-bit
-#  define HTYPE                   U32
-#  define INITBASE(base)          const BYTE* const base = ip
-#else            // 32-bit
-#  define HTYPE                   const BYTE*
-#  define INITBASE(base)          const int base = 0
-#endif
-
-#if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,i)  { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p+=2; }
-#else      // Little Endian
-#  define LZ4_READ_LITTLEENDIAN_16(d,s,p) { d = (s) - A16(p); }
-#  define LZ4_WRITE_LITTLEENDIAN_16(p,v)  { A16(p) = v; p+=2; }
-#endif
-
-
-//**************************************
-// Macros
-//**************************************
-#define LZ4_WILDCOPY(d,s,e)     { do { LZ4_COPY8(d,s) } while (d<e); }           // at the end, d>=e;
-
-
-//****************************
-// Private functions
-//****************************
-#if LZ4_ARCH64
-
-FORCE_INLINE int LZ4_NbCommonBytes (register U64 val)
-{
-# if defined(LZ4_BIG_ENDIAN)
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse64( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clzll(val) >> 3);
-#   else
-    int r;
-    if (!(val>>32)) { r=4; } else { r=0; val>>=32; }
-    if (!(val>>16)) { r+=2; val>>=8; } else { val>>=24; }
-    r += (!val);
-    return r;
-#   endif
-# else
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanForward64( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctzll(val) >> 3);
-#   else
-    static const int DeBruijnBytePos[64] = { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 };
-    return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
-#   endif
-# endif
-}
-
-#else
-
-FORCE_INLINE int LZ4_NbCommonBytes (register U32 val)
-{
-# if defined(LZ4_BIG_ENDIAN)
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r = 0;
-    _BitScanReverse( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_clz(val) >> 3);
-#   else
-    int r;
-    if (!(val>>16)) { r=2; val>>=8; } else { r=0; val>>=24; }
-    r += (!val);
-    return r;
-#   endif
-# else
-#   if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    unsigned long r;
-    _BitScanForward( &r, val );
-    return (int)(r>>3);
-#   elif defined(__GNUC__) && (GCC_VERSION >= 304) && !defined(LZ4_FORCE_SW_BITCOUNT)
-    return (__builtin_ctz(val) >> 3);
-#   else
-    static const int DeBruijnBytePos[32] = { 0, 0, 3, 0, 3, 1, 3, 0, 3, 2, 2, 1, 3, 2, 0, 1, 3, 3, 1, 2, 2, 2, 2, 0, 3, 1, 2, 0, 1, 0, 1, 1 };
-    return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
-#   endif
-# endif
-}
-
-#endif
-
-
-//****************************
-// Compression functions
-//****************************
-FORCE_INLINE int LZ4_hashSequence(U32 sequence, tableType_t tableType)
-{
-    if (tableType == byU16)
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-(LZ4_HASHLOG+1)));
-    else
-        return (((sequence) * 2654435761U) >> ((MINMATCH*8)-LZ4_HASHLOG));
-}
-
-FORCE_INLINE int LZ4_hashPosition(const BYTE* p, tableType_t tableType) { return LZ4_hashSequence(A32(p), tableType); }
-
-FORCE_INLINE void LZ4_putPositionOnHash(const BYTE* p, U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    switch (tableType)
-    {
-    case byPtr: { const BYTE** hashTable = (const BYTE**) tableBase; hashTable[h] = p; break; }
-    case byU32: { U32* hashTable = (U32*) tableBase; hashTable[h] = (U32)(p-srcBase); break; }
-    case byU16: { U16* hashTable = (U16*) tableBase; hashTable[h] = (U16)(p-srcBase); break; }
-    }
-}
-
-FORCE_INLINE void LZ4_putPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    U32 h = LZ4_hashPosition(p, tableType);
-    LZ4_putPositionOnHash(p, h, tableBase, tableType, srcBase);
-}
-
-FORCE_INLINE const BYTE* LZ4_getPositionOnHash(U32 h, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    if (tableType == byPtr) { const BYTE** hashTable = (const BYTE**) tableBase; return hashTable[h]; }
-    if (tableType == byU32) { U32* hashTable = (U32*) tableBase; return hashTable[h] + srcBase; }
-    { U16* hashTable = (U16*) tableBase; return hashTable[h] + srcBase; }   // default, to ensure a return
-}
-
-FORCE_INLINE const BYTE* LZ4_getPosition(const BYTE* p, void* tableBase, tableType_t tableType, const BYTE* srcBase)
-{
-    U32 h = LZ4_hashPosition(p, tableType);
-    return LZ4_getPositionOnHash(h, tableBase, tableType, srcBase);
-}
-
-
-FORCE_INLINE int LZ4_compress_generic(
-                 void* ctx,
-                 const char* source,
-                 char* dest,
-                 int inputSize,
-                 int maxOutputSize,
-
-                 limitedOutput_directive limitedOutput,
-                 tableType_t tableType,
-                 prefix64k_directive prefix)
-{
-    const BYTE* ip = (const BYTE*) source;
-    const BYTE* const base = (prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->base : (const BYTE*) source;
-    const BYTE* const lowLimit = ((prefix==withPrefix) ? ((LZ4_Data_Structure*)ctx)->bufferStart : (const BYTE*)source);
-    const BYTE* anchor = (const BYTE*) source;
-    const BYTE* const iend = ip + inputSize;
-    const BYTE* const mflimit = iend - MFLIMIT;
-    const BYTE* const matchlimit = iend - LASTLITERALS;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + maxOutputSize;
-
-    int length;
-    const int skipStrength = SKIPSTRENGTH;
-    U32 forwardH;
-
-    // Init conditions
-    if ((U32)inputSize > (U32)LZ4_MAX_INPUT_SIZE) return 0;                                // Unsupported input size, too large (or negative)
-    if ((prefix==withPrefix) && (ip != ((LZ4_Data_Structure*)ctx)->nextBlock)) return 0;   // must continue from end of previous block
-    if (prefix==withPrefix) ((LZ4_Data_Structure*)ctx)->nextBlock=iend;                    // do it now, due to potential early exit
-    if ((tableType == byU16) && (inputSize>=LZ4_64KLIMIT)) return 0;                       // Size too large (not within 64K limit)
-    if (inputSize<LZ4_minLength) goto _last_literals;                                      // Input too small, no compression (all literals)
-
-    // First Byte
-    LZ4_putPosition(ip, ctx, tableType, base);
-    ip++; forwardH = LZ4_hashPosition(ip, tableType);
-
-    // Main Loop
-    for ( ; ; )
-    {
-        int findMatchAttempts = (1U << skipStrength) + 3;
-        const BYTE* forwardIp = ip;
-        const BYTE* ref;
-        BYTE* token;
-
-        // Find a match
-        do {
-            U32 h = forwardH;
-            int step = findMatchAttempts++ >> skipStrength;
-            ip = forwardIp;
-            forwardIp = ip + step;
-
-            if unlikely(forwardIp > mflimit) { goto _last_literals; }
-
-            forwardH = LZ4_hashPosition(forwardIp, tableType);
-            ref = LZ4_getPositionOnHash(h, ctx, tableType, base);
-            LZ4_putPositionOnHash(ip, h, ctx, tableType, base);
-
-        } while ((ref + MAX_DISTANCE < ip) || (A32(ref) != A32(ip)));
-
-        // Catch up
-        while ((ip>anchor) && (ref > lowLimit) && unlikely(ip[-1]==ref[-1])) { ip--; ref--; }
-
-        // Encode Literal length
-        length = (int)(ip - anchor);
-        token = op++;
-        if ((limitedOutput) && unlikely(op + length + (2 + 1 + LASTLITERALS) + (length/255) > oend)) return 0;   // Check output limit
-        if (length>=(int)RUN_MASK) 
-        { 
-            int len = length-RUN_MASK; 
-            *token=(RUN_MASK<<ML_BITS); 
-            for(; len >= 255 ; len-=255) *op++ = 255; 
-            *op++ = (BYTE)len; 
-        }
-        else *token = (BYTE)(length<<ML_BITS);
-
-        // Copy Literals
-        { BYTE* end=(op)+(length); LZ4_WILDCOPY(op,anchor,end); op=end; }
-
-_next_match:
-        // Encode Offset
-        LZ4_WRITE_LITTLEENDIAN_16(op,(U16)(ip-ref));
-
-        // Start Counting
-        ip+=MINMATCH; ref+=MINMATCH;    // MinMatch already verified
-        anchor = ip;
-        while likely(ip<matchlimit-(STEPSIZE-1))
-        {
-            size_t diff = AARCH(ref) ^ AARCH(ip);
-            if (!diff) { ip+=STEPSIZE; ref+=STEPSIZE; continue; }
-            ip += LZ4_NbCommonBytes(diff);
-            goto _endCount;
-        }
-        if (LZ4_ARCH64) if ((ip<(matchlimit-3)) && (A32(ref) == A32(ip))) { ip+=4; ref+=4; }
-        if ((ip<(matchlimit-1)) && (A16(ref) == A16(ip))) { ip+=2; ref+=2; }
-        if ((ip<matchlimit) && (*ref == *ip)) ip++;
-_endCount:
-
-        // Encode MatchLength
-        length = (int)(ip - anchor);
-        if ((limitedOutput) && unlikely(op + (1 + LASTLITERALS) + (length>>8) > oend)) return 0;    // Check output limit
-        if (length>=(int)ML_MASK) 
-        { 
-            *token += ML_MASK; 
-            length -= ML_MASK; 
-            for (; length > 509 ; length-=510) { *op++ = 255; *op++ = 255; } 
-            if (length >= 255) { length-=255; *op++ = 255; } 
-            *op++ = (BYTE)length; 
-        }
-        else *token += (BYTE)(length);
-
-        // Test end of chunk
-        if (ip > mflimit) { anchor = ip;  break; }
-
-        // Fill table
-        LZ4_putPosition(ip-2, ctx, tableType, base);
-
-        // Test next position
-        ref = LZ4_getPosition(ip, ctx, tableType, base); 
-        LZ4_putPosition(ip, ctx, tableType, base);
-        if ((ref + MAX_DISTANCE >= ip) && (A32(ref) == A32(ip))) { token = op++; *token=0; goto _next_match; }
-
-        // Prepare next loop
-        anchor = ip++;
-        forwardH = LZ4_hashPosition(ip, tableType);
-    }
-
-_last_literals:
-    // Encode Last Literals
-    {
-        int lastRun = (int)(iend - anchor);
-        if ((limitedOutput) && (((char*)op - dest) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)maxOutputSize)) return 0;   // Check output limit
-        if (lastRun>=(int)RUN_MASK) { *op++=(RUN_MASK<<ML_BITS); lastRun-=RUN_MASK; for(; lastRun >= 255 ; lastRun-=255) *op++ = 255; *op++ = (BYTE) lastRun; }
-        else *op++ = (BYTE)(lastRun<<ML_BITS);
-        memcpy(op, anchor, iend - anchor);
-        op += iend-anchor;
-    }
-
-    // End
-    return (int) (((char*)op)-dest);
-}
-
-
-int LZ4_compress(const char* source, char* dest, int inputSize)
-{
-#if (HEAPMODE)
-    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
-#else
-    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
-#endif
-    int result;
-
-    if (inputSize < (int)LZ4_64KLIMIT)
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, byU16, noPrefix);
-    else
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, 0, notLimited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
-
-#if (HEAPMODE)
-    FREEMEM(ctx);
-#endif
-    return result;
-}
-
-int LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize)
-{
-    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, 0, notLimited, byU32, withPrefix);
-}
-
-
-int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-#if (HEAPMODE)
-    void* ctx = ALLOCATOR(HASHNBCELLS4, 4);   // Aligned on 4-bytes boundaries
-#else
-    U32 ctx[1U<<(MEMORY_USAGE-2)] = {0};           // Ensure data is aligned on 4-bytes boundaries
-#endif
-    int result;
-
-    if (inputSize < (int)LZ4_64KLIMIT)
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, byU16, noPrefix);
-    else
-        result = LZ4_compress_generic((void*)ctx, source, dest, inputSize, maxOutputSize, limited, (sizeof(void*)==8) ? byU32 : byPtr, noPrefix);
-
-#if (HEAPMODE)
-    FREEMEM(ctx);
-#endif
-    return result;
-}
-
-int LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_compress_generic(LZ4_Data, source, dest, inputSize, maxOutputSize, limited, byU32, withPrefix);
-}
-
-
-//****************************
-// Stream functions
-//****************************
-
-FORCE_INLINE void LZ4_init(LZ4_Data_Structure* lz4ds, const BYTE* base)
-{
-    MEM_INIT(lz4ds->hashTable, 0, sizeof(lz4ds->hashTable));
-    lz4ds->bufferStart = base;
-    lz4ds->base = base;
-    lz4ds->nextBlock = base;
-}
-
-
-void* LZ4_create (const char* inputBuffer)
-{
-    void* lz4ds = ALLOCATOR(1, sizeof(LZ4_Data_Structure));
-    LZ4_init ((LZ4_Data_Structure*)lz4ds, (const BYTE*)inputBuffer);
-    return lz4ds;
-}
-
-
-int LZ4_free (void* LZ4_Data)
-{
-    FREEMEM(LZ4_Data);
-    return (0);
-}
-
-
-char* LZ4_slideInputBuffer (void* LZ4_Data)
-{
-    LZ4_Data_Structure* lz4ds = (LZ4_Data_Structure*)LZ4_Data;
-    size_t delta = lz4ds->nextBlock - (lz4ds->bufferStart + 64 KB);
-
-    if ( (lz4ds->base - delta > lz4ds->base)                          // underflow control
-       || ((size_t)(lz4ds->nextBlock - lz4ds->base) > 0xE0000000) )   // close to 32-bits limit
-    {
-        size_t deltaLimit = (lz4ds->nextBlock - 64 KB) - lz4ds->base;
-        int nH;
-
-        for (nH=0; nH < HASHNBCELLS4; nH++)
-        {
-            if ((size_t)(lz4ds->hashTable[nH]) < deltaLimit) lz4ds->hashTable[nH] = 0;
-            else lz4ds->hashTable[nH] -= (U32)deltaLimit;
-        }
-        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
-        lz4ds->base = lz4ds->bufferStart;
-        lz4ds->nextBlock = lz4ds->base + 64 KB;
-    }
-    else
-    {
-        memcpy((void*)(lz4ds->bufferStart), (const void*)(lz4ds->nextBlock - 64 KB), 64 KB);
-        lz4ds->nextBlock -= delta;
-        lz4ds->base -= delta;
-    }
-
-    return (char*)(lz4ds->nextBlock);
-}
-
-
-//****************************
-// Decompression functions
-//****************************
-
-// This generic decompression function cover all use cases.
-// It shall be instanciated several times, using different sets of directives
-// Note that it is essential this generic function is really inlined, 
-// in order to remove useless branches during compilation optimisation.
-FORCE_INLINE int LZ4_decompress_generic(
-                 const char* source,
-                 char* dest,
-                 int inputSize,          //
-                 int outputSize,         // If endOnInput==endOnInputSize, this value is the max size of Output Buffer.
-
-                 int endOnInput,         // endOnOutputSize, endOnInputSize
-                 int prefix64k,          // noPrefix, withPrefix
-                 int partialDecoding,    // full, partial
-                 int targetOutputSize    // only used if partialDecoding==partial
-                 )
-{
-    // Local Variables
-    const BYTE* restrict ip = (const BYTE*) source;
-    const BYTE* ref;
-    const BYTE* const iend = ip + inputSize;
-
-    BYTE* op = (BYTE*) dest;
-    BYTE* const oend = op + outputSize;
-    BYTE* cpy;
-    BYTE* oexit = op + targetOutputSize;
-
-    const size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};   // static reduces speed for LZ4_decompress_safe() on GCC64
-    static const size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
-
-
-    // Special cases
-    if ((partialDecoding) && (oexit> oend-MFLIMIT)) oexit = oend-MFLIMIT;                        // targetOutputSize too high => decode everything
-    if ((endOnInput) && unlikely(outputSize==0)) return ((inputSize==1) && (*ip==0)) ? 0 : -1;   // Empty output buffer
-    if ((!endOnInput) && unlikely(outputSize==0)) return (*ip==0?1:-1);
-
-
-    // Main Loop
-    while (1)
-    {
-        unsigned token;
-        size_t length;
-
-        // get runlength
-        token = *ip++;
-        if ((length=(token>>ML_BITS)) == RUN_MASK)
-        { 
-            unsigned s=255; 
-            while (((endOnInput)?ip<iend:1) && (s==255))
-            { 
-                s = *ip++; 
-                length += s; 
-            } 
-        }
-
-        // copy literals
-        cpy = op+length;
-        if (((endOnInput) && ((cpy>(partialDecoding?oexit:oend-MFLIMIT)) || (ip+length>iend-(2+1+LASTLITERALS))) )
-            || ((!endOnInput) && (cpy>oend-COPYLENGTH)))
-        {
-            if (partialDecoding)
-            {
-                if (cpy > oend) goto _output_error;                           // Error : write attempt beyond end of output buffer
-                if ((endOnInput) && (ip+length > iend)) goto _output_error;   // Error : read attempt beyond end of input buffer
-            }
-            else
-            {
-                if ((!endOnInput) && (cpy != oend)) goto _output_error;       // Error : block decoding must stop exactly there
-                if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error;   // Error : input must be consumed
-            }
-            memcpy(op, ip, length);
-            ip += length;
-            op += length;
-            break;                                       // Necessarily EOF, due to parsing restrictions
-        }
-        LZ4_WILDCOPY(op, ip, cpy); ip -= (op-cpy); op = cpy;
-
-        // get offset
-        LZ4_READ_LITTLEENDIAN_16(ref,cpy,ip); ip+=2;
-        if ((prefix64k==noPrefix) && unlikely(ref < (BYTE* const)dest)) goto _output_error;   // Error : offset outside destination buffer
-
-        // get matchlength
-        if ((length=(token&ML_MASK)) == ML_MASK) 
-        { 
-            while ((!endOnInput) || (ip<iend-(LASTLITERALS+1)))   // Ensure enough bytes remain for LASTLITERALS + token
-            {
-                unsigned s = *ip++; 
-                length += s; 
-                if (s==255) continue; 
-                break; 
-            }
-        }
-
-        // copy repeated sequence
-        if unlikely((op-ref)<(int)STEPSIZE)
-        {
-            const size_t dec64 = dec64table[(sizeof(void*)==4) ? 0 : op-ref];
-            op[0] = ref[0];
-            op[1] = ref[1];
-            op[2] = ref[2];
-            op[3] = ref[3];
-            op += 4, ref += 4; ref -= dec32table[op-ref];
-            A32(op) = A32(ref); 
-            op += STEPSIZE-4; ref -= dec64;
-        } else { LZ4_COPYSTEP(op,ref); }
-        cpy = op + length - (STEPSIZE-4);
-
-        if unlikely(cpy>oend-COPYLENGTH-(STEPSIZE-4))
-        {
-            if (cpy > oend-LASTLITERALS) goto _output_error;    // Error : last 5 bytes must be literals
-            LZ4_SECURECOPY(op, ref, (oend-COPYLENGTH));
-            while(op<cpy) *op++=*ref++;
-            op=cpy;
-            continue;
-        }
-        LZ4_WILDCOPY(op, ref, cpy);
-        op=cpy;   // correction
-    }
-
-    // end of decoding
-    if (endOnInput)
-       return (int) (((char*)op)-dest);     // Nb of output bytes decoded
-    else
-       return (int) (((char*)ip)-source);   // Nb of input bytes read
-
-    // Overflow error detected
-_output_error:
-    return (int) (-(((char*)ip)-source))-1;
-}
-
-
-int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, full, 0);
-}
-
-int LZ4_decompress_safe_withPrefix64k(const char* source, char* dest, int inputSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, withPrefix, full, 0);
-}
-
-int LZ4_decompress_safe_partial(const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize)
-{
-    return LZ4_decompress_generic(source, dest, inputSize, maxOutputSize, endOnInputSize, noPrefix, partial, targetOutputSize);
-}
-
-int LZ4_decompress_fast_withPrefix64k(const char* source, char* dest, int outputSize)
-{
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
-}
-
-int LZ4_decompress_fast(const char* source, char* dest, int outputSize)
-{
-#ifdef _MSC_VER   // This version is faster with Visual
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, noPrefix, full, 0);
-#else
-    return LZ4_decompress_generic(source, dest, 0, outputSize, endOnOutputSize, withPrefix, full, 0);
-#endif
-}
-
diff --git a/storage/xtradb/fil/lz4.h b/storage/xtradb/fil/lz4.h
deleted file mode 100644
index 9ef58862947..00000000000
--- a/storage/xtradb/fil/lz4.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
-   LZ4 - Fast LZ compression algorithm
-   Header File
-   Copyright (C) 2011-2013, Yann Collet.
-   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
-
-   Redistribution and use in source and binary forms, with or without
-   modification, are permitted provided that the following conditions are
-   met:
-
-       * Redistributions of source code must retain the above copyright
-   notice, this list of conditions and the following disclaimer.
-       * Redistributions in binary form must reproduce the above
-   copyright notice, this list of conditions and the following disclaimer
-   in the documentation and/or other materials provided with the
-   distribution.
-
-   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-   You can contact the author at :
-   - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
-   - LZ4 source repository : http://code.google.com/p/lz4/
-*/
-#pragma once
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-
-//**************************************
-// Compiler Options
-//**************************************
-#if defined(_MSC_VER) && !defined(__cplusplus)   // Visual Studio
-#  define inline __inline           // Visual C is not C99, but supports some kind of inline
-#endif
-
-
-//****************************
-// Simple Functions
-//****************************
-
-int LZ4_compress        (const char* source, char* dest, int inputSize);
-int LZ4_decompress_safe (const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/*
-LZ4_compress() :
-    Compresses 'inputSize' bytes from 'source' into 'dest'.
-    Destination buffer must be already allocated,
-    and must be sized to handle worst cases situations (input data not compressible)
-    Worst case size evaluation is provided by function LZ4_compressBound()
-    inputSize : Max supported value is LZ4_MAX_INPUT_VALUE
-    return : the number of bytes written in buffer dest
-             or 0 if the compression fails
-
-LZ4_decompress_safe() :
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function is protected against buffer overflow exploits (never writes outside of output buffer, and never reads outside of input buffer). Therefore, it is protected against malicious data packets
-*/
-
-
-//****************************
-// Advanced Functions
-//****************************
-#define LZ4_MAX_INPUT_SIZE        0x7E000000   // 2 113 929 216 bytes
-#define LZ4_COMPRESSBOUND(isize)  ((unsigned int)(isize) > (unsigned int)LZ4_MAX_INPUT_SIZE ? 0 : (isize) + ((isize)/255) + 16)
-static inline int LZ4_compressBound(int isize)  { return LZ4_COMPRESSBOUND(isize); }
-
-/*
-LZ4_compressBound() :
-    Provides the maximum size that LZ4 may output in a "worst case" scenario (input data not compressible)
-    primarily useful for memory allocation of output buffer.
-    inline function is recommended for the general case,
-    macro is also provided when result needs to be evaluated at compilation (such as stack memory allocation).
-
-    isize  : is the input size. Max supported value is LZ4_MAX_INPUT_SIZE
-    return : maximum output size in a "worst case" scenario
-             or 0, if input size is too large ( > LZ4_MAX_INPUT_SIZE)
-*/
-
-
-int LZ4_compress_limitedOutput (const char* source, char* dest, int inputSize, int maxOutputSize);
-
-/*
-LZ4_compress_limitedOutput() :
-    Compress 'inputSize' bytes from 'source' into an output buffer 'dest' of maximum size 'maxOutputSize'.
-    If it cannot achieve it, compression will stop, and result of the function will be zero.
-    This function never writes outside of provided output buffer.
-
-    inputSize  : Max supported value is LZ4_MAX_INPUT_VALUE
-    maxOutputSize : is the size of the destination buffer (which must be already allocated)
-    return : the number of bytes written in buffer 'dest'
-             or 0 if the compression fails
-*/
-
-
-int LZ4_decompress_fast (const char* source, char* dest, int outputSize);
-
-/*
-LZ4_decompress_fast() :
-    outputSize : is the original (uncompressed) size
-    return : the number of bytes read from the source buffer (in other words, the compressed size)
-             If the source stream is malformed, the function will stop decoding and return a negative result.
-    note : This function is a bit faster than LZ4_decompress_safe()
-           This function never writes outside of output buffers, but may read beyond input buffer in case of malicious data packet.
-           Use this function preferably into a trusted environment (data to decode comes from a trusted source).
-           Destination buffer must be already allocated. Its size must be a minimum of 'outputSize' bytes.
-*/
-
-int LZ4_decompress_safe_partial (const char* source, char* dest, int inputSize, int targetOutputSize, int maxOutputSize);
-
-/*
-LZ4_decompress_safe_partial() :
-    This function decompress a compressed block of size 'inputSize' at position 'source'
-    into output buffer 'dest' of size 'maxOutputSize'.
-    The function tries to stop decompressing operation as soon as 'targetOutputSize' has been reached,
-    reducing decompression time.
-    return : the number of bytes decoded in the destination buffer (necessarily <= maxOutputSize)
-       Note : this number can be < 'targetOutputSize' should the compressed block to decode be smaller.
-             Always control how many bytes were decoded.
-             If the source stream is detected malformed, the function will stop decoding and return a negative result.
-             This function never writes outside of output buffer, and never reads outside of input buffer. It is therefore protected against malicious data packets
-*/
-
-
-//****************************
-// Stream Functions
-//****************************
-
-void* LZ4_create (const char* inputBuffer);
-int   LZ4_compress_continue (void* LZ4_Data, const char* source, char* dest, int inputSize);
-int   LZ4_compress_limitedOutput_continue (void* LZ4_Data, const char* source, char* dest, int inputSize, int maxOutputSize);
-char* LZ4_slideInputBuffer (void* LZ4_Data);
-int   LZ4_free (void* LZ4_Data);
-
-/* 
-These functions allow the compression of dependent blocks, where each block benefits from prior 64 KB within preceding blocks.
-In order to achieve this, it is necessary to start creating the LZ4 Data Structure, thanks to the function :
-
-void* LZ4_create (const char* inputBuffer);
-The result of the function is the (void*) pointer on the LZ4 Data Structure.
-This pointer will be needed in all other functions.
-If the pointer returned is NULL, then the allocation has failed, and compression must be aborted.
-The only parameter 'const char* inputBuffer' must, obviously, point at the beginning of input buffer.
-The input buffer must be already allocated, and size at least 192KB.
-'inputBuffer' will also be the 'const char* source' of the first block.
-
-All blocks are expected to lay next to each other within the input buffer, starting from 'inputBuffer'.
-To compress each block, use either LZ4_compress_continue() or LZ4_compress_limitedOutput_continue().
-Their behavior are identical to LZ4_compress() or LZ4_compress_limitedOutput(), 
-but require the LZ4 Data Structure as their first argument, and check that each block starts right after the previous one.
-If next block does not begin immediately after the previous one, the compression will fail (return 0).
-
-When it's no longer possible to lay the next block after the previous one (not enough space left into input buffer), a call to : 
-char* LZ4_slideInputBuffer(void* LZ4_Data);
-must be performed. It will typically copy the latest 64KB of input at the beginning of input buffer.
-Note that, for this function to work properly, minimum size of an input buffer must be 192KB.
-==> The memory position where the next input data block must start is provided as the result of the function.
-
-Compression can then resume, using LZ4_compress_continue() or LZ4_compress_limitedOutput_continue(), as usual.
-
-When compression is completed, a call to LZ4_free() will release the memory used by the LZ4 Data Structure.
-*/
-
-
-int LZ4_decompress_safe_withPrefix64k (const char* source, char* dest, int inputSize, int maxOutputSize);
-int LZ4_decompress_fast_withPrefix64k (const char* source, char* dest, int outputSize);
-
-/*
-*_withPrefix64k() :
-    These decoding functions work the same as their "normal name" versions,
-    but can use up to 64KB of data in front of 'char* dest'.
-    These functions are necessary to decode inter-dependant blocks.
-*/
-
-
-//****************************
-// Obsolete Functions
-//****************************
-
-static inline int LZ4_uncompress (const char* source, char* dest, int outputSize) { return LZ4_decompress_fast(source, dest, outputSize); }
-static inline int LZ4_uncompress_unknownOutputSize (const char* source, char* dest, int isize, int maxOutputSize) { return LZ4_decompress_safe(source, dest, isize, maxOutputSize); }
-
-/*
-These functions are deprecated and should no longer be used.
-They are provided here for compatibility with existing user programs.
-*/
-
-
-
-#if defined (__cplusplus)
-}
-#endif
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index ead0b0fc902..2b23526da5d 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -17968,10 +17968,12 @@ static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   "Use trim.",
   NULL, NULL, TRUE);
 
+#ifdef HAVE_LZ4
 static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
   PLUGIN_VAR_OPCMDARG ,
   "Use LZ4 for page compression",
   NULL, NULL, FALSE);
+#endif /* HAVE_LZ4 */
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_block_size),
diff --git a/storage/xtradb/include/dict0pagecompress.ic b/storage/xtradb/include/dict0pagecompress.ic
index fb9581fc657..ea3c7546850 100644
--- a/storage/xtradb/include/dict0pagecompress.ic
+++ b/storage/xtradb/include/dict0pagecompress.ic
@@ -54,12 +54,12 @@ dict_tf_verify_flags(
 	DBUG_EXECUTE_IF("dict_tf_verify_flags_failure",
 			return(ULINT_UNDEFINED););
 
-	ut_ad(!table_unused);
-	ut_ad(!fsp_unused);
-	ut_ad(page_ssize == 0 || page_ssize != 0); /* silence compiler */
-	ut_ad(compact == 0 || compact == 1); /* silence compiler */
-	ut_ad(data_dir == 0 || data_dir == 1); /* silence compiler */
-	ut_ad(post_antelope == 0 || post_antelope == 1); /* silence compiler */
+	ut_a(!table_unused);
+	ut_a(!fsp_unused);
+	ut_a(page_ssize == 0 || page_ssize != 0); /* silence compiler */
+	ut_a(compact == 0 || compact == 1); /* silence compiler */
+	ut_a(data_dir == 0 || data_dir == 1); /* silence compiler */
+	ut_a(post_antelope == 0 || post_antelope == 1); /* silence compiler */
 
 	if (ssize != zip_ssize) {
 		fprintf(stderr,

From 184e302ab471ebf47662221eba883cb47a3fa84c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 12 Feb 2014 07:09:06 +0200
Subject: [PATCH 17/56] Fix compiler error if lz4 is not found on the system.

---
 storage/innobase/handler/ha_innodb.cc | 2 ++
 storage/xtradb/handler/ha_innodb.cc   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index c284028c51c..812aa0cfe83 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16763,7 +16763,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(trim_pct),
   MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
+#ifdef HAVE_LZ4
   MYSQL_SYSVAR(use_lz4),
+#endif
   NULL
 };
 
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 2b23526da5d..557872abdf0 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -18169,7 +18169,9 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(trim_pct),
   MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
+#ifdef HAVE_LZ4
   MYSQL_SYSVAR(use_lz4),
+#endif
   NULL
 };
 

From f6ad325883dafdcdf1645d198bfe1a59e5a2b44b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 12 Feb 2014 10:55:45 +0200
Subject: [PATCH 18/56] Code cleanup. Removed those questions that are now
 addressed.

---
 storage/innobase/buf/buf0mtflu.cc | 41 +++++++++++++++++--------------
 storage/xtradb/buf/buf0mtflu.cc   | 41 +++++++++++++++++--------------
 2 files changed, 46 insertions(+), 36 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index a42e6158250..9cf5a66fc72 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -108,7 +108,7 @@ typedef struct wrk_itm
 	wr_tsk_t	wr;		/*!< Flush page list */
 	rd_tsk_t	rd;		/*!< Decompress page list */
         ulint		n_flushed; 	/*!< Flushed pages count  */
- 	os_thread_t	id_usr;		/*!< Thread-id currently working */
+ 	os_thread_id_t	id_usr;		/*!< Thread-id currently working */
     	wrk_status_t    wi_status;	/*!< Work item status */
  	struct wrk_itm	*next;		/*!< Next work item */
 } wrk_t;
@@ -125,12 +125,12 @@ typedef struct thread_sync
 	wthr_status_t   wt_status;	/*!< Worker thread status */
 	mem_heap_t*     wheap;		/*!< Work heap where memory
 					is allocated */
-	wrk_t*          work_item;      /*!< Work items to be processed */
+	wrk_t*          work_item;      /*!< Array of work-items that are
+					individually accessed by multiple
+					threads. Items are accessed in a
+					thread safe manner.*/
 } thread_sync_t;
 
-/* QUESTION: Is this array used from several threads concurrently ? */
-// static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
-
 /* TODO: REALLY NEEDED ? */
 static int		mtflush_work_initialized = -1;
 static os_fast_mutex_t	mtflush_mtx;
@@ -203,9 +203,7 @@ buf_mtflu_flush_pool_instance(
 		help in the retry which will follow the
 		failure. */
 #ifdef UNIV_DEBUG
-		/* QUESTION: is this a really failure ? */
-		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
-			work_item->wr.flush_type);
+		fprintf(stderr, "flush start failed.\n");
 #endif
 		return 0;
 	}
@@ -230,7 +228,7 @@ buf_mtflu_flush_pool_instance(
 	buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
 	buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
 
-	return 0;
+	return work_item->n_flushed;
 }
 
 #ifdef UNIV_DEBUG
@@ -287,23 +285,30 @@ mtflush_service_io(
 		return;
 	}
 
-	work_item->id_usr = mtflush_io->wthread;
+	work_item->id_usr = os_thread_get_curr_id();
+
+	/*  This works as a producer/consumer model, where in tasks are
+         *  inserted into the work-queue (wq) and completions are based
+         *  on the type of operations performed and as a result the WRITE/
+         *  compression/flush operation completions get posted to wr_cq.
+         *  And READ/decompress operations completions get posted to rd_cq.
+         *  in future we may have others.
+	*/
 
 	switch(work_item->tsk) {
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_SUCCESS;
-		/* QUESTION: Why completed work items are inserted to
-		completion queue ? */
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
 		break;
 
 	case MT_WRK_WRITE:
 		work_item->wi_status = WRK_ITEM_START;
 		/* Process work item */
-		/* QUESTION: Is this a really a error ? */
-		if (0 != (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
-			fprintf(stderr, "FLUSH op failed ret:%lu\n", n_flushed);
+		if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
+#ifdef UNIV_DEBUG
+			fprintf(stderr, "No pages flushed\n");
+#endif
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
@@ -551,7 +556,7 @@ buf_mtflu_flush_work_items(
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
-				ut_a(0);
+				ut_ad(0);
 			}
 
 			n_flushed+= done_wi->n_flushed;
@@ -598,7 +603,7 @@ buf_mtflu_flush_list(
 			 / srv_buf_pool_instances;
 	}
 
-	/* QUESTION: What is procted by below mutex ? */
+	/* This lock is to safequard against re-entry if any. */
 	os_fast_mutex_lock(&mtflush_mtx);
 	buf_mtflu_flush_work_items(srv_buf_pool_instances,
                 cnt_flush, BUF_FLUSH_LIST,
@@ -641,7 +646,7 @@ buf_mtflu_flush_LRU_tail(void)
 
 	ut_a(buf_mtflu_init_done());
 
-	/* QUESTION: What is protected by below mutex ? */
+	/* This lock is to safeguard against re-entry if any */
 	os_fast_mutex_lock(&mtflush_mtx);
 	buf_mtflu_flush_work_items(srv_buf_pool_instances,
 		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index 31cf74e7f5a..f98d99228af 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -108,7 +108,7 @@ typedef struct wrk_itm
 	wr_tsk_t	wr;		/*!< Flush page list */
 	rd_tsk_t	rd;		/*!< Decompress page list */
         ulint		n_flushed; 	/*!< Flushed pages count  */
- 	os_thread_t	id_usr;		/*!< Thread-id currently working */
+ 	os_thread_id_t	id_usr;		/*!< Thread-id currently working */
     	wrk_status_t    wi_status;	/*!< Work item status */
  	struct wrk_itm	*next;		/*!< Next work item */
 } wrk_t;
@@ -125,12 +125,12 @@ typedef struct thread_sync
 	wthr_status_t   wt_status;	/*!< Worker thread status */
 	mem_heap_t*     wheap;		/*!< Work heap where memory
 					is allocated */
-	wrk_t*          work_item;      /*!< Work items to be processed */
+	wrk_t*          work_item;      /*!< Array of work-items that are
+					individually accessed by multiple
+					threads. Items are accessed in a
+					thread safe manner.*/
 } thread_sync_t;
 
-/* QUESTION: Is this array used from several threads concurrently ? */
-// static wrk_t 	work_items[MTFLUSH_MAX_WORKER];
-
 /* TODO: REALLY NEEDED ? */
 static int		mtflush_work_initialized = -1;
 static os_fast_mutex_t	mtflush_mtx;
@@ -205,9 +205,7 @@ buf_mtflu_flush_pool_instance(
 		help in the retry which will follow the
 		failure. */
 #ifdef UNIV_DEBUG
-		/* QUESTION: is this a really failure ? */
-		fprintf(stderr, "flush_start Failed, flush_type:%d\n",
-			work_item->wr.flush_type);
+		fprintf(stderr, "flush start failed.\n");
 #endif
 		return 0;
 	}
@@ -235,7 +233,7 @@ buf_mtflu_flush_pool_instance(
 	buf_flush_end(work_item->wr.buf_pool, work_item->wr.flush_type);
 	buf_flush_common(work_item->wr.flush_type, work_item->n_flushed);
 
-	return 0;
+	return work_item->n_flushed;
 }
 
 #ifdef UNIV_DEBUG
@@ -293,23 +291,30 @@ mtflush_service_io(
 		return;
 	}
 
-	work_item->id_usr = mtflush_io->wthread;
+	work_item->id_usr = os_thread_get_curr_id();
+
+	/*  This works as a producer/consumer model, where in tasks are
+         *  inserted into the work-queue (wq) and completions are based
+         *  on the type of operations performed and as a result the WRITE/
+         *  compression/flush operation completions get posted to wr_cq.
+         *  And READ/decompress operations completions get posted to rd_cq.
+         *  in future we may have others.
+	*/
 
 	switch(work_item->tsk) {
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_SUCCESS;
-		/* QUESTION: Why completed work items are inserted to
-		completion queue ? */
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
 		break;
 
 	case MT_WRK_WRITE:
 		work_item->wi_status = WRK_ITEM_START;
 		/* Process work item */
-		/* QUESTION: Is this a really a error ? */
-		if (0 != (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
-			fprintf(stderr, "FLUSH op failed ret:%lu\n", n_flushed);
+		if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
+#ifdef UNIV_DEBUG
+			fprintf(stderr, "No pages flushed\n");
+#endif
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
@@ -557,7 +562,7 @@ buf_mtflu_flush_work_items(
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
-				ut_a(0);
+				ut_ad(0);
 			}
 
 			n_flushed+= done_wi->n_flushed;
@@ -604,7 +609,7 @@ buf_mtflu_flush_list(
 			 / srv_buf_pool_instances;
 	}
 
-	/* QUESTION: What is procted by below mutex ? */
+	/* This lock is to safequard against re-entry if any. */
 	os_fast_mutex_lock(&mtflush_mtx);
 	buf_mtflu_flush_work_items(srv_buf_pool_instances,
                 cnt_flush, BUF_FLUSH_LIST,
@@ -647,7 +652,7 @@ buf_mtflu_flush_LRU_tail(void)
 
 	ut_a(buf_mtflu_init_done());
 
-	/* QUESTION: What is protected by below mutex ? */
+	/* This lock is to safeguard against re-entry if any */
 	os_fast_mutex_lock(&mtflush_mtx);
 	buf_mtflu_flush_work_items(srv_buf_pool_instances,
 		cnt_flush, BUF_FLUSH_LRU, srv_LRU_scan_depth, 0);

From 1fa19bf777cb435e6630694fae029802260b5f6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 12 Feb 2014 12:52:34 +0200
Subject: [PATCH 19/56] Fixed issue on atomic writes setup and atomic blobs
 setup on system tables.

---
 storage/innobase/include/dict0dict.ic | 28 ++++++++++++++++++++++-----
 storage/xtradb/include/dict0dict.ic   | 28 ++++++++++++++++++++++-----
 2 files changed, 46 insertions(+), 10 deletions(-)

diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index ed891a00fd4..7cc0404e0eb 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -681,12 +681,16 @@ dict_sys_tables_type_validate(
 
 	if (redundant) {
 		if (zip_ssize || atomic_blobs) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
+				zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 	}
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused) {
+		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
+			type, unused);
 		return(ULINT_UNDEFINED);
 	}
 
@@ -701,6 +705,8 @@ dict_sys_tables_type_validate(
 
 	} else if (zip_ssize) {
 		/* Antelope does not support COMPRESSED format. */
+		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
+			type, zip_ssize);
 		return(ULINT_UNDEFINED);
 	}
 
@@ -710,11 +716,15 @@ dict_sys_tables_type_validate(
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 		if (!atomic_blobs) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
+				type, zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 
 		/* Validate that the number is within allowed range. */
 		if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
+				type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -731,6 +741,9 @@ dict_sys_tables_type_validate(
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 
                 if (!atomic_blobs || !page_compression) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
+				"InnoDB: Error: atomic_blobs %lu\n",
+				type, page_compression, page_compression_level, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -738,6 +751,9 @@ dict_sys_tables_type_validate(
 	if (awrites == ATOMIC_WRITES_ON ||
 		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
 		if (!atomic_blobs) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu atomic_blobs %lu\n",
+				type, atomic_writes, atomic_blobs);
+
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -846,10 +862,9 @@ dict_tf_set(
 	}
 
 	if (page_compressed) {
-		*flags = DICT_TF_COMPACT
-			| (1 << DICT_TF_POS_ATOMIC_BLOBS)
-                        | (1 << DICT_TF_POS_PAGE_COMPRESSION)
-			| (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
+                       | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+		       | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
 
 		ut_ad(zip_ssize == 0);
 		ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
@@ -863,7 +878,8 @@ dict_tf_set(
 
 	if (awrites == ATOMIC_WRITES_ON ||
 		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes )) {
-		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
+		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES)
+		       | (1 << DICT_TF_POS_ATOMIC_BLOBS);
 	}
 
 	if (use_data_dir) {
@@ -996,6 +1012,8 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
 			 | DICT_TF_MASK_ATOMIC_WRITES);
 
+	ut_a(dict_sys_tables_type_validate(type, 0));
+
 	return(type);
 }
 
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index 1ce4fe6a2f1..3f6d56fab1f 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -685,12 +685,16 @@ dict_sys_tables_type_validate(
 
 	if (redundant) {
 		if (zip_ssize || atomic_blobs) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
+				zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 	}
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused) {
+		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
+			type, unused);
 		return(ULINT_UNDEFINED);
 	}
 
@@ -705,6 +709,8 @@ dict_sys_tables_type_validate(
 
 	} else if (zip_ssize) {
 		/* Antelope does not support COMPRESSED format. */
+		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
+			type, zip_ssize);
 		return(ULINT_UNDEFINED);
 	}
 
@@ -714,11 +720,15 @@ dict_sys_tables_type_validate(
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 		if (!atomic_blobs) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
+				type, zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 
 		/* Validate that the number is within allowed range. */
 		if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
+				type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -735,6 +745,9 @@ dict_sys_tables_type_validate(
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 
                 if (!atomic_blobs || !page_compression) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
+				"InnoDB: Error: atomic_blobs %lu\n",
+				type, page_compression, page_compression_level, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -742,6 +755,9 @@ dict_sys_tables_type_validate(
 	if (awrites == ATOMIC_WRITES_ON ||
 		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
 		if (!atomic_blobs) {
+			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu atomic_blobs %lu\n",
+				type, atomic_writes, atomic_blobs);
+
 			return(ULINT_UNDEFINED);
 		}
 	}
@@ -854,10 +870,9 @@ dict_tf_set(
 	}
 
 	if (page_compressed) {
-		*flags = DICT_TF_COMPACT
-			| (1 << DICT_TF_POS_ATOMIC_BLOBS)
-                        | (1 << DICT_TF_POS_PAGE_COMPRESSION)
-			| (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
+		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS)
+                       | (1 << DICT_TF_POS_PAGE_COMPRESSION)
+		       | (page_compression_level << DICT_TF_POS_PAGE_COMPRESSION_LEVEL);
 
 		ut_ad(zip_ssize == 0);
 		ut_ad(dict_tf_get_page_compression(*flags) == TRUE);
@@ -871,7 +886,8 @@ dict_tf_set(
 
 	if (awrites == ATOMIC_WRITES_ON ||
 		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes )) {
-		*flags |= (1 << DICT_TF_POS_ATOMIC_BLOBS);
+		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES)
+		       | (1 << DICT_TF_POS_ATOMIC_BLOBS);
 	}
 }
 
@@ -1000,6 +1016,8 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
 			 | DICT_TF_MASK_ATOMIC_WRITES);
 
+	ut_a(dict_sys_tables_type_validate(type, 0));
+
 	return(type);
 }
 

From da927da04def025f91f6d71172d6b525513a6cd7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 12 Feb 2014 18:00:03 +0200
Subject: [PATCH 20/56] Fixed issue on atomic writes and system tables. Atomic
 writes can be used also on system tables but not per table.

---
 storage/innobase/buf/buf0mtflu.cc     | 14 ++++++---
 storage/innobase/include/dict0dict.ic | 45 ++++++++++++++-------------
 storage/xtradb/buf/buf0mtflu.cc       | 14 ++++++---
 storage/xtradb/include/dict0dict.ic   | 45 ++++++++++++++-------------
 4 files changed, 66 insertions(+), 52 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index 9cf5a66fc72..a28b1885fe4 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -298,7 +298,7 @@ mtflush_service_io(
 	switch(work_item->tsk) {
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
-		work_item->wi_status = WRK_ITEM_SUCCESS;
+		work_item->wi_status = WRK_ITEM_EXIT;
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
 		break;
 
@@ -419,11 +419,17 @@ buf_mtflu_io_thread_exit(void)
 
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
 
-		if (work_item) {
+		/* If we receive reply to work item and it's status is exit,
+		thead has processed this message and existed */
+		if (work_item && work_item->wi_status == WRK_ITEM_EXIT) {
 			i++;
 		}
 	}
 
+	/* Wait about 1/2 sec to allow threads really exit */
+	os_thread_sleep(50000);
+
+	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
 
@@ -432,10 +438,10 @@ buf_mtflu_io_thread_exit(void)
 	ib_wqueue_free(mtflush_io->wr_cq);
 	ib_wqueue_free(mtflush_io->rd_cq);
 
+	os_fast_mutex_free(&mtflush_mtx);
+
 	/* Free heap */
 	mem_heap_free(mtflush_io->wheap);
-
-	os_fast_mutex_free(&mtflush_mtx);
 }
 
 /******************************************************************//**
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 7cc0404e0eb..73fc9ac56fd 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -645,6 +645,24 @@ dict_tf_is_valid(
 		}
 	}
 
+	if (atomic_writes) {
+
+		if(atomic_writes < 0 || atomic_writes > ATOMIC_WRITES_OFF) {
+
+			fprintf(stderr,
+				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+			);
+			return(false);
+		}
+	}
+
 	/* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
 	so the DATA_DIR flag is compatible with all other table flags. */
 
@@ -670,7 +688,8 @@ dict_sys_tables_type_validate(
 	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
 	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
 	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
-	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
+
+	ut_a(atomic_writes >= 0 && atomic_writes <= ATOMIC_WRITES_OFF);
 
 	/* The low order bit of SYS_TABLES.TYPE is always set to 1.
 	If the format is UNIV_FORMAT_B or higher, this field is the same
@@ -748,16 +767,6 @@ dict_sys_tables_type_validate(
 		}
 	}
 
-	if (awrites == ATOMIC_WRITES_ON ||
-		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
-		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu atomic_blobs %lu\n",
-				type, atomic_writes, atomic_blobs);
-
-			return(ULINT_UNDEFINED);
-		}
-	}
-
 	/* Return the validated SYS_TABLES.TYPE. */
 	return(type);
 }
@@ -871,16 +880,8 @@ dict_tf_set(
 		ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
 	}
 
-	if (awrites != ATOMIC_WRITES_DEFAULT) {
-		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
-		ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
-	}
-
-	if (awrites == ATOMIC_WRITES_ON ||
-		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes )) {
-		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES)
-		       | (1 << DICT_TF_POS_ATOMIC_BLOBS);
-	}
+	*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+	ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
 
 	if (use_data_dir) {
 		*flags |= (1 << DICT_TF_POS_DATA_DIR);
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index f98d99228af..5b4d285be21 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -304,7 +304,7 @@ mtflush_service_io(
 	switch(work_item->tsk) {
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
-		work_item->wi_status = WRK_ITEM_SUCCESS;
+		work_item->wi_status = WRK_ITEM_EXIT;
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
 		break;
 
@@ -425,11 +425,17 @@ buf_mtflu_io_thread_exit(void)
 
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
 
-		if (work_item) {
+		/* If we receive reply to work item and it's status is exit,
+		thead has processed this message and existed */
+		if (work_item && work_item->wi_status == WRK_ITEM_EXIT) {
 			i++;
 		}
 	}
 
+	/* Wait about 1/2 sec to allow threads really exit */
+	os_thread_sleep(50000);
+
+	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->rd_cq));
 
@@ -438,10 +444,10 @@ buf_mtflu_io_thread_exit(void)
 	ib_wqueue_free(mtflush_io->wr_cq);
 	ib_wqueue_free(mtflush_io->rd_cq);
 
+	os_fast_mutex_free(&mtflush_mtx);
+
 	/* Free heap */
 	mem_heap_free(mtflush_io->wheap);
-
-	os_fast_mutex_free(&mtflush_mtx);
 }
 
 /******************************************************************//**
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index 3f6d56fab1f..d0fbb0d33d2 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -1,7 +1,7 @@
 /*****************************************************************************
 
 Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -649,6 +649,24 @@ dict_tf_is_valid(
 		}
 	}
 
+	if (atomic_writes) {
+
+		if(atomic_writes < 0 || atomic_writes > ATOMIC_WRITES_OFF) {
+
+			fprintf(stderr,
+				"InnoDB: Error: table flags are %ld in the data dictionary and are corrupted\n"
+				"InnoDB: Error: data dictionary flags are\n"
+				"InnoDB: compact %ld atomic_blobs %ld\n"
+				"InnoDB: unused %ld data_dir %ld zip_ssize %ld\n"
+				"InnoDB: page_compression %ld page_compression_level %ld\n"
+				"InnoDB: atomic_writes %ld\n",
+				flags, compact, atomic_blobs, unused, data_dir, zip_ssize,
+				page_compression, page_compression_level, atomic_writes
+			);
+			return(false);
+		}
+	}
+
 	/* CREATE TABLE ... DATA DIRECTORY is supported for any row format,
 	so the DATA_DIR flag is compatible with all other table flags. */
 
@@ -674,7 +692,8 @@ dict_sys_tables_type_validate(
 	ulint	page_compression = DICT_TF_GET_PAGE_COMPRESSION(type);
 	ulint	page_compression_level = DICT_TF_GET_PAGE_COMPRESSION_LEVEL(type);
 	ulint	atomic_writes = DICT_TF_GET_ATOMIC_WRITES(type);
-	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
+
+	ut_a(atomic_writes >= 0 && atomic_writes <= ATOMIC_WRITES_OFF);
 
 	/* The low order bit of SYS_TABLES.TYPE is always set to 1.
 	If the format is UNIV_FORMAT_B or higher, this field is the same
@@ -752,16 +771,6 @@ dict_sys_tables_type_validate(
 		}
 	}
 
-	if (awrites == ATOMIC_WRITES_ON ||
-		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes)) {
-		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu atomic_blobs %lu\n",
-				type, atomic_writes, atomic_blobs);
-
-			return(ULINT_UNDEFINED);
-		}
-	}
-
 	/* Return the validated SYS_TABLES.TYPE. */
 	return(type);
 }
@@ -879,16 +888,8 @@ dict_tf_set(
 		ut_ad(dict_tf_get_page_compression_level(*flags) == page_compression_level);
 	}
 
-	if (awrites != ATOMIC_WRITES_DEFAULT) {
-		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
-		ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
-	}
-
-	if (awrites == ATOMIC_WRITES_ON ||
-		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes )) {
-		*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES)
-		       | (1 << DICT_TF_POS_ATOMIC_BLOBS);
-	}
+	*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
+	ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
 }
 
 /********************************************************************//**

From d17ecff410180adf96dcd7f261157d52e7f62af2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 13 Feb 2014 09:13:56 +0200
Subject: [PATCH 21/56] Fixed issue on data dictionary corruption. Fixed issue
 on multi-threaded flush at shutdown. Removed unnecessary startup option
 innodb_compress_pages. Added a new startup option innodb_mtflush_threads,
 default 8.

---
 storage/innobase/buf/buf0mtflu.cc     | 37 +++++++++++++++------------
 storage/innobase/handler/ha_innodb.cc | 24 ++++++++---------
 storage/innobase/include/dict0dict.ic | 19 +++++++++-----
 storage/innobase/include/fsp0fsp.ic   | 22 +++++++++++++---
 storage/innobase/include/srv0srv.h    |  4 +--
 storage/innobase/os/os0file.cc        |  3 ++-
 storage/innobase/srv/srv0srv.cc       |  5 +---
 storage/innobase/srv/srv0start.cc     | 11 +++-----
 storage/xtradb/buf/buf0mtflu.cc       | 37 +++++++++++++++------------
 storage/xtradb/handler/ha_innodb.cc   | 24 ++++++++---------
 storage/xtradb/include/dict0dict.ic   | 19 +++++++++-----
 storage/xtradb/include/fsp0fsp.ic     | 22 +++++++++++++---
 storage/xtradb/include/srv0srv.h      |  4 +--
 storage/xtradb/os/os0file.cc          |  3 ++-
 storage/xtradb/srv/srv0srv.cc         |  5 +---
 storage/xtradb/srv/srv0start.cc       | 12 ++++-----
 16 files changed, 141 insertions(+), 110 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index a28b1885fe4..fb1d1ce54ae 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -272,7 +272,7 @@ mtflush_service_io(
 {
 	wrk_t		*work_item = NULL;
 	ulint		n_flushed=0;
-	ib_time_t	max_wait_usecs = 5000000;
+	ib_time_t	max_wait_usecs = 50000;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
 	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
@@ -300,7 +300,8 @@ mtflush_service_io(
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_EXIT;
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
-		break;
+		mtflush_io->wt_status = WTHR_KILL_IT;
+        return;
 
 	case MT_WRK_WRITE:
 		work_item->wi_status = WRK_ITEM_START;
@@ -346,11 +347,11 @@ DECLARE_THREAD(mtflush_io_thread)(
 #ifdef UNIV_DEBUG
 	ib_uint64_t   stat_universal_num_processed = 0;
 	ib_uint64_t   stat_cycle_num_processed = 0;
-	wrk_t*		work_item = mtflush_io[0].work_item;
+	wrk_t*	      work_item = mtflush_io[0].work_item;
 	ulint i;
 #endif
 
-	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+	while (TRUE) {
 		mtflush_service_io(mtflush_io);
 
 #ifdef UNIV_DEBUG
@@ -365,12 +366,9 @@ DECLARE_THREAD(mtflush_io_thread)(
 			stat_cycle_num_processed);
 		mtflu_print_thread_stat(work_item);
 #endif
-	}
-
-	/* This should make sure that all current work items are
-	processed before threads exit. */
-	while (!ib_wqueue_is_empty(mtflush_io->wq)) {
-		mtflush_service_io(mtflush_io);
+		if (mtflush_io->wt_status == WTHR_KILL_IT) {
+			break;
+		}
 	}
 
 	os_thread_exit(NULL);
@@ -385,16 +383,21 @@ void
 buf_mtflu_io_thread_exit(void)
 /*==========================*/
 {
-	ulint i;
+	long i;
 	thread_sync_t* mtflush_io = mtflush_ctx;
 
 	ut_a(mtflush_io != NULL);
 
-	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n",
+	/* Confirm if the io-thread KILL is in progress, bailout */
+	if (mtflush_io->wt_status == WTHR_KILL_IT) {
+		return;
+	}
+
+	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
 		srv_buf_pool_instances);
 
 	/* Send one exit work item/thread */
-	for (i=0; i < srv_buf_pool_instances; i++) {
+	for (i=0; i < srv_mtflush_threads; i++) {
 		mtflush_io->work_item[i].wr.buf_pool = NULL;
 		mtflush_io->work_item[i].rd.page_pool = NULL;
 		mtflush_io->work_item[i].tsk = MT_WRK_NONE;
@@ -407,14 +410,14 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Wait until all work items on a work queue are processed */
 	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
-		/* Wait about 1/2 sec */
-		os_thread_sleep(50000);
+		/* Wait */
+		os_thread_sleep(500000);
 	}
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 
 	/* Collect all work done items */
-	for (i=0; i < srv_buf_pool_instances;) {
+	for (i=0; i < srv_mtflush_threads;) {
 		wrk_t* work_item;
 
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
@@ -558,11 +561,13 @@ buf_mtflu_flush_work_items(
 
 			if((int)done_wi->id_usr == -1 &&
 			   done_wi->wi_status == WRK_ITEM_SET ) {
+#ifdef UNIV_DEBUG
 				fprintf(stderr,
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
 				ut_ad(0);
+#endif
 			}
 
 			n_flushed+= done_wi->n_flushed;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 812aa0cfe83..4999a202bd6 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -9769,14 +9769,6 @@ ha_innobase::check_table_options(
 
 	/* Check page compression requirements */
 	if (options->page_compressed) {
-		if (!srv_compress_pages) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED requires"
-				"innodb_compress_pages not enabled");
-			return "PAGE_COMPRESSED";
-		}
 
 		if (row_format == ROW_TYPE_COMPRESSED) {
 			push_warning(
@@ -16587,11 +16579,6 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
   NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG */
 
-static MYSQL_SYSVAR_BOOL(compress_pages, srv_compress_pages,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Use page compression.",
-  NULL, NULL, FALSE);
-
 static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
   PLUGIN_VAR_OPCMDARG ,
   "How many percent of compressed pages should be trimmed",
@@ -16614,6 +16601,15 @@ static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
   NULL, NULL, FALSE);
 #endif /* HAVE_LZ4 */
 
+static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
+  PLUGIN_VAR_RQCMDARG,
+  "Number of multi-threaded flush threads",
+  NULL, NULL,
+  MTFLUSH_DEFAULT_WORKER, /* Default setting */
+  1,                      /* Minimum setting */
+  MTFLUSH_MAX_WORKER,     /* Max setting */
+  0);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
   MYSQL_SYSVAR(api_trx_level),
@@ -16759,13 +16755,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(limit_optimistic_insert_debug),
   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
 #endif /* UNIV_DEBUG */
-  MYSQL_SYSVAR(compress_pages),
   MYSQL_SYSVAR(trim_pct),
   MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
 #ifdef HAVE_LZ4
   MYSQL_SYSVAR(use_lz4),
 #endif
+  MYSQL_SYSVAR(mtflush_threads),
   NULL
 };
 
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 73fc9ac56fd..2be68e37dc8 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -700,7 +700,7 @@ dict_sys_tables_type_validate(
 
 	if (redundant) {
 		if (zip_ssize || atomic_blobs) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
 				zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
@@ -708,7 +708,7 @@ dict_sys_tables_type_validate(
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused) {
-		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
+		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
 			type, unused);
 		return(ULINT_UNDEFINED);
 	}
@@ -724,7 +724,7 @@ dict_sys_tables_type_validate(
 
 	} else if (zip_ssize) {
 		/* Antelope does not support COMPRESSED format. */
-		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
+		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
 			type, zip_ssize);
 		return(ULINT_UNDEFINED);
 	}
@@ -735,14 +735,14 @@ dict_sys_tables_type_validate(
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
 				type, zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 
 		/* Validate that the number is within allowed range. */
 		if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
 				type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
 			return(ULINT_UNDEFINED);
 		}
@@ -760,13 +760,20 @@ dict_sys_tables_type_validate(
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 
                 if (!atomic_blobs || !page_compression) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
 				"InnoDB: Error: atomic_blobs %lu\n",
 				type, page_compression, page_compression_level, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 	}
 
+	/* Validate that the atomic writes number is within allowed range. */
+	if (atomic_writes < 0 || atomic_writes > ATOMIC_WRITES_OFF) {
+		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n",
+				type, atomic_writes);
+			return(ULINT_UNDEFINED);
+	}
+
 	/* Return the validated SYS_TABLES.TYPE. */
 	return(type);
 }
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index cb12d556ec4..fb253370b6e 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -67,13 +67,14 @@ fsp_flags_is_valid(
 	ulint	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
 	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
 	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
-	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
 
 	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
 
 	/* fsp_flags is zero unless atomic_blobs is set. */
 	/* Make sure there are no bits that we do not know about. */
 	if (unused != 0 || flags == 1) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n",
+			flags, unused);
 		return(false);
 	} else if (post_antelope) {
 		/* The Antelope row formats REDUNDANT and COMPACT did
@@ -81,6 +82,8 @@ fsp_flags_is_valid(
 		4-byte field is zero for Antelope row formats. */
 
 		if (!atomic_blobs) {
+			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n",
+				flags, atomic_blobs);
 			return(false);
 		}
 	}
@@ -92,10 +95,14 @@ fsp_flags_is_valid(
 		externally stored parts. */
 
 		if (post_antelope || zip_ssize != 0) {
+			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n",
+				flags, zip_ssize, atomic_blobs);
 			return(false);
 		}
 
 	} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n",
+			flags, zip_ssize, PAGE_ZIP_SSIZE_MAX);
 		return(false);
 	} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
 
@@ -103,9 +110,13 @@ fsp_flags_is_valid(
 		be zero for an original 16k page size.
 		Validate the page shift size is within allowed range. */
 
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n",
+			flags, page_ssize, UNIV_PAGE_SSIZE_MAX);
 		return(false);
 
 	} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n",
+			flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG);
 		return(false);
 	}
 
@@ -113,13 +124,16 @@ fsp_flags_is_valid(
 	to be set */
         if (page_compression_level || page_compression) {
 		if (!page_compression || !atomic_blobs) {
+			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n"
+				"InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n",
+				flags, page_compression, page_compression_level, atomic_blobs);
 			return(false);
 		}
 	}
 
-	if ((awrites == ATOMIC_WRITES_ON ||
-		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes))
-		&& !atomic_blobs) {
+	if (atomic_writes < 0 || atomic_writes > ATOMIC_WRITES_OFF) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n",
+			flags, atomic_writes);
 		return (false);
 	}
 
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 0ffb966d9a3..725aaf9553d 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -236,9 +236,6 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 extern my_bool	srv_use_native_aio;
 
-/* Is page compression used */
-extern my_bool srv_compress_pages;
-
 /* Is page compression used only for index pages */
 extern my_bool srv_page_compress_index_pages;
 
@@ -259,6 +256,7 @@ extern my_bool srv_use_lz4;
 
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
+#define MTFLUSH_DEFAULT_WORKER   8
 extern long    srv_mtflush_threads;
 
 #ifdef __WIN__
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 06c1a8c6ed4..683cd78b901 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -6153,6 +6153,7 @@ os_file_trim(
 	ulint		len)  /*!< in: length of area     */
 {
 
+#define SECT_SIZE 512
 	size_t trim_len = UNIV_PAGE_SIZE - len;
 	os_offset_t off = slot->offset + len;
 
@@ -6184,6 +6185,7 @@ os_file_trim(
 
 #ifdef __linux__
 #if defined(FALLOC_FL_PUNCH_HOLE) && defined (FALLOC_FL_KEEP_SIZE)
+	trim_len = (trim_len & ~(SECT_SIZE - 1)) + SECT_SIZE;
 	int ret = fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
 
 	if (ret) {
@@ -6252,7 +6254,6 @@ os_file_trim(
 	}
 #endif
 
-#define SECT_SIZE 512
 	srv_stats.page_compression_trim_sect512.add((trim_len / SECT_SIZE));
 	srv_stats.page_compression_trim_sect4096.add((trim_len / (SECT_SIZE*8)));
 	srv_stats.page_compressed_trim_op.inc();
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index fa1675f7a17..92cfda1c65e 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -146,9 +146,6 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
-/* If this flag is TRUE, then we will use page compression
-to the pages */
-UNIV_INTERN my_bool	srv_compress_pages = FALSE;
 /* If this flag is TRUE, then we will use page compression
 only for index pages */
 UNIV_INTERN my_bool	srv_page_compress_index_pages = FALSE;
@@ -163,7 +160,7 @@ UNIV_INTERN my_bool	srv_use_atomic_writes = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
 UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
 /* Number of threads used for multi-threaded flush */
-UNIV_INTERN long srv_mtflush_threads = 0;
+UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 879b2335720..a469dac8296 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -2593,11 +2593,7 @@ files_checked:
 
 	if (!srv_read_only_mode) {
 
-		if (srv_buf_pool_instances <= MTFLUSH_MAX_WORKER) {
-			srv_mtflush_threads = srv_buf_pool_instances;
-		}
-		/* else we default to 8 worker-threads */
-
+		/* Start multi-threaded flush threads */
 		mtflush_ctx = buf_mtflu_handler_init(srv_mtflush_threads,
 						     srv_buf_pool_instances);
 
@@ -2607,7 +2603,8 @@ files_checked:
 					(thread_ids + 6 + 32));
 
 #if UNIV_DEBUG
- 		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
+ 		fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
+			__FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
 #endif
 
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
@@ -2879,7 +2876,7 @@ innobase_shutdown_for_mysql(void)
 		buf_mtflu_io_thread_exit();
 
 #ifdef UNIV_DEBUG
-		fprintf(stderr, "%s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
+		fprintf(stderr, "InnoDB: Note: %s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
 #endif
 
 		os_mutex_enter(os_sync_mutex);
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index 5b4d285be21..beb46cc2813 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -278,7 +278,7 @@ mtflush_service_io(
 {
 	wrk_t		*work_item = NULL;
 	ulint		n_flushed=0;
-	ib_time_t	max_wait_usecs = 5000000;
+	ib_time_t	max_wait_usecs = 50000;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
 	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
@@ -306,7 +306,8 @@ mtflush_service_io(
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_EXIT;
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
-		break;
+		mtflush_io->wt_status = WTHR_KILL_IT;
+        return;
 
 	case MT_WRK_WRITE:
 		work_item->wi_status = WRK_ITEM_START;
@@ -352,11 +353,11 @@ DECLARE_THREAD(mtflush_io_thread)(
 #ifdef UNIV_DEBUG
 	ib_uint64_t   stat_universal_num_processed = 0;
 	ib_uint64_t   stat_cycle_num_processed = 0;
-	wrk_t*		work_item = mtflush_io[0].work_item;
+	wrk_t*	      work_item = mtflush_io[0].work_item;
 	ulint i;
 #endif
 
-	while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
+	while (TRUE) {
 		mtflush_service_io(mtflush_io);
 
 #ifdef UNIV_DEBUG
@@ -371,12 +372,9 @@ DECLARE_THREAD(mtflush_io_thread)(
 			stat_cycle_num_processed);
 		mtflu_print_thread_stat(work_item);
 #endif
-	}
-
-	/* This should make sure that all current work items are
-	processed before threads exit. */
-	while (!ib_wqueue_is_empty(mtflush_io->wq)) {
-		mtflush_service_io(mtflush_io);
+		if (mtflush_io->wt_status == WTHR_KILL_IT) {
+			break;
+		}
 	}
 
 	os_thread_exit(NULL);
@@ -391,16 +389,21 @@ void
 buf_mtflu_io_thread_exit(void)
 /*==========================*/
 {
-	ulint i;
+	long i;
 	thread_sync_t* mtflush_io = mtflush_ctx;
 
 	ut_a(mtflush_io != NULL);
 
-	fprintf(stderr, "signal page_comp_io_threads to exit [%lu]\n",
+	/* Confirm if the io-thread KILL is in progress, bailout */
+	if (mtflush_io->wt_status == WTHR_KILL_IT) {
+		return;
+	}
+
+	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
 		srv_buf_pool_instances);
 
 	/* Send one exit work item/thread */
-	for (i=0; i < srv_buf_pool_instances; i++) {
+	for (i=0; i < srv_mtflush_threads; i++) {
 		mtflush_io->work_item[i].wr.buf_pool = NULL;
 		mtflush_io->work_item[i].rd.page_pool = NULL;
 		mtflush_io->work_item[i].tsk = MT_WRK_NONE;
@@ -413,14 +416,14 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Wait until all work items on a work queue are processed */
 	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
-		/* Wait about 1/2 sec */
-		os_thread_sleep(50000);
+		/* Wait */
+		os_thread_sleep(500000);
 	}
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 
 	/* Collect all work done items */
-	for (i=0; i < srv_buf_pool_instances;) {
+	for (i=0; i < srv_mtflush_threads;) {
 		wrk_t* work_item;
 
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
@@ -564,11 +567,13 @@ buf_mtflu_flush_work_items(
 
 			if((int)done_wi->id_usr == -1 &&
 			   done_wi->wi_status == WRK_ITEM_SET ) {
+#ifdef UNIV_DEBUG
 				fprintf(stderr,
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
 				ut_ad(0);
+#endif
 			}
 
 			n_flushed+= done_wi->n_flushed;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 557872abdf0..f26ad436190 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -10294,14 +10294,6 @@ ha_innobase::check_table_options(
 
 	/* Check page compression requirements */
 	if (options->page_compressed) {
-		if (!srv_compress_pages) {
-			push_warning(
-				thd, Sql_condition::WARN_LEVEL_WARN,
-				HA_WRONG_CREATE_OPTION,
-				"InnoDB: PAGE_COMPRESSED requires"
-				"innodb_compress_pages not enabled");
-			return "PAGE_COMPRESSED";
-		}
 
 		if (row_format == ROW_TYPE_COMPRESSED) {
 			push_warning(
@@ -17942,11 +17934,6 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
   "Print stacktrace on long semaphore wait (off by default supported only on linux)",
   NULL, NULL, FALSE);
 
-static MYSQL_SYSVAR_BOOL(compress_pages, srv_compress_pages,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
-  "Use page compression.",
-  NULL, NULL, FALSE);
-
 static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
   PLUGIN_VAR_OPCMDARG ,
   "How many percent of compressed pages should be trimmed",
@@ -17975,6 +17962,15 @@ static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
   NULL, NULL, FALSE);
 #endif /* HAVE_LZ4 */
 
+static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
+  PLUGIN_VAR_RQCMDARG,
+  "Number of multi-threaded flush threads",
+  NULL, NULL,
+  MTFLUSH_DEFAULT_WORKER, /* Default setting */
+  1,                      /* Minimum setting */
+  MTFLUSH_MAX_WORKER,     /* Max setting */
+  0);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_block_size),
   MYSQL_SYSVAR(additional_mem_pool_size),
@@ -18165,13 +18161,13 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(fake_changes),
   MYSQL_SYSVAR(locking_fake_changes),
   MYSQL_SYSVAR(use_stacktrace),
-  MYSQL_SYSVAR(compress_pages),
   MYSQL_SYSVAR(trim_pct),
   MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
 #ifdef HAVE_LZ4
   MYSQL_SYSVAR(use_lz4),
 #endif
+  MYSQL_SYSVAR(mtflush_threads),
   NULL
 };
 
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index d0fbb0d33d2..d37db209beb 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -704,7 +704,7 @@ dict_sys_tables_type_validate(
 
 	if (redundant) {
 		if (zip_ssize || atomic_blobs) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=Redundant, zip_ssize %lu atomic_blobs %lu\n",
 				zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
@@ -712,7 +712,7 @@ dict_sys_tables_type_validate(
 
 	/* Make sure there are no bits that we do not know about. */
 	if (unused) {
-		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
+		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, unused %lu\n",
 			type, unused);
 		return(ULINT_UNDEFINED);
 	}
@@ -728,7 +728,7 @@ dict_sys_tables_type_validate(
 
 	} else if (zip_ssize) {
 		/* Antelope does not support COMPRESSED format. */
-		fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
+		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu\n",
 			type, zip_ssize);
 		return(ULINT_UNDEFINED);
 	}
@@ -739,14 +739,14 @@ dict_sys_tables_type_validate(
 		should be in N_COLS, but we already know about the
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 		if (!atomic_blobs) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu atomic_blobs %lu\n",
 				type, zip_ssize, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 
 		/* Validate that the number is within allowed range. */
 		if (zip_ssize > PAGE_ZIP_SSIZE_MAX) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, zip_ssize %lu max %d\n",
 				type, zip_ssize, PAGE_ZIP_SSIZE_MAX);
 			return(ULINT_UNDEFINED);
 		}
@@ -764,13 +764,20 @@ dict_sys_tables_type_validate(
 		low_order_bit and DICT_N_COLS_COMPACT flags. */
 
                 if (!atomic_blobs || !page_compression) {
-			fprintf(stderr, "InnoDB Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
+			fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, page_compression %lu page_compression_level %lu\n"
 				"InnoDB: Error: atomic_blobs %lu\n",
 				type, page_compression, page_compression_level, atomic_blobs);
 			return(ULINT_UNDEFINED);
 		}
 	}
 
+	/* Validate that the atomic writes number is within allowed range. */
+	if (atomic_writes < 0 || atomic_writes > ATOMIC_WRITES_OFF) {
+		fprintf(stderr, "InnoDB: Error: SYS_TABLES::TYPE=%lu, atomic_writes %lu\n",
+				type, atomic_writes);
+			return(ULINT_UNDEFINED);
+	}
+
 	/* Return the validated SYS_TABLES.TYPE. */
 	return(type);
 }
diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic
index bc46967fab0..3563f5ef372 100644
--- a/storage/xtradb/include/fsp0fsp.ic
+++ b/storage/xtradb/include/fsp0fsp.ic
@@ -67,13 +67,14 @@ fsp_flags_is_valid(
 	ulint	page_compression = FSP_FLAGS_GET_PAGE_COMPRESSION(flags);
 	ulint	page_compression_level = FSP_FLAGS_GET_PAGE_COMPRESSION_LEVEL(flags);
 	ulint	atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
-	atomic_writes_t awrites = (atomic_writes_t)atomic_writes;
 
 	DBUG_EXECUTE_IF("fsp_flags_is_valid_failure", return(false););
 
 	/* fsp_flags is zero unless atomic_blobs is set. */
 	/* Make sure there are no bits that we do not know about. */
 	if (unused != 0 || flags == 1) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted unused %lu\n",
+			flags, unused);
 		return(false);
 	} else if (post_antelope) {
 		/* The Antelope row formats REDUNDANT and COMPACT did
@@ -81,6 +82,8 @@ fsp_flags_is_valid(
 		4-byte field is zero for Antelope row formats. */
 
 		if (!atomic_blobs) {
+			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_blobs %lu\n",
+				flags, atomic_blobs);
 			return(false);
 		}
 	}
@@ -92,10 +95,14 @@ fsp_flags_is_valid(
 		externally stored parts. */
 
 		if (post_antelope || zip_ssize != 0) {
+			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu atomic_blobs %lu\n",
+				flags, zip_ssize, atomic_blobs);
 			return(false);
 		}
 
 	} else if (!post_antelope || zip_ssize > PAGE_ZIP_SSIZE_MAX) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted zip_ssize %lu max %d\n",
+			flags, zip_ssize, PAGE_ZIP_SSIZE_MAX);
 		return(false);
 	} else if (page_ssize > UNIV_PAGE_SSIZE_MAX) {
 
@@ -103,9 +110,13 @@ fsp_flags_is_valid(
 		be zero for an original 16k page size.
 		Validate the page shift size is within allowed range. */
 
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu\n",
+			flags, page_ssize, UNIV_PAGE_SSIZE_MAX);
 		return(false);
 
 	} else if (UNIV_PAGE_SIZE != UNIV_PAGE_SIZE_ORIG && !page_ssize) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_ssize %lu max %lu:%d\n",
+			flags, page_ssize, UNIV_PAGE_SIZE, UNIV_PAGE_SIZE_ORIG);
 		return(false);
 	}
 
@@ -117,13 +128,16 @@ fsp_flags_is_valid(
 	to be set */
         if (page_compression_level || page_compression) {
 		if (!page_compression || !atomic_blobs) {
+			fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted page_compression %lu\n"
+				"InnoDB: Error: page_compression_level %lu atomic_blobs %lu\n",
+				flags, page_compression, page_compression_level, atomic_blobs);
 			return(false);
 		}
 	}
 
-	if ((awrites == ATOMIC_WRITES_ON ||
-		(awrites == ATOMIC_WRITES_DEFAULT && srv_use_atomic_writes))
-		&& !atomic_blobs) {
+	if (atomic_writes < 0 || atomic_writes > ATOMIC_WRITES_OFF) {
+		fprintf(stderr, "InnoDB: Error: Tablespace flags %lu corrupted atomic_writes %lu\n",
+			flags, atomic_writes);
 		return (false);
 	}
 
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index f7de92d2288..bfb59865841 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -256,9 +256,6 @@ extern ibool	srv_use_native_conditions;
 #endif /* __WIN__ */
 #endif /* !UNIV_HOTBACKUP */
 
-/* Is page compression used */
-extern my_bool srv_compress_pages;
-
 /* Is page compression used only for index pages */
 extern my_bool srv_page_compress_index_pages;
 
@@ -279,6 +276,7 @@ extern my_bool srv_use_lz4;
 
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
+#define MTFLUSH_DEFAULT_WORKER   8
 extern long    srv_mtflush_threads;
 
 /** Server undo tablespaces directory, can be absolute path. */
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 43adf78c63c..d9a5be2b049 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -6208,6 +6208,7 @@ os_file_trim(
 	ulint		len)  /*!< in: length of area     */
 {
 
+#define SECT_SIZE 512
 	size_t trim_len = UNIV_PAGE_SIZE - len;
 	os_offset_t off = slot->offset + len;
 
@@ -6239,6 +6240,7 @@ os_file_trim(
 
 #ifdef __linux__
 #if defined(FALLOC_FL_PUNCH_HOLE) && defined (FALLOC_FL_KEEP_SIZE)
+	trim_len = (trim_len & ~(SECT_SIZE - 1)) + SECT_SIZE;
 	int ret = fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
 
 	if (ret) {
@@ -6307,7 +6309,6 @@ os_file_trim(
 	}
 #endif
 
-#define SECT_SIZE 512
 	srv_stats.page_compression_trim_sect512.add((trim_len / SECT_SIZE));
 	srv_stats.page_compression_trim_sect4096.add((trim_len / (SECT_SIZE*8)));
 	srv_stats.page_compressed_trim_op.inc();
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 7bd1ef52951..d6801b701ae 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -161,9 +161,6 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
-/* If this flag is TRUE, then we will use page compression
-to the pages */
-UNIV_INTERN my_bool     srv_compress_pages              = FALSE;
 /* If this flag is TRUE, then we will use page compression
 only for index pages */
 UNIV_INTERN my_bool     srv_page_compress_index_pages   = FALSE;
@@ -181,7 +178,7 @@ UNIV_INTERN my_bool     srv_use_atomic_writes           = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
 UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
 /* Number of threads used for multi-threaded flush */
-UNIV_INTERN long srv_mtflush_threads = 0;
+UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index 29afd0d0c98..bb539569e9a 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -2718,22 +2718,20 @@ files_checked:
 	}
 
 	if (!srv_read_only_mode) {
-		if (srv_buf_pool_instances <= MTFLUSH_MAX_WORKER) {
-			srv_mtflush_threads = srv_buf_pool_instances;
-		}
 
+		/* Start multi-threaded flush threads */
 		mtflush_ctx = buf_mtflu_handler_init(srv_mtflush_threads,
 						     srv_buf_pool_instances);
 
 		/* Set up the thread ids */
 		buf_mtflu_set_thread_ids(srv_mtflush_threads,
 					mtflush_ctx,
-					(thread_ids + 6 + 32));
+					(thread_ids + 6 + SRV_MAX_N_PURGE_THREADS));
 
 #if UNIV_DEBUG
- 		fprintf(stderr, "%s:%d buf-pool-instances:%lu\n", __FILE__, __LINE__, srv_buf_pool_instances);
+ 		fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
+			__FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
 #endif
-		/* JAN: TODO: END */
 
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
@@ -3011,7 +3009,7 @@ innobase_shutdown_for_mysql(void)
 		buf_mtflu_io_thread_exit();
 
 #ifdef UNIV_DEBUG
-		fprintf(stderr, "%s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
+		fprintf(stderr, "InnoDB: Note: %s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
 #endif
 		os_mutex_enter(os_sync_mutex);
 

From dfc295035609c669e699f1df07d60495d6b8dbb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 13 Feb 2014 12:23:55 +0200
Subject: [PATCH 22/56] Fixed small issue with dictionary.

---
 storage/innobase/include/dict0dict.ic | 2 --
 storage/xtradb/include/dict0dict.ic   | 2 --
 2 files changed, 4 deletions(-)

diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 2be68e37dc8..045d1185ebd 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -1020,8 +1020,6 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
 			 | DICT_TF_MASK_ATOMIC_WRITES);
 
-	ut_a(dict_sys_tables_type_validate(type, 0));
-
 	return(type);
 }
 
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index d37db209beb..ab9241c29a7 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -1024,8 +1024,6 @@ dict_tf_to_sys_tables_type(
 			 | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
 			 | DICT_TF_MASK_ATOMIC_WRITES);
 
-	ut_a(dict_sys_tables_type_validate(type, 0));
-
 	return(type);
 }
 

From 9c614665ee78028b9cf2edfe043373b4f6f0ff3e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 13 Feb 2014 12:35:37 +0200
Subject: [PATCH 23/56] Fixed compiler warnings.

---
 storage/innobase/include/dict0dict.ic | 2 +-
 storage/xtradb/include/dict0dict.ic   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index 045d1185ebd..d1cfdb0b8f7 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -888,7 +888,7 @@ dict_tf_set(
 	}
 
 	*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
-	ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
+	ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
 
 	if (use_data_dir) {
 		*flags |= (1 << DICT_TF_POS_DATA_DIR);
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index ab9241c29a7..2dc449bac4d 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -896,7 +896,7 @@ dict_tf_set(
 	}
 
 	*flags |= (atomic_writes << DICT_TF_POS_ATOMIC_WRITES);
-	ut_ad(dict_tf_get_atomic_writes(*flags) == awrites);
+	ut_a(dict_tf_get_atomic_writes(*flags) == awrites);
 }
 
 /********************************************************************//**

From cae21c52f604ba804f07f858edae5a930978d820 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 14 Feb 2014 15:02:26 +0200
Subject: [PATCH 24/56] Fix timing on queues, this could clearly lead to
 starvation.

---
 storage/innobase/buf/buf0mtflu.cc | 11 ++++++-----
 storage/xtradb/buf/buf0mtflu.cc   | 11 ++++++-----
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index fb1d1ce54ae..ee53e52a9cf 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -51,6 +51,8 @@ Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
 #include "fil0pagecompress.h"
 
 #define	MT_COMP_WATER_MARK	50
+/** Time to wait for a message. */
+#define MT_WAIT_IN_USECS 5000000
 
 /* Work item status */
 typedef enum wrk_status {
@@ -272,10 +274,9 @@ mtflush_service_io(
 {
 	wrk_t		*work_item = NULL;
 	ulint		n_flushed=0;
-	ib_time_t	max_wait_usecs = 50000;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
-	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
+	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS);
 
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
@@ -411,7 +412,7 @@ buf_mtflu_io_thread_exit(void)
 	/* Wait until all work items on a work queue are processed */
 	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
 		/* Wait */
-		os_thread_sleep(500000);
+		os_thread_sleep(MT_WAIT_IN_USECS * 2);
 	}
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
@@ -420,7 +421,7 @@ buf_mtflu_io_thread_exit(void)
 	for (i=0; i < srv_mtflush_threads;) {
 		wrk_t* work_item;
 
-		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
+		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
 
 		/* If we receive reply to work item and it's status is exit,
 		thead has processed this message and existed */
@@ -550,7 +551,7 @@ buf_mtflu_flush_work_items(
 
 	/* wait on the completion to arrive */
    	for(i=0; i< buf_pool_inst;) {
-		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, 50000);
+		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, MT_WAIT_IN_USECS);
 
 		if (done_wi != NULL) {
 			if(done_wi->n_flushed == 0) {
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index beb46cc2813..1f43e84a12f 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -51,6 +51,8 @@ Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
 #include "fil0pagecompress.h"
 
 #define	MT_COMP_WATER_MARK	50
+/** Time to wait for a message. */
+#define MT_WAIT_IN_USECS 5000000
 
 /* Work item status */
 typedef enum wrk_status {
@@ -278,10 +280,9 @@ mtflush_service_io(
 {
 	wrk_t		*work_item = NULL;
 	ulint		n_flushed=0;
-	ib_time_t	max_wait_usecs = 50000;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
-	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, max_wait_usecs);
+	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS);
 
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
@@ -417,7 +418,7 @@ buf_mtflu_io_thread_exit(void)
 	/* Wait until all work items on a work queue are processed */
 	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
 		/* Wait */
-		os_thread_sleep(500000);
+		os_thread_sleep(MT_WAIT_IN_USECS * 2);
 	}
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
@@ -426,7 +427,7 @@ buf_mtflu_io_thread_exit(void)
 	for (i=0; i < srv_mtflush_threads;) {
 		wrk_t* work_item;
 
-		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, 50000);
+		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
 
 		/* If we receive reply to work item and it's status is exit,
 		thead has processed this message and existed */
@@ -556,7 +557,7 @@ buf_mtflu_flush_work_items(
 
 	/* wait on the completion to arrive */
    	for(i=0; i< buf_pool_inst;) {
-		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, 50000);
+		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, MT_WAIT_IN_USECS);
 
 		if (done_wi != NULL) {
 			if(done_wi->n_flushed == 0) {

From 25318038a92872492036e8eb5da9363f22d1b7c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Sat, 15 Feb 2014 09:51:06 +0200
Subject: [PATCH 25/56] Fixed hang seen on TPC-C measure phase. We should not
 use timedwait on threads waiting for a job. They should sleep and let other
 threads to their work. At shutdown, we know that we put "work" and that is
 handled as soon as possible.

---
 storage/innobase/buf/buf0mtflu.cc | 6 +++---
 storage/xtradb/buf/buf0mtflu.cc   | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index ee53e52a9cf..3750dbaa13e 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -276,12 +276,12 @@ mtflush_service_io(
 	ulint		n_flushed=0;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
-	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS);
+	work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
 
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
 	} else {
-		/* Because of timeout this thread did not get any work */
+		/* Thread did not get any work */
 		mtflush_io->wt_status = WTHR_NO_WORK;
 		return;
 	}
@@ -551,7 +551,7 @@ buf_mtflu_flush_work_items(
 
 	/* wait on the completion to arrive */
    	for(i=0; i< buf_pool_inst;) {
-		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, MT_WAIT_IN_USECS);
+		done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
 
 		if (done_wi != NULL) {
 			if(done_wi->n_flushed == 0) {
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index 1f43e84a12f..55902cc7a58 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -282,12 +282,12 @@ mtflush_service_io(
 	ulint		n_flushed=0;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
-	work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS);
+	work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
 
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
 	} else {
-		/* Because of timeout this thread did not get any work */
+		/* Thread did not get any work */
 		mtflush_io->wt_status = WTHR_NO_WORK;
 		return;
 	}
@@ -557,7 +557,7 @@ buf_mtflu_flush_work_items(
 
 	/* wait on the completion to arrive */
    	for(i=0; i< buf_pool_inst;) {
-		done_wi = (wrk_t *)ib_wqueue_timedwait(mtflush_ctx->wr_cq, MT_WAIT_IN_USECS);
+		done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
 
 		if (done_wi != NULL) {
 			if(done_wi->n_flushed == 0) {

From 24bc0314c2b8ba373d970f15d5fba52c02cd01d2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 19 Feb 2014 20:25:55 +0200
Subject: [PATCH 26/56] Removed unnecessary memory initialization of page
 compressed buffer and added guard against unalligned trim size.

---
 storage/innobase/os/os0file.cc | 7 ++-----
 storage/xtradb/os/os0file.cc   | 6 ++----
 2 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 683cd78b901..32f469ac240 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4482,10 +4482,6 @@ found:
 
 		ut_ad(slot->page_buf);
 
-		/* Write buffer full of zeros, this is needed for trim,
-		can't really avoid this now. */
-		memset(slot->page_buf, 0, len);
-
 		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, page_compression_level, &real_len);
 
 		/* If compression succeeded, set up the length and buffer */
@@ -6155,6 +6151,8 @@ os_file_trim(
 
 #define SECT_SIZE 512
 	size_t trim_len = UNIV_PAGE_SIZE - len;
+	// len here should be alligned to sector size
+	ut_a(trim_len == ((trim_len + SECT_SIZE-1) & ~(SECT_SIZE-1)));
 	os_offset_t off = slot->offset + len;
 
 	// Nothing to do if trim length is zero or if actual write
@@ -6185,7 +6183,6 @@ os_file_trim(
 
 #ifdef __linux__
 #if defined(FALLOC_FL_PUNCH_HOLE) && defined (FALLOC_FL_KEEP_SIZE)
-	trim_len = (trim_len & ~(SECT_SIZE - 1)) + SECT_SIZE;
 	int ret = fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
 
 	if (ret) {
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index d9a5be2b049..2dec28b71f3 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -4585,10 +4585,6 @@ found:
 
 		ut_ad(slot->page_buf);
 
-		/* Write buffer full of zeros, this is needed for trim,
-		can't really avoid this now. */
-		memset(slot->page_buf, 0, len);
-
 		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, page_compression_level, &real_len);
 
 		/* If compression succeeded, set up the length and buffer */
@@ -6210,6 +6206,8 @@ os_file_trim(
 
 #define SECT_SIZE 512
 	size_t trim_len = UNIV_PAGE_SIZE - len;
+	// len here should be alligned to sector size
+	ut_a(trim_len == ((trim_len + SECT_SIZE-1) & ~(SECT_SIZE-1)));
 	os_offset_t off = slot->offset + len;
 
 	// Nothing to do if trim length is zero or if actual write

From 3c7714301718cc1b18847829582b3e3b71be940e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 21 Feb 2014 10:20:18 +0200
Subject: [PATCH 27/56] Write size was not correctly alligned to SECT_SIZE.
 This lead to situation where trim corrupted the database. Fixed the issue and
 added temporal guards against unalligned write/trim.

---
 storage/innobase/fil/fil0pagecompress.cc | 9 ++++++---
 storage/innobase/os/os0file.cc           | 5 +++--
 storage/xtradb/fil/fil0pagecompress.cc   | 9 ++++++---
 storage/xtradb/os/os0file.cc             | 5 +++--
 4 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 26e975bddf3..dfa216d0ae2 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -188,9 +188,13 @@ fil_compress_page(
 #endif /* UNIV_DEBUG */
 
 	write_size+=header_len;
+
+#define SECT_SIZE 512
+
 	/* Actual write needs to be alligned on block size */
-	if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
-		write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
+	if (write_size % SECT_SIZE) {
+		write_size = (write_size + SECT_SIZE-1) & ~(SECT_SIZE-1);
+		ut_a((write_size % SECT_SIZE) == 0);
 	}
 
 #ifdef UNIV_DEBUG
@@ -199,7 +203,6 @@ fil_compress_page(
 		space_id, fil_space_name(space), len, write_size);
 #endif
 
-#define SECT_SIZE 512
 
 	srv_stats.page_compression_saved.add((len - write_size));
 	if ((len - write_size) > 0) {
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 32f469ac240..cdd8a68b4d4 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -6151,9 +6151,10 @@ os_file_trim(
 
 #define SECT_SIZE 512
 	size_t trim_len = UNIV_PAGE_SIZE - len;
-	// len here should be alligned to sector size
-	ut_a(trim_len == ((trim_len + SECT_SIZE-1) & ~(SECT_SIZE-1)));
 	os_offset_t off = slot->offset + len;
+	// len here should be alligned to sector size
+	ut_a((trim_len % SECT_SIZE) == 0);
+	ut_a((len % SECT_SIZE) == 0);
 
 	// Nothing to do if trim length is zero or if actual write
 	// size is initialized and it is smaller than current write size.
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index 8f835113b7f..05dcf372112 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -184,9 +184,13 @@ fil_compress_page(
 #endif /* UNIV_DEBUG */
 
 	write_size+=header_len;
+
+#define SECT_SIZE 512
+
 	/* Actual write needs to be alligned on block size */
-	if (write_size % OS_FILE_LOG_BLOCK_SIZE) {
-		write_size = (write_size + (OS_FILE_LOG_BLOCK_SIZE - (write_size % OS_FILE_LOG_BLOCK_SIZE)));
+	if (write_size % SECT_SIZE) {
+		write_size = (write_size + SECT_SIZE-1) & ~(SECT_SIZE-1);
+		ut_a((write_size % SECT_SIZE) == 0);
 	}
 
 #ifdef UNIV_DEBUG
@@ -195,7 +199,6 @@ fil_compress_page(
 		space_id, fil_space_name(space), len, write_size);
 #endif /* UNIV_DEBUG */
 
-#define SECT_SIZE 512
 
 	srv_stats.page_compression_saved.add((len - write_size));
 	if ((len - write_size) > 0) {
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 2dec28b71f3..72ceed1debc 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -6206,9 +6206,10 @@ os_file_trim(
 
 #define SECT_SIZE 512
 	size_t trim_len = UNIV_PAGE_SIZE - len;
-	// len here should be alligned to sector size
-	ut_a(trim_len == ((trim_len + SECT_SIZE-1) & ~(SECT_SIZE-1)));
 	os_offset_t off = slot->offset + len;
+	// len here should be alligned to sector size
+	ut_a((trim_len % SECT_SIZE) == 0);
+	ut_a((len % SECT_SIZE) == 0);
 
 	// Nothing to do if trim length is zero or if actual write
 	// size is initialized and it is smaller than current write size.

From 38471a6d6aa6ed96cac056794a1c5ee22d861c93 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 21 Feb 2014 12:51:03 +0200
Subject: [PATCH 28/56] Remove incorrect trim_len calculation. We have already
 alligned actual page data write.

---
 storage/xtradb/os/os0file.cc | 1 -
 1 file changed, 1 deletion(-)

diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 72ceed1debc..945b4e788c5 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -6239,7 +6239,6 @@ os_file_trim(
 
 #ifdef __linux__
 #if defined(FALLOC_FL_PUNCH_HOLE) && defined (FALLOC_FL_KEEP_SIZE)
-	trim_len = (trim_len & ~(SECT_SIZE - 1)) + SECT_SIZE;
 	int ret = fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, off, trim_len);
 
 	if (ret) {

From 24235e99d83170f1802875f020179cc5dcda3182 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 25 Feb 2014 13:15:55 +0200
Subject: [PATCH 29/56] Fixed memory leak on queue nodes by using local memory
 heap on normal execution and global memory heap on shutdown.

Added a funcition to get work items from queue without waiting and
additional info when there is no work to do for a extended periods.
---
 storage/innobase/buf/buf0mtflu.cc    | 156 +++++++----------------
 storage/innobase/include/ut0wqueue.h |   9 ++
 storage/innobase/ut/ut0wqueue.cc     |  32 +++++
 storage/xtradb/buf/buf0mtflu.cc      | 177 ++++++++++-----------------
 storage/xtradb/include/ut0list.h     |   9 ++
 storage/xtradb/include/ut0list.ic    |  20 +++
 storage/xtradb/include/ut0wqueue.h   |  17 +++
 storage/xtradb/ut/ut0wqueue.cc       |  49 ++++++++
 8 files changed, 248 insertions(+), 221 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index 3750dbaa13e..19dfc883ca0 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -113,6 +113,8 @@ typedef struct wrk_itm
  	os_thread_id_t	id_usr;		/*!< Thread-id currently working */
     	wrk_status_t    wi_status;	/*!< Work item status */
  	struct wrk_itm	*next;		/*!< Next work item */
+	mem_heap_t      *wheap;         /*!< Heap were to allocate memory
+					for queue nodes */
 } wrk_t;
 
 /* Thread syncronization data */
@@ -127,39 +129,12 @@ typedef struct thread_sync
 	wthr_status_t   wt_status;	/*!< Worker thread status */
 	mem_heap_t*     wheap;		/*!< Work heap where memory
 					is allocated */
-	wrk_t*          work_item;      /*!< Array of work-items that are
-					individually accessed by multiple
-					threads. Items are accessed in a
-					thread safe manner.*/
 } thread_sync_t;
 
-/* TODO: REALLY NEEDED ? */
 static int		mtflush_work_initialized = -1;
 static os_fast_mutex_t	mtflush_mtx;
 static thread_sync_t*   mtflush_ctx=NULL;
 
-/******************************************************************//**
-Initialize work items. */
-static
-void
-mtflu_setup_work_items(
-/*===================*/
-	wrk_t*  work_items,     /*!< inout: Work items */
-	ulint	n_items)	/*!< in: Number of work items */
-{
-	ulint i;
-	for(i=0; i<n_items; i++) {
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].wr.buf_pool = NULL;
-		work_items[i].n_flushed = 0;
-		work_items[i].id_usr = -1;
-		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
-		work_items[i].next = &work_items[(i+1)%n_items];
-	}
-	/* last node should be the tail */
-	work_items[n_items-1].next = NULL;
-}
-
 /******************************************************************//**
 Set multi-threaded flush work initialized. */
 static inline
@@ -233,34 +208,6 @@ buf_mtflu_flush_pool_instance(
 	return work_item->n_flushed;
 }
 
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Print flush statistics of work items.
-*/
-static
-void
-mtflu_print_thread_stat(
-/*====================*/
-	wrk_t* work_item)	/*!< in: Work items */
-{
-	ulint stat_tot=0;
-	ulint i=0;
-
- 	for(i=0; i< MTFLUSH_MAX_WORKER; i++) {
- 		stat_tot+=work_item[i].n_flushed;
-
- 		fprintf(stderr, "MTFLUSH: Thread[%lu] stat [%lu]\n",
-			work_item[i].id_usr,
- 			work_item[i].n_flushed);
-
-		if (work_item[i].next == NULL) {
-			break; /* No more filled work items */
-		}
- 	}
- 	fprintf(stderr, "MTFLUSH: Stat-Total:%lu\n", stat_tot);
-}
-#endif /* UNIV_DEBUG */
-
 /******************************************************************//**
 Worker function to wait for work items and processing them and
 sending reply back.
@@ -276,7 +223,12 @@ mtflush_service_io(
 	ulint		n_flushed=0;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
-	work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
+
+	work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
+
+	if (work_item == NULL) {
+		work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
+	}
 
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
@@ -300,7 +252,7 @@ mtflush_service_io(
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_EXIT;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
 		mtflush_io->wt_status = WTHR_KILL_IT;
         return;
 
@@ -314,16 +266,11 @@ mtflush_service_io(
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
 		break;
 
 	case MT_WRK_READ:
-		/* Need to also handle the read case */
-		/* TODO: ? */
 		ut_a(0);
-		/* completed task get added to rd_cq */
-		/* work_item->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(mtflush_io->rd_cq, work_item, mtflush_io->wheap);*/
 		break;
 
 	default:
@@ -348,25 +295,12 @@ DECLARE_THREAD(mtflush_io_thread)(
 #ifdef UNIV_DEBUG
 	ib_uint64_t   stat_universal_num_processed = 0;
 	ib_uint64_t   stat_cycle_num_processed = 0;
-	wrk_t*	      work_item = mtflush_io[0].work_item;
 	ulint i;
 #endif
 
 	while (TRUE) {
 		mtflush_service_io(mtflush_io);
 
-#ifdef UNIV_DEBUG
-		for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
-			stat_cycle_num_processed+= work_item[i].n_flushed;
-		}
-
-		stat_universal_num_processed+=stat_cycle_num_processed;
-		stat_cycle_num_processed = 0;
-		fprintf(stderr, "MTFLUSH_IO_THREAD: total %lu cycle %lu\n",
-			stat_universal_num_processed,
-			stat_cycle_num_processed);
-		mtflu_print_thread_stat(work_item);
-#endif
 		if (mtflush_io->wt_status == WTHR_KILL_IT) {
 			break;
 		}
@@ -386,26 +320,31 @@ buf_mtflu_io_thread_exit(void)
 {
 	long i;
 	thread_sync_t* mtflush_io = mtflush_ctx;
+	wrk_t* work_item;
 
 	ut_a(mtflush_io != NULL);
 
+	/* Allocate work items for shutdown message */
+	work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
+
 	/* Confirm if the io-thread KILL is in progress, bailout */
 	if (mtflush_io->wt_status == WTHR_KILL_IT) {
 		return;
 	}
 
 	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
-		srv_buf_pool_instances);
+		srv_mtflush_threads);
 
 	/* Send one exit work item/thread */
 	for (i=0; i < srv_mtflush_threads; i++) {
-		mtflush_io->work_item[i].wr.buf_pool = NULL;
-		mtflush_io->work_item[i].rd.page_pool = NULL;
-		mtflush_io->work_item[i].tsk = MT_WRK_NONE;
-		mtflush_io->work_item[i].wi_status = WRK_ITEM_EXIT;
+		work_item[i].wr.buf_pool = NULL;
+		work_item[i].rd.page_pool = NULL;
+		work_item[i].tsk = MT_WRK_NONE;
+		work_item[i].wi_status = WRK_ITEM_EXIT;
+		work_item[i].wheap = mtflush_io->wheap;
 
 		ib_wqueue_add(mtflush_io->wq,
-			(void *)&(mtflush_io->work_item[i]),
+			(void *)&(work_item[i]),
 			mtflush_io->wheap);
 	}
 
@@ -431,7 +370,7 @@ buf_mtflu_io_thread_exit(void)
 	}
 
 	/* Wait about 1/2 sec to allow threads really exit */
-	os_thread_sleep(50000);
+	os_thread_sleep(5000000);
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
@@ -462,7 +401,6 @@ buf_mtflu_handler_init(
 	ib_wqueue_t*	mtflush_work_queue;
 	ib_wqueue_t*	mtflush_write_comp_queue;
 	ib_wqueue_t*	mtflush_read_comp_queue;
-	wrk_t*		work_items;
 
 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
 
@@ -481,14 +419,6 @@ buf_mtflu_handler_init(
 	mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
 				MTFLUSH_MAX_WORKER * sizeof(thread_sync_t));
 	ut_a(mtflush_ctx != NULL);
-	work_items = (wrk_t*)mem_heap_alloc(mtflush_heap,
-					    MTFLUSH_MAX_WORKER * sizeof(wrk_t));
-	ut_a(work_items != NULL);
-	memset(work_items, 0, sizeof(wrk_t) * MTFLUSH_MAX_WORKER);
-	memset(mtflush_ctx, 0, sizeof(thread_sync_t) * MTFLUSH_MAX_WORKER);
-
-	/* Initialize work items */
-	mtflu_setup_work_items(work_items, n_threads);
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < n_threads; i++) {
@@ -499,7 +429,6 @@ buf_mtflu_handler_init(
 		mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
 		mtflush_ctx[i].wheap = mtflush_heap;
 		mtflush_ctx[i].wt_status = WTHR_INITIALIZED;
-		mtflush_ctx[i].work_item = work_items;
 
 		mtflush_ctx[i].wthread = os_thread_create(
 			mtflush_io_thread,
@@ -533,20 +462,28 @@ buf_mtflu_flush_work_items(
 {
 	ulint n_flushed=0, i;
 	wrk_t *done_wi;
+	mem_heap_t* work_heap;
+	wrk_t* work_item;
+
+	/* Allocate heap where all work items used and queue
+	node items areallocated */
+	work_heap = mem_heap_create(0);
+	work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst);
 
 	for(i=0;i<buf_pool_inst; i++) {
-		mtflush_ctx->work_item[i].tsk = MT_WRK_WRITE;
-		mtflush_ctx->work_item[i].rd.page_pool = NULL;
-		mtflush_ctx->work_item[i].wr.buf_pool = buf_pool_from_array(i);
-		mtflush_ctx->work_item[i].wr.flush_type = flush_type;
-		mtflush_ctx->work_item[i].wr.min = min_n;
-		mtflush_ctx->work_item[i].wr.lsn_limit = lsn_limit;
-		mtflush_ctx->work_item[i].id_usr = -1;
-		mtflush_ctx->work_item[i].wi_status = WRK_ITEM_SET;
+		work_item[i].tsk = MT_WRK_WRITE;
+		work_item[i].rd.page_pool = NULL;
+		work_item[i].wr.buf_pool = buf_pool_from_array(i);
+		work_item[i].wr.flush_type = flush_type;
+		work_item[i].wr.min = min_n;
+		work_item[i].wr.lsn_limit = lsn_limit;
+		work_item[i].id_usr = -1;
+		work_item[i].wi_status = WRK_ITEM_SET;
+		work_item[i].wheap = work_heap;
 
 		ib_wqueue_add(mtflush_ctx->wq,
-			(void *)(&(mtflush_ctx->work_item[i])),
-			mtflush_ctx->wheap);
+			(void *)(&(work_item[i])),
+			work_heap);
 	}
 
 	/* wait on the completion to arrive */
@@ -554,21 +491,15 @@ buf_mtflu_flush_work_items(
 		done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
 
 		if (done_wi != NULL) {
-			if(done_wi->n_flushed == 0) {
-				per_pool_pages_flushed[i] = 0;
-			} else {
-				per_pool_pages_flushed[i] = done_wi->n_flushed;
-			}
+			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
 			if((int)done_wi->id_usr == -1 &&
 			   done_wi->wi_status == WRK_ITEM_SET ) {
-#ifdef UNIV_DEBUG
 				fprintf(stderr,
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
-				ut_ad(0);
-#endif
+				ut_a(0);
 			}
 
 			n_flushed+= done_wi->n_flushed;
@@ -576,6 +507,9 @@ buf_mtflu_flush_work_items(
 		}
 	}
 
+	/* Release used work_items and queue nodes */
+	mem_heap_free(work_heap);
+
 	return(n_flushed);
 }
 
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index 33385ddf2d4..bbbbd3b146b 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -95,6 +95,15 @@ ib_wqueue_timedwait(
 	ib_wqueue_t*	wq,		/* in: work queue */
 	ib_time_t	wait_in_usecs); /* in: wait time in micro seconds */
 
+/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+	ib_wqueue_t*	wq);		/*<! in: work queue */
+
+
 /* Work queue. */
 struct ib_wqueue_t {
 	ib_mutex_t		mutex;	/*!< mutex protecting everything */
diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc
index d1ba36b3b00..9ed4502da25 100644
--- a/storage/innobase/ut/ut0wqueue.cc
+++ b/storage/innobase/ut/ut0wqueue.cc
@@ -161,6 +161,38 @@ ib_wqueue_timedwait(
 	return(node ? node->data : NULL);
 }
 
+/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+	ib_wqueue_t*	wq)		/*<! in: work queue */
+{
+	ib_list_node_t*	node = NULL;
+
+	mutex_enter(&wq->mutex);
+
+	if(!ib_list_is_empty(wq->items)) {
+		node = ib_list_get_first(wq->items);
+
+		if (node) {
+			ib_list_remove(wq->items, node);
+
+		}
+	}
+
+	/* We must reset the event when the list
+	gets emptied. */
+	if(ib_list_is_empty(wq->items)) {
+		os_event_reset(wq->event);
+	}
+
+	mutex_exit(&wq->mutex);
+
+	return (node ? node->data : NULL);
+}
+
 /********************************************************************
 Check if queue is empty. */
 
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index 55902cc7a58..35a15bd5a14 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -113,6 +113,8 @@ typedef struct wrk_itm
  	os_thread_id_t	id_usr;		/*!< Thread-id currently working */
     	wrk_status_t    wi_status;	/*!< Work item status */
  	struct wrk_itm	*next;		/*!< Next work item */
+	mem_heap_t      *wheap;         /*!< Heap were to allocate memory
+					for queue nodes */
 } wrk_t;
 
 /* Thread syncronization data */
@@ -127,39 +129,12 @@ typedef struct thread_sync
 	wthr_status_t   wt_status;	/*!< Worker thread status */
 	mem_heap_t*     wheap;		/*!< Work heap where memory
 					is allocated */
-	wrk_t*          work_item;      /*!< Array of work-items that are
-					individually accessed by multiple
-					threads. Items are accessed in a
-					thread safe manner.*/
 } thread_sync_t;
 
-/* TODO: REALLY NEEDED ? */
 static int		mtflush_work_initialized = -1;
 static os_fast_mutex_t	mtflush_mtx;
 static thread_sync_t*   mtflush_ctx=NULL;
 
-/******************************************************************//**
-Initialize work items. */
-static
-void
-mtflu_setup_work_items(
-/*===================*/
-	wrk_t*  work_items,     /*!< inout: Work items */
-	ulint	n_items)	/*!< in: Number of work items */
-{
-	ulint i;
-	for(i=0; i<n_items; i++) {
-		work_items[i].rd.page_pool = NULL;
-		work_items[i].wr.buf_pool = NULL;
-		work_items[i].n_flushed = 0;
-		work_items[i].id_usr = -1;
-		work_items[i].wi_status = WRK_ITEM_STATUS_UNDEFINED;
-		work_items[i].next = &work_items[(i+1)%n_items];
-	}
-	/* last node should be the tail */
-	work_items[n_items-1].next = NULL;
-}
-
 /******************************************************************//**
 Set multi-threaded flush work initialized. */
 static inline
@@ -238,35 +213,6 @@ buf_mtflu_flush_pool_instance(
 	return work_item->n_flushed;
 }
 
-#ifdef UNIV_DEBUG
-/******************************************************************//**
-Print flush statistics of work items
-*/
-static
-void
-mtflu_print_thread_stat(
-/*====================*/
-	wrk_t* work_item)	/*!< in: Work items */
-{
-	ulint stat_tot=0;
-	ulint i=0;
-
- 	for(i=0; i< MTFLUSH_MAX_WORKER; i++) {
- 		stat_tot+=work_item[i].n_flushed;
-
- 		fprintf(stderr, "MTFLUSH: Thread[%lu] stat [%lu]\n",
-			work_item[i].id_usr,
- 			work_item[i].n_flushed);
-
-		if (work_item[i].next == NULL) {
-			break; /* No more filled work items */
-		}
- 	}
-
- 	fprintf(stderr, "MTFLUSH: Stat-Total:%lu\n", stat_tot);
-}
-#endif /* UNIV_DEBUG */
-
 /******************************************************************//**
 Worker function to wait for work items and processing them and
 sending reply back.
@@ -282,7 +228,12 @@ mtflush_service_io(
 	ulint		n_flushed=0;
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
-	work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
+
+	work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
+
+	if (work_item == NULL) {
+		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS);
+	}
 
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
@@ -306,7 +257,7 @@ mtflush_service_io(
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_EXIT;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
 		mtflush_io->wt_status = WTHR_KILL_IT;
         return;
 
@@ -320,16 +271,11 @@ mtflush_service_io(
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, mtflush_io->wheap);
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
 		break;
 
 	case MT_WRK_READ:
-		/* Need to also handle the read case */
-		/* TODO: ? */
 		ut_a(0);
-		/* completed task get added to rd_cq */
-		/* work_item->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(mtflush_io->rd_cq, work_item, mtflush_io->wheap);*/
 		break;
 
 	default:
@@ -351,28 +297,36 @@ DECLARE_THREAD(mtflush_io_thread)(
 	void * arg)
 {
 	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
+	ulint n_timeout = 0;
 #ifdef UNIV_DEBUG
 	ib_uint64_t   stat_universal_num_processed = 0;
 	ib_uint64_t   stat_cycle_num_processed = 0;
-	wrk_t*	      work_item = mtflush_io[0].work_item;
 	ulint i;
 #endif
 
 	while (TRUE) {
+		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
+					os_thread_get_curr_id(),
+					ib_wqueue_len(mtflush_io->wq),
+					ib_wqueue_len(mtflush_io->wr_cq));
+
 		mtflush_service_io(mtflush_io);
 
-#ifdef UNIV_DEBUG
-		for(i=0; i < MTFLUSH_MAX_WORKER; i++) {
-			stat_cycle_num_processed+= work_item[i].n_flushed;
+		if (mtflush_io->wt_status == WTHR_NO_WORK) {
+			n_timeout++;
+
+			if (n_timeout > 10) {
+				fprintf(stderr, "InnoDB: Note: Thread %lu has not received "
+					" work queue len %lu return queue len %lu\n",
+					os_thread_get_curr_id(),
+					ib_wqueue_len(mtflush_io->wq),
+					ib_wqueue_len(mtflush_io->wr_cq));
+				n_timeout = 0;
+			}
+		} else {
+			n_timeout = 0;
 		}
 
-		stat_universal_num_processed+=stat_cycle_num_processed;
-		stat_cycle_num_processed = 0;
-		fprintf(stderr, "MTFLUSH_IO_THREAD: total %lu cycle %lu\n",
-			stat_universal_num_processed,
-			stat_cycle_num_processed);
-		mtflu_print_thread_stat(work_item);
-#endif
 		if (mtflush_io->wt_status == WTHR_KILL_IT) {
 			break;
 		}
@@ -392,26 +346,31 @@ buf_mtflu_io_thread_exit(void)
 {
 	long i;
 	thread_sync_t* mtflush_io = mtflush_ctx;
+	wrk_t* work_item;
 
 	ut_a(mtflush_io != NULL);
 
+	/* Allocate work items for shutdown message */
+	work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
+
 	/* Confirm if the io-thread KILL is in progress, bailout */
 	if (mtflush_io->wt_status == WTHR_KILL_IT) {
 		return;
 	}
 
 	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
-		srv_buf_pool_instances);
+		srv_mtflush_threads);
 
 	/* Send one exit work item/thread */
 	for (i=0; i < srv_mtflush_threads; i++) {
-		mtflush_io->work_item[i].wr.buf_pool = NULL;
-		mtflush_io->work_item[i].rd.page_pool = NULL;
-		mtflush_io->work_item[i].tsk = MT_WRK_NONE;
-		mtflush_io->work_item[i].wi_status = WRK_ITEM_EXIT;
+		work_item[i].wr.buf_pool = NULL;
+		work_item[i].rd.page_pool = NULL;
+		work_item[i].tsk = MT_WRK_NONE;
+		work_item[i].wi_status = WRK_ITEM_EXIT;
+		work_item[i].wheap = mtflush_io->wheap;
 
 		ib_wqueue_add(mtflush_io->wq,
-			(void *)&(mtflush_io->work_item[i]),
+			(void *)&(work_item[i]),
 			mtflush_io->wheap);
 	}
 
@@ -437,7 +396,7 @@ buf_mtflu_io_thread_exit(void)
 	}
 
 	/* Wait about 1/2 sec to allow threads really exit */
-	os_thread_sleep(50000);
+	os_thread_sleep(5000000);
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
@@ -468,7 +427,6 @@ buf_mtflu_handler_init(
 	ib_wqueue_t*	mtflush_work_queue;
 	ib_wqueue_t*	mtflush_write_comp_queue;
 	ib_wqueue_t*	mtflush_read_comp_queue;
-	wrk_t*		work_items;
 
 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
 
@@ -487,14 +445,6 @@ buf_mtflu_handler_init(
 	mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
 				MTFLUSH_MAX_WORKER * sizeof(thread_sync_t));
 	ut_a(mtflush_ctx != NULL);
-	work_items = (wrk_t*)mem_heap_alloc(mtflush_heap,
-					    MTFLUSH_MAX_WORKER * sizeof(wrk_t));
-	ut_a(work_items != NULL);
-	memset(work_items, 0, sizeof(wrk_t) * MTFLUSH_MAX_WORKER);
-	memset(mtflush_ctx, 0, sizeof(thread_sync_t) * MTFLUSH_MAX_WORKER);
-
-	/* Initialize work items */
-	mtflu_setup_work_items(work_items, n_threads);
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < n_threads; i++) {
@@ -505,7 +455,6 @@ buf_mtflu_handler_init(
 		mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
 		mtflush_ctx[i].wheap = mtflush_heap;
 		mtflush_ctx[i].wt_status = WTHR_INITIALIZED;
-		mtflush_ctx[i].work_item = work_items;
 
 		mtflush_ctx[i].wthread = os_thread_create(
 			mtflush_io_thread,
@@ -539,20 +488,28 @@ buf_mtflu_flush_work_items(
 {
 	ulint n_flushed=0, i;
 	wrk_t *done_wi;
+	mem_heap_t* work_heap;
+	wrk_t* work_item;
+
+	/* Allocate heap where all work items used and queue
+	node items areallocated */
+	work_heap = mem_heap_create(0);
+	work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst);
 
 	for(i=0;i<buf_pool_inst; i++) {
-		mtflush_ctx->work_item[i].tsk = MT_WRK_WRITE;
-		mtflush_ctx->work_item[i].rd.page_pool = NULL;
-		mtflush_ctx->work_item[i].wr.buf_pool = buf_pool_from_array(i);
-		mtflush_ctx->work_item[i].wr.flush_type = flush_type;
-		mtflush_ctx->work_item[i].wr.min = min_n;
-		mtflush_ctx->work_item[i].wr.lsn_limit = lsn_limit;
-		mtflush_ctx->work_item[i].id_usr = -1;
-		mtflush_ctx->work_item[i].wi_status = WRK_ITEM_SET;
+		work_item[i].tsk = MT_WRK_WRITE;
+		work_item[i].rd.page_pool = NULL;
+		work_item[i].wr.buf_pool = buf_pool_from_array(i);
+		work_item[i].wr.flush_type = flush_type;
+		work_item[i].wr.min = min_n;
+		work_item[i].wr.lsn_limit = lsn_limit;
+		work_item[i].id_usr = -1;
+		work_item[i].wi_status = WRK_ITEM_SET;
+		work_item[i].wheap = work_heap;
 
 		ib_wqueue_add(mtflush_ctx->wq,
-			(void *)(&(mtflush_ctx->work_item[i])),
-			mtflush_ctx->wheap);
+			(void *)(&(work_item[i])),
+			work_heap);
 	}
 
 	/* wait on the completion to arrive */
@@ -560,21 +517,15 @@ buf_mtflu_flush_work_items(
 		done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
 
 		if (done_wi != NULL) {
-			if(done_wi->n_flushed == 0) {
-				per_pool_pages_flushed[i] = 0;
-			} else {
-				per_pool_pages_flushed[i] = done_wi->n_flushed;
-			}
+			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
 			if((int)done_wi->id_usr == -1 &&
 			   done_wi->wi_status == WRK_ITEM_SET ) {
-#ifdef UNIV_DEBUG
 				fprintf(stderr,
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
-				ut_ad(0);
-#endif
+				ut_a(0);
 			}
 
 			n_flushed+= done_wi->n_flushed;
@@ -582,6 +533,12 @@ buf_mtflu_flush_work_items(
 		}
 	}
 
+	ut_a(ib_wqueue_is_empty(mtflush_ctx->wq));
+	ut_a(ib_wqueue_is_empty(mtflush_ctx->wr_cq));
+
+	/* Release used work_items and queue nodes */
+	mem_heap_free(work_heap);
+
 	return(n_flushed);
 }
 
diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h
index 29fc8669ce4..b1035bad099 100644
--- a/storage/xtradb/include/ut0list.h
+++ b/storage/xtradb/include/ut0list.h
@@ -150,6 +150,15 @@ ib_list_is_empty(
 					/* out: TRUE if empty else  */
 	const ib_list_t*	list);	/* in: list */
 
+/********************************************************************
+Get number of items on list. 
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+	const ib_list_t*	list);		/*<! in: list */
+
 /* List. */
 struct ib_list_t {
 	ib_list_node_t*		first;		/*!< first node */
diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic
index d9dcb2eac99..eaf2577b16c 100644
--- a/storage/xtradb/include/ut0list.ic
+++ b/storage/xtradb/include/ut0list.ic
@@ -58,3 +58,23 @@ ib_list_is_empty(
 {
 	return(!(list->first || list->last));
 }
+
+/********************************************************************
+Get number of items on list. 
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+	const ib_list_t*	list)		/*<! in: list */
+{
+	ulint len = 0;
+	ib_list_node_t* node = list->first;
+
+	while(node) {
+		len++;
+		node = node->next;
+	}
+
+	return (len);
+}
diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h
index 33385ddf2d4..6513f4982c0 100644
--- a/storage/xtradb/include/ut0wqueue.h
+++ b/storage/xtradb/include/ut0wqueue.h
@@ -95,6 +95,23 @@ ib_wqueue_timedwait(
 	ib_wqueue_t*	wq,		/* in: work queue */
 	ib_time_t	wait_in_usecs); /* in: wait time in micro seconds */
 
+/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+	ib_wqueue_t*	wq);		/*<! in: work queue */
+
+
+/********************************************************************
+Get number of items on queue. 
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+	ib_wqueue_t*	wq);		/*<! in: work queue */
+
 /* Work queue. */
 struct ib_wqueue_t {
 	ib_mutex_t		mutex;	/*!< mutex protecting everything */
diff --git a/storage/xtradb/ut/ut0wqueue.cc b/storage/xtradb/ut/ut0wqueue.cc
index d1ba36b3b00..1607e535a94 100644
--- a/storage/xtradb/ut/ut0wqueue.cc
+++ b/storage/xtradb/ut/ut0wqueue.cc
@@ -161,6 +161,38 @@ ib_wqueue_timedwait(
 	return(node ? node->data : NULL);
 }
 
+/********************************************************************
+Return first item on work queue or NULL if queue is empty
+@return work item or NULL */
+void*
+ib_wqueue_nowait(
+/*=============*/
+	ib_wqueue_t*	wq)		/*<! in: work queue */
+{
+	ib_list_node_t*	node = NULL;
+
+	mutex_enter(&wq->mutex);
+
+	if(!ib_list_is_empty(wq->items)) {
+		node = ib_list_get_first(wq->items);
+
+		if (node) {
+			ib_list_remove(wq->items, node);
+
+		}
+	}
+
+	/* We must reset the event when the list
+	gets emptied. */
+	if(ib_list_is_empty(wq->items)) {
+		os_event_reset(wq->event);
+	}
+
+	mutex_exit(&wq->mutex);
+
+	return (node ? node->data : NULL);
+}
+
 /********************************************************************
 Check if queue is empty. */
 
@@ -173,3 +205,20 @@ ib_wqueue_is_empty(
 {
 	return(ib_list_is_empty(wq->items));
 }
+
+/********************************************************************
+Get number of items on queue.
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+	ib_wqueue_t*	wq)		/*<! in: work queue */
+{
+	ulint len = 0;
+
+	mutex_enter(&wq->mutex);
+	len = ib_list_len(wq->items);
+	mutex_exit(&wq->mutex);
+
+        return(len);
+}

From b620e7368f05af52f3fa1a759bc446140baf7b56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 26 Feb 2014 19:00:24 +0200
Subject: [PATCH 30/56] Small fixes to work_item handling.

---
 storage/innobase/buf/buf0mtflu.cc | 19 ++++++++-----------
 storage/xtradb/buf/buf0mtflu.cc   | 19 ++++++++-----------
 2 files changed, 16 insertions(+), 22 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index 19dfc883ca0..62ed3f539e2 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -56,13 +56,14 @@ Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
 
 /* Work item status */
 typedef enum wrk_status {
-	WRK_ITEM_SET=0,		/*!< Work item is set */
+	WRK_ITEM_UNSET=0,	/*!< Work item is not set */
 	WRK_ITEM_START=1,	/*!< Processing of work item has started */
 	WRK_ITEM_DONE=2,	/*!< Processing is done usually set to
 				SUCCESS/FAILED */
 	WRK_ITEM_SUCCESS=2,	/*!< Work item successfully processed */
 	WRK_ITEM_FAILED=3,	/*!< Work item process failed */
 	WRK_ITEM_EXIT=4,	/*!< Exiting */
+	WRK_ITEM_SET=5,		/*!< Work item is set */
 	WRK_ITEM_STATUS_UNDEFINED
 } wrk_status_t;
 
@@ -179,9 +180,7 @@ buf_mtflu_flush_pool_instance(
 		pools based on the assumption that it will
 		help in the retry which will follow the
 		failure. */
-#ifdef UNIV_DEBUG
-		fprintf(stderr, "flush start failed.\n");
-#endif
+		fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
 		return 0;
 	}
 
@@ -257,12 +256,10 @@ mtflush_service_io(
         return;
 
 	case MT_WRK_WRITE:
+		ut_a(work_item->wi_status == WRK_ITEM_SET);
 		work_item->wi_status = WRK_ITEM_START;
 		/* Process work item */
 		if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
-#ifdef UNIV_DEBUG
-			fprintf(stderr, "No pages flushed\n");
-#endif
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
@@ -320,7 +317,7 @@ buf_mtflu_io_thread_exit(void)
 {
 	long i;
 	thread_sync_t* mtflush_io = mtflush_ctx;
-	wrk_t* work_item;
+	wrk_t* work_item = NULL;
 
 	ut_a(mtflush_io != NULL);
 
@@ -358,7 +355,7 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Collect all work done items */
 	for (i=0; i < srv_mtflush_threads;) {
-		wrk_t* work_item;
+		wrk_t* work_item = NULL;
 
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
 
@@ -461,9 +458,8 @@ buf_mtflu_flush_work_items(
 					number does not exceed min_n) */
 {
 	ulint n_flushed=0, i;
-	wrk_t *done_wi;
 	mem_heap_t* work_heap;
-	wrk_t* work_item;
+	wrk_t* work_item=NULL;
 
 	/* Allocate heap where all work items used and queue
 	node items areallocated */
@@ -488,6 +484,7 @@ buf_mtflu_flush_work_items(
 
 	/* wait on the completion to arrive */
    	for(i=0; i< buf_pool_inst;) {
+		wrk_t *done_wi = NULL;
 		done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
 
 		if (done_wi != NULL) {
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index 35a15bd5a14..eeb9bf36c86 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -56,13 +56,14 @@ Modified 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
 
 /* Work item status */
 typedef enum wrk_status {
-	WRK_ITEM_SET=0,		/*!< Work item is set */
+	WRK_ITEM_UNSET=0,	/*!< Work item is not set */
 	WRK_ITEM_START=1,	/*!< Processing of work item has started */
 	WRK_ITEM_DONE=2,	/*!< Processing is done usually set to
 				SUCCESS/FAILED */
 	WRK_ITEM_SUCCESS=2,	/*!< Work item successfully processed */
 	WRK_ITEM_FAILED=3,	/*!< Work item process failed */
 	WRK_ITEM_EXIT=4,	/*!< Exiting */
+	WRK_ITEM_SET=5,		/*!< Work item is set */
 	WRK_ITEM_STATUS_UNDEFINED
 } wrk_status_t;
 
@@ -181,9 +182,7 @@ buf_mtflu_flush_pool_instance(
 		pools based on the assumption that it will
 		help in the retry which will follow the
 		failure. */
-#ifdef UNIV_DEBUG
-		fprintf(stderr, "flush start failed.\n");
-#endif
+		fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
 		return 0;
 	}
 
@@ -262,12 +261,10 @@ mtflush_service_io(
         return;
 
 	case MT_WRK_WRITE:
+		ut_a(work_item->wi_status == WRK_ITEM_SET);
 		work_item->wi_status = WRK_ITEM_START;
 		/* Process work item */
 		if (0 == (n_flushed = buf_mtflu_flush_pool_instance(work_item))) {
-#ifdef UNIV_DEBUG
-			fprintf(stderr, "No pages flushed\n");
-#endif
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
@@ -346,7 +343,7 @@ buf_mtflu_io_thread_exit(void)
 {
 	long i;
 	thread_sync_t* mtflush_io = mtflush_ctx;
-	wrk_t* work_item;
+	wrk_t* work_item = NULL;
 
 	ut_a(mtflush_io != NULL);
 
@@ -384,7 +381,7 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Collect all work done items */
 	for (i=0; i < srv_mtflush_threads;) {
-		wrk_t* work_item;
+		wrk_t* work_item = NULL;
 
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wr_cq, MT_WAIT_IN_USECS);
 
@@ -487,9 +484,8 @@ buf_mtflu_flush_work_items(
 					number does not exceed min_n) */
 {
 	ulint n_flushed=0, i;
-	wrk_t *done_wi;
 	mem_heap_t* work_heap;
-	wrk_t* work_item;
+	wrk_t* work_item=NULL;
 
 	/* Allocate heap where all work items used and queue
 	node items areallocated */
@@ -514,6 +510,7 @@ buf_mtflu_flush_work_items(
 
 	/* wait on the completion to arrive */
    	for(i=0; i< buf_pool_inst;) {
+		wrk_t *done_wi = NULL;
 		done_wi = (wrk_t *)ib_wqueue_wait(mtflush_ctx->wr_cq);
 
 		if (done_wi != NULL) {

From c88a0d48c6624466d058282bf7e2e8279660564e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 28 Feb 2014 08:53:09 +0200
Subject: [PATCH 31/56] Temporal fix for flush thread hang.

Added option to disable multi-threaded flush with innodb_use_mtflush = 0
option, by default multi-threaded flush is used.

Updated innochecksum tool, still it does not support new checksums.
---
 extra/CMakeLists.txt                  |  16 +-
 extra/innochecksum.c                  | 325 ---------------------
 extra/innochecksum.cc                 | 396 ++++++++++++++++++++++++++
 storage/innobase/buf/buf0mtflu.cc     |  55 +++-
 storage/innobase/handler/ha_innodb.cc |   6 +
 storage/innobase/include/fil0fil.h    |   4 +-
 storage/innobase/include/srv0srv.h    |   5 +
 storage/innobase/include/ut0list.h    |   9 +
 storage/innobase/include/ut0list.ic   |  20 ++
 storage/innobase/include/ut0wqueue.h  |   8 +
 storage/innobase/srv/srv0srv.cc       |   4 +-
 storage/innobase/srv/srv0start.cc     |  29 +-
 storage/innobase/ut/ut0wqueue.cc      |  17 ++
 storage/xtradb/buf/buf0mtflu.cc       |  34 ++-
 storage/xtradb/handler/ha_innodb.cc   |   6 +
 storage/xtradb/include/fil0fil.h      |   4 +-
 storage/xtradb/include/srv0srv.h      |   5 +
 storage/xtradb/include/ut0list.h      |   2 +-
 storage/xtradb/include/ut0list.ic     |   2 +-
 storage/xtradb/include/ut0wqueue.h    |   2 +-
 storage/xtradb/srv/srv0srv.cc         |   4 +-
 storage/xtradb/srv/srv0start.cc       |  31 +-
 22 files changed, 614 insertions(+), 370 deletions(-)
 delete mode 100644 extra/innochecksum.c
 create mode 100644 extra/innochecksum.cc

diff --git a/extra/CMakeLists.txt b/extra/CMakeLists.txt
index f8f71b00743..cf3a35cb1dd 100644
--- a/extra/CMakeLists.txt
+++ b/extra/CMakeLists.txt
@@ -72,10 +72,24 @@ IF(CMAKE_SYSTEM_NAME STREQUAL "SunOS")
 ENDIF()
 ENDIF()
 
+IF(WITH_INNOBASE_STORAGE_ENGINE)
+  # Add path to the InnoDB headers
+  INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/storage/innobase/include)
+  # We use the InnoDB code directly in case the code changes.
+  ADD_DEFINITIONS("-DUNIV_INNOCHECKSUM")
+  SET(INNOBASE_SOURCES
+      ../storage/innobase/buf/buf0checksum.cc
+      ../storage/innobase/ut/ut0crc32.cc
+      ../storage/innobase/ut/ut0ut.cc
+     )
+  MYSQL_ADD_EXECUTABLE(innochecksum innochecksum.cc ${INNOBASE_SOURCES})
+  TARGET_LINK_LIBRARIES(innochecksum mysys mysys_ssl)
+ENDIF()
+
 MYSQL_ADD_EXECUTABLE(replace replace.c COMPONENT Server)
 TARGET_LINK_LIBRARIES(replace mysys)
+
 IF(UNIX)
-  MYSQL_ADD_EXECUTABLE(innochecksum innochecksum.c)
 
   MYSQL_ADD_EXECUTABLE(resolve_stack_dump resolve_stack_dump.c)
   TARGET_LINK_LIBRARIES(resolve_stack_dump mysys)
diff --git a/extra/innochecksum.c b/extra/innochecksum.c
deleted file mode 100644
index ed4dfc48789..00000000000
--- a/extra/innochecksum.c
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
-   Copyright (c) 2005, 2011, Oracle and/or its affiliates
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; version 2 of the License.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
-*/
-
-/*
-  InnoDB offline file checksum utility.  85% of the code in this file
-  was taken wholesale fron the InnoDB codebase.
-
-  The final 15% was originally written by Mark Smith of Danga
-  Interactive, Inc. <junior@danga.com>
-
-  Published with a permission.
-*/
-
-#include <my_global.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-/* all of these ripped from InnoDB code from MySQL 4.0.22 */
-#define UT_HASH_RANDOM_MASK     1463735687
-#define UT_HASH_RANDOM_MASK2    1653893711
-#define FIL_PAGE_LSN          16 
-#define FIL_PAGE_FILE_FLUSH_LSN 26
-#define FIL_PAGE_OFFSET     4
-#define FIL_PAGE_DATA       38
-#define FIL_PAGE_END_LSN_OLD_CHKSUM 8
-#define FIL_PAGE_SPACE_OR_CHKSUM 0
-#define UNIV_PAGE_SIZE          (2 * 8192)
-
-/* command line argument to do page checks (that's it) */
-/* another argument to specify page ranges... seek to right spot and go from there */
-
-typedef unsigned long int ulint;
-
-/* innodb function in name; modified slightly to not have the ASM version (lots of #ifs that didn't apply) */
-ulint mach_read_from_4(uchar *b)
-{
-  return( ((ulint)(b[0]) << 24)
-          + ((ulint)(b[1]) << 16)
-          + ((ulint)(b[2]) << 8)
-          + (ulint)(b[3])
-          );
-}
-
-ulint
-ut_fold_ulint_pair(
-/*===============*/
-            /* out: folded value */
-    ulint   n1, /* in: ulint */
-    ulint   n2) /* in: ulint */
-{
-    return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1)
-                        ^ UT_HASH_RANDOM_MASK) + n2);
-}
-
-ulint
-ut_fold_binary(
-/*===========*/
-            /* out: folded value */
-    uchar*   str,    /* in: string of bytes */
-    ulint   len)    /* in: length */
-{
-    ulint   i;
-    ulint   fold= 0;
-
-    for (i= 0; i < len; i++)
-    {
-      fold= ut_fold_ulint_pair(fold, (ulint)(*str));
-
-      str++;
-    }
-
-    return(fold);
-}
-
-ulint
-buf_calc_page_new_checksum(
-/*=======================*/
-               /* out: checksum */
-    uchar*    page) /* in: buffer page */
-{
-    ulint checksum;
-
-    /* Since the fields FIL_PAGE_FILE_FLUSH_LSN and ..._ARCH_LOG_NO
-    are written outside the buffer pool to the first pages of data
-    files, we have to skip them in the page checksum calculation.
-    We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the
-    checksum is stored, and also the last 8 bytes of page because
-    there we store the old formula checksum. */
-
-    checksum= ut_fold_binary(page + FIL_PAGE_OFFSET,
-                             FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET)
-            + ut_fold_binary(page + FIL_PAGE_DATA,
-                             UNIV_PAGE_SIZE - FIL_PAGE_DATA
-                             - FIL_PAGE_END_LSN_OLD_CHKSUM);
-    checksum= checksum & 0xFFFFFFFF;
-
-    return(checksum);
-}
-
-ulint
-buf_calc_page_old_checksum(
-/*=======================*/
-               /* out: checksum */
-    uchar*    page) /* in: buffer page */
-{
-    ulint checksum;
-
-    checksum= ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
-
-    checksum= checksum & 0xFFFFFFFF;
-
-    return(checksum);
-}
-
-
-int main(int argc, char **argv)
-{
-  FILE *f;                     /* our input file */
-  uchar *p;                     /* storage of pages read */
-  int bytes;                   /* bytes read count */
-  ulint ct;                    /* current page number (0 based) */
-  int now;                     /* current time */
-  int lastt;                   /* last time */
-  ulint oldcsum, oldcsumfield, csum, csumfield, logseq, logseqfield; /* ulints for checksum storage */
-  struct stat st;              /* for stat, if you couldn't guess */
-  unsigned long long int size; /* size of file (has to be 64 bits) */
-  ulint pages;                 /* number of pages in file */
-  ulint start_page= 0, end_page= 0, use_end_page= 0; /* for starting and ending at certain pages */
-  off_t offset= 0;
-  int just_count= 0;          /* if true, just print page count */
-  int verbose= 0;
-  int debug= 0;
-  int c;
-  int fd;
-
-  /* remove arguments */
-  while ((c= getopt(argc, argv, "cvds:e:p:")) != -1)
-  {
-    switch (c)
-    {
-    case 'v':
-      verbose= 1;
-      break;
-    case 'c':
-      just_count= 1;
-      break;
-    case 's':
-      start_page= atoi(optarg);
-      break;
-    case 'e':
-      end_page= atoi(optarg);
-      use_end_page= 1;
-      break;
-    case 'p':
-      start_page= atoi(optarg);
-      end_page= atoi(optarg);
-      use_end_page= 1;
-      break;
-    case 'd':
-      debug= 1;
-      break;
-    case ':':
-      fprintf(stderr, "option -%c requires an argument\n", optopt);
-      return 1;
-      break;
-    case '?':
-      fprintf(stderr, "unrecognized option: -%c\n", optopt);
-      return 1;
-      break;
-    }
-  }
-
-  /* debug implies verbose... */
-  if (debug) verbose= 1;
-
-  /* make sure we have the right arguments */
-  if (optind >= argc)
-  {
-    printf("InnoDB offline file checksum utility.\n");
-    printf("usage: %s [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v] [-d] <filename>\n", argv[0]);
-    printf("\t-c\tprint the count of pages in the file\n");
-    printf("\t-s n\tstart on this page number (0 based)\n");
-    printf("\t-e n\tend at this page number (0 based)\n");
-    printf("\t-p n\tcheck only this page (0 based)\n");
-    printf("\t-v\tverbose (prints progress every 5 seconds)\n");
-    printf("\t-d\tdebug mode (prints checksums for each page)\n");
-    return 1;
-  }
-
-  /* stat the file to get size and page count */
-  if (stat(argv[optind], &st))
-  {
-    perror("error statting file");
-    return 1;
-  }
-  size= st.st_size;
-  pages= size / UNIV_PAGE_SIZE;
-  if (just_count)
-  {
-    printf("%lu\n", pages);
-    return 0;
-  }
-  else if (verbose)
-  {
-    printf("file %s = %llu bytes (%lu pages)...\n", argv[optind], size, pages);
-    printf("checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1));
-  }
-
-  /* open the file for reading */
-  f= fopen(argv[optind], "r");
-  if (!f)
-  {
-    perror("error opening file");
-    return 1;
-  }
-
-  /* seek to the necessary position */
-  if (start_page)
-  {
-    fd= fileno(f);
-    if (!fd)
-    {
-      perror("unable to obtain file descriptor number");
-      return 1;
-    }
-
-    offset= (off_t)start_page * (off_t)UNIV_PAGE_SIZE;
-
-    if (lseek(fd, offset, SEEK_SET) != offset)
-    {
-      perror("unable to seek to necessary offset");
-      return 1;
-    }
-  }
-
-  /* allocate buffer for reading (so we don't realloc every time) */
-  p= (uchar *)malloc(UNIV_PAGE_SIZE);
-
-  /* main checksumming loop */
-  ct= start_page;
-  lastt= 0;
-  while (!feof(f))
-  {
-    bytes= fread(p, 1, UNIV_PAGE_SIZE, f);
-    if (!bytes && feof(f)) return 0;
-    if (bytes != UNIV_PAGE_SIZE)
-    {
-      fprintf(stderr, "bytes read (%d) doesn't match universal page size (%d)\n", bytes, UNIV_PAGE_SIZE);
-      return 1;
-    }
-
-    /* check the "stored log sequence numbers" */
-    logseq= mach_read_from_4(p + FIL_PAGE_LSN + 4);
-    logseqfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
-    if (debug)
-      printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield);
-    if (logseq != logseqfield)
-    {
-      fprintf(stderr, "page %lu invalid (fails log sequence number check)\n", ct);
-      return 1;
-    }
-
-    /* check old method of checksumming */
-    oldcsum= buf_calc_page_old_checksum(p);
-    oldcsumfield= mach_read_from_4(p + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM);
-    if (debug)
-      printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield);
-    if (oldcsumfield != mach_read_from_4(p + FIL_PAGE_LSN) && oldcsumfield != oldcsum)
-    {
-      fprintf(stderr, "page %lu invalid (fails old style checksum)\n", ct);
-      return 1;
-    }
-
-    /* now check the new method */
-    csum= buf_calc_page_new_checksum(p);
-    csumfield= mach_read_from_4(p + FIL_PAGE_SPACE_OR_CHKSUM);
-    if (debug)
-      printf("page %lu: new style: calculated = %lu; recorded = %lu\n", ct, csum, csumfield);
-    if (csumfield != 0 && csum != csumfield)
-    {
-      fprintf(stderr, "page %lu invalid (fails new style checksum)\n", ct);
-      return 1;
-    }
-
-    /* end if this was the last page we were supposed to check */
-    if (use_end_page && (ct >= end_page))
-      return 0;
-
-    /* do counter increase and progress printing */
-    ct++;
-    if (verbose)
-    {
-      if (ct % 64 == 0)
-      {
-        now= time(0);
-        if (!lastt) lastt= now;
-        if (now - lastt >= 1)
-        {
-          printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100);
-          lastt= now;
-        }
-      }
-    }
-  }
-  return 0;
-}
-
diff --git a/extra/innochecksum.cc b/extra/innochecksum.cc
new file mode 100644
index 00000000000..c89196b1eee
--- /dev/null
+++ b/extra/innochecksum.cc
@@ -0,0 +1,396 @@
+/*
+   Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
+*/
+
+/*
+  InnoDB offline file checksum utility.  85% of the code in this utility
+  is included from the InnoDB codebase.
+
+  The final 15% was originally written by Mark Smith of Danga
+  Interactive, Inc. <junior@danga.com>
+
+  Published with a permission.
+*/
+
+#include <my_global.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#ifndef __WIN__
+# include <unistd.h>
+#endif
+#include <my_getopt.h>
+#include <m_string.h>
+#include <welcome_copyright_notice.h> /* ORACLE_WELCOME_COPYRIGHT_NOTICE */
+
+/* Only parts of these files are included from the InnoDB codebase.
+The parts not included are excluded by #ifndef UNIV_INNOCHECKSUM. */
+
+#include "univ.i"                /*  include all of this */
+
+#include "buf0checksum.h"        /* buf_calc_page_*() */
+#include "fil0fil.h"             /* FIL_* */
+#include "fsp0fsp.h"             /* fsp_flags_get_page_size() &
+                                    fsp_flags_get_zip_size() */
+#include "mach0data.h"           /* mach_read_from_4() */
+#include "ut0crc32.h"            /* ut_crc32_init() */
+
+#ifdef UNIV_NONINL
+# include "fsp0fsp.ic"
+# include "mach0data.ic"
+# include "ut0rnd.ic"
+#endif
+
+/* Global variables */
+static my_bool verbose;
+static my_bool debug;
+static my_bool just_count;
+static ulong start_page;
+static ulong end_page;
+static ulong do_page;
+static my_bool use_end_page;
+static my_bool do_one_page;
+ulong srv_page_size;              /* replaces declaration in srv0srv.c */
+static ulong physical_page_size;  /* Page size in bytes on disk. */
+static ulong logical_page_size;   /* Page size when uncompressed. */
+
+/* Get the page size of the filespace from the filespace header. */
+static
+my_bool
+get_page_size(
+/*==========*/
+  FILE*  f,                     /*!< in: file pointer, must be open
+                                         and set to start of file */
+  byte* buf,                    /*!< in: buffer used to read the page */
+  ulong* logical_page_size,     /*!< out: Logical/Uncompressed page size */
+  ulong* physical_page_size)    /*!< out: Physical/Commpressed page size */
+{
+  ulong flags;
+
+  int bytes= fread(buf, 1, UNIV_PAGE_SIZE_MIN, f);
+
+  if (ferror(f))
+  {
+    perror("Error reading file header");
+    return FALSE;
+  }
+
+  if (bytes != UNIV_PAGE_SIZE_MIN)
+  {
+    fprintf(stderr, "Error; Was not able to read the minimum page size ");
+    fprintf(stderr, "of %d bytes.  Bytes read was %d\n", UNIV_PAGE_SIZE_MIN, bytes);
+    return FALSE;
+  }
+
+  rewind(f);
+
+  flags = mach_read_from_4(buf + FIL_PAGE_DATA + FSP_SPACE_FLAGS);
+
+  /* srv_page_size is used by InnoDB code as UNIV_PAGE_SIZE */
+  srv_page_size = *logical_page_size = fsp_flags_get_page_size(flags);
+
+  /* fsp_flags_get_zip_size() will return zero if not compressed. */
+  *physical_page_size = fsp_flags_get_zip_size(flags);
+  if (*physical_page_size == 0)
+    *physical_page_size= *logical_page_size;
+
+  return TRUE;
+}
+
+
+/* command line argument to do page checks (that's it) */
+/* another argument to specify page ranges... seek to right spot and go from there */
+
+static struct my_option innochecksum_options[] =
+{
+  {"help", '?', "Displays this help and exits.",
+    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"info", 'I', "Synonym for --help.",
+    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"version", 'V', "Displays version information and exits.",
+    0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"verbose", 'v', "Verbose (prints progress every 5 seconds).",
+    &verbose, &verbose, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"debug", 'd', "Debug mode (prints checksums for each page, implies verbose).",
+    &debug, &debug, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"count", 'c', "Print the count of pages in the file.",
+    &just_count, &just_count, 0, GET_BOOL, NO_ARG, 0, 0, 0, 0, 0, 0},
+  {"start_page", 's', "Start on this page number (0 based).",
+    &start_page, &start_page, 0, GET_ULONG, REQUIRED_ARG,
+    0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+  {"end_page", 'e', "End at this page number (0 based).",
+    &end_page, &end_page, 0, GET_ULONG, REQUIRED_ARG,
+    0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+  {"page", 'p', "Check only this page (0 based).",
+    &do_page, &do_page, 0, GET_ULONG, REQUIRED_ARG,
+    0, 0, (longlong) 2L*1024L*1024L*1024L, 0, 1, 0},
+
+  {0, 0, 0, 0, 0, 0, GET_NO_ARG, NO_ARG, 0, 0, 0, 0, 0, 0}
+};
+
+static void print_version(void)
+{
+  printf("%s Ver %s, for %s (%s)\n",
+         my_progname, INNODB_VERSION_STR,
+         SYSTEM_TYPE, MACHINE_TYPE);
+}
+
+static void usage(void)
+{
+  print_version();
+  puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000"));
+  printf("InnoDB offline file checksum utility.\n");
+  printf("Usage: %s [-c] [-s <start page>] [-e <end page>] [-p <page>] [-v] [-d] <filename>\n", my_progname);
+  my_print_help(innochecksum_options);
+  my_print_variables(innochecksum_options);
+}
+
+extern "C" my_bool
+innochecksum_get_one_option(
+/*========================*/
+  int optid,
+  const struct my_option *opt __attribute__((unused)),
+  char *argument __attribute__((unused)))
+{
+  switch (optid) {
+  case 'd':
+    verbose=1;	/* debug implies verbose... */
+    break;
+  case 'e':
+    use_end_page= 1;
+    break;
+  case 'p':
+    end_page= start_page= do_page;
+    use_end_page= 1;
+    do_one_page= 1;
+    break;
+  case 'V':
+    print_version();
+    exit(0);
+    break;
+  case 'I':
+  case '?':
+    usage();
+    exit(0);
+    break;
+  }
+  return 0;
+}
+
+static int get_options(
+/*===================*/
+  int *argc,
+  char ***argv)
+{
+  int ho_error;
+
+  if ((ho_error=handle_options(argc, argv, innochecksum_options, innochecksum_get_one_option)))
+    exit(ho_error);
+
+  /* The next arg must be the filename */
+  if (!*argc)
+  {
+    usage();
+    return 1;
+  }
+  return 0;
+} /* get_options */
+
+
+int main(int argc, char **argv)
+{
+  FILE* f;                       /* our input file */
+  char* filename;                /* our input filename. */
+  unsigned char buf[UNIV_PAGE_SIZE_MAX]; /* Buffer to store pages read */
+  ulong bytes;                   /* bytes read count */
+  ulint ct;                      /* current page number (0 based) */
+  time_t now;                    /* current time */
+  time_t lastt;                  /* last time */
+  ulint oldcsum, oldcsumfield, csum, csumfield, crc32, logseq, logseqfield;
+                                 /* ulints for checksum storage */
+  struct stat st;                /* for stat, if you couldn't guess */
+  unsigned long long int size;   /* size of file (has to be 64 bits) */
+  ulint pages;                   /* number of pages in file */
+  off_t offset= 0;
+  int fd;
+
+  printf("InnoDB offline file checksum utility.\n");
+
+  ut_crc32_init();
+
+  MY_INIT(argv[0]);
+
+  if (get_options(&argc,&argv))
+    exit(1);
+
+  if (verbose)
+    my_print_variables(innochecksum_options);
+
+  /* The file name is not optional */
+  filename = *argv;
+  if (*filename == '\0')
+  {
+    fprintf(stderr, "Error; File name missing\n");
+    return 1;
+  }
+
+  /* stat the file to get size and page count */
+  if (stat(filename, &st))
+  {
+    fprintf(stderr, "Error; %s cannot be found\n", filename);
+    return 1;
+  }
+  size= st.st_size;
+
+  /* Open the file for reading */
+  f= fopen(filename, "rb");
+  if (f == NULL)
+  {
+    fprintf(stderr, "Error; %s cannot be opened", filename);
+    perror(" ");
+    return 1;
+  }
+
+  if (!get_page_size(f, buf, &logical_page_size, &physical_page_size))
+  {
+    return 1;
+  }
+
+  /* This tool currently does not support Compressed tables */
+  if (logical_page_size != physical_page_size)
+  {
+    fprintf(stderr, "Error; This file contains compressed pages\n");
+    return 1;
+  }
+
+  pages= (ulint) (size / physical_page_size);
+
+  if (just_count)
+  {
+    if (verbose)
+      printf("Number of pages: ");
+    printf("%lu\n", pages);
+    return 0;
+  }
+  else if (verbose)
+  {
+    printf("file %s = %llu bytes (%lu pages)...\n", filename, size, pages);
+    if (do_one_page)
+      printf("InnoChecksum; checking page %lu\n", do_page);
+    else
+      printf("InnoChecksum; checking pages in range %lu to %lu\n", start_page, use_end_page ? end_page : (pages - 1));
+  }
+
+  /* seek to the necessary position */
+  if (start_page)
+  {
+    fd= fileno(f);
+    if (!fd)
+    {
+      perror("Error; Unable to obtain file descriptor number");
+      return 1;
+    }
+
+    offset= (off_t)start_page * (off_t)physical_page_size;
+
+    if (lseek(fd, offset, SEEK_SET) != offset)
+    {
+      perror("Error; Unable to seek to necessary offset");
+      return 1;
+    }
+  }
+
+  /* main checksumming loop */
+  ct= start_page;
+  lastt= 0;
+  while (!feof(f))
+  {
+    bytes= fread(buf, 1, physical_page_size, f);
+    if (!bytes && feof(f))
+      return 0;
+
+    if (ferror(f))
+    {
+      fprintf(stderr, "Error reading %lu bytes", physical_page_size);
+      perror(" ");
+      return 1;
+    }
+    if (bytes != physical_page_size)
+    {
+      fprintf(stderr, "Error; bytes read (%lu) doesn't match page size (%lu)\n", bytes, physical_page_size);
+      return 1;
+    }
+
+    /* check the "stored log sequence numbers" */
+    logseq= mach_read_from_4(buf + FIL_PAGE_LSN + 4);
+    logseqfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM + 4);
+    if (debug)
+      printf("page %lu: log sequence number: first = %lu; second = %lu\n", ct, logseq, logseqfield);
+    if (logseq != logseqfield)
+    {
+      fprintf(stderr, "Fail; page %lu invalid (fails log sequence number check)\n", ct);
+      return 1;
+    }
+
+    /* check old method of checksumming */
+    oldcsum= buf_calc_page_old_checksum(buf);
+    oldcsumfield= mach_read_from_4(buf + logical_page_size - FIL_PAGE_END_LSN_OLD_CHKSUM);
+    if (debug)
+      printf("page %lu: old style: calculated = %lu; recorded = %lu\n", ct, oldcsum, oldcsumfield);
+    if (oldcsumfield != mach_read_from_4(buf + FIL_PAGE_LSN) && oldcsumfield != oldcsum)
+    {
+      fprintf(stderr, "Fail;  page %lu invalid (fails old style checksum)\n", ct);
+      return 1;
+    }
+
+    /* now check the new method */
+    csum= buf_calc_page_new_checksum(buf);
+    crc32= buf_calc_page_crc32(buf);
+    csumfield= mach_read_from_4(buf + FIL_PAGE_SPACE_OR_CHKSUM);
+    if (debug)
+      printf("page %lu: new style: calculated = %lu; crc32 = %lu; recorded = %lu\n",
+          ct, csum, crc32, csumfield);
+    if (csumfield != 0 && crc32 != csumfield && csum != csumfield)
+    {
+      fprintf(stderr, "Fail; page %lu invalid (fails innodb and crc32 checksum)\n", ct);
+      return 1;
+    }
+
+    /* end if this was the last page we were supposed to check */
+    if (use_end_page && (ct >= end_page))
+      return 0;
+
+    /* do counter increase and progress printing */
+    ct++;
+    if (verbose)
+    {
+      if (ct % 64 == 0)
+      {
+        now= time(0);
+        if (!lastt) lastt= now;
+        if (now - lastt >= 1)
+        {
+          printf("page %lu okay: %.3f%% done\n", (ct - 1), (float) ct / pages * 100);
+          lastt= now;
+        }
+      }
+    }
+  }
+  return 0;
+}
+
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index 62ed3f539e2..5b591024922 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -134,6 +134,7 @@ typedef struct thread_sync
 
 static int		mtflush_work_initialized = -1;
 static os_fast_mutex_t	mtflush_mtx;
+static os_fast_mutex_t	mtflush_mtx_wait;
 static thread_sync_t*   mtflush_ctx=NULL;
 
 /******************************************************************//**
@@ -180,7 +181,9 @@ buf_mtflu_flush_pool_instance(
 		pools based on the assumption that it will
 		help in the retry which will follow the
 		failure. */
+#ifdef UNIV_DEBUG
 		fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
+#endif
 		return 0;
 	}
 
@@ -223,12 +226,16 @@ mtflush_service_io(
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
 
+	/* TODO: Temporal fix for the hang bug. This needs a real fix. */
+	os_fast_mutex_lock(&mtflush_mtx_wait);
 	work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
 
 	if (work_item == NULL) {
 		work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
 	}
 
+	os_fast_mutex_unlock(&mtflush_mtx_wait);
+
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
 	} else {
@@ -237,6 +244,10 @@ mtflush_service_io(
 		return;
 	}
 
+	if (work_item->wi_status != WRK_ITEM_EXIT) {
+		work_item->wi_status = WRK_ITEM_SET;
+	}
+
 	work_item->id_usr = os_thread_get_curr_id();
 
 	/*  This works as a producer/consumer model, where in tasks are
@@ -253,7 +264,7 @@ mtflush_service_io(
 		work_item->wi_status = WRK_ITEM_EXIT;
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
 		mtflush_io->wt_status = WTHR_KILL_IT;
-        return;
+		break;
 
 	case MT_WRK_WRITE:
 		ut_a(work_item->wi_status == WRK_ITEM_SET);
@@ -273,9 +284,9 @@ mtflush_service_io(
 	default:
 		/* None other than Write/Read handling planned */
 		ut_a(0);
+		break;
 	}
 
-	mtflush_io->wt_status = WTHR_NO_WORK;
 }
 
 /******************************************************************//**
@@ -289,6 +300,7 @@ DECLARE_THREAD(mtflush_io_thread)(
 	void * arg)
 {
 	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
+	ulint n_timeout = 0;
 #ifdef UNIV_DEBUG
 	ib_uint64_t   stat_universal_num_processed = 0;
 	ib_uint64_t   stat_cycle_num_processed = 0;
@@ -296,8 +308,32 @@ DECLARE_THREAD(mtflush_io_thread)(
 #endif
 
 	while (TRUE) {
+#ifdef UNIV_DEBUG
+ 		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
+ 					os_thread_get_curr_id(),
+ 					ib_wqueue_len(mtflush_io->wq),
+ 					ib_wqueue_len(mtflush_io->wr_cq));
+#endif /* UNIV_DEBUG */
+
 		mtflush_service_io(mtflush_io);
 
+#ifdef UNIV_DEBUG
+		if (mtflush_io->wt_status == WTHR_NO_WORK) {
+			n_timeout++;
+
+			if (n_timeout > 10) {
+				fprintf(stderr, "InnoDB: Note: Thread %lu has not received "
+					" work queue len %lu return queue len %lu\n",
+					os_thread_get_curr_id(),
+					ib_wqueue_len(mtflush_io->wq),
+					ib_wqueue_len(mtflush_io->wr_cq));
+				n_timeout = 0;
+			}
+		} else {
+			n_timeout = 0;
+		}
+#endif /* UNIV_DEBUG */
+
 		if (mtflush_io->wt_status == WTHR_KILL_IT) {
 			break;
 		}
@@ -379,6 +415,7 @@ buf_mtflu_io_thread_exit(void)
 	ib_wqueue_free(mtflush_io->rd_cq);
 
 	os_fast_mutex_free(&mtflush_mtx);
+	os_fast_mutex_free(&mtflush_mtx_wait);
 
 	/* Free heap */
 	mem_heap_free(mtflush_io->wheap);
@@ -400,6 +437,7 @@ buf_mtflu_handler_init(
 	ib_wqueue_t*	mtflush_read_comp_queue;
 
 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx_wait);
 
 	/* Create heap, work queue, write completion queue, read
 	completion queue for multi-threaded flush, and init
@@ -465,16 +503,15 @@ buf_mtflu_flush_work_items(
 	node items areallocated */
 	work_heap = mem_heap_create(0);
 	work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst);
+	memset(work_item, 0, sizeof(wrk_t)*buf_pool_inst);
 
 	for(i=0;i<buf_pool_inst; i++) {
 		work_item[i].tsk = MT_WRK_WRITE;
-		work_item[i].rd.page_pool = NULL;
 		work_item[i].wr.buf_pool = buf_pool_from_array(i);
 		work_item[i].wr.flush_type = flush_type;
 		work_item[i].wr.min = min_n;
 		work_item[i].wr.lsn_limit = lsn_limit;
-		work_item[i].id_usr = -1;
-		work_item[i].wi_status = WRK_ITEM_SET;
+		work_item[i].wi_status = WRK_ITEM_UNSET;
 		work_item[i].wheap = work_heap;
 
 		ib_wqueue_add(mtflush_ctx->wq,
@@ -490,14 +527,18 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
-			if((int)done_wi->id_usr == -1 &&
-			   done_wi->wi_status == WRK_ITEM_SET ) {
+#ifdef UNIV_DEBUG
+			/* TODO: Temporal fix for hang. This is really a bug. */
+			if((int)done_wi->id_usr == 0 &&
+				(done_wi->wi_status == WRK_ITEM_SET ||
+					done_wi->wi_status == WRK_ITEM_UNSET)) {
 				fprintf(stderr,
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
 				ut_a(0);
 			}
+#endif
 
 			n_flushed+= done_wi->n_flushed;
 			i++;
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 4999a202bd6..6b44cb96677 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16610,6 +16610,11 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
   MTFLUSH_MAX_WORKER,     /* Max setting */
   0);
 
+static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
+  PLUGIN_VAR_OPCMDARG ,
+  "Use multi-threaded flush. Default TRUE.",
+  NULL, NULL, TRUE);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
   MYSQL_SYSVAR(api_trx_level),
@@ -16762,6 +16767,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(use_lz4),
 #endif
   MYSQL_SYSVAR(mtflush_threads),
+  MYSQL_SYSVAR(use_mtflush),
   NULL
 };
 
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 918a92fa811..37bc9ba5c86 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -1008,6 +1008,8 @@ Release fil_system mutex */
 void
 fil_system_exit(void);
 /*==================*/
+
+#ifndef UNIV_INNOCHECKSUM
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
 fil_space_t*
@@ -1020,5 +1022,5 @@ char*
 fil_space_name(
 /*===========*/
 	fil_space_t*	space);	/*!< in: space */
-
+#endif
 #endif /* fil0fil_h */
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 725aaf9553d..b4bb9c09ef6 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -257,8 +257,13 @@ extern my_bool srv_use_lz4;
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
 #define MTFLUSH_DEFAULT_WORKER   8
+
+/* Number of threads used for multi-threaded flush */
 extern long    srv_mtflush_threads;
 
+/* If this flag is TRUE, then we will use multi threaded flush. */
+extern my_bool	srv_use_mtflush;
+
 #ifdef __WIN__
 extern ibool	srv_use_native_conditions;
 #endif /* __WIN__ */
diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h
index 29fc8669ce4..796a272db59 100644
--- a/storage/innobase/include/ut0list.h
+++ b/storage/innobase/include/ut0list.h
@@ -150,6 +150,15 @@ ib_list_is_empty(
 					/* out: TRUE if empty else  */
 	const ib_list_t*	list);	/* in: list */
 
+/********************************************************************
+Get number of items on list.
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+	const ib_list_t*	list);		/*<! in: list */
+
 /* List. */
 struct ib_list_t {
 	ib_list_node_t*		first;		/*!< first node */
diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic
index d9dcb2eac99..7a7f53adb2f 100644
--- a/storage/innobase/include/ut0list.ic
+++ b/storage/innobase/include/ut0list.ic
@@ -58,3 +58,23 @@ ib_list_is_empty(
 {
 	return(!(list->first || list->last));
 }
+
+/********************************************************************
+Get number of items on list.
+@return number of items on list */
+UNIV_INLINE
+ulint
+ib_list_len(
+/*========*/
+	const ib_list_t*	list)		/*<! in: list */
+{
+	ulint len = 0;
+	ib_list_node_t* node = list->first;
+
+	while(node) {
+		len++;
+		node = node->next;
+	}
+
+	return (len);
+}
diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h
index bbbbd3b146b..9906e299808 100644
--- a/storage/innobase/include/ut0wqueue.h
+++ b/storage/innobase/include/ut0wqueue.h
@@ -103,6 +103,14 @@ ib_wqueue_nowait(
 /*=============*/
 	ib_wqueue_t*	wq);		/*<! in: work queue */
 
+/********************************************************************
+Get number of items on queue.
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+	ib_wqueue_t*	wq);		/*<! in: work queue */
+
 
 /* Work queue. */
 struct ib_wqueue_t {
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 92cfda1c65e..7d3e7bf8108 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -158,9 +158,11 @@ UNIV_INTERN my_bool	srv_use_posix_fallocate = FALSE;
 /* If this flag is TRUE, then we disable doublewrite buffer */
 UNIV_INTERN my_bool	srv_use_atomic_writes = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
-UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
+UNIV_INTERN my_bool	srv_use_lz4                     = FALSE;
 /* Number of threads used for multi-threaded flush */
 UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
+/* If this flag is TRUE, then we will use multi threaded flush. */
+UNIV_INTERN my_bool	srv_use_mtflush                 = TRUE;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index a469dac8296..9ad5c960e5f 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -2593,19 +2593,23 @@ files_checked:
 
 	if (!srv_read_only_mode) {
 
-		/* Start multi-threaded flush threads */
-		mtflush_ctx = buf_mtflu_handler_init(srv_mtflush_threads,
-						     srv_buf_pool_instances);
+		if (srv_use_mtflush) {
+			/* Start multi-threaded flush threads */
+			mtflush_ctx = buf_mtflu_handler_init(
+				srv_mtflush_threads,
+				srv_buf_pool_instances);
 
-		/* Set up the thread ids */
-		buf_mtflu_set_thread_ids(srv_mtflush_threads,
-					mtflush_ctx,
-					(thread_ids + 6 + 32));
+			/* Set up the thread ids */
+			buf_mtflu_set_thread_ids(
+				srv_mtflush_threads,
+				mtflush_ctx,
+				(thread_ids + 6 + SRV_MAX_N_PURGE_THREADS));
 
 #if UNIV_DEBUG
- 		fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
-			__FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
+			fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
+				__FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
 #endif
+		}
 
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
@@ -2871,9 +2875,12 @@ innobase_shutdown_for_mysql(void)
 		logs_empty_and_mark_files_at_shutdown() and should have
 		already quit or is quitting right now. */
 
-		/* g. Exit the multi threaded flush threads */
 
-		buf_mtflu_io_thread_exit();
+		if (srv_use_mtflush) {
+			/* g. Exit the multi threaded flush threads */
+
+			buf_mtflu_io_thread_exit();
+		}
 
 #ifdef UNIV_DEBUG
 		fprintf(stderr, "InnoDB: Note: %s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);
diff --git a/storage/innobase/ut/ut0wqueue.cc b/storage/innobase/ut/ut0wqueue.cc
index 9ed4502da25..1607e535a94 100644
--- a/storage/innobase/ut/ut0wqueue.cc
+++ b/storage/innobase/ut/ut0wqueue.cc
@@ -205,3 +205,20 @@ ib_wqueue_is_empty(
 {
 	return(ib_list_is_empty(wq->items));
 }
+
+/********************************************************************
+Get number of items on queue.
+@return number of items on queue */
+ulint
+ib_wqueue_len(
+/*==========*/
+	ib_wqueue_t*	wq)		/*<! in: work queue */
+{
+	ulint len = 0;
+
+	mutex_enter(&wq->mutex);
+	len = ib_list_len(wq->items);
+	mutex_exit(&wq->mutex);
+
+        return(len);
+}
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index eeb9bf36c86..f7da4c1c7a9 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -134,6 +134,7 @@ typedef struct thread_sync
 
 static int		mtflush_work_initialized = -1;
 static os_fast_mutex_t	mtflush_mtx;
+static os_fast_mutex_t	mtflush_mtx_wait;
 static thread_sync_t*   mtflush_ctx=NULL;
 
 /******************************************************************//**
@@ -182,7 +183,9 @@ buf_mtflu_flush_pool_instance(
 		pools based on the assumption that it will
 		help in the retry which will follow the
 		failure. */
+#ifdef UNIV_DEBUG
 		fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
+#endif
 		return 0;
 	}
 
@@ -228,12 +231,16 @@ mtflush_service_io(
 
    	mtflush_io->wt_status = WTHR_SIG_WAITING;
 
+	/* TODO: Temporal fix for the hang bug. This needs a real fix. */
+	os_fast_mutex_lock(&mtflush_mtx_wait);
 	work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
 
 	if (work_item == NULL) {
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS);
 	}
 
+	os_fast_mutex_unlock(&mtflush_mtx_wait);
+
 	if (work_item) {
 		mtflush_io->wt_status = WTHR_RUNNING;
 	} else {
@@ -242,6 +249,10 @@ mtflush_service_io(
 		return;
 	}
 
+	if (work_item->wi_status != WRK_ITEM_EXIT) {
+		work_item->wi_status = WRK_ITEM_SET;
+	}
+
 	work_item->id_usr = os_thread_get_curr_id();
 
 	/*  This works as a producer/consumer model, where in tasks are
@@ -258,7 +269,7 @@ mtflush_service_io(
 		work_item->wi_status = WRK_ITEM_EXIT;
 		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
 		mtflush_io->wt_status = WTHR_KILL_IT;
-        return;
+		break;
 
 	case MT_WRK_WRITE:
 		ut_a(work_item->wi_status == WRK_ITEM_SET);
@@ -278,9 +289,9 @@ mtflush_service_io(
 	default:
 		/* None other than Write/Read handling planned */
 		ut_a(0);
+		break;
 	}
 
-	mtflush_io->wt_status = WTHR_NO_WORK;
 }
 
 /******************************************************************//**
@@ -302,13 +313,16 @@ DECLARE_THREAD(mtflush_io_thread)(
 #endif
 
 	while (TRUE) {
+#ifdef UNIV_DEBUG
 		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
 					os_thread_get_curr_id(),
 					ib_wqueue_len(mtflush_io->wq),
 					ib_wqueue_len(mtflush_io->wr_cq));
+#endif /* UNIV_DEBUG */
 
 		mtflush_service_io(mtflush_io);
 
+#ifdef UNIV_DEBUG
 		if (mtflush_io->wt_status == WTHR_NO_WORK) {
 			n_timeout++;
 
@@ -323,6 +337,7 @@ DECLARE_THREAD(mtflush_io_thread)(
 		} else {
 			n_timeout = 0;
 		}
+#endif /* UNIV_DEBUG */
 
 		if (mtflush_io->wt_status == WTHR_KILL_IT) {
 			break;
@@ -405,6 +420,7 @@ buf_mtflu_io_thread_exit(void)
 	ib_wqueue_free(mtflush_io->rd_cq);
 
 	os_fast_mutex_free(&mtflush_mtx);
+	os_fast_mutex_free(&mtflush_mtx_wait);
 
 	/* Free heap */
 	mem_heap_free(mtflush_io->wheap);
@@ -426,6 +442,7 @@ buf_mtflu_handler_init(
 	ib_wqueue_t*	mtflush_read_comp_queue;
 
 	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx_wait);
 
 	/* Create heap, work queue, write completion queue, read
 	completion queue for multi-threaded flush, and init
@@ -491,16 +508,15 @@ buf_mtflu_flush_work_items(
 	node items areallocated */
 	work_heap = mem_heap_create(0);
 	work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst);
+	memset(work_item, 0, sizeof(wrk_t)*buf_pool_inst);
 
 	for(i=0;i<buf_pool_inst; i++) {
 		work_item[i].tsk = MT_WRK_WRITE;
-		work_item[i].rd.page_pool = NULL;
 		work_item[i].wr.buf_pool = buf_pool_from_array(i);
 		work_item[i].wr.flush_type = flush_type;
 		work_item[i].wr.min = min_n;
 		work_item[i].wr.lsn_limit = lsn_limit;
-		work_item[i].id_usr = -1;
-		work_item[i].wi_status = WRK_ITEM_SET;
+		work_item[i].wi_status = WRK_ITEM_UNSET;
 		work_item[i].wheap = work_heap;
 
 		ib_wqueue_add(mtflush_ctx->wq,
@@ -516,14 +532,18 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
-			if((int)done_wi->id_usr == -1 &&
-			   done_wi->wi_status == WRK_ITEM_SET ) {
+#ifdef UNIV_DEBUG
+			/* TODO: Temporal fix for hang. This is really a bug. */
+			if((int)done_wi->id_usr == 0 &&
+				(done_wi->wi_status == WRK_ITEM_SET ||
+					done_wi->wi_status == WRK_ITEM_UNSET)) {
 				fprintf(stderr,
 					"**Set/Unused work_item[%lu] flush_type=%d\n",
 					i,
 					done_wi->wr.flush_type);
 				ut_a(0);
 			}
+#endif
 
 			n_flushed+= done_wi->n_flushed;
 			i++;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index f26ad436190..f35ec84fd12 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -17971,6 +17971,11 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
   MTFLUSH_MAX_WORKER,     /* Max setting */
   0);
 
+static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
+  PLUGIN_VAR_OPCMDARG ,
+  "Use multi-threaded flush. Default TRUE.",
+  NULL, NULL, TRUE);
+
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_block_size),
   MYSQL_SYSVAR(additional_mem_pool_size),
@@ -18168,6 +18173,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(use_lz4),
 #endif
   MYSQL_SYSVAR(mtflush_threads),
+  MYSQL_SYSVAR(use_mtflush),
   NULL
 };
 
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 6b69a899690..e42063f6335 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -1042,6 +1042,8 @@ Release fil_system mutex */
 void
 fil_system_exit(void);
 /*==================*/
+
+#ifndef UNIV_INNOCHECKSUM
 /*******************************************************************//**
 Returns the table space by a given id, NULL if not found. */
 fil_space_t*
@@ -1054,5 +1056,5 @@ char*
 fil_space_name(
 /*===========*/
 	fil_space_t*	space);	/*!< in: space */
-
+#endif
 #endif /* fil0fil_h */
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index bfb59865841..879989770e6 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -277,8 +277,13 @@ extern my_bool srv_use_lz4;
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
 #define MTFLUSH_DEFAULT_WORKER   8
+
+/* Number of threads used for multi-threaded flush */
 extern long    srv_mtflush_threads;
 
+/* If this flag is TRUE, then we will use multi threaded flush. */
+extern my_bool	srv_use_mtflush;
+
 /** Server undo tablespaces directory, can be absolute path. */
 extern char*	srv_undo_dir;
 
diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h
index b1035bad099..796a272db59 100644
--- a/storage/xtradb/include/ut0list.h
+++ b/storage/xtradb/include/ut0list.h
@@ -151,7 +151,7 @@ ib_list_is_empty(
 	const ib_list_t*	list);	/* in: list */
 
 /********************************************************************
-Get number of items on list. 
+Get number of items on list.
 @return number of items on list */
 UNIV_INLINE
 ulint
diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic
index eaf2577b16c..7a7f53adb2f 100644
--- a/storage/xtradb/include/ut0list.ic
+++ b/storage/xtradb/include/ut0list.ic
@@ -60,7 +60,7 @@ ib_list_is_empty(
 }
 
 /********************************************************************
-Get number of items on list. 
+Get number of items on list.
 @return number of items on list */
 UNIV_INLINE
 ulint
diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h
index 6513f4982c0..e6b9891aed1 100644
--- a/storage/xtradb/include/ut0wqueue.h
+++ b/storage/xtradb/include/ut0wqueue.h
@@ -105,7 +105,7 @@ ib_wqueue_nowait(
 
 
 /********************************************************************
-Get number of items on queue. 
+Get number of items on queue.
 @return number of items on queue */
 ulint
 ib_wqueue_len(
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index d6801b701ae..f7469e29911 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -176,9 +176,11 @@ UNIV_INTERN my_bool     srv_use_posix_fallocate         = FALSE;
 /* If this flag is TRUE, then we disable doublewrite buffer */
 UNIV_INTERN my_bool     srv_use_atomic_writes           = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
-UNIV_INTERN my_bool	srv_use_lz4 = FALSE;
+UNIV_INTERN my_bool	srv_use_lz4                     = FALSE;
 /* Number of threads used for multi-threaded flush */
 UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
+/* If this flag is TRUE, then we will use multi threaded flush. */
+UNIV_INTERN my_bool	srv_use_mtflush                 = TRUE;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/xtradb/srv/srv0start.cc b/storage/xtradb/srv/srv0start.cc
index bb539569e9a..4d97632f818 100644
--- a/storage/xtradb/srv/srv0start.cc
+++ b/storage/xtradb/srv/srv0start.cc
@@ -2719,19 +2719,23 @@ files_checked:
 
 	if (!srv_read_only_mode) {
 
-		/* Start multi-threaded flush threads */
-		mtflush_ctx = buf_mtflu_handler_init(srv_mtflush_threads,
-						     srv_buf_pool_instances);
-
-		/* Set up the thread ids */
-		buf_mtflu_set_thread_ids(srv_mtflush_threads,
-					mtflush_ctx,
-					(thread_ids + 6 + SRV_MAX_N_PURGE_THREADS));
+		if (srv_use_mtflush) {
+			/* Start multi-threaded flush threads */
+			mtflush_ctx = buf_mtflu_handler_init(
+				srv_mtflush_threads,
+				srv_buf_pool_instances);
 
+			/* Set up the thread ids */
+			buf_mtflu_set_thread_ids(
+				srv_mtflush_threads,
+				mtflush_ctx,
+				(thread_ids + 6 + SRV_MAX_N_PURGE_THREADS));
 #if UNIV_DEBUG
- 		fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
-			__FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
+			fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",
+				__FILE__, __LINE__, srv_buf_pool_instances, srv_mtflush_threads);
 #endif
+		}
+
 
 		os_thread_create(buf_flush_page_cleaner_thread, NULL, NULL);
 	}
@@ -3004,9 +3008,12 @@ innobase_shutdown_for_mysql(void)
 		logs_empty_and_mark_files_at_shutdown() and should have
 		already quit or is quitting right now. */
 
-		/* g. Exit the multi threaded flush threads */
 
-		buf_mtflu_io_thread_exit();
+		if (srv_use_mtflush) {
+			/* g. Exit the multi threaded flush threads */
+
+			buf_mtflu_io_thread_exit();
+		}
 
 #ifdef UNIV_DEBUG
 		fprintf(stderr, "InnoDB: Note: %s:%d os_thread_count:%lu \n", __FUNCTION__, __LINE__, os_thread_count);

From e667c0f926deaa7ce7f093cd4679431599c55b1f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 28 Feb 2014 09:05:36 +0200
Subject: [PATCH 32/56] Fix compiler error.

---
 storage/innobase/srv/srv0start.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/innobase/srv/srv0start.cc b/storage/innobase/srv/srv0start.cc
index 9ad5c960e5f..76587822bb1 100644
--- a/storage/innobase/srv/srv0start.cc
+++ b/storage/innobase/srv/srv0start.cc
@@ -2603,7 +2603,7 @@ files_checked:
 			buf_mtflu_set_thread_ids(
 				srv_mtflush_threads,
 				mtflush_ctx,
-				(thread_ids + 6 + SRV_MAX_N_PURGE_THREADS));
+				(thread_ids + 6 + 32));
 
 #if UNIV_DEBUG
 			fprintf(stderr, "InnoDB: Note: %s:%d buf-pool-instances:%lu mtflush_threads %lu\n",

From b67892cf59872867514709784c54526434784ea9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 12:34:33 +0200
Subject: [PATCH 33/56] Turn all new features off by default.

---
 storage/innobase/handler/ha_innodb.cc | 8 ++++----
 storage/innobase/srv/srv0srv.cc       | 4 ++--
 storage/xtradb/handler/ha_innodb.cc   | 8 ++++----
 storage/xtradb/srv/srv0srv.cc         | 4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 6b44cb96677..2ec17049434 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16591,8 +16591,8 @@ static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
 
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
-  "Use trim.",
-  NULL, NULL, TRUE);
+  "Use trim. Default FALSE.",
+  NULL, NULL, FALSE);
 
 #ifdef HAVE_LZ4
 static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
@@ -16612,8 +16612,8 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
 
 static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
   PLUGIN_VAR_OPCMDARG ,
-  "Use multi-threaded flush. Default TRUE.",
-  NULL, NULL, TRUE);
+  "Use multi-threaded flush. Default FALSE.",
+  NULL, NULL, FALSE);
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 7d3e7bf8108..6a0abdbf148 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -152,7 +152,7 @@ UNIV_INTERN my_bool	srv_page_compress_index_pages = FALSE;
 UNIV_INTERN long	srv_trim_pct = 100;
 /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
 to the pages */
-UNIV_INTERN my_bool	srv_use_trim = TRUE;
+UNIV_INTERN my_bool	srv_use_trim = FALSE;
 /* If this flag is TRUE, then we will use posix fallocate for file extentsion */
 UNIV_INTERN my_bool	srv_use_posix_fallocate = FALSE;
 /* If this flag is TRUE, then we disable doublewrite buffer */
@@ -162,7 +162,7 @@ UNIV_INTERN my_bool	srv_use_lz4                     = FALSE;
 /* Number of threads used for multi-threaded flush */
 UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
 /* If this flag is TRUE, then we will use multi threaded flush. */
-UNIV_INTERN my_bool	srv_use_mtflush                 = TRUE;
+UNIV_INTERN my_bool	srv_use_mtflush                 = FALSE;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index f35ec84fd12..160ca6b8181 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -17952,8 +17952,8 @@ static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
 
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
-  "Use trim.",
-  NULL, NULL, TRUE);
+  "Use trim. Default FALSE.",
+  NULL, NULL, FALSE);
 
 #ifdef HAVE_LZ4
 static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
@@ -17973,8 +17973,8 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
 
 static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
   PLUGIN_VAR_OPCMDARG ,
-  "Use multi-threaded flush. Default TRUE.",
-  NULL, NULL, TRUE);
+  "Use multi-threaded flush. Default FALSE.",
+  NULL, NULL, FALSE);
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(log_block_size),
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index f7469e29911..f1ee459efd7 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -170,7 +170,7 @@ level is set for the table*/
 UNIV_INTERN long        srv_compress_zlib_level         = 6;
 /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
 to the pages */
-UNIV_INTERN my_bool     srv_use_trim                    = TRUE;
+UNIV_INTERN my_bool     srv_use_trim                    = FALSE;
 /* If this flag is TRUE, then we will use posix fallocate for file extentsion */
 UNIV_INTERN my_bool     srv_use_posix_fallocate         = FALSE;
 /* If this flag is TRUE, then we disable doublewrite buffer */
@@ -180,7 +180,7 @@ UNIV_INTERN my_bool	srv_use_lz4                     = FALSE;
 /* Number of threads used for multi-threaded flush */
 UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
 /* If this flag is TRUE, then we will use multi threaded flush. */
-UNIV_INTERN my_bool	srv_use_mtflush                 = TRUE;
+UNIV_INTERN my_bool	srv_use_mtflush                 = FALSE;
 
 #ifdef __WIN__
 /* Windows native condition variables. We use runtime loading / function

From be50724d89d141360472326f4fad006ba6e377b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 14:45:45 +0200
Subject: [PATCH 34/56] Fix compiler error on windows.

---
 storage/innobase/include/os0file.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 89cc7597375..8baa207855c 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -354,7 +354,7 @@ to original un-instrumented file I/O APIs */
 
 # define os_file_close(file)	os_file_close_func(file)
 
-# define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size) \
+# define os_aio(type, mode, name, file, buf, offset, n, message1, message2, write_size, page_compression, page_compression_level) \
 	os_aio_func(type, mode, name, file, buf, offset, n,		\
 		message1, message2, write_size, page_compression, page_compression_level)
 

From ec45160e3b8cb5fb4dc1118fc7c539f5f256d85c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 15:02:39 +0200
Subject: [PATCH 35/56] Fix windows compiler erros.

---
 storage/innobase/include/os0file.h | 11 ++++++++++-
 storage/xtradb/include/os0file.h   | 12 +++++++++++-
 storage/xtradb/os/os0file.cc       | 11 ++++++++++-
 3 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 8baa207855c..18a3f6a5ccd 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1204,7 +1204,16 @@ os_aio_windows_handle(
 				parameters are valid and can be used to
 				restart the operation, for example */
 	void**	message2,
-	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
+	ulint*	type,		/*!< out: OS_FILE_WRITE or ..._READ */
+	ulint*		write_size,/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level) /*!< page compression
+						 level to be used */
 #endif
 
 /**********************************************************************//**
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index e5abd4e2961..e4df03a1c8a 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -1243,7 +1243,17 @@ os_aio_windows_handle(
 				restart the operation, for example */
 	void**	message2,
 	ulint*	type,		/*!< out: OS_FILE_WRITE or ..._READ */
-	ulint*	space_id);
+	ulint*	space_id,
+	ulint*		write_size,/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level) /*!< page compression
+						 level to be used */
+
 #endif
 
 /**********************************************************************//**
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index cd1efc21061..158485ed7a2 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -5090,7 +5090,16 @@ os_aio_windows_handle(
 				restart the operation, for example */
 	void**	message2,
 	ulint*	type,		/*!< out: OS_FILE_WRITE or ..._READ */
-	ulint*	space_id)
+	ulint*	space_id,
+	ulint*		write_size,/*!< in/out: Actual write size initialized
+			       after fist successfull trim
+			       operation for this page and if
+			       initialized we do not trim again if
+			       actual page size does not decrease. */
+	ibool		page_compression, /*!< in: is page compression used
+					  on this file space */
+	ulint		page_compression_level) /*!< page compression
+						 level to be used */
 {
 	ulint		orig_seg	= segment;
 	os_aio_slot_t*	slot;

From 6cde211d8ddefb98945904967cb028d6e3844bd0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 15:15:00 +0200
Subject: [PATCH 36/56] Fix typo.

---
 storage/innobase/include/os0file.h | 2 +-
 storage/xtradb/include/os0file.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 18a3f6a5ccd..6e32a64ca48 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1212,7 +1212,7 @@ os_aio_windows_handle(
 			       actual page size does not decrease. */
 	ibool		page_compression, /*!< in: is page compression used
 					  on this file space */
-	ulint		page_compression_level) /*!< page compression
+	ulint		page_compression_level); /*!< page compression
 						 level to be used */
 #endif
 
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index e4df03a1c8a..1cb19e57516 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -1251,7 +1251,7 @@ os_aio_windows_handle(
 			       actual page size does not decrease. */
 	ibool		page_compression, /*!< in: is page compression used
 					  on this file space */
-	ulint		page_compression_level) /*!< page compression
+	ulint		page_compression_level); /*!< page compression
 						 level to be used */
 
 #endif

From b8e0bc3a67557290aaee67e9b6f59b782eebd59e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 15:26:58 +0200
Subject: [PATCH 37/56] Additional windows fixes.

---
 storage/innobase/include/os0file.h | 11 +----------
 storage/innobase/os/os0file.cc     | 18 ++++--------------
 storage/xtradb/include/os0file.h   | 11 +----------
 storage/xtradb/os/os0file.cc       | 15 +++------------
 4 files changed, 9 insertions(+), 46 deletions(-)

diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 6e32a64ca48..8baa207855c 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1204,16 +1204,7 @@ os_aio_windows_handle(
 				parameters are valid and can be used to
 				restart the operation, for example */
 	void**	message2,
-	ulint*	type,		/*!< out: OS_FILE_WRITE or ..._READ */
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	ibool		page_compression, /*!< in: is page compression used
-					  on this file space */
-	ulint		page_compression_level); /*!< page compression
-						 level to be used */
+	ulint*	type);		/*!< out: OS_FILE_WRITE or ..._READ */
 #endif
 
 /**********************************************************************//**
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 35e1cd47e37..2ca7f2009c6 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4949,8 +4949,7 @@ try_again:
 				retval = os_aio_windows_handle(
 					ULINT_UNDEFINED, slot->pos,
 					&dummy_mess1, &dummy_mess2,
-					&dummy_type,
-					write_size, page_compression, page_compression_level);
+					&dummy_type);
 
 				return(retval);
 			}
@@ -5007,16 +5006,7 @@ os_aio_windows_handle(
 				parameters are valid and can be used to
 				restart the operation, for example */
 	void**	message2,
-	ulint*	type,		/*!< out: OS_FILE_WRITE or ..._READ */
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	ibool		page_compression, /*!< in: is page compression used
-					  on this file space */
-	ulint		page_compression_level) /*!< page compression
-						 level to be used */
+	ulint*	type)		/*!< out: OS_FILE_WRITE or ..._READ */
 {
 	ulint		orig_seg	= segment;
 	os_aio_array_t*	array;
@@ -5123,7 +5113,7 @@ os_aio_windows_handle(
 		switch (slot->type) {
 		case OS_FILE_WRITE:
 			if (slot->message1 &&
-			    page_compression &&
+			    slot->page_compression &&
 			    slot->page_buf) {
 				ret = WriteFile(slot->file, slot->page_buf,
 					(DWORD) slot->len, &len,
@@ -5164,7 +5154,7 @@ os_aio_windows_handle(
 		ret_val = ret && len == slot->len;
 	}
 
-	if (slot->message1 && page_compression) {
+	if (slot->message1 && slot->page_compression) {
 		// We allocate memory for page compressed buffer if and only
 		// if it is not yet allocated.
 		if (slot->page_buf == NULL) {
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index 1cb19e57516..d355bfdf081 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -1243,16 +1243,7 @@ os_aio_windows_handle(
 				restart the operation, for example */
 	void**	message2,
 	ulint*	type,		/*!< out: OS_FILE_WRITE or ..._READ */
-	ulint*	space_id,
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	ibool		page_compression, /*!< in: is page compression used
-					  on this file space */
-	ulint		page_compression_level); /*!< page compression
-						 level to be used */
+	ulint*	space_id);
 
 #endif
 
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 158485ed7a2..e4530f0f338 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -5090,16 +5090,7 @@ os_aio_windows_handle(
 				restart the operation, for example */
 	void**	message2,
 	ulint*	type,		/*!< out: OS_FILE_WRITE or ..._READ */
-	ulint*	space_id,
-	ulint*		write_size,/*!< in/out: Actual write size initialized
-			       after fist successfull trim
-			       operation for this page and if
-			       initialized we do not trim again if
-			       actual page size does not decrease. */
-	ibool		page_compression, /*!< in: is page compression used
-					  on this file space */
-	ulint		page_compression_level) /*!< page compression
-						 level to be used */
+	ulint*	space_id)
 {
 	ulint		orig_seg	= segment;
 	os_aio_slot_t*	slot;
@@ -5186,7 +5177,7 @@ os_aio_windows_handle(
 
 		switch (slot->type) {
 		case OS_FILE_WRITE:
-			if (slot->message1 && page_compression && slot->page_buf) {
+			if (slot->message1 && slot->page_compression && slot->page_buf) {
 				ret_val = os_file_write(slot->name, slot->file, slot->page_buf,
 					slot->control.Offset, slot->control.OffsetHigh, slot->len);
 			} else {
@@ -5222,7 +5213,7 @@ os_aio_windows_handle(
 		ret_val = ret && len == slot->len;
 	}
 
-	if (slot->message1 && page_compression) {
+	if (slot->message1 && slot->page_compression) {
 		// We allocate memory for page compressed buffer if and only
 		// if it is not yet allocated.
 		if (slot->page_buf == NULL) {

From e656a8a92791944420c3793f6686357f584788bf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 15:43:38 +0200
Subject: [PATCH 38/56] Fix windows os_file_write.

---
 storage/xtradb/os/os0file.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index e4530f0f338..c56a625a84c 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -5179,11 +5179,11 @@ os_aio_windows_handle(
 		case OS_FILE_WRITE:
 			if (slot->message1 && slot->page_compression && slot->page_buf) {
 				ret_val = os_file_write(slot->name, slot->file, slot->page_buf,
-					slot->control.Offset, slot->control.OffsetHigh, slot->len);
+					slot->offset, slot->len);
 			} else {
 
 				ret_val = os_file_write(slot->name, slot->file, slot->buf,
-					slot->control.Offset, slot->control.OffsetHigh, slot->len);
+					slot->offset, slot->len);
 			}
 			break;
 		case OS_FILE_READ:

From 81318f04c8dd935d994d5ade3aed95f0059d5a83 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 15:51:54 +0200
Subject: [PATCH 39/56] Yet more windows fixes.

---
 storage/innobase/os/os0file.cc | 4 ++--
 storage/xtradb/os/os0file.cc   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 2ca7f2009c6..f0ca05b7faa 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -6246,12 +6246,12 @@ os_file_trim(
 			FALSE, __FILE__, __LINE__);
 
 		if (slot->write_size) {
-			slot->write_size = 0;
+			*slot->write_size = 0;
 		}
 		return (FALSE);
 	} else {
 		if (slot->write_size) {
-			slot->write_size = len;
+			*slot->write_size = len;
 		}
 	}
 #endif
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index c56a625a84c..933690dfefa 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -5188,7 +5188,7 @@ os_aio_windows_handle(
 			break;
 		case OS_FILE_READ:
 			ret_val = os_file_read(slot->file, slot->buf,
-				 slot->control.Offset, slot->control.OffsetHigh, slot->len);
+				slot->offset, slot->len);
 			break;
 		default:
 			ut_error;
@@ -6311,12 +6311,12 @@ os_file_trim(
 			FALSE, __FILE__, __LINE__);
 
 		if (slot->write_size) {
-			slot->write_size = 0;
+			*slot->write_size = 0;
 		}
 		return (FALSE);
 	} else {
 		if (slot->write_size) {
-			slot->write_size = len;
+			*slot->write_size = len;
 		}
 	}
 #endif

From fd38dca5d580eafcdd6c521be686601d5efa4c85 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 3 Mar 2014 18:14:29 +0200
Subject: [PATCH 40/56] Fixed a hang. The core issues is with the
 heap-thrashing by the individual queue's. Tried to minimize memory allocation
 from heap whenever it is unnecessary.

---
 storage/innobase/buf/buf0mtflu.cc | 167 +++++++++++++++--------------
 storage/xtradb/buf/buf0mtflu.cc   | 169 ++++++++++++++++--------------
 2 files changed, 184 insertions(+), 152 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index d249c1af15d..ea10d09e934 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -116,26 +116,40 @@ typedef struct wrk_itm
  	struct wrk_itm	*next;		/*!< Next work item */
 	mem_heap_t      *wheap;         /*!< Heap were to allocate memory
 					for queue nodes */
+	mem_heap_t      *rheap;
 } wrk_t;
 
+typedef struct thread_data
+{
+	os_thread_id_t	wthread_id;	/*!< Identifier */
+	os_thread_t 	wthread;	/*!< Thread id */
+	wthr_status_t   wt_status;	/*!< Worker thread status */
+} thread_data_t;
+
 /* Thread syncronization data */
 typedef struct thread_sync
 {
+	/* Global variables used by all threads */
+	os_fast_mutex_t	thread_global_mtx; /*!< Mutex used protecting below
+					   variables */
 	ulint           n_threads;	/*!< Number of threads */
-	os_thread_id_t	wthread_id;	/*!< Identifier */
-	os_thread_t 	wthread;	/*!< Thread id */
 	ib_wqueue_t	*wq;		/*!< Work Queue */
 	ib_wqueue_t     *wr_cq;		/*!< Write Completion Queue */
 	ib_wqueue_t     *rd_cq;		/*!< Read Completion Queue */
-	wthr_status_t   wt_status;	/*!< Worker thread status */
 	mem_heap_t*     wheap;		/*!< Work heap where memory
 					is allocated */
+	mem_heap_t*     rheap;		/*!< Work heap where memory
+					is allocated */
+	wthr_status_t   gwt_status;     /*!< Global thread status */
+
+	/* Variables used by only one thread at a time */
+        thread_data_t*  thread_data;    /*!< Thread specific data */
+
 } thread_sync_t;
 
 static int		mtflush_work_initialized = -1;
-static os_fast_mutex_t	mtflush_mtx;
-static os_fast_mutex_t	mtflush_mtx_wait;
 static thread_sync_t*   mtflush_ctx=NULL;
+static os_fast_mutex_t  mtflush_mtx;
 
 /******************************************************************//**
 Set multi-threaded flush work initialized. */
@@ -218,29 +232,29 @@ static
 void
 mtflush_service_io(
 /*===============*/
-	thread_sync_t*	mtflush_io)	/*!< inout: multi-threaded flush
+	thread_sync_t*	mtflush_io,	/*!< inout: multi-threaded flush
 					syncronization data */
+	thread_data_t*  thread_data)    /* Thread status data */
 {
 	wrk_t		*work_item = NULL;
 	ulint		n_flushed=0;
 
-   	mtflush_io->wt_status = WTHR_SIG_WAITING;
+	ut_a(mtflush_io != NULL);
+	ut_a(thread_data != NULL);
+
+   	thread_data->wt_status = WTHR_SIG_WAITING;
 
-	/* TODO: Temporal fix for the hang bug. This needs a real fix. */
-	os_fast_mutex_lock(&mtflush_mtx_wait);
 	work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
 
 	if (work_item == NULL) {
 		work_item = (wrk_t *)ib_wqueue_wait(mtflush_io->wq);
 	}
 
-	os_fast_mutex_unlock(&mtflush_mtx_wait);
-
 	if (work_item) {
-		mtflush_io->wt_status = WTHR_RUNNING;
+		thread_data->wt_status = WTHR_RUNNING;
 	} else {
 		/* Thread did not get any work */
-		mtflush_io->wt_status = WTHR_NO_WORK;
+		thread_data->wt_status = WTHR_NO_WORK;
 		return;
 	}
 
@@ -262,8 +276,8 @@ mtflush_service_io(
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_EXIT;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
-		mtflush_io->wt_status = WTHR_KILL_IT;
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
+		thread_data->wt_status = WTHR_KILL_IT;
 		break;
 
 	case MT_WRK_WRITE:
@@ -274,7 +288,7 @@ mtflush_service_io(
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
 		break;
 
 	case MT_WRK_READ:
@@ -286,7 +300,6 @@ mtflush_service_io(
 		ut_a(0);
 		break;
 	}
-
 }
 
 /******************************************************************//**
@@ -300,14 +313,23 @@ DECLARE_THREAD(mtflush_io_thread)(
 	void * arg)
 {
 	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
-	ulint n_timeout = 0;
-#ifdef UNIV_DEBUG
-	ib_uint64_t   stat_universal_num_processed = 0;
-	ib_uint64_t   stat_cycle_num_processed = 0;
+	thread_data_t *this_thread_data = NULL;
 	ulint i;
-#endif
+
+	/* Find correct slot for this thread */
+	os_fast_mutex_lock(&(mtflush_io->thread_global_mtx));
+	for(i=0; i < mtflush_io->n_threads; i ++) {
+		if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
+			break;
+		}
+	}
+
+	ut_a(i <= mtflush_io->n_threads);
+	this_thread_data = &mtflush_io->thread_data[i];
+	os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx));
 
 	while (TRUE) {
+
 #ifdef UNIV_DEBUG
  		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
  					os_thread_get_curr_id(),
@@ -315,26 +337,10 @@ DECLARE_THREAD(mtflush_io_thread)(
  					ib_wqueue_len(mtflush_io->wr_cq));
 #endif /* UNIV_DEBUG */
 
-		mtflush_service_io(mtflush_io);
+		mtflush_service_io(mtflush_io, this_thread_data);
 
-#ifdef UNIV_DEBUG
-		if (mtflush_io->wt_status == WTHR_NO_WORK) {
-			n_timeout++;
 
-			if (n_timeout > 10) {
-				fprintf(stderr, "InnoDB: Note: Thread %lu has not received "
-					" work queue len %lu return queue len %lu\n",
-					os_thread_get_curr_id(),
-					ib_wqueue_len(mtflush_io->wq),
-					ib_wqueue_len(mtflush_io->wr_cq));
-				n_timeout = 0;
-			}
-		} else {
-			n_timeout = 0;
-		}
-#endif /* UNIV_DEBUG */
-
-		if (mtflush_io->wt_status == WTHR_KILL_IT) {
+		if (this_thread_data->wt_status == WTHR_KILL_IT) {
 			break;
 		}
 	}
@@ -359,22 +365,24 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Allocate work items for shutdown message */
 	work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
+	memset(work_item, 0, sizeof(wrk_t)*srv_mtflush_threads);
 
 	/* Confirm if the io-thread KILL is in progress, bailout */
-	if (mtflush_io->wt_status == WTHR_KILL_IT) {
+	if (mtflush_io->gwt_status == WTHR_KILL_IT) {
 		return;
 	}
 
+	mtflush_io->gwt_status = WTHR_KILL_IT;
+
 	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
 		srv_mtflush_threads);
 
 	/* Send one exit work item/thread */
 	for (i=0; i < srv_mtflush_threads; i++) {
-		work_item[i].wr.buf_pool = NULL;
-		work_item[i].rd.page_pool = NULL;
 		work_item[i].tsk = MT_WRK_NONE;
 		work_item[i].wi_status = WRK_ITEM_EXIT;
 		work_item[i].wheap = mtflush_io->wheap;
+		work_item[i].rheap = mtflush_io->rheap;
 
 		ib_wqueue_add(mtflush_io->wq,
 			(void *)&(work_item[i]),
@@ -384,7 +392,7 @@ buf_mtflu_io_thread_exit(void)
 	/* Wait until all work items on a work queue are processed */
 	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
 		/* Wait */
-		os_thread_sleep(MT_WAIT_IN_USECS * 2);
+		os_thread_sleep(MT_WAIT_IN_USECS);
 	}
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
@@ -403,7 +411,7 @@ buf_mtflu_io_thread_exit(void)
 	}
 
 	/* Wait about 1/2 sec to allow threads really exit */
-	os_thread_sleep(5000000);
+	os_thread_sleep(MT_WAIT_IN_USECS);
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
@@ -415,10 +423,11 @@ buf_mtflu_io_thread_exit(void)
 	ib_wqueue_free(mtflush_io->rd_cq);
 
 	os_fast_mutex_free(&mtflush_mtx);
-	os_fast_mutex_free(&mtflush_mtx_wait);
+	os_fast_mutex_free(&mtflush_io->thread_global_mtx);
 
 	/* Free heap */
 	mem_heap_free(mtflush_io->wheap);
+	mem_heap_free(mtflush_io->rheap);
 }
 
 /******************************************************************//**
@@ -432,45 +441,50 @@ buf_mtflu_handler_init(
 {
 	ulint   	i;
 	mem_heap_t*	mtflush_heap;
-	ib_wqueue_t*	mtflush_work_queue;
-	ib_wqueue_t*	mtflush_write_comp_queue;
-	ib_wqueue_t*	mtflush_read_comp_queue;
-
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx_wait);
+	mem_heap_t*	mtflush_heap2;
 
 	/* Create heap, work queue, write completion queue, read
 	completion queue for multi-threaded flush, and init
 	handler. */
 	mtflush_heap = mem_heap_create(0);
 	ut_a(mtflush_heap != NULL);
-	mtflush_work_queue = ib_wqueue_create();
-	ut_a(mtflush_work_queue != NULL);
-	mtflush_write_comp_queue = ib_wqueue_create();
-	ut_a(mtflush_write_comp_queue != NULL);
-	mtflush_read_comp_queue = ib_wqueue_create();
-	ut_a(mtflush_read_comp_queue != NULL);
+	mtflush_heap2 = mem_heap_create(0);
+	ut_a(mtflush_heap2 != NULL);
 
 	mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
-				MTFLUSH_MAX_WORKER * sizeof(thread_sync_t));
+				sizeof(thread_sync_t));
+	memset(mtflush_ctx, 0, sizeof(thread_sync_t));
 	ut_a(mtflush_ctx != NULL);
+	mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc(
+		mtflush_heap, sizeof(thread_data_t) * n_threads);
+	ut_a(mtflush_ctx->thread_data);
+	memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads);
+
+	mtflush_ctx->n_threads = n_threads;
+	mtflush_ctx->wq = ib_wqueue_create();
+	ut_a(mtflush_ctx->wq);
+	mtflush_ctx->wr_cq = ib_wqueue_create();
+	ut_a(mtflush_ctx->wr_cq);
+	mtflush_ctx->rd_cq = ib_wqueue_create();
+	ut_a(mtflush_ctx->rd_cq);
+	mtflush_ctx->wheap = mtflush_heap;
+	mtflush_ctx->rheap = mtflush_heap2;
+
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx);
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < n_threads; i++) {
 		os_thread_id_t new_thread_id;
-		mtflush_ctx[i].n_threads = n_threads;
-		mtflush_ctx[i].wq = mtflush_work_queue;
-		mtflush_ctx[i].wr_cq = mtflush_write_comp_queue;
-		mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
-		mtflush_ctx[i].wheap = mtflush_heap;
-		mtflush_ctx[i].wt_status = WTHR_INITIALIZED;
 
-		mtflush_ctx[i].wthread = os_thread_create(
+		mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED;
+
+		mtflush_ctx->thread_data[i].wthread = os_thread_create(
 			mtflush_io_thread,
-			((void *)(mtflush_ctx + i)),
+			((void *) mtflush_ctx),
 	                &new_thread_id);
 
-		mtflush_ctx[i].wthread_id = new_thread_id;
+		mtflush_ctx->thread_data[i].wthread_id = new_thread_id;
 	}
 
 	buf_mtflu_work_init();
@@ -497,13 +511,15 @@ buf_mtflu_flush_work_items(
 {
 	ulint n_flushed=0, i;
 	mem_heap_t* work_heap;
-	wrk_t* work_item=NULL;
+	mem_heap_t* reply_heap;
+	wrk_t work_item[MTFLUSH_MAX_WORKER];
 
 	/* Allocate heap where all work items used and queue
 	node items areallocated */
 	work_heap = mem_heap_create(0);
-	work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst);
-	memset(work_item, 0, sizeof(wrk_t)*buf_pool_inst);
+	reply_heap = mem_heap_create(0);
+	memset(work_item, 0, sizeof(wrk_t)*MTFLUSH_MAX_WORKER);
+
 
 	for(i=0;i<buf_pool_inst; i++) {
 		work_item[i].tsk = MT_WRK_WRITE;
@@ -513,9 +529,10 @@ buf_mtflu_flush_work_items(
 		work_item[i].wr.lsn_limit = lsn_limit;
 		work_item[i].wi_status = WRK_ITEM_UNSET;
 		work_item[i].wheap = work_heap;
+		work_item[i].rheap = reply_heap;
 
 		ib_wqueue_add(mtflush_ctx->wq,
-			(void *)(&(work_item[i])),
+			(void *)(work_item + i),
 			work_heap);
 	}
 
@@ -527,8 +544,6 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
-#ifdef UNIV_DEBUG
-			/* TODO: Temporal fix for hang. This is really a bug. */
 			if((int)done_wi->id_usr == 0 &&
 				(done_wi->wi_status == WRK_ITEM_SET ||
 					done_wi->wi_status == WRK_ITEM_UNSET)) {
@@ -538,7 +553,6 @@ buf_mtflu_flush_work_items(
 					done_wi->wr.flush_type);
 				ut_a(0);
 			}
-#endif
 
 			n_flushed+= done_wi->n_flushed;
 			i++;
@@ -547,6 +561,7 @@ buf_mtflu_flush_work_items(
 
 	/* Release used work_items and queue nodes */
 	mem_heap_free(work_heap);
+	mem_heap_free(reply_heap);
 
 	return(n_flushed);
 }
@@ -672,6 +687,6 @@ buf_mtflu_set_thread_ids(
 	ut_a(thread_ids != NULL);
 
 	for(i = 0; i < n_threads; i++) {
-		thread_ids[i] = mtflush_io[i].wthread_id;
+		thread_ids[i] = mtflush_io->thread_data[i].wthread_id;
 	}
 }
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index f7da4c1c7a9..d1ec9979f51 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -116,26 +116,40 @@ typedef struct wrk_itm
  	struct wrk_itm	*next;		/*!< Next work item */
 	mem_heap_t      *wheap;         /*!< Heap were to allocate memory
 					for queue nodes */
+	mem_heap_t      *rheap;
 } wrk_t;
 
+typedef struct thread_data
+{
+	os_thread_id_t	wthread_id;	/*!< Identifier */
+	os_thread_t 	wthread;	/*!< Thread id */
+	wthr_status_t   wt_status;	/*!< Worker thread status */
+} thread_data_t;
+
 /* Thread syncronization data */
 typedef struct thread_sync
 {
+	/* Global variables used by all threads */
+	os_fast_mutex_t	thread_global_mtx; /*!< Mutex used protecting below
+					   variables */
 	ulint           n_threads;	/*!< Number of threads */
-	os_thread_id_t	wthread_id;	/*!< Identifier */
-	os_thread_t 	wthread;	/*!< Thread id */
 	ib_wqueue_t	*wq;		/*!< Work Queue */
 	ib_wqueue_t     *wr_cq;		/*!< Write Completion Queue */
 	ib_wqueue_t     *rd_cq;		/*!< Read Completion Queue */
-	wthr_status_t   wt_status;	/*!< Worker thread status */
 	mem_heap_t*     wheap;		/*!< Work heap where memory
 					is allocated */
+	mem_heap_t*     rheap;		/*!< Work heap where memory
+					is allocated */
+	wthr_status_t   gwt_status;     /*!< Global thread status */
+
+	/* Variables used by only one thread at a time */
+        thread_data_t*  thread_data;    /*!< Thread specific data */
+
 } thread_sync_t;
 
 static int		mtflush_work_initialized = -1;
-static os_fast_mutex_t	mtflush_mtx;
-static os_fast_mutex_t	mtflush_mtx_wait;
 static thread_sync_t*   mtflush_ctx=NULL;
+static os_fast_mutex_t  mtflush_mtx;
 
 /******************************************************************//**
 Set multi-threaded flush work initialized. */
@@ -172,6 +186,8 @@ buf_mtflu_flush_pool_instance(
 	ut_a(work_item != NULL);
 	ut_a(work_item->wr.buf_pool != NULL);
 
+	memset(&n, 0, sizeof(flush_counters_t));
+
 	if (!buf_flush_start(work_item->wr.buf_pool, work_item->wr.flush_type)) {
 		/* We have two choices here. If lsn_limit was
 		specified then skipping an instance of buffer
@@ -223,29 +239,29 @@ static
 void
 mtflush_service_io(
 /*===============*/
-	thread_sync_t*	mtflush_io)	/*!< inout: multi-threaded flush
+	thread_sync_t*	mtflush_io,	/*!< inout: multi-threaded flush
 					syncronization data */
+	thread_data_t*  thread_data)    /* Thread status data */
 {
 	wrk_t		*work_item = NULL;
 	ulint		n_flushed=0;
 
-   	mtflush_io->wt_status = WTHR_SIG_WAITING;
+	ut_a(mtflush_io != NULL);
+	ut_a(thread_data != NULL);
+
+   	thread_data->wt_status = WTHR_SIG_WAITING;
 
-	/* TODO: Temporal fix for the hang bug. This needs a real fix. */
-	os_fast_mutex_lock(&mtflush_mtx_wait);
 	work_item = (wrk_t *)ib_wqueue_nowait(mtflush_io->wq);
 
 	if (work_item == NULL) {
 		work_item = (wrk_t *)ib_wqueue_timedwait(mtflush_io->wq, MT_WAIT_IN_USECS);
 	}
 
-	os_fast_mutex_unlock(&mtflush_mtx_wait);
-
 	if (work_item) {
-		mtflush_io->wt_status = WTHR_RUNNING;
+		thread_data->wt_status = WTHR_RUNNING;
 	} else {
 		/* Thread did not get any work */
-		mtflush_io->wt_status = WTHR_NO_WORK;
+		thread_data->wt_status = WTHR_NO_WORK;
 		return;
 	}
 
@@ -267,8 +283,8 @@ mtflush_service_io(
 	case MT_WRK_NONE:
 		ut_a(work_item->wi_status == WRK_ITEM_EXIT);
 		work_item->wi_status = WRK_ITEM_EXIT;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
-		mtflush_io->wt_status = WTHR_KILL_IT;
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
+		thread_data->wt_status = WTHR_KILL_IT;
 		break;
 
 	case MT_WRK_WRITE:
@@ -279,7 +295,7 @@ mtflush_service_io(
 			work_item->wi_status = WRK_ITEM_FAILED;
 		}
 		work_item->wi_status = WRK_ITEM_SUCCESS;
-		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->wheap);
+		ib_wqueue_add(mtflush_io->wr_cq, work_item, work_item->rheap);
 		break;
 
 	case MT_WRK_READ:
@@ -291,7 +307,6 @@ mtflush_service_io(
 		ut_a(0);
 		break;
 	}
-
 }
 
 /******************************************************************//**
@@ -305,14 +320,23 @@ DECLARE_THREAD(mtflush_io_thread)(
 	void * arg)
 {
 	thread_sync_t *mtflush_io = ((thread_sync_t *)arg);
-	ulint n_timeout = 0;
-#ifdef UNIV_DEBUG
-	ib_uint64_t   stat_universal_num_processed = 0;
-	ib_uint64_t   stat_cycle_num_processed = 0;
+	thread_data_t *this_thread_data = NULL;
 	ulint i;
-#endif
+
+	/* Find correct slot for this thread */
+	os_fast_mutex_lock(&(mtflush_io->thread_global_mtx));
+	for(i=0; i < mtflush_io->n_threads; i ++) {
+		if (mtflush_io->thread_data[i].wthread_id == os_thread_get_curr_id()) {
+			break;
+		}
+	}
+
+	ut_a(i <= mtflush_io->n_threads);
+	this_thread_data = &mtflush_io->thread_data[i];
+	os_fast_mutex_unlock(&(mtflush_io->thread_global_mtx));
 
 	while (TRUE) {
+
 #ifdef UNIV_DEBUG
 		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
 					os_thread_get_curr_id(),
@@ -320,26 +344,10 @@ DECLARE_THREAD(mtflush_io_thread)(
 					ib_wqueue_len(mtflush_io->wr_cq));
 #endif /* UNIV_DEBUG */
 
-		mtflush_service_io(mtflush_io);
+		mtflush_service_io(mtflush_io, this_thread_data);
 
-#ifdef UNIV_DEBUG
-		if (mtflush_io->wt_status == WTHR_NO_WORK) {
-			n_timeout++;
 
-			if (n_timeout > 10) {
-				fprintf(stderr, "InnoDB: Note: Thread %lu has not received "
-					" work queue len %lu return queue len %lu\n",
-					os_thread_get_curr_id(),
-					ib_wqueue_len(mtflush_io->wq),
-					ib_wqueue_len(mtflush_io->wr_cq));
-				n_timeout = 0;
-			}
-		} else {
-			n_timeout = 0;
-		}
-#endif /* UNIV_DEBUG */
-
-		if (mtflush_io->wt_status == WTHR_KILL_IT) {
+		if (this_thread_data->wt_status == WTHR_KILL_IT) {
 			break;
 		}
 	}
@@ -364,22 +372,24 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Allocate work items for shutdown message */
 	work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
+	memset(work_item, 0, sizeof(wrk_t)*srv_mtflush_threads);
 
 	/* Confirm if the io-thread KILL is in progress, bailout */
-	if (mtflush_io->wt_status == WTHR_KILL_IT) {
+	if (mtflush_io->gwt_status == WTHR_KILL_IT) {
 		return;
 	}
 
+	mtflush_io->gwt_status = WTHR_KILL_IT;
+
 	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
 		srv_mtflush_threads);
 
 	/* Send one exit work item/thread */
 	for (i=0; i < srv_mtflush_threads; i++) {
-		work_item[i].wr.buf_pool = NULL;
-		work_item[i].rd.page_pool = NULL;
 		work_item[i].tsk = MT_WRK_NONE;
 		work_item[i].wi_status = WRK_ITEM_EXIT;
 		work_item[i].wheap = mtflush_io->wheap;
+		work_item[i].rheap = mtflush_io->rheap;
 
 		ib_wqueue_add(mtflush_io->wq,
 			(void *)&(work_item[i]),
@@ -389,7 +399,7 @@ buf_mtflu_io_thread_exit(void)
 	/* Wait until all work items on a work queue are processed */
 	while(!ib_wqueue_is_empty(mtflush_io->wq)) {
 		/* Wait */
-		os_thread_sleep(MT_WAIT_IN_USECS * 2);
+		os_thread_sleep(MT_WAIT_IN_USECS);
 	}
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
@@ -408,7 +418,7 @@ buf_mtflu_io_thread_exit(void)
 	}
 
 	/* Wait about 1/2 sec to allow threads really exit */
-	os_thread_sleep(5000000);
+	os_thread_sleep(MT_WAIT_IN_USECS);
 
 	ut_a(ib_wqueue_is_empty(mtflush_io->wq));
 	ut_a(ib_wqueue_is_empty(mtflush_io->wr_cq));
@@ -420,10 +430,11 @@ buf_mtflu_io_thread_exit(void)
 	ib_wqueue_free(mtflush_io->rd_cq);
 
 	os_fast_mutex_free(&mtflush_mtx);
-	os_fast_mutex_free(&mtflush_mtx_wait);
+	os_fast_mutex_free(&mtflush_io->thread_global_mtx);
 
 	/* Free heap */
 	mem_heap_free(mtflush_io->wheap);
+	mem_heap_free(mtflush_io->rheap);
 }
 
 /******************************************************************//**
@@ -437,45 +448,50 @@ buf_mtflu_handler_init(
 {
 	ulint   	i;
 	mem_heap_t*	mtflush_heap;
-	ib_wqueue_t*	mtflush_work_queue;
-	ib_wqueue_t*	mtflush_write_comp_queue;
-	ib_wqueue_t*	mtflush_read_comp_queue;
-
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
-	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx_wait);
+	mem_heap_t*	mtflush_heap2;
 
 	/* Create heap, work queue, write completion queue, read
 	completion queue for multi-threaded flush, and init
 	handler. */
 	mtflush_heap = mem_heap_create(0);
 	ut_a(mtflush_heap != NULL);
-	mtflush_work_queue = ib_wqueue_create();
-	ut_a(mtflush_work_queue != NULL);
-	mtflush_write_comp_queue = ib_wqueue_create();
-	ut_a(mtflush_write_comp_queue != NULL);
-	mtflush_read_comp_queue = ib_wqueue_create();
-	ut_a(mtflush_read_comp_queue != NULL);
+	mtflush_heap2 = mem_heap_create(0);
+	ut_a(mtflush_heap2 != NULL);
 
 	mtflush_ctx = (thread_sync_t *)mem_heap_alloc(mtflush_heap,
-				MTFLUSH_MAX_WORKER * sizeof(thread_sync_t));
+				sizeof(thread_sync_t));
+	memset(mtflush_ctx, 0, sizeof(thread_sync_t));
 	ut_a(mtflush_ctx != NULL);
+	mtflush_ctx->thread_data = (thread_data_t*)mem_heap_alloc(
+		mtflush_heap, sizeof(thread_data_t) * n_threads);
+	ut_a(mtflush_ctx->thread_data);
+	memset(mtflush_ctx->thread_data, 0, sizeof(thread_data_t) * n_threads);
+
+	mtflush_ctx->n_threads = n_threads;
+	mtflush_ctx->wq = ib_wqueue_create();
+	ut_a(mtflush_ctx->wq);
+	mtflush_ctx->wr_cq = ib_wqueue_create();
+	ut_a(mtflush_ctx->wr_cq);
+	mtflush_ctx->rd_cq = ib_wqueue_create();
+	ut_a(mtflush_ctx->rd_cq);
+	mtflush_ctx->wheap = mtflush_heap;
+	mtflush_ctx->rheap = mtflush_heap2;
+
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_ctx->thread_global_mtx);
+	os_fast_mutex_init(PFS_NOT_INSTRUMENTED, &mtflush_mtx);
 
 	/* Create threads for page-compression-flush */
 	for(i=0; i < n_threads; i++) {
 		os_thread_id_t new_thread_id;
-		mtflush_ctx[i].n_threads = n_threads;
-		mtflush_ctx[i].wq = mtflush_work_queue;
-		mtflush_ctx[i].wr_cq = mtflush_write_comp_queue;
-		mtflush_ctx[i].rd_cq = mtflush_read_comp_queue;
-		mtflush_ctx[i].wheap = mtflush_heap;
-		mtflush_ctx[i].wt_status = WTHR_INITIALIZED;
 
-		mtflush_ctx[i].wthread = os_thread_create(
+		mtflush_ctx->thread_data[i].wt_status = WTHR_INITIALIZED;
+
+		mtflush_ctx->thread_data[i].wthread = os_thread_create(
 			mtflush_io_thread,
-			((void *)(mtflush_ctx + i)),
+			((void *) mtflush_ctx),
 	                &new_thread_id);
 
-		mtflush_ctx[i].wthread_id = new_thread_id;
+		mtflush_ctx->thread_data[i].wthread_id = new_thread_id;
 	}
 
 	buf_mtflu_work_init();
@@ -502,13 +518,15 @@ buf_mtflu_flush_work_items(
 {
 	ulint n_flushed=0, i;
 	mem_heap_t* work_heap;
-	wrk_t* work_item=NULL;
+	mem_heap_t* reply_heap;
+	wrk_t work_item[MTFLUSH_MAX_WORKER];
 
 	/* Allocate heap where all work items used and queue
 	node items areallocated */
 	work_heap = mem_heap_create(0);
-	work_item = (wrk_t*)mem_heap_alloc(work_heap, sizeof(wrk_t)*buf_pool_inst);
-	memset(work_item, 0, sizeof(wrk_t)*buf_pool_inst);
+	reply_heap = mem_heap_create(0);
+	memset(work_item, 0, sizeof(wrk_t)*MTFLUSH_MAX_WORKER);
+
 
 	for(i=0;i<buf_pool_inst; i++) {
 		work_item[i].tsk = MT_WRK_WRITE;
@@ -518,9 +536,10 @@ buf_mtflu_flush_work_items(
 		work_item[i].wr.lsn_limit = lsn_limit;
 		work_item[i].wi_status = WRK_ITEM_UNSET;
 		work_item[i].wheap = work_heap;
+		work_item[i].rheap = reply_heap;
 
 		ib_wqueue_add(mtflush_ctx->wq,
-			(void *)(&(work_item[i])),
+			(void *)(work_item + i),
 			work_heap);
 	}
 
@@ -532,8 +551,6 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
-#ifdef UNIV_DEBUG
-			/* TODO: Temporal fix for hang. This is really a bug. */
 			if((int)done_wi->id_usr == 0 &&
 				(done_wi->wi_status == WRK_ITEM_SET ||
 					done_wi->wi_status == WRK_ITEM_UNSET)) {
@@ -543,7 +560,6 @@ buf_mtflu_flush_work_items(
 					done_wi->wr.flush_type);
 				ut_a(0);
 			}
-#endif
 
 			n_flushed+= done_wi->n_flushed;
 			i++;
@@ -555,6 +571,7 @@ buf_mtflu_flush_work_items(
 
 	/* Release used work_items and queue nodes */
 	mem_heap_free(work_heap);
+	mem_heap_free(reply_heap);
 
 	return(n_flushed);
 }
@@ -680,6 +697,6 @@ buf_mtflu_set_thread_ids(
 	ut_a(thread_ids != NULL);
 
 	for(i = 0; i < n_threads; i++) {
-		thread_ids[i] = mtflush_io[i].wthread_id;
+		thread_ids[i] = mtflush_io->thread_data[i].wthread_id;
 	}
 }

From 7322270a0514883b62f4148e6acc039a5e1b7fd9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 4 Mar 2014 17:14:08 +0200
Subject: [PATCH 41/56] Set actual compressed page size also on read code path
 to buffer pool so that we can later use it to avoid unnecessary trim
 operations.

---
 storage/innobase/buf/buf0rea.cc             |  2 +-
 storage/innobase/fil/fil0pagecompress.cc    | 10 +++++++++-
 storage/innobase/include/fil0pagecompress.h |  4 +++-
 storage/innobase/os/os0file.cc              | 12 +++++++-----
 storage/xtradb/buf/buf0rea.cc               |  2 +-
 storage/xtradb/fil/fil0pagecompress.cc      | 10 +++++++++-
 storage/xtradb/include/fil0pagecompress.h   |  4 +++-
 storage/xtradb/os/os0file.cc                | 14 ++++++++------
 8 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/storage/innobase/buf/buf0rea.cc b/storage/innobase/buf/buf0rea.cc
index e2578b7f6b7..ec76c9923fe 100644
--- a/storage/innobase/buf/buf0rea.cc
+++ b/storage/innobase/buf/buf0rea.cc
@@ -185,7 +185,7 @@ buf_read_page_low(
 		*err = fil_io(OS_FILE_READ | wake_later
 			      | ignore_nonexistent_pages,
 			      sync, space, zip_size, offset, 0, zip_size,
-			      bpage->zip.data, bpage, 0);
+			      bpage->zip.data, bpage, &bpage->write_size);
 	} else {
 		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
 
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index dfa216d0ae2..8ecb5317088 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -226,7 +226,9 @@ fil_decompress_page(
 	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
 	byte*           buf,           /*!< out: buffer from which to read; in aio
 				       this must be appropriately aligned */
-        ulint           len)           /*!< in: length of output buffer.*/
+        ulint           len,           /*!< in: length of output buffer.*/
+	ulint*		write_size)    /*!< in/out: Actual payload size of
+				       the compressed data. */
 {
         int err = 0;
         ulint actual_size = 0;
@@ -277,6 +279,12 @@ fil_decompress_page(
 		ut_error;
 	}
 
+	/* Store actual payload size of the compressed data. This pointer
+	points to buffer pool. */
+	if (write_size) {
+		*write_size = actual_size;
+	}
+
 	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
 
 #ifdef UNIV_DEBUG
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
index 342b105401c..c362c0ddcd2 100644
--- a/storage/innobase/include/fil0pagecompress.h
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -97,7 +97,9 @@ fil_decompress_page(
 	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
 	byte*           buf,           /*!< out: buffer from which to read; in aio
 				       this must be appropriately aligned */
-        ulint           len);          /*!< in: length of output buffer.*/
+        ulint           len,           /*!< in: length of output buffer.*/
+	ulint*		write_size);   /*!< in/out: Actual payload size of
+				       the compressed data. */
 
 /****************************************************************//**
 Get space id from fil node
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index f0ca05b7faa..376aa244bc9 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2821,7 +2821,7 @@ try_again:
 
 	if (ret && len == n) {
 		if (fil_page_is_compressed((byte *)buf)) {
-		        fil_decompress_page(NULL, (byte *)buf, len);
+		        fil_decompress_page(NULL, (byte *)buf, len, NULL);
 		}
 		return(TRUE);
 	}
@@ -2836,7 +2836,7 @@ try_again:
 
 	if ((ulint) ret == n) {
 		if (fil_page_is_compressed((byte *)buf)) {
-		        fil_decompress_page(NULL, (byte *)buf, n);
+		        fil_decompress_page(NULL, (byte *)buf, n, NULL);
 		}
 
 		return(TRUE);
@@ -5164,7 +5164,7 @@ os_aio_windows_handle(
 
 	        if (slot->type == OS_FILE_READ) {
 			if (fil_page_is_compressed(slot->buf)) {
-				fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+				fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
 			}
 		} else {
 			if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) {
@@ -5278,7 +5278,7 @@ retry:
 
 				if (slot->type == OS_FILE_READ) {
 					if (fil_page_is_compressed(slot->buf)) {
-						fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+						fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
 					}
 				} else {
 					if (slot->page_compress_success &&
@@ -6219,7 +6219,9 @@ os_file_trim(
 		"  InnoDB: [Warning] fallocate not supported on this installation."
 		"  InnoDB: Disabling fallocate for now.");
 	os_fallocate_failed = TRUE;
-	slot->write_size = NULL;
+	if (slot->write_size) {
+		*slot->write_size = 0;
+	}
 
 #endif /* HAVE_FALLOCATE ... */
 
diff --git a/storage/xtradb/buf/buf0rea.cc b/storage/xtradb/buf/buf0rea.cc
index 3dec3df6f2b..7a79958c136 100644
--- a/storage/xtradb/buf/buf0rea.cc
+++ b/storage/xtradb/buf/buf0rea.cc
@@ -237,7 +237,7 @@ not_to_recover:
 		*err = _fil_io(OS_FILE_READ | wake_later
 			      | ignore_nonexistent_pages,
 			      sync, space, 0, offset, 0, UNIV_PAGE_SIZE,
-			      ((buf_block_t*) bpage)->frame, bpage, 0, trx);
+			      ((buf_block_t*) bpage)->frame, bpage, &bpage->write_size, trx);
 	}
 
 	if (sync) {
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index 05dcf372112..eac889cf7c6 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -222,7 +222,9 @@ fil_decompress_page(
 	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
 	byte*           buf,           /*!< out: buffer from which to read; in aio
 				       this must be appropriately aligned */
-        ulint           len)           /*!< in: length of output buffer.*/
+        ulint           len,           /*!< in: length of output buffer.*/
+	ulint*		write_size)    /*!< in/out: Actual payload size of
+				       the compressed data. */
 {
         int err = 0;
         ulint actual_size = 0;
@@ -273,6 +275,12 @@ fil_decompress_page(
 		ut_error;
 	}
 
+	/* Store actual payload size of the compressed data. This pointer
+	points to buffer pool. */
+	if (write_size) {
+		*write_size = actual_size;
+	}
+
 	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
 
 #ifdef UNIV_DEBUG
diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h
index 342b105401c..c362c0ddcd2 100644
--- a/storage/xtradb/include/fil0pagecompress.h
+++ b/storage/xtradb/include/fil0pagecompress.h
@@ -97,7 +97,9 @@ fil_decompress_page(
 	byte*           page_buf,      /*!< in: preallocated buffer or NULL */
 	byte*           buf,           /*!< out: buffer from which to read; in aio
 				       this must be appropriately aligned */
-        ulint           len);          /*!< in: length of output buffer.*/
+        ulint           len,           /*!< in: length of output buffer.*/
+	ulint*		write_size);   /*!< in/out: Actual payload size of
+				       the compressed data. */
 
 /****************************************************************//**
 Get space id from fil node
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 933690dfefa..1b094bfa1f3 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -3009,7 +3009,7 @@ try_again:
 
 	if (ret && len == n) {
 		if (fil_page_is_compressed((byte *)buf)) {
-		        fil_decompress_page(NULL, (byte *)buf, len);
+		        fil_decompress_page(NULL, (byte *)buf, len, NULL);
 		}
 		return(TRUE);
 	}
@@ -3025,7 +3025,7 @@ try_again:
 	if ((ulint) ret == n) {
 
 		if (fil_page_is_compressed((byte *)buf)) {
-		        fil_decompress_page(NULL, (byte *)buf, n);
+		        fil_decompress_page(NULL, (byte *)buf, n, NULL);
 		}
 
 		return(TRUE);
@@ -3129,7 +3129,7 @@ try_again:
 	if ((ulint) ret == n) {
 
 		if (fil_page_is_compressed((byte *)buf)) {
-		        fil_decompress_page(NULL, (byte *)buf, n);
+		        fil_decompress_page(NULL, (byte *)buf, n, NULL);
 		}
 
 		return(TRUE);
@@ -5223,7 +5223,7 @@ os_aio_windows_handle(
 
 	        if (slot->type == OS_FILE_READ) {
 			if (fil_page_is_compressed(slot->buf)) {
-				fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+				fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
 			}
 		} else {
 			if (slot->page_compress_success && fil_page_is_compressed(slot->page_buf)) {
@@ -5337,7 +5337,7 @@ retry:
 
 				if (slot->type == OS_FILE_READ) {
 					if (fil_page_is_compressed(slot->buf)) {
-						fil_decompress_page(slot->page_buf, slot->buf, slot->len);
+						fil_decompress_page(slot->page_buf, slot->buf, slot->len, slot->write_size);
 					}
 				} else {
 					if (slot->page_compress_success &&
@@ -6284,7 +6284,9 @@ os_file_trim(
 		"  InnoDB: [Warning] fallocate not supported on this installation."
 		"  InnoDB: Disabling fallocate for now.");
 	os_fallocate_failed = TRUE;
-	slot->write_size = NULL;
+	if (slot->write_size) {
+		*slot->write_size = 0;
+	}
 
 #endif /* HAVE_FALLOCATE ... */
 

From 3a4b8879e5250eeac3e0a6c770fdf235111c8171 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 4 Mar 2014 20:12:32 +0200
Subject: [PATCH 42/56] Set index page page compression on by default and
 remove innodb_trim_pct as it is not used/implemented.

---
 storage/innobase/handler/ha_innodb.cc | 8 +++++---
 storage/innobase/srv/srv0srv.cc       | 2 +-
 storage/xtradb/handler/ha_innodb.cc   | 8 +++++---
 storage/xtradb/srv/srv0srv.cc         | 2 +-
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 98e27f4e02c..a65937d9490 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16786,15 +16786,17 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
   NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG */
 
+/*
 static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
   PLUGIN_VAR_OPCMDARG ,
   "How many percent of compressed pages should be trimmed",
   NULL, NULL, 100, 0, 100, 0);
+*/
 
 static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
   PLUGIN_VAR_OPCMDARG,
-  "Use page compression for only index pages.",
-  NULL, NULL, FALSE);
+  "Use page compression for only index pages. Default TRUE.",
+  NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
@@ -16974,7 +16976,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(limit_optimistic_insert_debug),
   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
 #endif /* UNIV_DEBUG */
-  MYSQL_SYSVAR(trim_pct),
+  // MYSQL_SYSVAR(trim_pct),
   MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
 #ifdef HAVE_LZ4
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index dcef4a03b76..11e6ffd31d3 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -148,7 +148,7 @@ UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
 /* If this flag is TRUE, then we will use page compression
 only for index pages */
-UNIV_INTERN my_bool	srv_page_compress_index_pages = FALSE;
+UNIV_INTERN my_bool	srv_page_compress_index_pages = TRUE;
 UNIV_INTERN long	srv_trim_pct = 100;
 /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
 to the pages */
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 1f92db64ddc..046fdfa45a9 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -17934,10 +17934,12 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
   "Print stacktrace on long semaphore wait (off by default supported only on linux)",
   NULL, NULL, FALSE);
 
+/*
 static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
   PLUGIN_VAR_OPCMDARG ,
   "How many percent of compressed pages should be trimmed",
   NULL, NULL, 100, 0, 100, 0);
+*/
 
 static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
   PLUGIN_VAR_RQCMDARG,
@@ -17947,8 +17949,8 @@ static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
 
 static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
   PLUGIN_VAR_OPCMDARG,
-  "Use page compression for only index pages.",
-  NULL, NULL, FALSE);
+  "Use page compression for only index pages. Default TRUE.",
+  NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
@@ -18166,7 +18168,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(fake_changes),
   MYSQL_SYSVAR(locking_fake_changes),
   MYSQL_SYSVAR(use_stacktrace),
-  MYSQL_SYSVAR(trim_pct),
+  // MYSQL_SYSVAR(trim_pct),
   MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
 #ifdef HAVE_LZ4
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index f1ee459efd7..5706c354ada 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -163,7 +163,7 @@ UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
 /* If this flag is TRUE, then we will use page compression
 only for index pages */
-UNIV_INTERN my_bool     srv_page_compress_index_pages   = FALSE;
+UNIV_INTERN my_bool     srv_page_compress_index_pages   = TRUE;
 UNIV_INTERN long        srv_trim_pct                    = 100;
 /* Default compression level if page compression is used and no compression
 level is set for the table*/

From c556b9d8176107ba892ac218dd72e35d53e0c4f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 7 Mar 2014 08:20:43 +0200
Subject: [PATCH 43/56] Changed so that innodb_compress_index pages means that
 if true also index pages are compressed if false index pages are not
 compressed.

Fixed small output error when page_compression_level was incorrectly
given.
---
 storage/innobase/handler/ha_innodb.cc | 6 +++---
 storage/innobase/os/os0file.cc        | 8 +++-----
 storage/innobase/srv/srv0srv.cc       | 2 +-
 storage/xtradb/handler/ha_innodb.cc   | 6 +++---
 storage/xtradb/os/os0file.cc          | 8 +++-----
 storage/xtradb/srv/srv0srv.cc         | 2 +-
 6 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index a65937d9490..1273a25a5f5 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -9907,7 +9907,7 @@ ha_innobase::check_table_options(
 				HA_WRONG_CREATE_OPTION,
 				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
 				" Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",
-				create_info->key_block_size);
+				options->page_compression_level);
 			return "PAGE_COMPRESSION_LEVEL";
 		}
 	}
@@ -16795,8 +16795,8 @@ static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
 
 static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
   PLUGIN_VAR_OPCMDARG,
-  "Use page compression for only index pages. Default TRUE.",
-  NULL, NULL, TRUE);
+  "Use page compression also for index pages. Default FALSE.",
+  NULL, NULL, FALSE);
 
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 376aa244bc9..0093dd8e266 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4464,14 +4464,12 @@ found:
 	slot->page_compression = page_compression;
 
 	/* If the space is page compressed and this is write operation
-	   and if either only index pages compression is disabled or
-	   page is index page and only index pages compression is enabled then
-	   we compress the page */
+	   and either index compression is enabled or page is not a index
+	   page then we compress the page */
 	if (message1 &&
 	    type == OS_FILE_WRITE &&
 	    page_compression &&
-	    (srv_page_compress_index_pages == false ||
-	     (srv_page_compress_index_pages == true &&  fil_page_is_index_page(slot->buf)))) {
+	     (srv_page_compress_index_pages == true || !fil_page_is_index_page(slot->buf))) {
 		ulint           real_len = len;
 		byte*           tmp = NULL;
 
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index 11e6ffd31d3..dcef4a03b76 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -148,7 +148,7 @@ UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
 /* If this flag is TRUE, then we will use page compression
 only for index pages */
-UNIV_INTERN my_bool	srv_page_compress_index_pages = TRUE;
+UNIV_INTERN my_bool	srv_page_compress_index_pages = FALSE;
 UNIV_INTERN long	srv_trim_pct = 100;
 /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
 to the pages */
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 046fdfa45a9..fc92cc828f7 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -10420,7 +10420,7 @@ ha_innobase::check_table_options(
 				HA_WRONG_CREATE_OPTION,
 				"InnoDB: invalid PAGE_COMPRESSION_LEVEL = %lu."
 				" Valid values are [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]",
-				create_info->key_block_size);
+				options->page_compression_level);
 			return "PAGE_COMPRESSION_LEVEL";
 		}
 	}
@@ -17949,8 +17949,8 @@ static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
 
 static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
   PLUGIN_VAR_OPCMDARG,
-  "Use page compression for only index pages. Default TRUE.",
-  NULL, NULL, TRUE);
+  "Use page compression also for index pages. Default FALSE.",
+  NULL, NULL, FALSE);
 
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 38be419e2ad..525310025da 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -4580,14 +4580,12 @@ found:
 	slot->page_compression = page_compression;
 
 	/* If the space is page compressed and this is write operation
-	   and if either only index pages compression is disabled or
-	   page is index page and only index pages compression is enabled then
-	   we compress the page */
+	   and either index compression is enabled or page is not a index
+	   page then we compress the page */
 	if (message1 &&
 	    type == OS_FILE_WRITE &&
 	    page_compression &&
-	    (srv_page_compress_index_pages == false ||
-	     (srv_page_compress_index_pages == true &&  fil_page_is_index_page(slot->buf)))) {
+	     (srv_page_compress_index_pages == true || !fil_page_is_index_page(slot->buf))) {
 		ulint           real_len = len;
 		byte*           tmp = NULL;
 
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 5706c354ada..f1ee459efd7 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -163,7 +163,7 @@ UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
 /* If this flag is TRUE, then we will use page compression
 only for index pages */
-UNIV_INTERN my_bool     srv_page_compress_index_pages   = TRUE;
+UNIV_INTERN my_bool     srv_page_compress_index_pages   = FALSE;
 UNIV_INTERN long        srv_trim_pct                    = 100;
 /* Default compression level if page compression is used and no compression
 level is set for the table*/

From 3ea72a2ba9deb9e3da7efe57a74ce9b34b346dfd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 12 Mar 2014 14:47:38 +0200
Subject: [PATCH 44/56] Removed options innodb_compress_index_pages and
 innodb_trim_pct. Both are unnecessary. There is a lot more index pages than
 there is normal pages. Earlier all pages were compressed and this provided
 best performance and compression ratio. Added status variable to show how
 many non index pages are written.

---
 storage/innobase/fil/fil0fil.cc       |  2 ++
 storage/innobase/handler/ha_innodb.cc | 16 ++--------------
 storage/innobase/include/srv0mon.h    |  1 +
 storage/innobase/include/srv0srv.h    | 10 ++++------
 storage/innobase/os/os0file.cc        |  8 ++------
 storage/innobase/srv/srv0mon.cc       | 13 ++++++++++++-
 storage/innobase/srv/srv0srv.cc       |  6 ++----
 storage/xtradb/fil/fil0fil.cc         |  2 ++
 storage/xtradb/handler/ha_innodb.cc   | 16 ++--------------
 storage/xtradb/include/srv0mon.h      |  1 +
 storage/xtradb/include/srv0srv.h      | 10 ++++------
 storage/xtradb/os/os0file.cc          |  8 ++------
 storage/xtradb/srv/srv0mon.cc         | 13 ++++++++++++-
 storage/xtradb/srv/srv0srv.cc         |  6 ++----
 14 files changed, 50 insertions(+), 62 deletions(-)

diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 3678442417a..cee9c7e0534 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -5487,6 +5487,8 @@ fil_io(
 		srv_stats.data_written.add(len);
 		if (fil_page_is_index_page((byte *)buf)) {
 			srv_stats.index_pages_written.inc();
+		} else {
+			srv_stats.non_index_pages_written.inc();
 		}
 	}
 
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 1273a25a5f5..b790ae76121 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -711,6 +711,8 @@ static SHOW_VAR innodb_status_variables[]= {
    (char*) &export_vars.innodb_page_compression_trim_sect4096,    SHOW_LONGLONG},
   {"num_index_pages_written",
    (char*) &export_vars.innodb_index_pages_written,       SHOW_LONGLONG},
+  {"num_non_index_pages_written",
+   (char*) &export_vars.innodb_non_index_pages_written,       SHOW_LONGLONG},
   {"num_pages_page_compressed",
    (char*) &export_vars.innodb_pages_page_compressed,     SHOW_LONGLONG},
   {"num_page_compressed_trim_op",
@@ -16786,18 +16788,6 @@ static MYSQL_SYSVAR_BOOL(trx_purge_view_update_only_debug,
   NULL, NULL, FALSE);
 #endif /* UNIV_DEBUG */
 
-/*
-static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
-  PLUGIN_VAR_OPCMDARG ,
-  "How many percent of compressed pages should be trimmed",
-  NULL, NULL, 100, 0, 100, 0);
-*/
-
-static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
-  PLUGIN_VAR_OPCMDARG,
-  "Use page compression also for index pages. Default FALSE.",
-  NULL, NULL, FALSE);
-
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
   "Use trim. Default FALSE.",
@@ -16976,8 +16966,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(limit_optimistic_insert_debug),
   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
 #endif /* UNIV_DEBUG */
-  // MYSQL_SYSVAR(trim_pct),
-  MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
 #ifdef HAVE_LZ4
   MYSQL_SYSVAR(use_lz4),
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index d7c2d6ce531..4d0379a2643 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -165,6 +165,7 @@ enum monitor_id_t {
 	MONITOR_OVLD_PAGE_CREATED,
 	MONITOR_OVLD_PAGES_WRITTEN,
 	MONITOR_OVLD_INDEX_PAGES_WRITTEN,
+	MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN,
 	MONITOR_OVLD_PAGES_READ,
 	MONITOR_OVLD_BYTE_READ,
 	MONITOR_OVLD_BYTE_WRITTEN,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index b4bb9c09ef6..ac264a7d597 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -111,6 +111,8 @@ struct srv_stats_t {
 	ulint_ctr_64_t          page_compression_trim_sect4096;
 	/* Number of index pages written */
 	ulint_ctr_64_t          index_pages_written;
+	/* Number of non index pages written */
+	ulint_ctr_64_t          non_index_pages_written;
 	/* Number of pages compressed with page compression */
         ulint_ctr_64_t          pages_page_compressed;
 	/* Number of TRIM operations induced by page compression */
@@ -236,12 +238,6 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 extern my_bool	srv_use_native_aio;
 
-/* Is page compression used only for index pages */
-extern my_bool srv_page_compress_index_pages;
-
-/* Frequency of trim operations */
-extern long srv_trim_pct;
-
 /* Use trim operation */
 extern my_bool srv_use_trim;
 
@@ -901,6 +897,8 @@ struct export_var_t{
 						by page compression */
 	ib_int64_t innodb_index_pages_written;  /*!< Number of index pages
 						written */
+	ib_int64_t innodb_non_index_pages_written;  /*!< Number of non index pages
+						written */
 	ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
 						compressed by page compression */
 	ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 0093dd8e266..09340cca68d 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -4464,12 +4464,8 @@ found:
 	slot->page_compression = page_compression;
 
 	/* If the space is page compressed and this is write operation
-	   and either index compression is enabled or page is not a index
-	   page then we compress the page */
-	if (message1 &&
-	    type == OS_FILE_WRITE &&
-	    page_compression &&
-	     (srv_page_compress_index_pages == true || !fil_page_is_index_page(slot->buf))) {
+	   then we compress the page */
+	if (message1 && type == OS_FILE_WRITE && page_compression ) {
 		ulint           real_len = len;
 		byte*           tmp = NULL;
 
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 8ba0b977c98..32171182cf9 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -296,6 +296,12 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN},
 
+	{"buffer_non_index_pages_written", "buffer",
+	 "Number of non index pages written (innodb_non_index_pages_written)",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN},
+
 	{"buffer_pages_read", "buffer",
 	 "Number of pages read (innodb_pages_read)",
 	 static_cast<monitor_type_t>(
@@ -1593,11 +1599,16 @@ srv_mon_process_existing_counter(
 		value = stat.n_pages_written;
 		break;
 
-	/* innodb_index_pages_written, the number of page written */
+	/* innodb_index_pages_written, the number of index pages written */
 	case MONITOR_OVLD_INDEX_PAGES_WRITTEN:
 		value = srv_stats.index_pages_written;
 		break;
 
+	/* innodb_non_index_pages_written, the number of non index pages written */
+	case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN:
+		value = srv_stats.non_index_pages_written;
+		break;
+
 	/* innodb_pages_read */
 	case MONITOR_OVLD_PAGES_READ:
 		buf_get_total_stat(&stat);
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index dcef4a03b76..fe3af72e150 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -146,10 +146,6 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
-/* If this flag is TRUE, then we will use page compression
-only for index pages */
-UNIV_INTERN my_bool	srv_page_compress_index_pages = FALSE;
-UNIV_INTERN long	srv_trim_pct = 100;
 /* If this flag is TRUE, then we will use fallocate(PUCH_HOLE)
 to the pages */
 UNIV_INTERN my_bool	srv_use_trim = FALSE;
@@ -393,6 +389,7 @@ UNIV_INTERN ib_uint64_t srv_page_compression_saved      = 0;
 UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512       = 0;
 UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096      = 0;
 UNIV_INTERN ib_uint64_t srv_index_pages_written         = 0;
+UNIV_INTERN ib_uint64_t srv_non_index_pages_written     = 0;
 UNIV_INTERN ib_uint64_t srv_pages_page_compressed       = 0;
 UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op     = 0;
 UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved     = 0;
@@ -1485,6 +1482,7 @@ srv_export_innodb_status(void)
 	export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
 	export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
 	export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
+	export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
 	export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
 	export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
 	export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index b38b80d9ef2..8e788e71983 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -5488,6 +5488,8 @@ _fil_io(
 		srv_stats.data_written.add(len);
 		if (fil_page_is_index_page((byte *)buf)) {
 			srv_stats.index_pages_written.inc();
+		} else {
+			srv_stats.non_index_pages_written.inc();
 		}
 	}
 
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index fc92cc828f7..4436dc3d0e1 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -906,6 +906,8 @@ static SHOW_VAR innodb_status_variables[]= {
    (char*) &export_vars.innodb_page_compression_trim_sect4096,    SHOW_LONGLONG},
   {"num_index_pages_written",
    (char*) &export_vars.innodb_index_pages_written,       SHOW_LONGLONG},
+  {"num_non_index_pages_written",
+   (char*) &export_vars.innodb_non_index_pages_written,       SHOW_LONGLONG},
   {"num_pages_page_compressed",
    (char*) &export_vars.innodb_pages_page_compressed,     SHOW_LONGLONG},
   {"num_page_compressed_trim_op",
@@ -17934,24 +17936,12 @@ static MYSQL_SYSVAR_BOOL(use_stacktrace, srv_use_stacktrace,
   "Print stacktrace on long semaphore wait (off by default supported only on linux)",
   NULL, NULL, FALSE);
 
-/*
-static MYSQL_SYSVAR_LONG(trim_pct, srv_trim_pct,
-  PLUGIN_VAR_OPCMDARG ,
-  "How many percent of compressed pages should be trimmed",
-  NULL, NULL, 100, 0, 100, 0);
-*/
-
 static MYSQL_SYSVAR_UINT(compression_level, page_zip_level,
   PLUGIN_VAR_RQCMDARG,
   "Compression level used for zlib compression.  0 is no compression"
   ", 1 is fastest, 9 is best compression and default is 6.",
   NULL, NULL, DEFAULT_COMPRESSION_LEVEL, 0, 9, 0);
 
-static MYSQL_SYSVAR_BOOL(compress_index_pages, srv_page_compress_index_pages,
-  PLUGIN_VAR_OPCMDARG,
-  "Use page compression also for index pages. Default FALSE.",
-  NULL, NULL, FALSE);
-
 static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   PLUGIN_VAR_OPCMDARG,
   "Use trim. Default FALSE.",
@@ -18168,8 +18158,6 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(fake_changes),
   MYSQL_SYSVAR(locking_fake_changes),
   MYSQL_SYSVAR(use_stacktrace),
-  // MYSQL_SYSVAR(trim_pct),
-  MYSQL_SYSVAR(compress_index_pages),
   MYSQL_SYSVAR(use_trim),
 #ifdef HAVE_LZ4
   MYSQL_SYSVAR(use_lz4),
diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h
index 5e5de2c2e0f..10e1fa6188a 100644
--- a/storage/xtradb/include/srv0mon.h
+++ b/storage/xtradb/include/srv0mon.h
@@ -165,6 +165,7 @@ enum monitor_id_t {
 	MONITOR_OVLD_PAGE_CREATED,
 	MONITOR_OVLD_PAGES_WRITTEN,
 	MONITOR_OVLD_INDEX_PAGES_WRITTEN,
+	MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN,
 	MONITOR_OVLD_PAGES_READ,
 	MONITOR_OVLD_BYTE_READ,
 	MONITOR_OVLD_BYTE_WRITTEN,
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index 879989770e6..a532f90ec24 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -111,6 +111,8 @@ struct srv_stats_t {
 	ulint_ctr_64_t          page_compression_trim_sect4096;
 	/* Number of index pages written */
 	ulint_ctr_64_t          index_pages_written;
+	/* Number of non index pages written */
+	ulint_ctr_64_t          non_index_pages_written;
 	/* Number of pages compressed with page compression */
         ulint_ctr_64_t          pages_page_compressed;
 	/* Number of TRIM operations induced by page compression */
@@ -256,12 +258,6 @@ extern ibool	srv_use_native_conditions;
 #endif /* __WIN__ */
 #endif /* !UNIV_HOTBACKUP */
 
-/* Is page compression used only for index pages */
-extern my_bool srv_page_compress_index_pages;
-
-/* Frequency of trim operations */
-extern long srv_trim_pct;
-
 /* Use trim operation */
 extern my_bool srv_use_trim;
 
@@ -1110,6 +1106,8 @@ struct export_var_t{
 						by page compression */
 	ib_int64_t innodb_index_pages_written;  /*!< Number of index pages
 						written */
+	ib_int64_t innodb_non_index_pages_written;  /*!< Number of non index pages
+						written */
 	ib_int64_t innodb_pages_page_compressed;/*!< Number of pages
 						compressed by page compression */
 	ib_int64_t innodb_page_compressed_trim_op;/*!< Number of TRIM operations
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 525310025da..fc2f5d78c9a 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -4580,12 +4580,8 @@ found:
 	slot->page_compression = page_compression;
 
 	/* If the space is page compressed and this is write operation
-	   and either index compression is enabled or page is not a index
-	   page then we compress the page */
-	if (message1 &&
-	    type == OS_FILE_WRITE &&
-	    page_compression &&
-	     (srv_page_compress_index_pages == true || !fil_page_is_index_page(slot->buf))) {
+	   then we compress the page */
+	if (message1 && type == OS_FILE_WRITE && page_compression ) {
 		ulint           real_len = len;
 		byte*           tmp = NULL;
 
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index 8ba0b977c98..32171182cf9 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -296,6 +296,12 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_INDEX_PAGES_WRITTEN},
 
+	{"buffer_non_index_pages_written", "buffer",
+	 "Number of non index pages written (innodb_non_index_pages_written)",
+	 static_cast<monitor_type_t>(
+	 MONITOR_EXISTING | MONITOR_DEFAULT_ON),
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN},
+
 	{"buffer_pages_read", "buffer",
 	 "Number of pages read (innodb_pages_read)",
 	 static_cast<monitor_type_t>(
@@ -1593,11 +1599,16 @@ srv_mon_process_existing_counter(
 		value = stat.n_pages_written;
 		break;
 
-	/* innodb_index_pages_written, the number of page written */
+	/* innodb_index_pages_written, the number of index pages written */
 	case MONITOR_OVLD_INDEX_PAGES_WRITTEN:
 		value = srv_stats.index_pages_written;
 		break;
 
+	/* innodb_non_index_pages_written, the number of non index pages written */
+	case MONITOR_OVLD_NON_INDEX_PAGES_WRITTEN:
+		value = srv_stats.non_index_pages_written;
+		break;
+
 	/* innodb_pages_read */
 	case MONITOR_OVLD_PAGES_READ:
 		buf_get_total_stat(&stat);
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index f1ee459efd7..386dbfddf0b 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -161,10 +161,6 @@ use simulated aio we build below with threads.
 Currently we support native aio on windows and linux */
 UNIV_INTERN my_bool	srv_use_native_aio = TRUE;
 
-/* If this flag is TRUE, then we will use page compression
-only for index pages */
-UNIV_INTERN my_bool     srv_page_compress_index_pages   = FALSE;
-UNIV_INTERN long        srv_trim_pct                    = 100;
 /* Default compression level if page compression is used and no compression
 level is set for the table*/
 UNIV_INTERN long        srv_compress_zlib_level         = 6;
@@ -515,6 +511,7 @@ UNIV_INTERN ib_uint64_t srv_page_compression_saved      = 0;
 UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect512       = 0;
 UNIV_INTERN ib_uint64_t srv_page_compression_trim_sect4096      = 0;
 UNIV_INTERN ib_uint64_t srv_index_pages_written         = 0;
+UNIV_INTERN ib_uint64_t srv_non_index_pages_written     = 0;
 UNIV_INTERN ib_uint64_t srv_pages_page_compressed       = 0;
 UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op     = 0;
 UNIV_INTERN ib_uint64_t srv_page_compressed_trim_op_saved     = 0;
@@ -1866,6 +1863,7 @@ srv_export_innodb_status(void)
 	export_vars.innodb_page_compression_trim_sect512 = srv_stats.page_compression_trim_sect512;
 	export_vars.innodb_page_compression_trim_sect4096 = srv_stats.page_compression_trim_sect4096;
 	export_vars.innodb_index_pages_written = srv_stats.index_pages_written;
+	export_vars.innodb_non_index_pages_written = srv_stats.non_index_pages_written;
 	export_vars.innodb_pages_page_compressed = srv_stats.pages_page_compressed;
 	export_vars.innodb_page_compressed_trim_op = srv_stats.page_compressed_trim_op;
 	export_vars.innodb_page_compressed_trim_op_saved = srv_stats.page_compressed_trim_op_saved;

From 6a756b3a44cbe849a3a5a41b0e134e820d567c6d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 21 Mar 2014 15:46:36 +0200
Subject: [PATCH 45/56] Code cleanup: Removed some unnecessary outputs from
 standard builds (available on special builds UNIV_PAGECOMPRESS_DEBUG and
 UNIV_MTFLUSH_DEBUG).

Added a new status variable compress_pages_page_compression_error to count possible
compression errors.
---
 storage/innobase/buf/buf0flu.cc          |  2 +-
 storage/innobase/buf/buf0mtflu.cc        | 14 ++++++++------
 storage/innobase/fil/fil0pagecompress.cc | 24 +++++++++++++-----------
 storage/innobase/include/srv0mon.h       |  1 +
 storage/innobase/include/srv0srv.h       |  4 ++++
 storage/innobase/os/os0file.cc           |  2 +-
 storage/innobase/srv/srv0mon.cc          |  8 ++++++++
 storage/xtradb/buf/buf0flu.cc            | 14 +++++++-------
 storage/xtradb/buf/buf0mtflu.cc          | 14 ++++++++------
 storage/xtradb/fil/fil0pagecompress.cc   | 24 +++++++++++++-----------
 storage/xtradb/include/srv0mon.h         |  1 +
 storage/xtradb/include/srv0srv.h         |  4 ++++
 storage/xtradb/os/os0file.cc             |  2 +-
 storage/xtradb/srv/srv0mon.cc            |  8 ++++++++
 14 files changed, 78 insertions(+), 44 deletions(-)

diff --git a/storage/innobase/buf/buf0flu.cc b/storage/innobase/buf/buf0flu.cc
index 07bff922e76..280f8cc39a9 100644
--- a/storage/innobase/buf/buf0flu.cc
+++ b/storage/innobase/buf/buf0flu.cc
@@ -2461,7 +2461,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 
 			/* Flush pages from end of LRU if required */
 			n_lru = n_flushed = buf_flush_LRU_tail();
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 			if (n_lru) {
 				fprintf(stderr,"n_lru:%lu ",n_lru);
 			}
diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index ea10d09e934..a5937caaf57 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -195,7 +195,7 @@ buf_mtflu_flush_pool_instance(
 		pools based on the assumption that it will
 		help in the retry which will follow the
 		failure. */
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 		fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
 #endif
 		return 0;
@@ -330,12 +330,12 @@ DECLARE_THREAD(mtflush_io_thread)(
 
 	while (TRUE) {
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
  		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
  					os_thread_get_curr_id(),
  					ib_wqueue_len(mtflush_io->wq),
  					ib_wqueue_len(mtflush_io->wr_cq));
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_MTFLUSH_DEBUG */
 
 		mtflush_service_io(mtflush_io, this_thread_data);
 
@@ -374,7 +374,7 @@ buf_mtflu_io_thread_exit(void)
 
 	mtflush_io->gwt_status = WTHR_KILL_IT;
 
-	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
+	fprintf(stderr, "InnoDB: [Note]: Signal mtflush_io_threads to exit [%lu]\n",
 		srv_mtflush_threads);
 
 	/* Send one exit work item/thread */
@@ -544,6 +544,7 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
+#if UNIV_DEBUG
 			if((int)done_wi->id_usr == 0 &&
 				(done_wi->wi_status == WRK_ITEM_SET ||
 					done_wi->wi_status == WRK_ITEM_UNSET)) {
@@ -553,6 +554,7 @@ buf_mtflu_flush_work_items(
 					done_wi->wr.flush_type);
 				ut_a(0);
 			}
+#endif
 
 			n_flushed+= done_wi->n_flushed;
 			i++;
@@ -621,7 +623,7 @@ buf_mtflu_flush_list(
 				cnt_flush[i]);
 		}
 	}
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
 		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
 #endif
@@ -663,7 +665,7 @@ buf_mtflu_flush_LRU_tail(void)
 		}
 	}
 
-#if UNIV_DEBUG
+#if UNIV_MTFLUSH_DEBUG
 	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
 			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
 #endif
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 8ecb5317088..dfd52d36b8e 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -106,11 +106,11 @@ fil_compress_page(
 		level = page_zip_level;
 	}
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 	fprintf(stderr,
 		"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
 		space_id, fil_space_name(space), len);
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 
 	write_size = UNIV_PAGE_SIZE - header_len;
 
@@ -126,6 +126,7 @@ fil_compress_page(
 				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
 				space_id, fil_space_name(space), len, err, write_size);
 
+			srv_stats.pages_page_compression_error.inc();
 			*out_len = len;
 			return (buf);
 		}
@@ -140,6 +141,7 @@ fil_compress_page(
 				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
 				space_id, fil_space_name(space), len, err, write_size);
 
+			srv_stats.pages_page_compression_error.inc();
 			*out_len = len;
 			return (buf);
 		}
@@ -197,11 +199,11 @@ fil_compress_page(
 		ut_a((write_size % SECT_SIZE) == 0);
 	}
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 	fprintf(stderr,
 		"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
 		space_id, fil_space_name(space), len, write_size);
-#endif
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 
 
 	srv_stats.page_compression_saved.add((len - write_size));
@@ -209,7 +211,7 @@ fil_compress_page(
 		srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
 		srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
 	}
-	//srv_stats.page_compressed_trim_op.inc();
+
 	srv_stats.pages_page_compressed.inc();
 	*out_len = write_size;
 
@@ -258,10 +260,10 @@ fil_decompress_page(
 
 	// If no buffer was given, we need to allocate temporal buffer
 	if (page_buf == NULL) {
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr,
 			"InnoDB: Note: FIL: Compression buffer not given, allocating...\n");
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
 	} else {
 		in_buf = page_buf;
@@ -287,11 +289,11 @@ fil_decompress_page(
 
 	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr,
 			"InnoDB: Note: Preparing for decompress for len %lu\n",
 			actual_size);
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 
 		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
 
@@ -310,11 +312,11 @@ fil_decompress_page(
 			ut_error;
 		}
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr,
 			"InnoDB: Note: Decompression succeeded for len %lu \n",
 			len);
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 #ifdef HAVE_LZ4
 	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
 		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index 4d0379a2643..2b02428bfb6 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -315,6 +315,7 @@ enum monitor_id_t {
 	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
 	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
 	MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
+	MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
 
 	/* Index related counters */
 	MONITOR_MODULE_INDEX,
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index ac264a7d597..1d01c7821d0 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -121,6 +121,8 @@ struct srv_stats_t {
         ulint_ctr_64_t          page_compressed_trim_op_saved;
 	/* Number of pages decompressed with page compression */
         ulint_ctr_64_t          pages_page_decompressed;
+	/* Number of page compression errors */
+	ulint_ctr_64_t          pages_page_compression_error;
 
 	/** Number of data read in total (in bytes) */
 	ulint_ctr_1_t		data_read;
@@ -908,6 +910,8 @@ struct export_var_t{
 	ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
 						decompressed by page
 						compression */
+	ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
+						compression errors */
 };
 
 /** Thread slot in the thread table.  */
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 09340cca68d..8068e05573c 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -6166,7 +6166,7 @@ os_file_trim(
 		    *slot->write_size > 0 &&
 		    len >= *slot->write_size)) {
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
 			*slot->write_size, trim_len, len);
 #endif
diff --git a/storage/innobase/srv/srv0mon.cc b/storage/innobase/srv/srv0mon.cc
index 32171182cf9..f276efdc021 100644
--- a/storage/innobase/srv/srv0mon.cc
+++ b/storage/innobase/srv/srv0mon.cc
@@ -926,6 +926,11 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
 
+	{"compress_pages_page_compression_error", "compression",
+	 "Number of page compression errors",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR},
+
 	/* ========== Counters for Index ========== */
 	{"module_index", "index", "Index Manager",
 	 MONITOR_MODULE,
@@ -1871,6 +1876,9 @@ srv_mon_process_existing_counter(
         case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
 		value = srv_stats.pages_page_decompressed;
 		break;
+        case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR:
+		value = srv_stats.pages_page_compression_error;
+		break;
 
 	default:
 		ut_error;
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 04fe25afa01..7b502ae3eea 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -721,7 +721,7 @@ buf_flush_write_complete(
 
 	buf_pool->n_flush[flush_type]--;
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 	fprintf(stderr, "n pending flush %lu\n",
 		buf_pool->n_flush[flush_type]);
 #endif
@@ -1863,7 +1863,7 @@ buf_flush_start(
 
 		/* There is already a flush batch of the same type running */
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr, "Error: flush_type %d n_flush %lu init_flush %lu\n",
 			flush_type, buf_pool->n_flush[flush_type], buf_pool->init_flush[flush_type]);
 #endif
@@ -2732,7 +2732,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 
 		/* Flush pages from end of LRU if required */
 		n_lru = n_flushed = buf_flush_LRU_tail();
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 		if (n_lru) {
 			fprintf(stderr,"n_lru:%lu ",n_lru);
 		}
@@ -2743,7 +2743,7 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 
 			/* Flush pages from flush_list if required */
 			n_flushed += n_pgc_flush = page_cleaner_flush_pages_if_needed();
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 			if (n_pgc_flush) {
 				fprintf(stderr,"n_pgc_flush:%lu ",n_pgc_flush);
 			}
@@ -2760,16 +2760,16 @@ DECLARE_THREAD(buf_flush_page_cleaner_thread)(
 					MONITOR_FLUSH_BACKGROUND_PAGES,
 					n_flushed);
 			}
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 			if (n_pgc_batch) {
 				fprintf(stderr,"n_pgc_batch:%lu ",n_pgc_batch);
 			}
 #endif
 		}
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 		if (n_lru || n_pgc_flush || n_pgc_batch) {
-			fprintf(stderr,"\n");
+			fprintf1(stderr,"\n");
 			n_lru = n_pgc_flush = n_pgc_batch = 0;
 		}
 #endif
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index d1ec9979f51..5df4a96d42e 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -199,7 +199,7 @@ buf_mtflu_flush_pool_instance(
 		pools based on the assumption that it will
 		help in the retry which will follow the
 		failure. */
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 		fprintf(stderr, "InnoDB: Note: buf flush start failed there is already active flush for this buffer pool.\n");
 #endif
 		return 0;
@@ -337,12 +337,12 @@ DECLARE_THREAD(mtflush_io_thread)(
 
 	while (TRUE) {
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 		fprintf(stderr, "InnoDB: Note. Thread %lu work queue len %lu return queue len %lu\n",
 					os_thread_get_curr_id(),
 					ib_wqueue_len(mtflush_io->wq),
 					ib_wqueue_len(mtflush_io->wr_cq));
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_MTFLUSH_DEBUG */
 
 		mtflush_service_io(mtflush_io, this_thread_data);
 
@@ -381,7 +381,7 @@ buf_mtflu_io_thread_exit(void)
 
 	mtflush_io->gwt_status = WTHR_KILL_IT;
 
-	fprintf(stderr, "signal mtflush_io_threads to exit [%lu]\n",
+	fprintf(stderr, "InnoDB: [Note]: Signal mtflush_io_threads to exit [%lu]\n",
 		srv_mtflush_threads);
 
 	/* Send one exit work item/thread */
@@ -551,6 +551,7 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
+#if UNIV_DEBUG
 			if((int)done_wi->id_usr == 0 &&
 				(done_wi->wi_status == WRK_ITEM_SET ||
 					done_wi->wi_status == WRK_ITEM_UNSET)) {
@@ -560,6 +561,7 @@ buf_mtflu_flush_work_items(
 					done_wi->wr.flush_type);
 				ut_a(0);
 			}
+#endif
 
 			n_flushed+= done_wi->n_flushed;
 			i++;
@@ -631,7 +633,7 @@ buf_mtflu_flush_list(
 				cnt_flush[i]);
 		}
 	}
-#ifdef UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 	fprintf(stderr, "%s: [1] [*n_processed: (min:%lu)%lu ]\n",
 		__FUNCTION__, (min_n * srv_buf_pool_instances), *n_processed);
 #endif
@@ -673,7 +675,7 @@ buf_mtflu_flush_LRU_tail(void)
 		}
 	}
 
-#if UNIV_DEBUG
+#if UNIV_MTFLUSH_DEBUG
 	fprintf(stderr, "[1] [*n_processed: (min:%lu)%lu ]\n", (
 			srv_LRU_scan_depth * srv_buf_pool_instances), total_flushed);
 #endif
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index eac889cf7c6..2acdf85b100 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -106,11 +106,11 @@ fil_compress_page(
 		level = page_zip_level;
 	}
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 	fprintf(stderr,
 		"InnoDB: Note: Preparing for compress for space %lu name %s len %lu\n",
 		space_id, fil_space_name(space), len);
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 
 	write_size = UNIV_PAGE_SIZE - header_len;
 
@@ -126,6 +126,7 @@ fil_compress_page(
 				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
 				space_id, fil_space_name(space), len, err, write_size);
 
+			srv_stats.pages_page_compression_error.inc();
 			*out_len = len;
 			return (buf);
 		}
@@ -140,6 +141,7 @@ fil_compress_page(
 				"InnoDB: Warning: Compression failed for space %lu name %s len %lu rt %d write %lu\n",
 				space_id, fil_space_name(space), len, err, write_size);
 
+			srv_stats.pages_page_compression_error.inc();
 			*out_len = len;
 			return (buf);
 		}
@@ -193,11 +195,11 @@ fil_compress_page(
 		ut_a((write_size % SECT_SIZE) == 0);
 	}
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 	fprintf(stderr,
 		"InnoDB: Note: Compression succeeded for space %lu name %s len %lu out_len %lu\n",
 		space_id, fil_space_name(space), len, write_size);
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 
 
 	srv_stats.page_compression_saved.add((len - write_size));
@@ -205,7 +207,7 @@ fil_compress_page(
 		srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
 		srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
 	}
-	//srv_stats.page_compressed_trim_op.inc();
+
 	srv_stats.pages_page_compressed.inc();
 	*out_len = write_size;
 
@@ -254,10 +256,10 @@ fil_decompress_page(
 
 	// If no buffer was given, we need to allocate temporal buffer
 	if (page_buf == NULL) {
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr,
 			"InnoDB: FIL: Note: Compression buffer not given, allocating...\n");
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
 	} else {
 		in_buf = page_buf;
@@ -283,11 +285,11 @@ fil_decompress_page(
 
 	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr,
 			"InnoDB: Note: Preparing for decompress for len %lu\n",
 			actual_size);
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 
 		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
 
@@ -305,11 +307,11 @@ fil_decompress_page(
 			ut_error;
 		}
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr,
 			"InnoDB: Note: Decompression succeeded for len %lu \n",
 			len);
-#endif /* UNIV_DEBUG */
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
 #ifdef HAVE_LZ4
 	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
 		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
diff --git a/storage/xtradb/include/srv0mon.h b/storage/xtradb/include/srv0mon.h
index 10e1fa6188a..8e6975ed68f 100644
--- a/storage/xtradb/include/srv0mon.h
+++ b/storage/xtradb/include/srv0mon.h
@@ -316,6 +316,7 @@ enum monitor_id_t {
 	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP,
 	MONITOR_OVLD_PAGE_COMPRESSED_TRIM_OP_SAVED,
 	MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED,
+	MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR,
 
 	/* Index related counters */
 	MONITOR_MODULE_INDEX,
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index a532f90ec24..be16dfddc72 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -121,6 +121,8 @@ struct srv_stats_t {
         ulint_ctr_64_t          page_compressed_trim_op_saved;
 	/* Number of pages decompressed with page compression */
         ulint_ctr_64_t          pages_page_decompressed;
+	/* Number of page compression errors */
+	ulint_ctr_64_t          pages_page_compression_error;
 
 	/** Number of data read in total (in bytes) */
 	ulint_ctr_1_t		data_read;
@@ -1117,6 +1119,8 @@ struct export_var_t{
 	ib_int64_t innodb_pages_page_decompressed;/*!< Number of pages
 						decompressed by page
 						compression */
+	ib_int64_t innodb_pages_page_compression_error;/*!< Number of page
+						compression errors */
 };
 
 /** Thread slot in the thread table.  */
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index fc2f5d78c9a..646f8a87cbc 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -6234,7 +6234,7 @@ os_file_trim(
 		    *slot->write_size > 0 &&
 		    len >= *slot->write_size)) {
 
-#ifdef UNIV_DEBUG
+#ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr, "Note: TRIM: write_size %lu trim_len %lu len %lu\n",
 			*slot->write_size, trim_len, len);
 #endif
diff --git a/storage/xtradb/srv/srv0mon.cc b/storage/xtradb/srv/srv0mon.cc
index 32171182cf9..f276efdc021 100644
--- a/storage/xtradb/srv/srv0mon.cc
+++ b/storage/xtradb/srv/srv0mon.cc
@@ -926,6 +926,11 @@ static monitor_info_t	innodb_counter_info[] =
 	 MONITOR_NONE,
 	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED},
 
+	{"compress_pages_page_compression_error", "compression",
+	 "Number of page compression errors",
+	 MONITOR_NONE,
+	 MONITOR_DEFAULT_START, MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR},
+
 	/* ========== Counters for Index ========== */
 	{"module_index", "index", "Index Manager",
 	 MONITOR_MODULE,
@@ -1871,6 +1876,9 @@ srv_mon_process_existing_counter(
         case MONITOR_OVLD_PAGES_PAGE_DECOMPRESSED:
 		value = srv_stats.pages_page_decompressed;
 		break;
+        case MONITOR_OVLD_PAGES_PAGE_COMPRESSION_ERROR:
+		value = srv_stats.pages_page_compression_error;
+		break;
 
 	default:
 		ut_error;

From a81f8fd5804e84b0679fe11a079d2ced641ee1f6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Sat, 22 Mar 2014 11:30:03 +0200
Subject: [PATCH 46/56] Fix test cases to contain new status variables
 introduced.

---
 .../sys_vars/r/innodb_monitor_disable_basic.result     |  2 ++
 .../sys_vars/r/innodb_monitor_enable_basic.result      | 10 ++++++++++
 .../sys_vars/r/innodb_monitor_reset_all_basic.result   | 10 ++++++++++
 .../suite/sys_vars/r/innodb_monitor_reset_basic.result | 10 ++++++++++
 4 files changed, 32 insertions(+)

diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
index 1f3d38a0420..aee118aced2 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_disable_basic.result
@@ -38,6 +38,7 @@ buffer_pool_pages_free	disabled
 buffer_pages_created	disabled
 buffer_pages_written	disabled
 buffer_index_pages_written	disabled
+buffer_non_index_pages_written	disabled
 buffer_pages_read	disabled
 buffer_data_reads	disabled
 buffer_data_written	disabled
@@ -168,6 +169,7 @@ compress_pages_page_compressed	disabled
 compress_page_compressed_trim_op	disabled
 compress_page_compressed_trim_op_saved	disabled
 compress_pages_page_decompressed	disabled
+compress_pages_page_compression_error	disabled
 index_page_splits	disabled
 index_page_merge_attempts	disabled
 index_page_merge_successful	disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
index 6f1c4c21d17..aee118aced2 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_enable_basic.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty	disabled
 buffer_pool_pages_free	disabled
 buffer_pages_created	disabled
 buffer_pages_written	disabled
+buffer_index_pages_written	disabled
+buffer_non_index_pages_written	disabled
 buffer_pages_read	disabled
 buffer_data_reads	disabled
 buffer_data_written	disabled
@@ -160,6 +162,14 @@ compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
 compression_pad_decrements	disabled
+compress_saved	disabled
+compress_trim_sect512	disabled
+compress_trim_sect4096	disabled
+compress_pages_page_compressed	disabled
+compress_page_compressed_trim_op	disabled
+compress_page_compressed_trim_op_saved	disabled
+compress_pages_page_decompressed	disabled
+compress_pages_page_compression_error	disabled
 index_page_splits	disabled
 index_page_merge_attempts	disabled
 index_page_merge_successful	disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
index 6f1c4c21d17..aee118aced2 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_all_basic.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty	disabled
 buffer_pool_pages_free	disabled
 buffer_pages_created	disabled
 buffer_pages_written	disabled
+buffer_index_pages_written	disabled
+buffer_non_index_pages_written	disabled
 buffer_pages_read	disabled
 buffer_data_reads	disabled
 buffer_data_written	disabled
@@ -160,6 +162,14 @@ compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
 compression_pad_decrements	disabled
+compress_saved	disabled
+compress_trim_sect512	disabled
+compress_trim_sect4096	disabled
+compress_pages_page_compressed	disabled
+compress_page_compressed_trim_op	disabled
+compress_page_compressed_trim_op_saved	disabled
+compress_pages_page_decompressed	disabled
+compress_pages_page_compression_error	disabled
 index_page_splits	disabled
 index_page_merge_attempts	disabled
 index_page_merge_successful	disabled
diff --git a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
index 6f1c4c21d17..aee118aced2 100644
--- a/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
+++ b/mysql-test/suite/sys_vars/r/innodb_monitor_reset_basic.result
@@ -37,6 +37,8 @@ buffer_pool_bytes_dirty	disabled
 buffer_pool_pages_free	disabled
 buffer_pages_created	disabled
 buffer_pages_written	disabled
+buffer_index_pages_written	disabled
+buffer_non_index_pages_written	disabled
 buffer_pages_read	disabled
 buffer_data_reads	disabled
 buffer_data_written	disabled
@@ -160,6 +162,14 @@ compress_pages_compressed	disabled
 compress_pages_decompressed	disabled
 compression_pad_increments	disabled
 compression_pad_decrements	disabled
+compress_saved	disabled
+compress_trim_sect512	disabled
+compress_trim_sect4096	disabled
+compress_pages_page_compressed	disabled
+compress_page_compressed_trim_op	disabled
+compress_page_compressed_trim_op_saved	disabled
+compress_pages_page_decompressed	disabled
+compress_pages_page_compression_error	disabled
 index_page_splits	disabled
 index_page_merge_attempts	disabled
 index_page_merge_successful	disabled

From f761835b5c13158fd958a5239b346daa09b06cc6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 25 Mar 2014 21:31:27 +0200
Subject: [PATCH 47/56] Fix candidate for XtraDB and row compressed tables.

---
 storage/xtradb/buf/buf0flu.cc | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 7b502ae3eea..53ac9bb9cc7 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1674,7 +1674,7 @@ buf_do_LRU_batch(
 		n->flushed = 0;
 	}
 
-	n->evicted += count;
+	n->flushed += count;
 }
 
 /*******************************************************************//**

From 502733803979e2109b6dcdcb3d8c5a0ddd6d2363 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 27 Mar 2014 09:35:24 +0200
Subject: [PATCH 48/56] Fix bug
 https://code.launchpad.net/~laurynas-biveinis/percona-server/bug1295268
 (Inadequate background LRU flushing for write workloads with InnoDB
 compression).

  If InnoDB compression is used and the workload has writes, the
  following situation is possible. The LRU flusher issues an LRU flush
  request for an instance.  buf_do_LRU_batch decides to perform
  unzip_LRU eviction and this eviction might fully satisfy the
  request. Then buf_flush_LRU_tail checks the number of flushed pages in
  the last iteration, finds it to be zero, and wrongly decides not to
  flush that instance anymore.

  Fixed by maintaining unzip_LRU eviction counter in struct
  flush_counter_t variables, and checking it in buf_flush_LRU_tail when
  deciding whether to stop flushing the current instance.

Added test cases for new configuration files to get mysql-test-run suite sys_vars
to pass. Fix some small errors.
---
 .../r/innodb_mtflush_threads_basic.result     | 21 +++++++++++
 .../sys_vars/r/innodb_use_lz4_basic.result    |  3 ++
 .../r/innodb_use_mtflush_basic.result         | 21 +++++++++++
 .../sys_vars/r/innodb_use_trim_basic.result   | 33 +++++++++++++++++
 .../t/innodb_mtflush_threads_basic.test       | 21 +++++++++++
 .../sys_vars/t/innodb_use_lz4_basic.test      |  5 +++
 .../sys_vars/t/innodb_use_mtflush_basic.test  | 22 ++++++++++++
 .../sys_vars/t/innodb_use_trim_basic.test     | 36 +++++++++++++++++++
 storage/innobase/handler/ha_innodb.cc         |  4 +--
 storage/xtradb/buf/buf0flu.cc                 | 24 ++++++++-----
 storage/xtradb/handler/ha_innodb.cc           |  4 +--
 storage/xtradb/include/buf0flu.h              |  2 ++
 12 files changed, 184 insertions(+), 12 deletions(-)
 create mode 100644 mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result
 create mode 100644 mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result
 create mode 100644 mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result
 create mode 100644 mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
 create mode 100644 mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test
 create mode 100644 mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test
 create mode 100644 mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test
 create mode 100644 mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test

diff --git a/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result b/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result
new file mode 100644
index 00000000000..75a1cc5262e
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_mtflush_threads_basic.result
@@ -0,0 +1,21 @@
+select @@global.innodb_mtflush_threads;
+@@global.innodb_mtflush_threads
+8
+select @@session.innodb_mtflush_threads;
+ERROR HY000: Variable 'innodb_mtflush_threads' is a GLOBAL variable
+show global variables like 'innodb_mtflush_threads';
+Variable_name	Value
+innodb_mtflush_threads	8
+show session variables like 'innodb_mtflush_threads';
+Variable_name	Value
+innodb_mtflush_threads	8
+select * from information_schema.global_variables where variable_name='innodb_mtflush_threads';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_MTFLUSH_THREADS	8
+select * from information_schema.session_variables where variable_name='innodb_mtflush_threads';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_MTFLUSH_THREADS	8
+set global innodb_mtflush_threads=1;
+ERROR HY000: Variable 'innodb_mtflush_threads' is a read only variable
+set session innodb_mtflush_threads=1;
+ERROR HY000: Variable 'innodb_mtflush_threads' is a read only variable
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result
new file mode 100644
index 00000000000..4c3cfa524af
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_use_lz4_basic.result
@@ -0,0 +1,3 @@
+select @@global.innodb_use_fallocate;
+@@global.innodb_use_fallocate
+0
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result
new file mode 100644
index 00000000000..f77abba7ac9
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_use_mtflush_basic.result
@@ -0,0 +1,21 @@
+select @@global.innodb_use_mtflush;
+@@global.innodb_use_mtflush
+0
+select @@session.innodb_use_mtflush;
+ERROR HY000: Variable 'innodb_use_mtflush' is a GLOBAL variable
+show global variables like 'innodb_use_mtflush';
+Variable_name	Value
+innodb_use_mtflush	OFF
+show session variables like 'innodb_use_mtflush';
+Variable_name	Value
+innodb_use_mtflush	OFF
+select * from information_schema.global_variables where variable_name='innodb_use_mtflush';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_USE_MTFLUSH	OFF
+select * from information_schema.session_variables where variable_name='innodb_use_mtflush';
+VARIABLE_NAME	VARIABLE_VALUE
+INNODB_USE_MTFLUSH	OFF
+set global innodb_use_mtflush=1;
+ERROR HY000: Variable 'innodb_use_mtflush' is a read only variable
+set session innodb_use_mtflush=1;
+ERROR HY000: Variable 'innodb_use_mtflush' is a read only variable
diff --git a/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
new file mode 100644
index 00000000000..63292f5d3c8
--- /dev/null
+++ b/mysql-test/suite/sys_vars/r/innodb_use_trim_basic.result
@@ -0,0 +1,33 @@
+SET @start_use_trim = @@global.innodb_use_trim;
+SELECT @start_use_trim;
+@start_use_trim
+0
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+COUNT(@@GLOBAL.innodb_use_trim)
+1
+1 Expected
+SET @@GLOBAL.innodb_use_trim=1;
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+COUNT(@@GLOBAL.innodb_use_trim)
+1
+1 Expected
+SELECT IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_use_trim';
+IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+COUNT(@@GLOBAL.innodb_use_trim)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
+WHERE VARIABLE_NAME='innodb_use_trim';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+SET @@global.innodb_use_trim = @start_use_trim;
+SELECT @@global.innodb_use_trim;
+@@global.innodb_use_trim
+0
diff --git a/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test b/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test
new file mode 100644
index 00000000000..c8412f969eb
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_mtflush_threads_basic.test
@@ -0,0 +1,21 @@
+--source include/have_innodb.inc
+# bool readonly
+
+#
+# show values;
+#
+select @@global.innodb_mtflush_threads;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_mtflush_threads;
+show global variables like 'innodb_mtflush_threads';
+show session variables like 'innodb_mtflush_threads';
+select * from information_schema.global_variables where variable_name='innodb_mtflush_threads';
+select * from information_schema.session_variables where variable_name='innodb_mtflush_threads';
+
+#
+# show that it's read-only
+#
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set global innodb_mtflush_threads=1;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set session innodb_mtflush_threads=1;
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test
new file mode 100644
index 00000000000..aefa276dcee
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_use_lz4_basic.test
@@ -0,0 +1,5 @@
+--source include/have_innodb.inc
+# bool readonly
+# not on all compilations
+select @@global.innodb_use_fallocate;
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test
new file mode 100644
index 00000000000..a9c40b9e522
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_use_mtflush_basic.test
@@ -0,0 +1,22 @@
+--source include/have_innodb.inc
+# bool readonly
+
+#
+# show values;
+#
+select @@global.innodb_use_mtflush;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+select @@session.innodb_use_mtflush;
+show global variables like 'innodb_use_mtflush';
+show session variables like 'innodb_use_mtflush';
+select * from information_schema.global_variables where variable_name='innodb_use_mtflush';
+select * from information_schema.session_variables where variable_name='innodb_use_mtflush';
+
+#
+# show that it's read-only
+#
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set global innodb_use_mtflush=1;
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+set session innodb_use_mtflush=1;
+
diff --git a/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test b/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test
new file mode 100644
index 00000000000..c1b0f142179
--- /dev/null
+++ b/mysql-test/suite/sys_vars/t/innodb_use_trim_basic.test
@@ -0,0 +1,36 @@
+--source include/have_innodb.inc
+
+SET @start_use_trim = @@global.innodb_use_trim;
+SELECT @start_use_trim;
+
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+--echo 1 Expected
+
+####################################################################
+#   Check if Value can set                                         #
+####################################################################
+
+SET @@GLOBAL.innodb_use_trim=1;
+
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+--echo 1 Expected
+
+#################################################################
+# Check if the value in GLOBAL Table matches value in variable  #
+#################################################################
+
+SELECT IF(@@GLOBAL.innodb_use_trim, 'ON', 'OFF') = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_use_trim';
+--echo 1 Expected
+
+SELECT COUNT(@@GLOBAL.innodb_use_trim);
+--echo 1 Expected
+
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
+WHERE VARIABLE_NAME='innodb_use_trim';
+--echo 1 Expected
+
+SET @@global.innodb_use_trim = @start_use_trim;
+SELECT @@global.innodb_use_trim;
\ No newline at end of file
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index b790ae76121..16e33c8901f 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16801,7 +16801,7 @@ static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
 #endif /* HAVE_LZ4 */
 
 static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
-  PLUGIN_VAR_RQCMDARG,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
   "Number of multi-threaded flush threads",
   NULL, NULL,
   MTFLUSH_DEFAULT_WORKER, /* Default setting */
@@ -16810,7 +16810,7 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
   0);
 
 static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
-  PLUGIN_VAR_OPCMDARG ,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
   "Use multi-threaded flush. Default FALSE.",
   NULL, NULL, FALSE);
 
diff --git a/storage/xtradb/buf/buf0flu.cc b/storage/xtradb/buf/buf0flu.cc
index 53ac9bb9cc7..f4ba0f10761 100644
--- a/storage/xtradb/buf/buf0flu.cc
+++ b/storage/xtradb/buf/buf0flu.cc
@@ -1549,6 +1549,7 @@ buf_flush_LRU_list_batch(
 
 	n->flushed = 0;
 	n->evicted = 0;
+	n->unzip_LRU_evicted = 0;
 
 	ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
 
@@ -1660,21 +1661,22 @@ buf_do_LRU_batch(
 	flush_counters_t*	n)	/*!< out: flushed/evicted page
 					counts */
 {
-	ulint	count = 0;
-
 	if (buf_LRU_evict_from_unzip_LRU(buf_pool)) {
-		count += buf_free_from_unzip_LRU_list_batch(buf_pool, max);
+		n->unzip_LRU_evicted
+			+= buf_free_from_unzip_LRU_list_batch(buf_pool, max);
+	} else {
+		n->unzip_LRU_evicted = 0;
 	}
 
-	if (max > count) {
-		buf_flush_LRU_list_batch(buf_pool, max - count, limited_scan,
-					 n);
+	if (max > n->unzip_LRU_evicted) {
+		buf_flush_LRU_list_batch(buf_pool, max - n->unzip_LRU_evicted,
+					 limited_scan, n);
 	} else {
 		n->evicted = 0;
 		n->flushed = 0;
 	}
 
-	n->flushed += count;
+	n->evicted += n->unzip_LRU_evicted;
 }
 
 /*******************************************************************//**
@@ -2306,9 +2308,15 @@ buf_flush_LRU_tail(void)
 
 				requested_pages[i] += lru_chunk_size;
 
+				/* If we failed to flush or evict this
+				instance, do not bother anymore. But take into
+			        account that we might have zero flushed pages
+				because the flushing request was fully
+				satisfied by unzip_LRU evictions. */
 				if (requested_pages[i] >= scan_depth[i]
 				    || !(srv_cleaner_eviction_factor
-					 ? n.evicted : n.flushed)) {
+					? n.evicted
+					: (n.flushed + n.unzip_LRU_evicted))) {
 
 					active_instance[i] = false;
 					remaining_instances--;
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 4436dc3d0e1..09416a990d7 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -17955,7 +17955,7 @@ static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
 #endif /* HAVE_LZ4 */
 
 static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
-  PLUGIN_VAR_RQCMDARG,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
   "Number of multi-threaded flush threads",
   NULL, NULL,
   MTFLUSH_DEFAULT_WORKER, /* Default setting */
@@ -17964,7 +17964,7 @@ static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
   0);
 
 static MYSQL_SYSVAR_BOOL(use_mtflush, srv_use_mtflush,
-  PLUGIN_VAR_OPCMDARG ,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
   "Use multi-threaded flush. Default FALSE.",
   NULL, NULL, FALSE);
 
diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h
index 528ec7b3f64..4cb1446036b 100644
--- a/storage/xtradb/include/buf0flu.h
+++ b/storage/xtradb/include/buf0flu.h
@@ -40,6 +40,8 @@ extern ibool buf_page_cleaner_is_active;
 struct flush_counters_t {
 	ulint	flushed;	/*!< number of dirty pages flushed */
 	ulint	evicted;	/*!< number of clean pages evicted */
+	ulint	unzip_LRU_evicted;/*!< number of uncompressed page images
+				evicted */
 };
 
 

From 0b92fe9c188109c980444114f36bc56c119b84e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Thu, 27 Mar 2014 12:21:16 +0200
Subject: [PATCH 49/56] Fixed windows compiler errors.

---
 storage/innobase/fil/fil0pagecompress.cc | 2 +-
 storage/xtradb/fil/fil0pagecompress.cc   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index dfd52d36b8e..75da02a22a4 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -132,7 +132,7 @@ fil_compress_page(
 		}
 	} else {
 #endif /* HAVE_LZ4 */
-		err = compress2(out_buf+header_len, &write_size, buf, len, level);
+		err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level);
 
 		if (err != Z_OK) {
 			/* If error we leave the actual page as it was */
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index 2acdf85b100..96c019e3723 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -132,7 +132,7 @@ fil_compress_page(
 		}
 	} else {
 #endif /* HAVE_LZ4 */
-		err = compress2(out_buf+header_len, &write_size, buf, len, level);
+		err = compress2(out_buf+header_len, (ulong *)&write_size, buf, len, level);
 
 		if (err != Z_OK) {
 			/* If error we leave the actual page as it was */

From 3b61030dc19cdd63e376db1db91f771051b1ac3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 28 Mar 2014 08:42:53 +0200
Subject: [PATCH 50/56] Fix error on innodb_mtflush_threads parameter.

---
 storage/innobase/handler/ha_innodb.cc | 2 +-
 storage/xtradb/handler/ha_innodb.cc   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 16e33c8901f..2d3ac405cbe 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -16801,7 +16801,7 @@ static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
 #endif /* HAVE_LZ4 */
 
 static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of multi-threaded flush threads",
   NULL, NULL,
   MTFLUSH_DEFAULT_WORKER, /* Default setting */
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 09416a990d7..83fd8b28394 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -17955,7 +17955,7 @@ static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
 #endif /* HAVE_LZ4 */
 
 static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
-  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of multi-threaded flush threads",
   NULL, NULL,
   MTFLUSH_DEFAULT_WORKER, /* Default setting */

From 88765c3b4d7357ed5a063abb46cabf72c26e7b32 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Sat, 29 Mar 2014 16:51:28 +0200
Subject: [PATCH 51/56] Disable failing test cases that fail because of
 upstream.

---
 mysql-test/disabled.def | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mysql-test/disabled.def b/mysql-test/disabled.def
index e5fa24786e1..d2e839fa39a 100644
--- a/mysql-test/disabled.def
+++ b/mysql-test/disabled.def
@@ -20,3 +20,5 @@ mysql_embedded           : Bug#12561297 2011-05-14 Anitha Dependent on PB2 chang
 ssl_crl_clients_valid    : broken upstream
 ssl_crl                  : broken upstream
 ssl_crl_clrpath          : broken upstream
+innodb-wl5522-debug-zip  : broken upstream
+innodb_bug12902967       : broken upstream
\ No newline at end of file

From 13c73c31c320877bb3a7b7035631ccdd6eee4c2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Tue, 15 Apr 2014 14:28:25 +0300
Subject: [PATCH 52/56] Added support for LZO compression method.

Removed: innodb_use_lz4 configuration parameter

Added: innodb_compression_algorithm configuration parameter
0 = no compression, 1 = ZLIB, 2 = LZ4, 3 = LZO

Fixed issue with incorrect trim calculations
---
 cmake/lz4.cmake                             |  24 ++--
 cmake/lzo.cmake                             |  35 ++++++
 storage/innobase/CMakeLists.txt             |   4 +-
 storage/innobase/fil/fil0pagecompress.cc    | 118 ++++++++++++--------
 storage/innobase/handler/ha_innodb.cc       |  25 +++--
 storage/innobase/include/fil0pagecompress.h |  12 +-
 storage/innobase/include/srv0srv.h          |   4 +-
 storage/innobase/os/os0file.cc              |  22 +++-
 storage/innobase/srv/srv0srv.cc             |   3 +-
 storage/xtradb/CMakeLists.txt               |   4 +-
 storage/xtradb/fil/fil0pagecompress.cc      | 117 +++++++++++--------
 storage/xtradb/handler/ha_innodb.cc         |  25 +++--
 storage/xtradb/include/fil0pagecompress.h   |  12 +-
 storage/xtradb/include/srv0srv.h            |   4 +-
 storage/xtradb/os/os0file.cc                |  22 +++-
 storage/xtradb/srv/srv0srv.cc               |   3 +-
 16 files changed, 297 insertions(+), 137 deletions(-)
 create mode 100644 cmake/lzo.cmake

diff --git a/cmake/lz4.cmake b/cmake/lz4.cmake
index 56120e2cdd0..bb2300891eb 100644
--- a/cmake/lz4.cmake
+++ b/cmake/lz4.cmake
@@ -14,22 +14,22 @@
 
 MACRO (MYSQL_CHECK_LZ4)
 
-CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
-CHECK_LIBRARY_EXISTS(liblz4.a LZ4_compress_limitedOutput "" HAVE_LZ4_LIB)
-
-IF(HAVE_LZ4_LIB AND HAVE_LZ4_H)
-  ADD_DEFINITIONS(-DHAVE_LZ4=1)
-  LINK_LIBRARIES(liblz4.a)
-ENDIF()
-ENDMACRO()
-
-MACRO (MYSQL_CHECK_SHARED_LZ4)
-
 CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
 CHECK_LIBRARY_EXISTS(lz4 LZ4_compress_limitedOutput "" HAVE_LZ4_SHARED_LIB)
 
 IF (HAVE_LZ4_SHARED_LIB AND HAVE_LZ4_H)
   ADD_DEFINITIONS(-DHAVE_LZ4=1)
-  LINK_LIBRARIES(lz4)
+  LINK_LIBRARIES(lz4) 
 ENDIF()
 ENDMACRO()
+
+MACRO (MYSQL_CHECK_LZ4_STATIC)
+ 
+ CHECK_INCLUDE_FILES(lz4.h HAVE_LZ4_H)
+ CHECK_LIBRARY_EXISTS(liblz4.a LZ4_compress_limitedOutput "" HAVE_LZ4_LIB)
+
+ IF(HAVE_LZ4_LIB AND HAVE_LZ4_H)
+   ADD_DEFINITIONS(-DHAVE_LZ4=1)
+   LINK_LIBRARIES(liblz4.a)
+ ENDIF()
+ENDMACRO()
\ No newline at end of file
diff --git a/cmake/lzo.cmake b/cmake/lzo.cmake
new file mode 100644
index 00000000000..596dfdcde8b
--- /dev/null
+++ b/cmake/lzo.cmake
@@ -0,0 +1,35 @@
+# Copyright (C) 2014, SkySQL Ab. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free Software
+# Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+MACRO (MYSQL_CHECK_LZO_STATIC)
+
+CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H)
+CHECK_LIBRARY_EXISTS(liblzo2.a lzo1x_1_compress "" HAVE_LZO_LIB)
+
+IF(HAVE_LZO_LIB AND HAVE_LZO_H)
+  ADD_DEFINITIONS(-DHAVE_LZO=1)
+  LINK_LIBRARIES(liblzo2.a)
+ENDIF()
+ENDMACRO()
+
+MACRO (MYSQL_CHECK_LZO)
+
+CHECK_INCLUDE_FILES(lzo/lzo1x.h HAVE_LZO_H)
+CHECK_LIBRARY_EXISTS(lzo2 lzo1x_1_compress "" HAVE_LZO_LIB)
+
+IF(HAVE_LZO_LIB AND HAVE_LZO_H)
+  ADD_DEFINITIONS(-DHAVE_LZO=1)
+  LINK_LIBRARIES(lzo2)
+ENDIF()
+ENDMACRO()
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index fa948c449c2..ca64c730051 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -19,8 +19,10 @@ INCLUDE(CheckFunctionExists)
 INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckCSourceRuns)
 INCLUDE(lz4)
+INCLUDE(lzo)
 
-MYSQL_CHECK_SHARED_LZ4()
+MYSQL_CHECK_LZ4()
+MYSQL_CHECK_LZO()
 
 # OS tests
 IF(UNIX)
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index 75da02a22a4..e06a789e37b 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -66,6 +66,10 @@ static ulint srv_data_read, srv_data_written;
 #ifdef HAVE_LZ4
 #include "lz4.h"
 #endif
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -81,7 +85,9 @@ fil_compress_page(
         byte*           out_buf,       /*!< out: compressed buffer */
         ulint           len,           /*!< in: length of input buffer.*/
         ulint           compression_level, /* in: compression level */
-	ulint*          out_len)       /*!< out: actual length of compressed page */
+	ulint*          out_len,       /*!< out: actual length of compressed
+				       page */
+	byte*		lzo_mem)       /*!< in: temporal memory used by LZO */
 {
         int err = Z_OK;
         int level = 0;
@@ -114,9 +120,11 @@ fil_compress_page(
 
 	write_size = UNIV_PAGE_SIZE - header_len;
 
+	switch(innodb_compression_algorithm) {
 #ifdef HAVE_LZ4
-	if (srv_use_lz4) {
-		err = LZ4_compress_limitedOutput((const char *)buf, (char *)out_buf+header_len, len, write_size);
+	case PAGE_LZ4_ALGORITHM:
+		err = LZ4_compress_limitedOutput((const char *)buf,
+			(char *)out_buf+header_len, len, write_size);
 		write_size = err;
 
 		if (err == 0) {
@@ -130,8 +138,25 @@ fil_compress_page(
 			*out_len = len;
 			return (buf);
 		}
-	} else {
+		break;
 #endif /* HAVE_LZ4 */
+#ifdef HAVE_LZO
+	case PAGE_LZO_ALGORITHM:
+		err = lzo1x_1_15_compress(
+			buf, len, out_buf+header_len, &write_size, lzo_mem);
+
+		if (err != LZO_E_OK || write_size > len) {
+			fprintf(stderr,
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu",
+				space_id, fil_space_name(space), len, err, write_size);
+			srv_stats.pages_page_compression_error.inc();
+			*out_len = len;
+			return (buf);
+		}
+
+		break;
+#endif /* HAVE_LZO */
+	case PAGE_ZLIB_ALGORITHM:
 		err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level);
 
 		if (err != Z_OK) {
@@ -145,9 +170,12 @@ fil_compress_page(
 			*out_len = len;
 			return (buf);
 		}
-#ifdef HAVE_LZ4
+		break;
+
+	default:
+		ut_error;
+		break;
 	}
-#endif /* HAVE_LZ4 */
 
 	/* Set up the page header */
 	memcpy(out_buf, buf, FIL_PAGE_DATA);
@@ -156,18 +184,7 @@ fil_compress_page(
 	/* Set up the correct page type */
 	mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
 	/* Set up the flush lsn to be compression algorithm */
-
-#ifdef HAVE_LZ4
-	if (srv_use_lz4) {
-		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_LZ4);
-	} else {
-#endif /* HAVE_LZ4 */
-		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
-
-#ifdef HAVE_LZ4
-	}
-#endif /* HAVE_LZ4 */
-
+	mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, innodb_compression_algorithm);
 	/* Set up the actual payload lenght */
 	mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
 
@@ -176,17 +193,7 @@ fil_compress_page(
 	ut_ad(fil_page_is_compressed(out_buf));
 	ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
 	ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
-
-#ifdef HAVE_LZ4
-	if (srv_use_lz4) {
-		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_LZ4);
-	} else {
-#endif /* HAVE_LZ4 */
-		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
-
-#ifdef HAVE_LZ4
-	}
-#endif /* HAVE_LZ4 */
+	ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == innodb_compression_algorithm);
 #endif /* UNIV_DEBUG */
 
 	write_size+=header_len;
@@ -207,11 +214,6 @@ fil_compress_page(
 
 
 	srv_stats.page_compression_saved.add((len - write_size));
-	if ((len - write_size) > 0) {
-		srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
-		srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
-	}
-
 	srv_stats.pages_page_compressed.inc();
 	*out_len = write_size;
 
@@ -236,6 +238,7 @@ fil_decompress_page(
         ulint actual_size = 0;
 	ulint compression_alg = 0;
 	byte *in_buf;
+	ulint olen=0;
 
 	ut_ad(buf);
 	ut_ad(len);
@@ -287,16 +290,16 @@ fil_decompress_page(
 		*write_size = actual_size;
 	}
 
-	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
-
 #ifdef UNIV_PAGECOMPRESS_DEBUG
-		fprintf(stderr,
-			"InnoDB: Note: Preparing for decompress for len %lu\n",
-			actual_size);
+	fprintf(stderr,
+		"InnoDB: Note: Preparing for decompress for len %lu\n",
+		actual_size);
 #endif /* UNIV_PAGECOMPRESS_DEBUG */
 
-		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
 
+	switch(compression_alg) {
+	case PAGE_ZLIB_ALGORITHM:
+		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
 
 		/* If uncompress fails it means that page is corrupted */
 		if (err != Z_OK) {
@@ -311,14 +314,10 @@ fil_decompress_page(
 
 			ut_error;
 		}
+		break;
 
-#ifdef UNIV_PAGECOMPRESS_DEBUG
-		fprintf(stderr,
-			"InnoDB: Note: Decompression succeeded for len %lu \n",
-			len);
-#endif /* UNIV_PAGECOMPRESS_DEBUG */
 #ifdef HAVE_LZ4
-	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
+	case PAGE_LZ4_ALGORITHM:
 		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
 
 		if (err != (int)actual_size) {
@@ -331,8 +330,26 @@ fil_decompress_page(
 
 			ut_error;
 		}
+		break;
 #endif /* HAVE_LZ4 */
-	} else {
+#ifdef HAVE_LZO
+	case PAGE_LZO_ALGORITHM:
+		err = lzo1x_decompress((const unsigned char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
+			actual_size,(unsigned char *)in_buf, &olen, NULL);
+
+		if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Corruption: Page is marked as compressed\n"
+				"InnoDB: but decompression read only %d bytes.\n"
+				"InnoDB: size %lu len %lu\n",
+				olen, actual_size, len);
+			fflush(stderr);
+
+			ut_error;
+		}
+		break;
+#endif
+	default:
 		fprintf(stderr,
 			"InnoDB: Corruption: Page is marked as compressed\n"
 			"InnoDB: but compression algorithm %s\n"
@@ -341,8 +358,15 @@ fil_decompress_page(
 
 		fflush(stderr);
 		ut_error;
+		break;
 	}
 
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+	fprintf(stderr,
+		"InnoDB: Note: Decompression succeeded for len %lu \n",
+		len);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
 	srv_stats.pages_page_decompressed.inc();
 
 	/* Copy the uncompressed page to the buffer pool, not
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index 2d3ac405cbe..c82c15193ee 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -101,6 +101,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #endif /* UNIV_DEBUG */
 #include "fts0priv.h"
 #include "page0zip.h"
+#include "fil0pagecompress.h"
 
 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
 
@@ -16793,12 +16794,20 @@ static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   "Use trim. Default FALSE.",
   NULL, NULL, FALSE);
 
-#ifdef HAVE_LZ4
-static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
-  PLUGIN_VAR_OPCMDARG ,
-  "Use LZ4 for page compression",
-  NULL, NULL, FALSE);
-#endif /* HAVE_LZ4 */
+static MYSQL_SYSVAR_LONG(compression_algorithm, innodb_compression_algorithm,
+  PLUGIN_VAR_OPCMDARG,
+  "Compression algorithm used on page compression. 1 for zlib, 2 for lz3, 3 for lzo",
+  NULL, NULL,
+  PAGE_ZLIB_ALGORITHM,
+  0,
+#if defined(HAVE_LZO) && defined(HAVE_LZ4)
+  PAGE_ALGORITHM_LAST,
+#elif defined(HAVE_LZ4) && !defined(HAVE_LZO)
+  PAGE_ALGORITHM_LZ4,
+#else
+  PAGE_ALGORITHM_ZLIB,
+#endif
+  0);
 
 static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -16967,9 +16976,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(trx_purge_view_update_only_debug),
 #endif /* UNIV_DEBUG */
   MYSQL_SYSVAR(use_trim),
-#ifdef HAVE_LZ4
-  MYSQL_SYSVAR(use_lz4),
-#endif
+  MYSQL_SYSVAR(compression_algorithm),
   MYSQL_SYSVAR(mtflush_threads),
   MYSQL_SYSVAR(use_mtflush),
   NULL
diff --git a/storage/innobase/include/fil0pagecompress.h b/storage/innobase/include/fil0pagecompress.h
index c362c0ddcd2..0cc5aeb4678 100644
--- a/storage/innobase/include/fil0pagecompress.h
+++ b/storage/innobase/include/fil0pagecompress.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2013, 2014 SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -22,6 +22,12 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #include "fsp0fsp.h"
 #include "fsp0pagecompress.h"
 
+#define PAGE_UNCOMPRESSED   0
+#define PAGE_ZLIB_ALGORITHM 1
+#define PAGE_LZ4_ALGORITHM  2
+#define PAGE_LZO_ALGORITHM  3
+#define PAGE_ALGORITHM_LAST PAGE_LZO_ALGORITHM
+
 /******************************************************************//**
 @file include/fil0pagecompress.h
 Helper functions for extracting/storing page compression and
@@ -85,7 +91,9 @@ fil_compress_page(
         byte*           out_buf,       /*!< out: compressed buffer */
         ulint           len,           /*!< in: length of input buffer.*/
         ulint           compression_level, /*!< in: compression level */
-	ulint*          out_len);       /*!< out: actual length of compressed page */
+	ulint*          out_len,       /*!< out: actual length of compressed
+				       page */
+	byte*		lzo_mem);      /*!< in: temporal memory used by LZO */
 
 /****************************************************************//**
 For page compressed pages decompress the page after actual read
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index 1d01c7821d0..cfa94242200 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -249,8 +249,8 @@ extern my_bool srv_use_posix_fallocate;
 /* Use atomic writes i.e disable doublewrite buffer */
 extern my_bool srv_use_atomic_writes;
 
-/* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
-extern my_bool srv_use_lz4;
+/* Compression algorithm*/
+extern long innodb_compression_algorithm;
 
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index 8068e05573c..ce1b42e670e 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -74,6 +74,10 @@ Created 10/21/1995 Heikki Tuuri
 # endif
 #endif
 
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+
 /** Insert buffer segment id */
 static const ulint IO_IBUF_SEGMENT = 0;
 
@@ -221,6 +225,12 @@ struct os_aio_slot_t{
 	int		n_bytes;	/* bytes written/read. */
 	int		ret;		/* AIO return code */
 #endif /* WIN_ASYNC_IO */
+#ifdef HAVE_LZO
+	byte		lzo_mem[LZO1X_1_15_MEM_COMPRESS];
+#else
+	byte		lzo_mem;	/* Temporal memory used by LZO */
+#endif
+
 };
 
 /** The asynchronous i/o array structure */
@@ -4480,7 +4490,15 @@ found:
 
 		ut_ad(slot->page_buf);
 
-		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, page_compression_level, &real_len);
+		/* Call page compression */
+		tmp = fil_compress_page(fil_node_get_space_id(slot->message1),
+			(byte *)buf,
+			slot->page_buf,
+			len,
+			page_compression_level,
+			&real_len,
+			slot->lzo_mem
+		);
 
 		/* If compression succeeded, set up the length and buffer */
 		if (tmp != buf) {
diff --git a/storage/innobase/srv/srv0srv.cc b/storage/innobase/srv/srv0srv.cc
index fe3af72e150..a9cc7beb6b0 100644
--- a/storage/innobase/srv/srv0srv.cc
+++ b/storage/innobase/srv/srv0srv.cc
@@ -71,6 +71,7 @@ Created 10/8/1995 Heikki Tuuri
 
 #include "mysql/plugin.h"
 #include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
 
 /* The following is the maximum allowed duration of a lock wait. */
 UNIV_INTERN ulint	srv_fatal_semaphore_wait_threshold = 600;
@@ -154,7 +155,7 @@ UNIV_INTERN my_bool	srv_use_posix_fallocate = FALSE;
 /* If this flag is TRUE, then we disable doublewrite buffer */
 UNIV_INTERN my_bool	srv_use_atomic_writes = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
-UNIV_INTERN my_bool	srv_use_lz4                     = FALSE;
+UNIV_INTERN long	innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM;
 /* Number of threads used for multi-threaded flush */
 UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
 /* If this flag is TRUE, then we will use multi threaded flush. */
diff --git a/storage/xtradb/CMakeLists.txt b/storage/xtradb/CMakeLists.txt
index 10118cce0c1..7e6e5a048e2 100644
--- a/storage/xtradb/CMakeLists.txt
+++ b/storage/xtradb/CMakeLists.txt
@@ -19,8 +19,10 @@ INCLUDE(CheckFunctionExists)
 INCLUDE(CheckCSourceCompiles)
 INCLUDE(CheckCSourceRuns)
 INCLUDE(lz4)
+INCLUDE(lzo)
 
-MYSQL_CHECK_LZ4()
+MYSQL_CHECK_LZ4_STATIC()
+MYSQL_CHECK_LZO_STATIC()
 
 # OS tests
 IF(UNIX)
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index 96c019e3723..e06a789e37b 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -66,6 +66,10 @@ static ulint srv_data_read, srv_data_written;
 #ifdef HAVE_LZ4
 #include "lz4.h"
 #endif
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -81,7 +85,9 @@ fil_compress_page(
         byte*           out_buf,       /*!< out: compressed buffer */
         ulint           len,           /*!< in: length of input buffer.*/
         ulint           compression_level, /* in: compression level */
-	ulint*          out_len)       /*!< out: actual length of compressed page */
+	ulint*          out_len,       /*!< out: actual length of compressed
+				       page */
+	byte*		lzo_mem)       /*!< in: temporal memory used by LZO */
 {
         int err = Z_OK;
         int level = 0;
@@ -114,9 +120,11 @@ fil_compress_page(
 
 	write_size = UNIV_PAGE_SIZE - header_len;
 
+	switch(innodb_compression_algorithm) {
 #ifdef HAVE_LZ4
-	if (srv_use_lz4) {
-		err = LZ4_compress_limitedOutput((const char *)buf, (char *)out_buf+header_len, len, write_size);
+	case PAGE_LZ4_ALGORITHM:
+		err = LZ4_compress_limitedOutput((const char *)buf,
+			(char *)out_buf+header_len, len, write_size);
 		write_size = err;
 
 		if (err == 0) {
@@ -130,9 +138,26 @@ fil_compress_page(
 			*out_len = len;
 			return (buf);
 		}
-	} else {
+		break;
 #endif /* HAVE_LZ4 */
-		err = compress2(out_buf+header_len, (ulong *)&write_size, buf, len, level);
+#ifdef HAVE_LZO
+	case PAGE_LZO_ALGORITHM:
+		err = lzo1x_1_15_compress(
+			buf, len, out_buf+header_len, &write_size, lzo_mem);
+
+		if (err != LZO_E_OK || write_size > len) {
+			fprintf(stderr,
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu",
+				space_id, fil_space_name(space), len, err, write_size);
+			srv_stats.pages_page_compression_error.inc();
+			*out_len = len;
+			return (buf);
+		}
+
+		break;
+#endif /* HAVE_LZO */
+	case PAGE_ZLIB_ALGORITHM:
+		err = compress2(out_buf+header_len, (ulong*)&write_size, buf, len, level);
 
 		if (err != Z_OK) {
 			/* If error we leave the actual page as it was */
@@ -145,9 +170,12 @@ fil_compress_page(
 			*out_len = len;
 			return (buf);
 		}
-#ifdef HAVE_LZ4
+		break;
+
+	default:
+		ut_error;
+		break;
 	}
-#endif /* HAVE_LZ4 */
 
 	/* Set up the page header */
 	memcpy(out_buf, buf, FIL_PAGE_DATA);
@@ -156,15 +184,7 @@ fil_compress_page(
 	/* Set up the correct page type */
 	mach_write_to_2(out_buf+FIL_PAGE_TYPE, FIL_PAGE_PAGE_COMPRESSED);
 	/* Set up the flush lsn to be compression algorithm */
-#ifdef HAVE_LZ4
-	if (srv_use_lz4) {
-		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_LZ4);
-	} else {
-#endif /* HAVE_LZ4 */
-		mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, FIL_PAGE_COMPRESSION_ZLIB);
-#ifdef HAVE_LZ4
-	}
-#endif /* HAVE_LZ4 */
+	mach_write_to_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN, innodb_compression_algorithm);
 	/* Set up the actual payload lenght */
 	mach_write_to_2(out_buf+FIL_PAGE_DATA, write_size);
 
@@ -173,16 +193,7 @@ fil_compress_page(
 	ut_ad(fil_page_is_compressed(out_buf));
 	ut_ad(mach_read_from_4(out_buf+FIL_PAGE_SPACE_OR_CHKSUM) == BUF_NO_CHECKSUM_MAGIC);
 	ut_ad(mach_read_from_2(out_buf+FIL_PAGE_DATA) == write_size);
-
-#ifdef HAVE_LZ4
-	if (srv_use_lz4) {
-		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_LZ4);
-	} else {
-#endif /* HAVE_LZ4 */
-		ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == FIL_PAGE_COMPRESSION_ZLIB);
-#ifdef HAVE_LZ4
-	}
-#endif /* HAVE_LZ4 */
+	ut_ad(mach_read_from_8(out_buf+FIL_PAGE_FILE_FLUSH_LSN) == innodb_compression_algorithm);
 #endif /* UNIV_DEBUG */
 
 	write_size+=header_len;
@@ -203,11 +214,6 @@ fil_compress_page(
 
 
 	srv_stats.page_compression_saved.add((len - write_size));
-	if ((len - write_size) > 0) {
-		srv_stats.page_compression_trim_sect512.add(((len - write_size) / SECT_SIZE));
-		srv_stats.page_compression_trim_sect4096.add(((len - write_size) / (SECT_SIZE*8)));
-	}
-
 	srv_stats.pages_page_compressed.inc();
 	*out_len = write_size;
 
@@ -232,6 +238,7 @@ fil_decompress_page(
         ulint actual_size = 0;
 	ulint compression_alg = 0;
 	byte *in_buf;
+	ulint olen=0;
 
 	ut_ad(buf);
 	ut_ad(len);
@@ -258,7 +265,7 @@ fil_decompress_page(
 	if (page_buf == NULL) {
 #ifdef UNIV_PAGECOMPRESS_DEBUG
 		fprintf(stderr,
-			"InnoDB: FIL: Note: Compression buffer not given, allocating...\n");
+			"InnoDB: Note: FIL: Compression buffer not given, allocating...\n");
 #endif /* UNIV_PAGECOMPRESS_DEBUG */
 		in_buf = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE));
 	} else {
@@ -283,14 +290,15 @@ fil_decompress_page(
 		*write_size = actual_size;
 	}
 
-	if (compression_alg == FIL_PAGE_COMPRESSION_ZLIB) {
-
 #ifdef UNIV_PAGECOMPRESS_DEBUG
-		fprintf(stderr,
-			"InnoDB: Note: Preparing for decompress for len %lu\n",
-			actual_size);
+	fprintf(stderr,
+		"InnoDB: Note: Preparing for decompress for len %lu\n",
+		actual_size);
 #endif /* UNIV_PAGECOMPRESS_DEBUG */
 
+
+	switch(compression_alg) {
+	case PAGE_ZLIB_ALGORITHM:
 		err= uncompress(in_buf, &len, buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (unsigned long)actual_size);
 
 		/* If uncompress fails it means that page is corrupted */
@@ -306,14 +314,10 @@ fil_decompress_page(
 
 			ut_error;
 		}
+		break;
 
-#ifdef UNIV_PAGECOMPRESS_DEBUG
-		fprintf(stderr,
-			"InnoDB: Note: Decompression succeeded for len %lu \n",
-			len);
-#endif /* UNIV_PAGECOMPRESS_DEBUG */
 #ifdef HAVE_LZ4
-	} else if (compression_alg == FIL_PAGE_COMPRESSION_LZ4) {
+	case PAGE_LZ4_ALGORITHM:
 		err = LZ4_decompress_fast((const char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE, (char *)in_buf, UNIV_PAGE_SIZE);
 
 		if (err != (int)actual_size) {
@@ -326,8 +330,26 @@ fil_decompress_page(
 
 			ut_error;
 		}
+		break;
 #endif /* HAVE_LZ4 */
-	} else {
+#ifdef HAVE_LZO
+	case PAGE_LZO_ALGORITHM:
+		err = lzo1x_decompress((const unsigned char *)buf+FIL_PAGE_DATA+FIL_PAGE_COMPRESSED_SIZE,
+			actual_size,(unsigned char *)in_buf, &olen, NULL);
+
+		if (err != LZO_E_OK || (olen == 0 || olen > UNIV_PAGE_SIZE)) {
+			fprintf(stderr,
+				"InnoDB: Corruption: Page is marked as compressed\n"
+				"InnoDB: but decompression read only %d bytes.\n"
+				"InnoDB: size %lu len %lu\n",
+				olen, actual_size, len);
+			fflush(stderr);
+
+			ut_error;
+		}
+		break;
+#endif
+	default:
 		fprintf(stderr,
 			"InnoDB: Corruption: Page is marked as compressed\n"
 			"InnoDB: but compression algorithm %s\n"
@@ -336,8 +358,15 @@ fil_decompress_page(
 
 		fflush(stderr);
 		ut_error;
+		break;
 	}
 
+#ifdef UNIV_PAGECOMPRESS_DEBUG
+	fprintf(stderr,
+		"InnoDB: Note: Decompression succeeded for len %lu \n",
+		len);
+#endif /* UNIV_PAGECOMPRESS_DEBUG */
+
 	srv_stats.pages_page_decompressed.inc();
 
 	/* Copy the uncompressed page to the buffer pool, not
diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc
index 83fd8b28394..25b96be43b7 100644
--- a/storage/xtradb/handler/ha_innodb.cc
+++ b/storage/xtradb/handler/ha_innodb.cc
@@ -103,6 +103,7 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #endif /* UNIV_DEBUG */
 #include "fts0priv.h"
 #include "page0zip.h"
+#include "fil0pagecompress.h"
 
 #define thd_get_trx_isolation(X) ((enum_tx_isolation)thd_tx_isolation(X))
 
@@ -17947,12 +17948,20 @@ static MYSQL_SYSVAR_BOOL(use_trim, srv_use_trim,
   "Use trim. Default FALSE.",
   NULL, NULL, FALSE);
 
-#ifdef HAVE_LZ4
-static MYSQL_SYSVAR_BOOL(use_lz4, srv_use_lz4,
-  PLUGIN_VAR_OPCMDARG ,
-  "Use LZ4 for page compression",
-  NULL, NULL, FALSE);
-#endif /* HAVE_LZ4 */
+static MYSQL_SYSVAR_LONG(compression_algorithm, innodb_compression_algorithm,
+  PLUGIN_VAR_OPCMDARG,
+  "Compression algorithm used on page compression. 1 for zlib, 2 for lz3, 3 for lzo",
+  NULL, NULL,
+  PAGE_ZLIB_ALGORITHM,
+  0,
+#if defined(HAVE_LZO) && defined(HAVE_LZ4)
+  PAGE_ALGORITHM_LAST,
+#elif defined(HAVE_LZ4) && !defined(HAVE_LZO)
+  PAGE_ALGORITHM_LZ4,
+#else
+  PAGE_ALGORITHM_ZLIB,
+#endif
+  0);
 
 static MYSQL_SYSVAR_LONG(mtflush_threads, srv_mtflush_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -18159,9 +18168,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(locking_fake_changes),
   MYSQL_SYSVAR(use_stacktrace),
   MYSQL_SYSVAR(use_trim),
-#ifdef HAVE_LZ4
-  MYSQL_SYSVAR(use_lz4),
-#endif
+  MYSQL_SYSVAR(compression_algorithm),
   MYSQL_SYSVAR(mtflush_threads),
   MYSQL_SYSVAR(use_mtflush),
   NULL
diff --git a/storage/xtradb/include/fil0pagecompress.h b/storage/xtradb/include/fil0pagecompress.h
index c362c0ddcd2..0cc5aeb4678 100644
--- a/storage/xtradb/include/fil0pagecompress.h
+++ b/storage/xtradb/include/fil0pagecompress.h
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (C) 2013 SkySQL Ab. All Rights Reserved.
+Copyright (C) 2013, 2014 SkySQL Ab. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -22,6 +22,12 @@ this program; if not, write to the Free Software Foundation, Inc.,
 #include "fsp0fsp.h"
 #include "fsp0pagecompress.h"
 
+#define PAGE_UNCOMPRESSED   0
+#define PAGE_ZLIB_ALGORITHM 1
+#define PAGE_LZ4_ALGORITHM  2
+#define PAGE_LZO_ALGORITHM  3
+#define PAGE_ALGORITHM_LAST PAGE_LZO_ALGORITHM
+
 /******************************************************************//**
 @file include/fil0pagecompress.h
 Helper functions for extracting/storing page compression and
@@ -85,7 +91,9 @@ fil_compress_page(
         byte*           out_buf,       /*!< out: compressed buffer */
         ulint           len,           /*!< in: length of input buffer.*/
         ulint           compression_level, /*!< in: compression level */
-	ulint*          out_len);       /*!< out: actual length of compressed page */
+	ulint*          out_len,       /*!< out: actual length of compressed
+				       page */
+	byte*		lzo_mem);      /*!< in: temporal memory used by LZO */
 
 /****************************************************************//**
 For page compressed pages decompress the page after actual read
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index be16dfddc72..ea8afd450dd 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -269,8 +269,8 @@ extern my_bool srv_use_posix_fallocate;
 /* Use atomic writes i.e disable doublewrite buffer */
 extern my_bool srv_use_atomic_writes;
 
-/* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
-extern my_bool srv_use_lz4;
+/* Compression algorithm*/
+extern long innodb_compression_algorithm;
 
 /* Number of flush threads */
 #define MTFLUSH_MAX_WORKER       64
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 646f8a87cbc..a3307fa0ba2 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -2,7 +2,7 @@
 
 Copyright (c) 1995, 2013, Oracle and/or its affiliates. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
-Copyright (c) 2013, SkySQL Ab. All Rights Reserved.
+Copyright (c) 2013, 2014, SkySQL Ab. All Rights Reserved.
 
 Portions of this file contain modifications contributed and copyrighted
 by Percona Inc.. Those modifications are
@@ -80,6 +80,10 @@ Created 10/21/1995 Heikki Tuuri
 # endif
 #endif
 
+#ifdef HAVE_LZO
+#include "lzo/lzo1x.h"
+#endif
+
 /** Insert buffer segment id */
 static const ulint IO_IBUF_SEGMENT = 0;
 
@@ -230,6 +234,12 @@ struct os_aio_slot_t{
 	int		n_bytes;	/* bytes written/read. */
 	int		ret;		/* AIO return code */
 #endif /* WIN_ASYNC_IO */
+#ifdef HAVE_LZO
+	byte		lzo_mem[LZO1X_1_15_MEM_COMPRESS];
+#else
+	byte		lzo_mem;	/* Temporal memory used by LZO */
+#endif
+
 };
 
 /** The asynchronous i/o array structure */
@@ -4596,7 +4606,15 @@ found:
 
 		ut_ad(slot->page_buf);
 
-		tmp = fil_compress_page(fil_node_get_space_id(slot->message1), (byte *)buf, slot->page_buf, len, page_compression_level, &real_len);
+		/* Call page compression */
+		tmp = fil_compress_page(fil_node_get_space_id(slot->message1),
+			(byte *)buf,
+			slot->page_buf,
+			len,
+			page_compression_level,
+			&real_len,
+			slot->lzo_mem
+		);
 
 		/* If compression succeeded, set up the length and buffer */
 		if (tmp != buf) {
diff --git a/storage/xtradb/srv/srv0srv.cc b/storage/xtradb/srv/srv0srv.cc
index 386dbfddf0b..e70a2bd0dab 100644
--- a/storage/xtradb/srv/srv0srv.cc
+++ b/storage/xtradb/srv/srv0srv.cc
@@ -73,6 +73,7 @@ Created 10/8/1995 Heikki Tuuri
 
 #include "mysql/plugin.h"
 #include "mysql/service_thd_wait.h"
+#include "fil0pagecompress.h"
 
 /* prototypes of new functions added to ha_innodb.cc for kill_idle_transaction */
 ibool		innobase_thd_is_idle(const void* thd);
@@ -172,7 +173,7 @@ UNIV_INTERN my_bool     srv_use_posix_fallocate         = FALSE;
 /* If this flag is TRUE, then we disable doublewrite buffer */
 UNIV_INTERN my_bool     srv_use_atomic_writes           = FALSE;
 /* If this flag IS TRUE, then we use lz4 to compress/decompress pages */
-UNIV_INTERN my_bool	srv_use_lz4                     = FALSE;
+UNIV_INTERN long	innodb_compression_algorithm = PAGE_ZLIB_ALGORITHM;
 /* Number of threads used for multi-threaded flush */
 UNIV_INTERN long srv_mtflush_threads = MTFLUSH_DEFAULT_WORKER;
 /* If this flag is TRUE, then we will use multi threaded flush. */

From 2f46e5b9fc51f0c427634f935b7d922047023628 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 16 Apr 2014 16:55:36 +0300
Subject: [PATCH 53/56] MDEV-6070: FusionIO: Failure to create a table with
 ATOMIC_WRITES option leaves the database in inconsistent state,

Analysis: Problem was that atomic writes variable had incorrect
type on same places leading to fact that e.g. OFF option was
not regognized. Furthermore, some error check code was missing
from both InnoDB and XtraDB engines. Finally, when table is
created we have already created the .ibd file and if we can't
set atomic writes it stays there.

Fix: Fix atomic writes variable type to ulint as it should be.
Fix: Add proper error code checking on os errors on both InnoDB
and XtraDB
Fix: Remove the .idb file when atomic writes can't be enabled
to a new table.
---
 storage/innobase/fil/fil0fil.cc |  6 +--
 storage/innobase/os/os0file.cc  | 83 ++++++++++++++++++++++++++-------
 storage/xtradb/fil/fil0fil.cc   | 15 ++++--
 storage/xtradb/os/os0file.cc    | 81 ++++++++++++++++++++++++++------
 4 files changed, 146 insertions(+), 39 deletions(-)

diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index cee9c7e0534..888b2c659b9 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -745,7 +745,7 @@ fil_node_open_file(
 	ulint		space_id;
 	ulint		flags=0;
 	ulint		page_size;
-	ibool           atomic_writes=FALSE;
+	ulint           atomic_writes=0;
 
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->n_pending == 0);
@@ -3425,7 +3425,7 @@ fil_create_new_single_table_tablespace(
 	/* TRUE if a table is created with CREATE TEMPORARY TABLE */
 	bool		is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
 	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
-	bool		atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
+	ulint		atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
 
 	ut_a(space_id > 0);
 	ut_ad(!srv_read_only_mode);
@@ -3720,7 +3720,7 @@ fil_open_single_table_tablespace(
 	fsp_open_info	remote;
 	ulint		tablespaces_found = 0;
 	ulint		valid_tablespaces_found = 0;
-	ibool           atomic_writes = FALSE;
+	ulint           atomic_writes = 0;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index ce1b42e670e..3020e7b1a53 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -399,9 +399,8 @@ os_file_set_atomic_writes(
 
 	if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
 
-		fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
-		"file %s on non-supported platform! Please restart with "
-			"innodb_use_atomic_writes disabled.\n", name);
+		fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on "
+			"file %s on non-supported platform!\n", name);
 		os_file_handle_error_no_exit(name, "ioctl", FALSE, __FILE__, __LINE__);
 		return(FALSE);
 	}
@@ -409,8 +408,7 @@ os_file_set_atomic_writes(
 	return(TRUE);
 #else
 	fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
-		"non-supported platform! Please restart with "
-		"innodb_use_atomic_writes disabled.\n");
+		"file %s on non-supported platform!\n", name);
 	return(FALSE);
 #endif
 }
@@ -561,6 +559,19 @@ os_file_get_last_error_low(
 				"InnoDB: because of either a thread exit"
 				" or an application request.\n"
 				"InnoDB: Retry attempt is made.\n");
+		} else if (err == ECANCELED || err == ENOTTY) {
+			if (strerror(err) != NULL) {
+				fprintf(stderr,
+					"InnoDB: Error number %d"
+					" means '%s'.\n",
+					err, strerror(err));
+			}
+
+			if(srv_use_atomic_writes) {
+				fprintf(stderr,
+					"InnoDB: Error trying to enable atomic writes on "
+					"non-supported destination!\n");
+			}
 		} else {
 			fprintf(stderr,
 				"InnoDB: Some operating system error numbers"
@@ -620,11 +631,14 @@ os_file_get_last_error_low(
 			fprintf(stderr,
 				"InnoDB: The error means mysqld does not have"
 				" the access rights to\n"
-				"InnoDECANCELEDB: the directory.\n");
-		} else if (err == ECANCELED) {
-			fprintf(stderr,
-				"InnoDB: Operation canceled (%d):%s\n",
-				err, strerror(err));
+				"InnoDB: the directory.\n");
+		} else if (err == ECANCELED || err == ENOTTY) {
+			if (strerror(err) != NULL) {
+				fprintf(stderr,
+					"InnoDB: Error number %d"
+					" means '%s'.\n",
+					err, strerror(err));
+			}
 
 			if(srv_use_atomic_writes) {
 				fprintf(stderr,
@@ -663,6 +677,7 @@ os_file_get_last_error_low(
 	case EISDIR:
 		return(OS_FILE_PATH_ERROR);
 	case ECANCELED:
+	case ENOTTY:
                 return(OS_FILE_OPERATION_NOT_SUPPORTED);
 	case EAGAIN:
 		if (srv_use_native_aio) {
@@ -1521,13 +1536,21 @@ os_file_create_simple_no_error_handling_func(
 			  attributes,
 			  NULL);		// No template file
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != INVALID_HANDLE_VALUE
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-			 CloseHandle(file);
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			CloseHandle(file);
+			os_file_delete_if_exists_func(name);
 			*success = FALSE;
 			file = INVALID_HANDLE_VALUE;
+		}
 	}
 
 	*success = (file != INVALID_HANDLE_VALUE);
@@ -1590,13 +1613,21 @@ os_file_create_simple_no_error_handling_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != -1
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-		*success = FALSE;
-		close(file);
-		file = -1;
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			close(file);
+			os_file_delete_if_exists_func(name);
+			*success = FALSE;
+			file = -1;
+		}
 	}
 
 
@@ -1836,13 +1867,21 @@ os_file_create_func(
 
 	} while (retry);
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != INVALID_HANDLE_VALUE
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-			 CloseHandle(file);
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			CloseHandle(file);
+			os_file_delete_if_exists_func(name);
 			*success = FALSE;
 			file = INVALID_HANDLE_VALUE;
+		}
 	}
 #else /* __WIN__ */
 	int		create_flag;
@@ -1972,13 +2011,21 @@ os_file_create_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != -1
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-		*success = FALSE;
-		close(file);
-		file = -1;
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			close(file);
+			os_file_delete_if_exists_func(name);
+			*success = FALSE;
+			file = -1;
+		}
 	}
 #endif /* __WIN__ */
 
diff --git a/storage/xtradb/fil/fil0fil.cc b/storage/xtradb/fil/fil0fil.cc
index 8e788e71983..b30a85a8597 100644
--- a/storage/xtradb/fil/fil0fil.cc
+++ b/storage/xtradb/fil/fil0fil.cc
@@ -746,7 +746,7 @@ fil_node_open_file(
 	ulint		space_id;
 	ulint		flags=0;
 	ulint		page_size;
-	ibool           atomic_writes=FALSE;
+	ulint           atomic_writes=0;
 
 	ut_ad(mutex_own(&(system->mutex)));
 	ut_a(node->n_pending == 0);
@@ -3288,6 +3288,8 @@ fil_create_link_file(
 		} else if (error == OS_FILE_DISK_FULL) {
 			err = DB_OUT_OF_FILE_SPACE;
 
+		} else if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+			err = DB_UNSUPPORTED;
 		} else {
 			err = DB_ERROR;
 		}
@@ -3448,7 +3450,7 @@ fil_create_new_single_table_tablespace(
 	/* TRUE if a table is created with CREATE TEMPORARY TABLE */
 	bool		is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
 	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
-	bool		atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
+	ulint		atomic_writes = FSP_FLAGS_GET_ATOMIC_WRITES(flags);
 
 	ut_a(space_id > 0);
 	ut_ad(!srv_read_only_mode);
@@ -3509,6 +3511,11 @@ fil_create_new_single_table_tablespace(
 			goto error_exit_3;
 		}
 
+		if (error == OS_FILE_OPERATION_NOT_SUPPORTED) {
+			err = DB_UNSUPPORTED;
+			goto error_exit_3;
+		}
+
 		if (error == OS_FILE_DISK_FULL) {
 			err = DB_OUT_OF_FILE_SPACE;
 			goto error_exit_3;
@@ -3735,7 +3742,7 @@ fil_open_single_table_tablespace(
 	fsp_open_info	remote;
 	ulint		tablespaces_found = 0;
 	ulint		valid_tablespaces_found = 0;
-	ibool           atomic_writes = FALSE;
+	ulint           atomic_writes = 0;
 
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
@@ -3746,6 +3753,8 @@ fil_open_single_table_tablespace(
 		return(DB_CORRUPTION);
 	}
 
+	atomic_writes = fsp_flags_get_atomic_writes(flags);
+
 	/* If the tablespace was relocated, we do not
 	compare the DATA_DIR flag */
 	ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index a3307fa0ba2..f7677140c9a 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -628,10 +628,13 @@ os_file_get_last_error_low(
 				"InnoDB: because of either a thread exit"
 				" or an application request.\n"
 				"InnoDB: Retry attempt is made.\n");
-		} else if (err == ECANCELED) {
-			fprintf(stderr,
-				"InnoDB: Operation canceled (%d):%s\n",
-				err, strerror(err));
+		} else if (err == ECANCELED || err == ENOTTY) {
+			if (strerror(err) != NULL) {
+				fprintf(stderr,
+					"InnoDB: Error number %d"
+					" means '%s'.\n",
+					err, strerror(err));
+			}
 
 			if(srv_use_atomic_writes) {
 				fprintf(stderr,
@@ -698,6 +701,20 @@ os_file_get_last_error_low(
 				"InnoDB: The error means mysqld does not have"
 				" the access rights to\n"
 				"InnoDB: the directory.\n");
+		} else if (err == ECANCELED || err == ENOTTY) {
+			if (strerror(err) != NULL) {
+				fprintf(stderr,
+					"InnoDB: Error number %d"
+					" means '%s'.\n",
+					err, strerror(err));
+			}
+
+
+			if(srv_use_atomic_writes) {
+				fprintf(stderr,
+					"InnoDB: Error trying to enable atomic writes on "
+					"non-supported destination!\n");
+			}
 		} else {
 			if (strerror(err) != NULL) {
 				fprintf(stderr,
@@ -735,6 +752,7 @@ os_file_get_last_error_low(
 		}
 		break;
 	case ECANCELED:
+        case ENOTTY:
                 return(OS_FILE_OPERATION_NOT_SUPPORTED);
 	case EINTR:
 		if (srv_use_native_aio) {
@@ -1591,13 +1609,21 @@ os_file_create_simple_no_error_handling_func(
 			  attributes,
 			  NULL);		// No template file
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != INVALID_HANDLE_VALUE
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-			 CloseHandle(file);
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			CloseHandle(file);
+			os_file_delete_if_exists_func(name);
 			*success = FALSE;
 			file = INVALID_HANDLE_VALUE;
+		}
 	}
 
 	*success = (file != INVALID_HANDLE_VALUE);
@@ -1660,13 +1686,21 @@ os_file_create_simple_no_error_handling_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != -1
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-		*success = FALSE;
-		close(file);
-		file = -1;
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			close(file);
+			os_file_delete_if_exists_func(name);
+			*success = FALSE;
+			file = -1;
+		}
 	}
 
 #endif /* __WIN__ */
@@ -1752,15 +1786,16 @@ os_file_set_atomic_writes(
 
 	if (ioctl(file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic_option)) {
 
+		fprintf(stderr, "InnoDB: Warning:Trying to enable atomic writes on "
+			"file %s on non-supported platform!\n", name);
 		os_file_handle_error_no_exit(name, "ioctl(DFS_IOCTL_ATOMIC_WRITE_SET)", FALSE, __FILE__, __LINE__);
 		return(FALSE);
 	}
 
 	return(TRUE);
 #else
-	ib_logf(IB_LOG_LEVEL_ERROR,
-		"trying to enable atomic writes on non-supported platform! "
-		"Please restart with innodb_use_atomic_writes disabled.\n");
+ 	fprintf(stderr, "InnoDB: Error: trying to enable atomic writes on "
+		"file %s on non-supported platform!\n", name);
 	return(FALSE);
 #endif
 }
@@ -1951,13 +1986,21 @@ os_file_create_func(
 
 	} while (retry);
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != INVALID_HANDLE_VALUE
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-			 CloseHandle(file);
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			CloseHandle(file);
+			os_file_delete_if_exists_func(name);
 			*success = FALSE;
 			file = INVALID_HANDLE_VALUE;
+		}
 	}
 
 #else /* __WIN__ */
@@ -2090,13 +2133,21 @@ os_file_create_func(
 	}
 #endif /* USE_FILE_LOCK */
 
+	/* If we have proper file handle and atomic writes should be used,
+	try to set atomic writes and if that fails when creating a new
+	table, produce a error. If atomic writes are used on existing
+	file, ignore error and use traditional writes for that file */
 	if (file != -1
 	    && (awrites == ATOMIC_WRITES_ON ||
 		(srv_use_atomic_writes && awrites == ATOMIC_WRITES_DEFAULT))
 	    && !os_file_set_atomic_writes(name, file)) {
-		*success = FALSE;
-		close(file);
-		file = -1;
+		if (create_mode == OS_FILE_CREATE) {
+			fprintf(stderr, "InnoDB: Error: Can't create file using atomic writes\n");
+			close(file);
+			os_file_delete_if_exists_func(name);
+			*success = FALSE;
+			file = -1;
+		}
 	}
 
 

From 2d340f9a677bb8dc24e9e1601c613a6c10f5c3c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Wed, 23 Apr 2014 19:23:11 +0300
Subject: [PATCH 54/56] Fixed bug on free buffer space calculation when LZO is
 used. Fixed bug on function call when InnoDB plugin is used.

---
 storage/innobase/fil/fil0fil.cc          | 2 +-
 storage/innobase/fil/fil0pagecompress.cc | 4 +++-
 storage/innobase/include/os0file.h       | 4 +++-
 storage/innobase/os/os0file.cc           | 2 +-
 storage/xtradb/fil/fil0pagecompress.cc   | 4 +++-
 storage/xtradb/os/os0file.cc             | 1 +
 6 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/storage/innobase/fil/fil0fil.cc b/storage/innobase/fil/fil0fil.cc
index 8c48adf7c66..9658b9ddcb0 100644
--- a/storage/innobase/fil/fil0fil.cc
+++ b/storage/innobase/fil/fil0fil.cc
@@ -4997,7 +4997,7 @@ retry:
 				"space for file \'%s\' failed.  Current size "
 				INT64PF ", desired size " INT64PF "\n",
 				node->name, start_offset, len+start_offset);
-			os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE);
+			os_file_handle_error_no_exit(node->name, "posix_fallocate", FALSE, __FILE__, __LINE__);
 			success = FALSE;
 		} else {
 			success = TRUE;
diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index e06a789e37b..b2d201e6a59 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -70,6 +70,8 @@ static ulint srv_data_read, srv_data_written;
 #include "lzo/lzo1x.h"
 #endif
 
+/* Used for debugging */
+//#define UNIV_PAGECOMPRESS_DEBUG 1
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -145,7 +147,7 @@ fil_compress_page(
 		err = lzo1x_1_15_compress(
 			buf, len, out_buf+header_len, &write_size, lzo_mem);
 
-		if (err != LZO_E_OK || write_size > len) {
+		if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
 			fprintf(stderr,
 				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu",
 				space_id, fil_space_name(space), len, err, write_size);
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index 530fc536f01..2f22aa73508 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -1318,8 +1318,10 @@ os_file_handle_error_no_exit(
 /*=========================*/
 	const char*	name,		/*!< in: name of a file or NULL */
 	const char*	operation,	/*!< in: operation */
-	ibool		on_error_silent);/*!< in: if TRUE then don't print
+	ibool		on_error_silent,/*!< in: if TRUE then don't print
 					any message to the log. */
+	const char*	file,		/*!< in: file name */
+	const ulint	line);		/*!< in: line */
 
 #ifndef UNIV_NONINL
 #include "os0file.ic"
diff --git a/storage/innobase/os/os0file.cc b/storage/innobase/os/os0file.cc
index c33829b7fe1..cd7b4161cb2 100644
--- a/storage/innobase/os/os0file.cc
+++ b/storage/innobase/os/os0file.cc
@@ -370,7 +370,6 @@ os_slot_alloc_page_buf(
 /****************************************************************//**
 Does error handling when a file operation fails.
 @return	TRUE if we should retry the operation */
-static
 ibool
 os_file_handle_error_no_exit(
 /*=========================*/
@@ -6337,6 +6336,7 @@ os_slot_alloc_page_buf(
 	byte*           cbuf2;
 	byte*           cbuf;
 
+	/* We allocate extra to avoid memory overwrite on compression */
 	cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
 	cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
 	slot->page_compression_page = static_cast<byte *>(cbuf2);
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index e06a789e37b..b2d201e6a59 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -70,6 +70,8 @@ static ulint srv_data_read, srv_data_written;
 #include "lzo/lzo1x.h"
 #endif
 
+/* Used for debugging */
+//#define UNIV_PAGECOMPRESS_DEBUG 1
 
 /****************************************************************//**
 For page compressed pages compress the page before actual write
@@ -145,7 +147,7 @@ fil_compress_page(
 		err = lzo1x_1_15_compress(
 			buf, len, out_buf+header_len, &write_size, lzo_mem);
 
-		if (err != LZO_E_OK || write_size > len) {
+		if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
 			fprintf(stderr,
 				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu",
 				space_id, fil_space_name(space), len, err, write_size);
diff --git a/storage/xtradb/os/os0file.cc b/storage/xtradb/os/os0file.cc
index 36136614814..657a3a8d050 100644
--- a/storage/xtradb/os/os0file.cc
+++ b/storage/xtradb/os/os0file.cc
@@ -6406,6 +6406,7 @@ os_slot_alloc_page_buf(
 	byte*           cbuf2;
 	byte*           cbuf;
 
+	/* We allocate extra to avoid memory overwrite on compression */
 	cbuf2 = static_cast<byte *>(ut_malloc(UNIV_PAGE_SIZE*2));
 	cbuf = static_cast<byte *>(ut_align(cbuf2, UNIV_PAGE_SIZE));
 	slot->page_compression_page = static_cast<byte *>(cbuf2);

From d6afa8004ec48e4c25d5dfed804d0556cdec587f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Mon, 28 Apr 2014 07:52:41 +0300
Subject: [PATCH 55/56] Fixed small error on compression failure error text.

---
 storage/innobase/fil/fil0pagecompress.cc | 2 +-
 storage/xtradb/fil/fil0pagecompress.cc   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/storage/innobase/fil/fil0pagecompress.cc b/storage/innobase/fil/fil0pagecompress.cc
index b2d201e6a59..ce7063bc688 100644
--- a/storage/innobase/fil/fil0pagecompress.cc
+++ b/storage/innobase/fil/fil0pagecompress.cc
@@ -149,7 +149,7 @@ fil_compress_page(
 
 		if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
 			fprintf(stderr,
-				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu",
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
 				space_id, fil_space_name(space), len, err, write_size);
 			srv_stats.pages_page_compression_error.inc();
 			*out_len = len;
diff --git a/storage/xtradb/fil/fil0pagecompress.cc b/storage/xtradb/fil/fil0pagecompress.cc
index b2d201e6a59..ce7063bc688 100644
--- a/storage/xtradb/fil/fil0pagecompress.cc
+++ b/storage/xtradb/fil/fil0pagecompress.cc
@@ -149,7 +149,7 @@ fil_compress_page(
 
 		if (err != LZO_E_OK || write_size > UNIV_PAGE_SIZE-header_len) {
 			fprintf(stderr,
-				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu",
+				"InnoDB: Warning: Compression failed for space %lu name %s len %lu err %d write_size %lu\n",
 				space_id, fil_space_name(space), len, err, write_size);
 			srv_stats.pages_page_compression_error.inc();
 			*out_len = len;

From 972a14b59a0ec12b01c9a7f5c8867294fd4f40db Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan=20Lindstr=C3=B6m?= <jplindst@mariadb.org>
Date: Fri, 16 May 2014 15:30:13 +0300
Subject: [PATCH 56/56] Code cleanup after review.

---
 storage/innobase/buf/buf0mtflu.cc | 11 +++++++----
 storage/xtradb/buf/buf0mtflu.cc   | 11 +++++++----
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/storage/innobase/buf/buf0mtflu.cc b/storage/innobase/buf/buf0mtflu.cc
index a5937caaf57..5a1769e3b70 100644
--- a/storage/innobase/buf/buf0mtflu.cc
+++ b/storage/innobase/buf/buf0mtflu.cc
@@ -113,7 +113,6 @@ typedef struct wrk_itm
         ulint		n_flushed; 	/*!< Flushed pages count  */
  	os_thread_id_t	id_usr;		/*!< Thread-id currently working */
     	wrk_status_t    wi_status;	/*!< Work item status */
- 	struct wrk_itm	*next;		/*!< Next work item */
 	mem_heap_t      *wheap;         /*!< Heap were to allocate memory
 					for queue nodes */
 	mem_heap_t      *rheap;
@@ -262,6 +261,9 @@ mtflush_service_io(
 		work_item->wi_status = WRK_ITEM_SET;
 	}
 
+#ifdef UNIV_MTFLUSH_DEBUG
+	ut_a(work_item->id_usr == 0);
+#endif
 	work_item->id_usr = os_thread_get_curr_id();
 
 	/*  This works as a producer/consumer model, where in tasks are
@@ -365,7 +367,6 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Allocate work items for shutdown message */
 	work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
-	memset(work_item, 0, sizeof(wrk_t)*srv_mtflush_threads);
 
 	/* Confirm if the io-thread KILL is in progress, bailout */
 	if (mtflush_io->gwt_status == WTHR_KILL_IT) {
@@ -383,6 +384,7 @@ buf_mtflu_io_thread_exit(void)
 		work_item[i].wi_status = WRK_ITEM_EXIT;
 		work_item[i].wheap = mtflush_io->wheap;
 		work_item[i].rheap = mtflush_io->rheap;
+		work_item[i].id_usr = 0;
 
 		ib_wqueue_add(mtflush_io->wq,
 			(void *)&(work_item[i]),
@@ -518,7 +520,6 @@ buf_mtflu_flush_work_items(
 	node items areallocated */
 	work_heap = mem_heap_create(0);
 	reply_heap = mem_heap_create(0);
-	memset(work_item, 0, sizeof(wrk_t)*MTFLUSH_MAX_WORKER);
 
 
 	for(i=0;i<buf_pool_inst; i++) {
@@ -530,6 +531,8 @@ buf_mtflu_flush_work_items(
 		work_item[i].wi_status = WRK_ITEM_UNSET;
 		work_item[i].wheap = work_heap;
 		work_item[i].rheap = reply_heap;
+		work_item[i].n_flushed = 0;
+		work_item[i].id_usr = 0;
 
 		ib_wqueue_add(mtflush_ctx->wq,
 			(void *)(work_item + i),
@@ -544,7 +547,7 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
-#if UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 			if((int)done_wi->id_usr == 0 &&
 				(done_wi->wi_status == WRK_ITEM_SET ||
 					done_wi->wi_status == WRK_ITEM_UNSET)) {
diff --git a/storage/xtradb/buf/buf0mtflu.cc b/storage/xtradb/buf/buf0mtflu.cc
index 5df4a96d42e..b14b83aa5d0 100644
--- a/storage/xtradb/buf/buf0mtflu.cc
+++ b/storage/xtradb/buf/buf0mtflu.cc
@@ -113,7 +113,6 @@ typedef struct wrk_itm
         ulint		n_flushed; 	/*!< Flushed pages count  */
  	os_thread_id_t	id_usr;		/*!< Thread-id currently working */
     	wrk_status_t    wi_status;	/*!< Work item status */
- 	struct wrk_itm	*next;		/*!< Next work item */
 	mem_heap_t      *wheap;         /*!< Heap were to allocate memory
 					for queue nodes */
 	mem_heap_t      *rheap;
@@ -269,6 +268,9 @@ mtflush_service_io(
 		work_item->wi_status = WRK_ITEM_SET;
 	}
 
+#ifdef UNIV_MTFLUSH_DEBUG
+	ut_a(work_item->id_usr == 0);
+#endif
 	work_item->id_usr = os_thread_get_curr_id();
 
 	/*  This works as a producer/consumer model, where in tasks are
@@ -372,7 +374,6 @@ buf_mtflu_io_thread_exit(void)
 
 	/* Allocate work items for shutdown message */
 	work_item = (wrk_t*)mem_heap_alloc(mtflush_io->wheap, sizeof(wrk_t)*srv_mtflush_threads);
-	memset(work_item, 0, sizeof(wrk_t)*srv_mtflush_threads);
 
 	/* Confirm if the io-thread KILL is in progress, bailout */
 	if (mtflush_io->gwt_status == WTHR_KILL_IT) {
@@ -390,6 +391,7 @@ buf_mtflu_io_thread_exit(void)
 		work_item[i].wi_status = WRK_ITEM_EXIT;
 		work_item[i].wheap = mtflush_io->wheap;
 		work_item[i].rheap = mtflush_io->rheap;
+		work_item[i].id_usr = 0;
 
 		ib_wqueue_add(mtflush_io->wq,
 			(void *)&(work_item[i]),
@@ -525,7 +527,6 @@ buf_mtflu_flush_work_items(
 	node items areallocated */
 	work_heap = mem_heap_create(0);
 	reply_heap = mem_heap_create(0);
-	memset(work_item, 0, sizeof(wrk_t)*MTFLUSH_MAX_WORKER);
 
 
 	for(i=0;i<buf_pool_inst; i++) {
@@ -537,6 +538,8 @@ buf_mtflu_flush_work_items(
 		work_item[i].wi_status = WRK_ITEM_UNSET;
 		work_item[i].wheap = work_heap;
 		work_item[i].rheap = reply_heap;
+		work_item[i].n_flushed = 0;
+		work_item[i].id_usr = 0;
 
 		ib_wqueue_add(mtflush_ctx->wq,
 			(void *)(work_item + i),
@@ -551,7 +554,7 @@ buf_mtflu_flush_work_items(
 		if (done_wi != NULL) {
 			per_pool_pages_flushed[i] = done_wi->n_flushed;
 
-#if UNIV_DEBUG
+#ifdef UNIV_MTFLUSH_DEBUG
 			if((int)done_wi->id_usr == 0 &&
 				(done_wi->wi_status == WRK_ITEM_SET ||
 					done_wi->wi_status == WRK_ITEM_UNSET)) {